diff --git a/data_scaling/n800_1/pretrain/log.json b/data_scaling/n800_1/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..4c38d396ac53b942ea980fa78c643b93584d60b6 --- /dev/null +++ b/data_scaling/n800_1/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05557951611027122, "train/loss": 0.99305494556427, "eval/hcp-train-subset/loss": 0.9893642029454631, "eval/hcp-val/loss": 0.9891314919917814, "eval/nsd-val/loss": 0.9897332950945823} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.08360476486980915, "train/loss": 0.9879086724948883, "eval/hcp-train-subset/loss": 0.9860379176755105, "eval/hcp-val/loss": 0.9858486287055477, "eval/nsd-val/loss": 0.9869759342362804} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.13070850917907306, "train/loss": 0.9839685311317444, "eval/hcp-train-subset/loss": 0.9797971796604895, "eval/hcp-val/loss": 0.9795017655818693, "eval/nsd-val/loss": 0.9826059197225878} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.2016446154242003, "train/loss": 0.9743436627578735, "eval/hcp-train-subset/loss": 0.964172520945149, "eval/hcp-val/loss": 0.9644979142373608, "eval/nsd-val/loss": 0.9570187022609096} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.23890099714775362, "train/loss": 0.9404326035785675, "eval/hcp-train-subset/loss": 0.9196922846378819, "eval/hcp-val/loss": 0.918797166116776, "eval/nsd-val/loss": 0.8908167241081115} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.16506562540462663, "train/loss": 0.9054659925174713, "eval/hcp-train-subset/loss": 0.8823485009131893, "eval/hcp-val/loss": 0.8816219981639616, "eval/nsd-val/loss": 0.8450277140063625} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.11810548107505169, "train/loss": 0.8771912380504608, "eval/hcp-train-subset/loss": 0.8667689888708053, "eval/hcp-val/loss": 0.8663207427147896, "eval/nsd-val/loss": 0.8338456499961114} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.0939128009967085, "train/loss": 0.8659757615566254, "eval/hcp-train-subset/loss": 0.8594264955289902, "eval/hcp-val/loss": 0.8585559350828971, "eval/nsd-val/loss": 0.8247244569563097} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.08507290438704633, "train/loss": 0.8581435841274262, "eval/hcp-train-subset/loss": 0.8539786281124238, "eval/hcp-val/loss": 0.8542331082205619, "eval/nsd-val/loss": 0.8204295135313465} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.07887237338038831, "train/loss": 0.8551882533836365, "eval/hcp-train-subset/loss": 0.850783743204609, "eval/hcp-val/loss": 0.8513946811999044, "eval/nsd-val/loss": 0.821854870165548} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.07482970760128772, "train/loss": 0.8514173165035248, "eval/hcp-train-subset/loss": 0.8496898624204821, "eval/hcp-val/loss": 0.8506327732916801, "eval/nsd-val/loss": 0.8197328227181588} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.07157703919796271, "train/loss": 0.8505740975475311, "eval/hcp-train-subset/loss": 0.8480175714338979, "eval/hcp-val/loss": 0.848658142551299, "eval/nsd-val/loss": 0.8157770825970557} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.07122849855242862, "train/loss": 0.8477365613365173, "eval/hcp-train-subset/loss": 0.8460793495178223, "eval/hcp-val/loss": 0.8474423981481983, "eval/nsd-val/loss": 0.8161180519288586} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.07037336775608523, "train/loss": 0.8454127600288391, "eval/hcp-train-subset/loss": 0.8448077786353326, "eval/hcp-val/loss": 0.846681265100356, "eval/nsd-val/loss": 0.8175623484196202} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.0692010689985899, "train/loss": 0.8454169969463349, "eval/hcp-train-subset/loss": 0.8449662894971909, "eval/hcp-val/loss": 0.8468272541799853, "eval/nsd-val/loss": 0.8184430156984637} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.07001059333931892, "train/loss": 0.8414112066650391, "eval/hcp-train-subset/loss": 0.8428080226144483, "eval/hcp-val/loss": 0.8454222208069216, "eval/nsd-val/loss": 0.8169159495061443} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.0690929715082803, "train/loss": 0.8425687613677979, "eval/hcp-train-subset/loss": 0.8432011537013515, "eval/hcp-val/loss": 0.8455820516232522, "eval/nsd-val/loss": 0.8186581115568837} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.07147697920561619, "train/loss": 0.838952526922226, "eval/hcp-train-subset/loss": 0.8412524490587173, "eval/hcp-val/loss": 0.8452303121166844, "eval/nsd-val/loss": 0.8169143334511788} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.0723840739856496, "train/loss": 0.8359483904266357, "eval/hcp-train-subset/loss": 0.8405202819455054, "eval/hcp-val/loss": 0.8443636288565974, "eval/nsd-val/loss": 0.8162706119398917} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.0721781014425444, "train/loss": 0.8358673351192475, "eval/hcp-train-subset/loss": 0.839336940357762, "eval/hcp-val/loss": 0.8444588165129384, "eval/nsd-val/loss": 0.8165250103319844} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.07288994475941535, "train/loss": 0.835715345067978, "eval/hcp-train-subset/loss": 0.839037379910869, "eval/hcp-val/loss": 0.8435696507653883, "eval/nsd-val/loss": 0.815791969337771} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.07289034460584927, "train/loss": 0.8371698464107513, "eval/hcp-train-subset/loss": 0.8381960363157334, "eval/hcp-val/loss": 0.8438360921798214, "eval/nsd-val/loss": 0.8164104973116229} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.07333650852567407, "train/loss": 0.8354075204849243, "eval/hcp-train-subset/loss": 0.8373544571861145, "eval/hcp-val/loss": 0.8423699813504373, "eval/nsd-val/loss": 0.8164845551213911} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.07437580657852715, "train/loss": 0.8337587834262848, "eval/hcp-train-subset/loss": 0.8364519092344469, "eval/hcp-val/loss": 0.842615528452781, "eval/nsd-val/loss": 0.8167225622361706} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.07449120901007486, "train/loss": 0.8348637602043152, "eval/hcp-train-subset/loss": 0.8367072622622213, "eval/hcp-val/loss": 0.8435706448170447, "eval/nsd-val/loss": 0.8130761221531899} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.07878431451486641, "train/loss": 0.8296564966201783, "eval/hcp-train-subset/loss": 0.8365503174643363, "eval/hcp-val/loss": 0.8435048980097617, "eval/nsd-val/loss": 0.8145029650580499} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.0795863535460365, "train/loss": 0.828511400976181, "eval/hcp-train-subset/loss": 0.8367028736299084, "eval/hcp-val/loss": 0.8438996797607791, "eval/nsd-val/loss": 0.8149711037835767} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.08170075104041227, "train/loss": 0.8273501449012757, "eval/hcp-train-subset/loss": 0.834711060408623, "eval/hcp-val/loss": 0.8423004832959944, "eval/nsd-val/loss": 0.8172057424822161} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.08111664744346819, "train/loss": 0.8269739177036285, "eval/hcp-train-subset/loss": 0.8341167905638295, "eval/hcp-val/loss": 0.8429100330798857, "eval/nsd-val/loss": 0.8139026520713684} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.08179530774231546, "train/loss": 0.8277935656642914, "eval/hcp-train-subset/loss": 0.834626043035138, "eval/hcp-val/loss": 0.8428561341377997, "eval/nsd-val/loss": 0.8168327991039522} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.08146715357632595, "train/loss": 0.827539805803299, "eval/hcp-train-subset/loss": 0.8335106411287861, "eval/hcp-val/loss": 0.8415466412421195, "eval/nsd-val/loss": 0.816599388276377} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.08588097601170233, "train/loss": 0.8214100947093964, "eval/hcp-train-subset/loss": 0.8331174360167596, "eval/hcp-val/loss": 0.8422541012687068, "eval/nsd-val/loss": 0.8180500336231724} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.08535786608303211, "train/loss": 0.8231316295433044, "eval/hcp-train-subset/loss": 0.8326770595965847, "eval/hcp-val/loss": 0.8416294224800602, "eval/nsd-val/loss": 0.8151384159441917} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.0882791002843501, "train/loss": 0.822666150007248, "eval/hcp-train-subset/loss": 0.8322985239567295, "eval/hcp-val/loss": 0.8415504424802719, "eval/nsd-val/loss": 0.8148358214286066} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.08837022423200433, "train/loss": 0.821098764591217, "eval/hcp-train-subset/loss": 0.831674769040077, "eval/hcp-val/loss": 0.841315173333691, "eval/nsd-val/loss": 0.8180714922566568} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.09068334608060927, "train/loss": 0.8176154763126373, "eval/hcp-train-subset/loss": 0.8315025346894418, "eval/hcp-val/loss": 0.84111263963484, "eval/nsd-val/loss": 0.8139828491595483} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.08915730219376704, "train/loss": 0.8207342241668701, "eval/hcp-train-subset/loss": 0.8293115842726922, "eval/hcp-val/loss": 0.8415125195057162, "eval/nsd-val/loss": 0.8150753465390974} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.09369547536907644, "train/loss": 0.8175116253089905, "eval/hcp-train-subset/loss": 0.8308738556600386, "eval/hcp-val/loss": 0.8433115789967198, "eval/nsd-val/loss": 0.8195496836016255} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.09421350403065433, "train/loss": 0.8166600114536285, "eval/hcp-train-subset/loss": 0.8307782959553504, "eval/hcp-val/loss": 0.8417664054901369, "eval/nsd-val/loss": 0.811880394335716} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.09475719253573586, "train/loss": 0.8164162064647674, "eval/hcp-train-subset/loss": 0.8273532486731007, "eval/hcp-val/loss": 0.8414645714144553, "eval/nsd-val/loss": 0.8155181705951691} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.09548490641342904, "train/loss": 0.8153189671707153, "eval/hcp-train-subset/loss": 0.8295405305201008, "eval/hcp-val/loss": 0.8413962673756384, "eval/nsd-val/loss": 0.8229321981630018} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.09763862677550518, "train/loss": 0.8153072679710388, "eval/hcp-train-subset/loss": 0.8279275278891286, "eval/hcp-val/loss": 0.8414190738431869, "eval/nsd-val/loss": 0.8147412375096352} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.09785731520358278, "train/loss": 0.8152214750385285, "eval/hcp-train-subset/loss": 0.8281701466729564, "eval/hcp-val/loss": 0.8409218653555839, "eval/nsd-val/loss": 0.8157080719547887} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.10040795413664823, "train/loss": 0.8131367872142792, "eval/hcp-train-subset/loss": 0.8269858764063928, "eval/hcp-val/loss": 0.8408920572650048, "eval/nsd-val/loss": 0.8212627153242787} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.1010346265930668, "train/loss": 0.8141529225158691, "eval/hcp-train-subset/loss": 0.8254972032962307, "eval/hcp-val/loss": 0.8414223405622667, "eval/nsd-val/loss": 0.8191746915540388} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.10471252677187076, "train/loss": 0.8092914191246032, "eval/hcp-train-subset/loss": 0.8248402332105944, "eval/hcp-val/loss": 0.84166403355137, "eval/nsd-val/loss": 0.817427340053743} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.10368217458762467, "train/loss": 0.812126314535141, "eval/hcp-train-subset/loss": 0.8225747068082133, "eval/hcp-val/loss": 0.8420727502915167, "eval/nsd-val/loss": 0.8223869492930751} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.10149215210095601, "train/loss": 0.8144456730842591, "eval/hcp-train-subset/loss": 0.8224465597060419, "eval/hcp-val/loss": 0.8415962803748346, "eval/nsd-val/loss": 0.8189505761669528} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.10918054434575042, "train/loss": 0.8088462692642212, "eval/hcp-train-subset/loss": 0.8230175875848339, "eval/hcp-val/loss": 0.8397474760009397, "eval/nsd-val/loss": 0.815961136933296} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.11018511715193641, "train/loss": 0.8051790238189698, "eval/hcp-train-subset/loss": 0.8224857286099465, "eval/hcp-val/loss": 0.8412057315149615, "eval/nsd-val/loss": 0.8188642349935347} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.10925591124815535, "train/loss": 0.8105888201904297, "eval/hcp-train-subset/loss": 0.8220570279705909, "eval/hcp-val/loss": 0.8412050662502166, "eval/nsd-val/loss": 0.8149831016217509} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.11290744710692753, "train/loss": 0.806661569852829, "eval/hcp-train-subset/loss": 0.8204413210192034, "eval/hcp-val/loss": 0.840014265429589, "eval/nsd-val/loss": 0.8171430258981643} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.11419626061670567, "train/loss": 0.8063465749073029, "eval/hcp-train-subset/loss": 0.818625912550957, "eval/hcp-val/loss": 0.8399063877521022, "eval/nsd-val/loss": 0.8184743244801799} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.11529029717057547, "train/loss": 0.8043197271251679, "eval/hcp-train-subset/loss": 0.8176988074856419, "eval/hcp-val/loss": 0.840355291481941, "eval/nsd-val/loss": 0.8152190331489809} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.11832258395407058, "train/loss": 0.8040062693023682, "eval/hcp-train-subset/loss": 0.817976551671182, "eval/hcp-val/loss": 0.8393003133035475, "eval/nsd-val/loss": 0.8187303841114044} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.12407177067141165, "train/loss": 0.8009278922843933, "eval/hcp-train-subset/loss": 0.8171338342851208, "eval/hcp-val/loss": 0.8388166600658048, "eval/nsd-val/loss": 0.8191565640511052} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.12280275832704256, "train/loss": 0.8031247965335846, "eval/hcp-train-subset/loss": 0.8160920393082404, "eval/hcp-val/loss": 0.8386069132435706, "eval/nsd-val/loss": 0.8182058651601115} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.12341527956756608, "train/loss": 0.8040184836006165, "eval/hcp-train-subset/loss": 0.8141984785756757, "eval/hcp-val/loss": 0.8387853926227938, "eval/nsd-val/loss": 0.8180994949033183} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.12807353566756569, "train/loss": 0.7995112971973419, "eval/hcp-train-subset/loss": 0.8148229304821261, "eval/hcp-val/loss": 0.8395799484945112, "eval/nsd-val/loss": 0.8211402027837692} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.12841425600930303, "train/loss": 0.8020426762676239, "eval/hcp-train-subset/loss": 0.8150227108309346, "eval/hcp-val/loss": 0.8397156144342115, "eval/nsd-val/loss": 0.8188519545139805} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.13047553640564422, "train/loss": 0.7996231863880158, "eval/hcp-train-subset/loss": 0.8125254646424325, "eval/hcp-val/loss": 0.8386093704931198, "eval/nsd-val/loss": 0.8230492607239754} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.13164468431218065, "train/loss": 0.7992665868282318, "eval/hcp-train-subset/loss": 0.8123393424095646, "eval/hcp-val/loss": 0.8386218836230617, "eval/nsd-val/loss": 0.8261439665671317} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.1338344741745655, "train/loss": 0.7981188384437561, "eval/hcp-train-subset/loss": 0.8113027407277015, "eval/hcp-val/loss": 0.8398673957394015, "eval/nsd-val/loss": 0.8188137527435057} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.1332845863843987, "train/loss": 0.7999420831108093, "eval/hcp-train-subset/loss": 0.8093091134102114, "eval/hcp-val/loss": 0.8399326051435163, "eval/nsd-val/loss": 0.8186195108198351} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.1387093625783863, "train/loss": 0.7990751268100739, "eval/hcp-train-subset/loss": 0.8092553990502511, "eval/hcp-val/loss": 0.8390589250672248, "eval/nsd-val/loss": 0.8189442773019114} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.14051592248136174, "train/loss": 0.7954003508663178, "eval/hcp-train-subset/loss": 0.8086359510498662, "eval/hcp-val/loss": 0.8399238932517267, "eval/nsd-val/loss": 0.82000304806617} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.1414362529695759, "train/loss": 0.7988574220561981, "eval/hcp-train-subset/loss": 0.8066393463842331, "eval/hcp-val/loss": 0.8400765570902056, "eval/nsd-val/loss": 0.8162286118153603} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.14441464539789017, "train/loss": 0.7964259827613831, "eval/hcp-train-subset/loss": 0.8041861124577061, "eval/hcp-val/loss": 0.8393447370298447, "eval/nsd-val/loss": 0.8202721428486609} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.1451759527690681, "train/loss": 0.7971121720600128, "eval/hcp-train-subset/loss": 0.8048486209684803, "eval/hcp-val/loss": 0.8393276006944718, "eval/nsd-val/loss": 0.823653555685474} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.1462490844327258, "train/loss": 0.796851992931366, "eval/hcp-train-subset/loss": 0.8028242453452079, "eval/hcp-val/loss": 0.838575416995633, "eval/nsd-val/loss": 0.8182649025993962} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.1497293102391608, "train/loss": 0.7943155593681336, "eval/hcp-train-subset/loss": 0.8017111487926976, "eval/hcp-val/loss": 0.8385916111930725, "eval/nsd-val/loss": 0.8215436012514176} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.15504983856453633, "train/loss": 0.7937070176124573, "eval/hcp-train-subset/loss": 0.8018990726240219, "eval/hcp-val/loss": 0.8390136207303693, "eval/nsd-val/loss": 0.8194304560461352} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.15181978244844838, "train/loss": 0.7953761101341248, "eval/hcp-train-subset/loss": 0.8009728904693357, "eval/hcp-val/loss": 0.8391698473884214, "eval/nsd-val/loss": 0.8197312066631932} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.155490698869207, "train/loss": 0.7949092643165588, "eval/hcp-train-subset/loss": 0.7985759310183986, "eval/hcp-val/loss": 0.8389917256370667, "eval/nsd-val/loss": 0.8179363031541148} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.15659724085778953, "train/loss": 0.7944628803634644, "eval/hcp-train-subset/loss": 0.7998040385784642, "eval/hcp-val/loss": 0.8390925209368428, "eval/nsd-val/loss": 0.8216909891174685} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.16094230558488853, "train/loss": 0.7916686376857758, "eval/hcp-train-subset/loss": 0.7981750945891103, "eval/hcp-val/loss": 0.8392559270704946, "eval/nsd-val/loss": 0.8210946984829441} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.16051101926761882, "train/loss": 0.7932169512271882, "eval/hcp-train-subset/loss": 0.7959404164744962, "eval/hcp-val/loss": 0.8392432380107141, "eval/nsd-val/loss": 0.8189780442945419} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.16298751503219602, "train/loss": 0.7912658083438874, "eval/hcp-train-subset/loss": 0.795267298336952, "eval/hcp-val/loss": 0.8392360364237139, "eval/nsd-val/loss": 0.8211044871038006} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.16573184080528072, "train/loss": 0.7913833130931854, "eval/hcp-train-subset/loss": 0.7942649376007819, "eval/hcp-val/loss": 0.8385033453664472, "eval/nsd-val/loss": 0.8219846592795464} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.16719040660685838, "train/loss": 0.791326928243637, "eval/hcp-train-subset/loss": 0.7934466725395571, "eval/hcp-val/loss": 0.8398152964730417, "eval/nsd-val/loss": 0.8221551381772564} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.17094690214112765, "train/loss": 0.7891566100311279, "eval/hcp-train-subset/loss": 0.7916518920852292, "eval/hcp-val/loss": 0.838543850568033, "eval/nsd-val/loss": 0.8224162084441031} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.16868178083987073, "train/loss": 0.7943092018890381, "eval/hcp-train-subset/loss": 0.7906762919118328, "eval/hcp-val/loss": 0.8392438811640586, "eval/nsd-val/loss": 0.8214806866261267} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.17022022906638368, "train/loss": 0.7928001017379761, "eval/hcp-train-subset/loss": 0.7900386933357485, "eval/hcp-val/loss": 0.8389725146755096, "eval/nsd-val/loss": 0.8214401237426265} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.1765348170329453, "train/loss": 0.7879824459171295, "eval/hcp-train-subset/loss": 0.7893926597410633, "eval/hcp-val/loss": 0.8393848961399447, "eval/nsd-val/loss": 0.8237024113055198} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.17360245220319012, "train/loss": 0.7922934934329987, "eval/hcp-train-subset/loss": 0.7889529495469986, "eval/hcp-val/loss": 0.8390007855430726, "eval/nsd-val/loss": 0.823797357659186} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.17532123481204495, "train/loss": 0.7917480660533905, "eval/hcp-train-subset/loss": 0.7878674951291853, "eval/hcp-val/loss": 0.8392348731717756, "eval/nsd-val/loss": 0.8220521202010493} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.17831588609253518, "train/loss": 0.7889138554191589, "eval/hcp-train-subset/loss": 0.7869097609673777, "eval/hcp-val/loss": 0.8379312897882154, "eval/nsd-val/loss": 0.8222112271093553} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.17415982190037813, "train/loss": 0.7927430392169953, "eval/hcp-train-subset/loss": 0.7868859825595733, "eval/hcp-val/loss": 0.8386571368863506, "eval/nsd-val/loss": 0.8219752061751581} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.17664138169499116, "train/loss": 0.7928441916465759, "eval/hcp-train-subset/loss": 0.7867067129381241, "eval/hcp-val/loss": 0.8387217300553476, "eval/nsd-val/loss": 0.8215359439772945} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.17880664980156777, "train/loss": 0.791288721666336, "eval/hcp-train-subset/loss": 0.7855959361599337, "eval/hcp-val/loss": 0.8387318176607932, "eval/nsd-val/loss": 0.8229192495346069} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.18144242683241904, "train/loss": 0.7913537672710419, "eval/hcp-train-subset/loss": 0.7847489147417007, "eval/hcp-val/loss": 0.8390965346367129, "eval/nsd-val/loss": 0.8219232876454631} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.18087576502406047, "train/loss": 0.7942271099281311, "eval/hcp-train-subset/loss": 0.7841088281523797, "eval/hcp-val/loss": 0.8386417223561194, "eval/nsd-val/loss": 0.8203512670532349} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.1837156662225189, "train/loss": 0.7933781177043915, "eval/hcp-train-subset/loss": 0.7838221384632972, "eval/hcp-val/loss": 0.8383968107161983, "eval/nsd-val/loss": 0.8219256727926193} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.18327060072574283, "train/loss": 0.7899825508594513, "eval/hcp-train-subset/loss": 0.7832534543929561, "eval/hcp-val/loss": 0.8383283451680215, "eval/nsd-val/loss": 0.8213330930279147} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.182818222246679, "train/loss": 0.7921518129062652, "eval/hcp-train-subset/loss": 0.7826513830692537, "eval/hcp-val/loss": 0.838212670818452, "eval/nsd-val/loss": 0.8229872247865123} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.18849768848949305, "train/loss": 0.788685396270752, "eval/hcp-train-subset/loss": 0.782187775258095, "eval/hcp-val/loss": 0.8382088301643249, "eval/nsd-val/loss": 0.8223608376518372} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.18502053510614141, "train/loss": 0.7946466433811188, "eval/hcp-train-subset/loss": 0.7820050351081356, "eval/hcp-val/loss": 0.8378799644208723, "eval/nsd-val/loss": 0.8217265134857547} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.18721441872580102, "train/loss": 0.7923817550182343, "eval/hcp-train-subset/loss": 0.7818257702935126, "eval/hcp-val/loss": 0.8377579594812086, "eval/nsd-val/loss": 0.8216637632539195} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.1897902801368067, "train/loss": 0.7927230242538452, "eval/hcp-train-subset/loss": 0.7822354849307768, "eval/hcp-val/loss": 0.8375381961945565, "eval/nsd-val/loss": 0.8215769529342651} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.1871258923842022, "train/loss": 0.7942694155883789, "eval/hcp-train-subset/loss": 0.7818839828814229, "eval/hcp-val/loss": 0.837602146210209, "eval/nsd-val/loss": 0.8218551470387366} diff --git a/data_scaling/n800_1/pretrain/log.txt b/data_scaling/n800_1/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cbaa4feff268acca70266653a2614a3d32cdf798 --- /dev/null +++ b/data_scaling/n800_1/pretrain/log.txt @@ -0,0 +1,8258 @@ +pretraining fmri mae +start: 2026-01-17 20:35:24 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n800_1/pretrain +notes: data scaling experiment n800_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n800_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00799}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00799}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 12:44:10 lr: 0.000000 grad: 0.0275 (0.0275) loss: 0.9959 (0.9959) time: 7.3360 data: 5.9771 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:18:43 lr: 0.000000 grad: 0.0135 (0.0160) loss: 0.9959 (0.9958) time: 0.1120 data: 0.0354 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:15:35 lr: 0.000001 grad: 0.0127 (0.0148) loss: 0.9957 (0.9958) time: 0.1373 data: 0.0551 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:14:38 lr: 0.000001 grad: 0.0124 (0.0142) loss: 0.9957 (0.9958) time: 0.1523 data: 0.0675 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:14:25 lr: 0.000002 grad: 0.0124 (0.0138) loss: 0.9958 (0.9958) time: 0.1737 data: 0.0873 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:14:12 lr: 0.000002 grad: 0.0127 (0.0136) loss: 0.9958 (0.9958) time: 0.1658 data: 0.0780 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:14:13 lr: 0.000002 grad: 0.0122 (0.0134) loss: 0.9955 (0.9958) time: 0.1768 data: 0.0831 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:14:04 lr: 0.000003 grad: 0.0127 (0.0133) loss: 0.9958 (0.9958) time: 0.1520 data: 0.0508 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:13:55 lr: 0.000003 grad: 0.0130 (0.0133) loss: 0.9961 (0.9958) time: 0.1697 data: 0.0799 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:13:40 lr: 0.000004 grad: 0.0132 (0.0133) loss: 0.9957 (0.9958) time: 0.1517 data: 0.0574 max mem: 9377 +Train: [0] [1000/6250] eta: 0:13:24 lr: 0.000004 grad: 0.0150 (0.0134) loss: 0.9955 (0.9958) time: 0.1582 data: 0.0564 max mem: 9377 +Train: [0] [1100/6250] eta: 0:13:05 lr: 0.000004 grad: 0.0147 (0.0136) loss: 0.9958 (0.9958) time: 0.1382 data: 0.0481 max mem: 9377 +Train: [0] [1200/6250] eta: 0:12:52 lr: 0.000005 grad: 0.0183 (0.0141) loss: 0.9954 (0.9958) time: 0.1578 data: 0.0590 max mem: 9377 +Train: [0] [1300/6250] eta: 0:12:34 lr: 0.000005 grad: 0.0280 (0.0147) loss: 0.9949 (0.9958) time: 0.1326 data: 0.0417 max mem: 9377 +Train: [0] [1400/6250] eta: 0:12:16 lr: 0.000006 grad: 0.0224 (0.0154) loss: 0.9958 (0.9958) time: 0.1512 data: 0.0678 max mem: 9377 +Train: [0] [1500/6250] eta: 0:11:58 lr: 0.000006 grad: 0.0247 (0.0160) loss: 0.9957 (0.9958) time: 0.1256 data: 0.0317 max mem: 9377 +Train: [0] [1600/6250] eta: 0:11:40 lr: 0.000006 grad: 0.0273 (0.0168) loss: 0.9952 (0.9957) time: 0.1448 data: 0.0534 max mem: 9377 +Train: [0] [1700/6250] eta: 0:11:23 lr: 0.000007 grad: 0.0295 (0.0179) loss: 0.9949 (0.9957) time: 0.1240 data: 0.0317 max mem: 9377 +Train: [0] [1800/6250] eta: 0:11:05 lr: 0.000007 grad: 0.0472 (0.0194) loss: 0.9954 (0.9957) time: 0.1456 data: 0.0500 max mem: 9377 +Train: [0] [1900/6250] eta: 0:10:49 lr: 0.000008 grad: 0.0365 (0.0208) loss: 0.9953 (0.9956) time: 0.1474 data: 0.0463 max mem: 9377 +Train: [0] [2000/6250] eta: 0:10:31 lr: 0.000008 grad: 0.0421 (0.0223) loss: 0.9945 (0.9956) time: 0.1295 data: 0.0246 max mem: 9377 +Train: [0] [2100/6250] eta: 0:10:16 lr: 0.000008 grad: 0.0439 (0.0237) loss: 0.9949 (0.9956) time: 0.1390 data: 0.0505 max mem: 9377 +Train: [0] [2200/6250] eta: 0:10:00 lr: 0.000009 grad: 0.0555 (0.0250) loss: 0.9942 (0.9955) time: 0.1299 data: 0.0373 max mem: 9377 +Train: [0] [2300/6250] eta: 0:09:45 lr: 0.000009 grad: 0.0490 (0.0262) loss: 0.9947 (0.9955) time: 0.1485 data: 0.0548 max mem: 9377 +Train: [0] [2400/6250] eta: 0:09:29 lr: 0.000010 grad: 0.0485 (0.0274) loss: 0.9941 (0.9954) time: 0.1318 data: 0.0432 max mem: 9377 +Train: [0] [2500/6250] eta: 0:09:13 lr: 0.000010 grad: 0.0481 (0.0287) loss: 0.9945 (0.9954) time: 0.1350 data: 0.0399 max mem: 9377 +Train: [0] [2600/6250] eta: 0:08:57 lr: 0.000010 grad: 0.0541 (0.0297) loss: 0.9936 (0.9953) time: 0.1274 data: 0.0307 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:42 lr: 0.000011 grad: 0.0671 (0.0309) loss: 0.9928 (0.9952) time: 0.1346 data: 0.0469 max mem: 9377 +Train: [0] [2800/6250] eta: 0:08:27 lr: 0.000011 grad: 0.0557 (0.0320) loss: 0.9938 (0.9952) time: 0.1509 data: 0.0616 max mem: 9377 +Train: [0] [2900/6250] eta: 0:08:12 lr: 0.000012 grad: 0.0563 (0.0330) loss: 0.9936 (0.9951) time: 0.1433 data: 0.0460 max mem: 9377 +Train: [0] [3000/6250] eta: 0:07:57 lr: 0.000012 grad: 0.0625 (0.0341) loss: 0.9931 (0.9951) time: 0.1465 data: 0.0602 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:42 lr: 0.000012 grad: 0.0688 (0.0352) loss: 0.9918 (0.9950) time: 0.1461 data: 0.0558 max mem: 9377 +Train: [0] [3200/6250] eta: 0:07:27 lr: 0.000013 grad: 0.0614 (0.0365) loss: 0.9930 (0.9950) time: 0.1497 data: 0.0554 max mem: 9377 +Train: [0] [3300/6250] eta: 0:07:13 lr: 0.000013 grad: 0.0594 (0.0375) loss: 0.9929 (0.9949) time: 0.1653 data: 0.0630 max mem: 9377 +Train: [0] [3400/6250] eta: 0:06:58 lr: 0.000014 grad: 0.0510 (0.0387) loss: 0.9940 (0.9948) time: 0.1461 data: 0.0620 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:44 lr: 0.000014 grad: 0.0713 (0.0396) loss: 0.9926 (0.9948) time: 0.1525 data: 0.0614 max mem: 9377 +Train: [0] [3600/6250] eta: 0:06:30 lr: 0.000014 grad: 0.0703 (0.0406) loss: 0.9918 (0.9947) time: 0.1657 data: 0.0722 max mem: 9377 +Train: [0] [3700/6250] eta: 0:06:15 lr: 0.000015 grad: 0.0633 (0.0415) loss: 0.9917 (0.9946) time: 0.1443 data: 0.0615 max mem: 9377 +Train: [0] [3800/6250] eta: 0:06:00 lr: 0.000015 grad: 0.0668 (0.0423) loss: 0.9924 (0.9946) time: 0.1426 data: 0.0578 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:46 lr: 0.000016 grad: 0.0671 (0.0432) loss: 0.9916 (0.9945) time: 0.1744 data: 0.0923 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:31 lr: 0.000016 grad: 0.0801 (0.0442) loss: 0.9913 (0.9944) time: 0.1567 data: 0.0643 max mem: 9377 +Train: [0] [4100/6250] eta: 0:05:17 lr: 0.000016 grad: 0.0705 (0.0451) loss: 0.9923 (0.9944) time: 0.1553 data: 0.0669 max mem: 9377 +Train: [0] [4200/6250] eta: 0:05:02 lr: 0.000017 grad: 0.0869 (0.0460) loss: 0.9906 (0.9943) time: 0.1676 data: 0.0795 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:47 lr: 0.000017 grad: 0.0777 (0.0466) loss: 0.9916 (0.9942) time: 0.1400 data: 0.0503 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:33 lr: 0.000018 grad: 0.0632 (0.0474) loss: 0.9912 (0.9942) time: 0.1549 data: 0.0705 max mem: 9377 +Train: [0] [4500/6250] eta: 0:04:18 lr: 0.000018 grad: 0.0784 (0.0482) loss: 0.9894 (0.9941) time: 0.1670 data: 0.0758 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:04 lr: 0.000018 grad: 0.0669 (0.0489) loss: 0.9924 (0.9940) time: 0.1400 data: 0.0484 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:49 lr: 0.000019 grad: 0.0782 (0.0495) loss: 0.9903 (0.9940) time: 0.1608 data: 0.0739 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:34 lr: 0.000019 grad: 0.0717 (0.0501) loss: 0.9915 (0.9939) time: 0.1547 data: 0.0612 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:20 lr: 0.000020 grad: 0.0690 (0.0506) loss: 0.9904 (0.9938) time: 0.1800 data: 0.0897 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:06 lr: 0.000020 grad: 0.0641 (0.0511) loss: 0.9908 (0.9938) time: 0.1595 data: 0.0746 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:51 lr: 0.000020 grad: 0.0647 (0.0515) loss: 0.9921 (0.9937) time: 0.1468 data: 0.0622 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:36 lr: 0.000021 grad: 0.0664 (0.0519) loss: 0.9910 (0.9937) time: 0.1418 data: 0.0583 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:22 lr: 0.000021 grad: 0.0643 (0.0524) loss: 0.9913 (0.9936) time: 0.1849 data: 0.1009 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:07 lr: 0.000022 grad: 0.0657 (0.0527) loss: 0.9894 (0.9935) time: 0.1537 data: 0.0614 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:52 lr: 0.000022 grad: 0.0668 (0.0531) loss: 0.9904 (0.9935) time: 0.1874 data: 0.1001 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:37 lr: 0.000022 grad: 0.0657 (0.0534) loss: 0.9909 (0.9934) time: 0.1491 data: 0.0528 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:22 lr: 0.000023 grad: 0.0666 (0.0537) loss: 0.9908 (0.9934) time: 0.1577 data: 0.0681 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:07 lr: 0.000023 grad: 0.0692 (0.0541) loss: 0.9912 (0.9933) time: 0.1589 data: 0.0715 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:52 lr: 0.000024 grad: 0.0670 (0.0544) loss: 0.9909 (0.9933) time: 0.1482 data: 0.0704 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:37 lr: 0.000024 grad: 0.0697 (0.0547) loss: 0.9896 (0.9932) time: 0.1598 data: 0.0767 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:22 lr: 0.000024 grad: 0.0689 (0.0551) loss: 0.9907 (0.9931) time: 0.1467 data: 0.0683 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.0715 (0.0554) loss: 0.9903 (0.9931) time: 0.1779 data: 0.0915 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0735 (0.0556) loss: 0.9905 (0.9931) time: 0.1572 data: 0.0704 max mem: 9377 +Train: [0] Total time: 0:15:49 (0.1519 s / it) +Averaged stats: lr: 0.000025 grad: 0.0735 (0.0556) loss: 0.9905 (0.9931) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:03:28 loss: 0.9891 (0.9891) time: 3.3707 data: 3.2992 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9900 (0.9894) time: 0.1483 data: 0.1229 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (hcp-train-subset): loss: 0.9900 (0.9894) +Eval (hcp-val): [0] [ 0/62] eta: 0:05:57 loss: 0.9848 (0.9848) time: 5.7702 data: 5.7412 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9900 (0.9891) time: 0.1785 data: 0.1520 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:15 (0.2503 s / it) +Averaged stats (hcp-val): loss: 0.9900 (0.9891) +Eval (nsd-val): [0] [ 0/62] eta: 0:04:44 loss: 0.9866 (0.9866) time: 4.5959 data: 4.5626 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9903 (0.9897) time: 0.1256 data: 0.1006 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (nsd-val): loss: 0.9903 (0.9897) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 9:07:34 lr: 0.000025 grad: 0.1400 (0.1400) loss: 0.9974 (0.9974) time: 5.2567 data: 5.1187 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:21:24 lr: 0.000025 grad: 0.0912 (0.1012) loss: 0.9893 (0.9897) time: 0.1767 data: 0.0998 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:21:00 lr: 0.000026 grad: 0.0742 (0.0946) loss: 0.9900 (0.9894) time: 0.2294 data: 0.1150 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:20:33 lr: 0.000026 grad: 0.0762 (0.0882) loss: 0.9892 (0.9894) time: 0.2542 data: 0.1343 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:20:06 lr: 0.000027 grad: 0.0710 (0.0851) loss: 0.9905 (0.9894) time: 0.2239 data: 0.1201 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:20:02 lr: 0.000027 grad: 0.0642 (0.0838) loss: 0.9906 (0.9895) time: 0.2370 data: 0.1156 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:19:40 lr: 0.000027 grad: 0.0726 (0.0824) loss: 0.9900 (0.9895) time: 0.1875 data: 0.0655 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:19:02 lr: 0.000028 grad: 0.0822 (0.0820) loss: 0.9896 (0.9895) time: 0.1789 data: 0.0727 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:18:31 lr: 0.000028 grad: 0.0710 (0.0813) loss: 0.9892 (0.9896) time: 0.1697 data: 0.0606 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:18:02 lr: 0.000029 grad: 0.0804 (0.0815) loss: 0.9899 (0.9895) time: 0.1992 data: 0.0921 max mem: 9377 +Train: [1] [1000/6250] eta: 0:17:29 lr: 0.000029 grad: 0.0843 (0.0814) loss: 0.9900 (0.9895) time: 0.1957 data: 0.1108 max mem: 9377 +Train: [1] [1100/6250] eta: 0:16:56 lr: 0.000029 grad: 0.0688 (0.0813) loss: 0.9893 (0.9894) time: 0.1637 data: 0.0743 max mem: 9377 +Train: [1] [1200/6250] eta: 0:16:25 lr: 0.000030 grad: 0.0810 (0.0812) loss: 0.9893 (0.9894) time: 0.1551 data: 0.0524 max mem: 9377 +Train: [1] [1300/6250] eta: 0:15:54 lr: 0.000030 grad: 0.0799 (0.0813) loss: 0.9902 (0.9894) time: 0.1781 data: 0.0836 max mem: 9377 +Train: [1] [1400/6250] eta: 0:15:23 lr: 0.000031 grad: 0.0751 (0.0812) loss: 0.9891 (0.9894) time: 0.1541 data: 0.0700 max mem: 9377 +Train: [1] [1500/6250] eta: 0:14:59 lr: 0.000031 grad: 0.0824 (0.0808) loss: 0.9884 (0.9893) time: 0.1615 data: 0.0699 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:31 lr: 0.000031 grad: 0.0913 (0.0809) loss: 0.9892 (0.9893) time: 0.1550 data: 0.0669 max mem: 9377 +Train: [1] [1700/6250] eta: 0:14:06 lr: 0.000032 grad: 0.0759 (0.0808) loss: 0.9880 (0.9893) time: 0.1397 data: 0.0538 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:42 lr: 0.000032 grad: 0.0692 (0.0807) loss: 0.9885 (0.9893) time: 0.1534 data: 0.0668 max mem: 9377 +Train: [1] [1900/6250] eta: 0:13:18 lr: 0.000033 grad: 0.0724 (0.0805) loss: 0.9891 (0.9893) time: 0.1724 data: 0.0905 max mem: 9377 +Train: [1] [2000/6250] eta: 0:12:56 lr: 0.000033 grad: 0.0808 (0.0806) loss: 0.9880 (0.9892) time: 0.1692 data: 0.0797 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:34 lr: 0.000033 grad: 0.0828 (0.0805) loss: 0.9888 (0.9892) time: 0.2043 data: 0.1245 max mem: 9377 +Train: [1] [2200/6250] eta: 0:12:13 lr: 0.000034 grad: 0.0693 (0.0805) loss: 0.9890 (0.9892) time: 0.1640 data: 0.0727 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:51 lr: 0.000034 grad: 0.0758 (0.0805) loss: 0.9892 (0.9891) time: 0.1633 data: 0.0731 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:30 lr: 0.000035 grad: 0.0754 (0.0804) loss: 0.9885 (0.9891) time: 0.1711 data: 0.0550 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:09 lr: 0.000035 grad: 0.0731 (0.0805) loss: 0.9897 (0.9891) time: 0.1613 data: 0.0652 max mem: 9377 +Train: [1] [2600/6250] eta: 0:10:47 lr: 0.000035 grad: 0.0789 (0.0804) loss: 0.9903 (0.9891) time: 0.1674 data: 0.0879 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:28 lr: 0.000036 grad: 0.0820 (0.0807) loss: 0.9881 (0.9890) time: 0.1715 data: 0.0778 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:10 lr: 0.000036 grad: 0.0754 (0.0806) loss: 0.9885 (0.9890) time: 0.1445 data: 0.0540 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:52 lr: 0.000037 grad: 0.0723 (0.0806) loss: 0.9881 (0.9890) time: 0.1869 data: 0.0988 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:34 lr: 0.000037 grad: 0.0849 (0.0807) loss: 0.9871 (0.9890) time: 0.1841 data: 0.0941 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:17 lr: 0.000037 grad: 0.0716 (0.0807) loss: 0.9893 (0.9890) time: 0.1895 data: 0.0979 max mem: 9377 +Train: [1] [3200/6250] eta: 0:08:57 lr: 0.000038 grad: 0.0780 (0.0809) loss: 0.9877 (0.9889) time: 0.1530 data: 0.0631 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:39 lr: 0.000038 grad: 0.0723 (0.0810) loss: 0.9891 (0.9889) time: 0.1811 data: 0.0931 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:21 lr: 0.000039 grad: 0.0791 (0.0810) loss: 0.9883 (0.9889) time: 0.1723 data: 0.0811 max mem: 9377 +Train: [1] [3500/6250] eta: 0:08:02 lr: 0.000039 grad: 0.0704 (0.0811) loss: 0.9879 (0.9889) time: 0.1709 data: 0.0751 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:44 lr: 0.000039 grad: 0.0775 (0.0812) loss: 0.9890 (0.9888) time: 0.1830 data: 0.0991 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:25 lr: 0.000040 grad: 0.0825 (0.0814) loss: 0.9882 (0.9888) time: 0.1574 data: 0.0643 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:07 lr: 0.000040 grad: 0.0703 (0.0814) loss: 0.9893 (0.9887) time: 0.1440 data: 0.0528 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:49 lr: 0.000041 grad: 0.0945 (0.0816) loss: 0.9873 (0.9887) time: 0.1546 data: 0.0678 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:31 lr: 0.000041 grad: 0.0849 (0.0818) loss: 0.9875 (0.9886) time: 0.1708 data: 0.0846 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:12 lr: 0.000041 grad: 0.0792 (0.0818) loss: 0.9866 (0.9886) time: 0.1621 data: 0.0657 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:54 lr: 0.000042 grad: 0.0770 (0.0820) loss: 0.9873 (0.9885) time: 0.1544 data: 0.0783 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:36 lr: 0.000042 grad: 0.0736 (0.0821) loss: 0.9884 (0.9885) time: 0.1518 data: 0.0676 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:18 lr: 0.000043 grad: 0.0738 (0.0821) loss: 0.9876 (0.9885) time: 0.1718 data: 0.0823 max mem: 9377 +Train: [1] [4500/6250] eta: 0:05:01 lr: 0.000043 grad: 0.0820 (0.0821) loss: 0.9866 (0.9884) time: 0.1674 data: 0.0832 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:43 lr: 0.000043 grad: 0.0703 (0.0821) loss: 0.9888 (0.9884) time: 0.1434 data: 0.0528 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:26 lr: 0.000044 grad: 0.0776 (0.0821) loss: 0.9879 (0.9884) time: 0.1662 data: 0.0795 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:08 lr: 0.000044 grad: 0.0811 (0.0821) loss: 0.9865 (0.9884) time: 0.1396 data: 0.0459 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:51 lr: 0.000045 grad: 0.0836 (0.0823) loss: 0.9868 (0.9883) time: 0.1522 data: 0.0616 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:33 lr: 0.000045 grad: 0.0753 (0.0824) loss: 0.9883 (0.9883) time: 0.1612 data: 0.0765 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:16 lr: 0.000045 grad: 0.0840 (0.0824) loss: 0.9858 (0.9883) time: 0.1575 data: 0.0781 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:59 lr: 0.000046 grad: 0.1003 (0.0827) loss: 0.9846 (0.9882) time: 0.1675 data: 0.0772 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:42 lr: 0.000046 grad: 0.0923 (0.0829) loss: 0.9860 (0.9882) time: 0.1635 data: 0.0719 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:24 lr: 0.000047 grad: 0.0761 (0.0830) loss: 0.9851 (0.9881) time: 0.1633 data: 0.0650 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:07 lr: 0.000047 grad: 0.0796 (0.0832) loss: 0.9862 (0.9881) time: 0.1855 data: 0.0949 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:50 lr: 0.000047 grad: 0.0904 (0.0833) loss: 0.9851 (0.9881) time: 0.1759 data: 0.0861 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:33 lr: 0.000048 grad: 0.0775 (0.0833) loss: 0.9858 (0.9880) time: 0.1522 data: 0.0690 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:16 lr: 0.000048 grad: 0.0848 (0.0834) loss: 0.9877 (0.9880) time: 0.1607 data: 0.0756 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:59 lr: 0.000049 grad: 0.0723 (0.0834) loss: 0.9871 (0.9880) time: 0.1704 data: 0.0831 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0746 (0.0834) loss: 0.9874 (0.9880) time: 0.1391 data: 0.0542 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.0661 (0.0835) loss: 0.9892 (0.9879) time: 0.1536 data: 0.0613 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.0831 (0.0836) loss: 0.9858 (0.9879) time: 0.1676 data: 0.0862 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0785 (0.0836) loss: 0.9876 (0.9879) time: 0.1342 data: 0.0408 max mem: 9377 +Train: [1] Total time: 0:17:37 (0.1692 s / it) +Averaged stats: lr: 0.000050 grad: 0.0785 (0.0836) loss: 0.9876 (0.9879) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:06:04 loss: 0.9861 (0.9861) time: 5.8763 data: 5.8447 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9870 (0.9860) time: 0.1869 data: 0.1580 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:17 (0.2812 s / it) +Averaged stats (hcp-train-subset): loss: 0.9870 (0.9860) +Eval (hcp-val): [1] [ 0/62] eta: 0:04:02 loss: 0.9877 (0.9877) time: 3.9039 data: 3.8246 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9856 (0.9858) time: 0.1994 data: 0.1746 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:19 (0.3134 s / it) +Averaged stats (hcp-val): loss: 0.9856 (0.9858) +Eval (nsd-val): [1] [ 0/62] eta: 0:07:05 loss: 0.9875 (0.9875) time: 6.8619 data: 6.8287 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9871 (0.9870) time: 0.1528 data: 0.1268 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:18 (0.2960 s / it) +Averaged stats (nsd-val): loss: 0.9871 (0.9870) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 9:27:31 lr: 0.000050 grad: 0.2856 (0.2856) loss: 0.9923 (0.9923) time: 5.4482 data: 5.1893 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:26:20 lr: 0.000050 grad: 0.0852 (0.0905) loss: 0.9865 (0.9867) time: 0.1582 data: 0.0600 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:22:47 lr: 0.000051 grad: 0.0778 (0.0858) loss: 0.9888 (0.9868) time: 0.2182 data: 0.1289 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:21:27 lr: 0.000051 grad: 0.0759 (0.0822) loss: 0.9885 (0.9873) time: 0.1970 data: 0.1130 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:20:24 lr: 0.000052 grad: 0.0764 (0.0820) loss: 0.9855 (0.9873) time: 0.2137 data: 0.1068 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:19:43 lr: 0.000052 grad: 0.0805 (0.0836) loss: 0.9857 (0.9869) time: 0.1818 data: 0.0650 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:19:16 lr: 0.000052 grad: 0.0816 (0.0853) loss: 0.9854 (0.9866) time: 0.2105 data: 0.1073 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:18:54 lr: 0.000053 grad: 0.0855 (0.0863) loss: 0.9864 (0.9864) time: 0.2385 data: 0.1168 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:18:38 lr: 0.000053 grad: 0.0832 (0.0866) loss: 0.9870 (0.9863) time: 0.2164 data: 0.0911 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:18:14 lr: 0.000054 grad: 0.0699 (0.0865) loss: 0.9865 (0.9863) time: 0.1905 data: 0.0838 max mem: 9377 +Train: [2] [1000/6250] eta: 0:17:35 lr: 0.000054 grad: 0.0816 (0.0871) loss: 0.9858 (0.9863) time: 0.1378 data: 0.0427 max mem: 9377 +Train: [2] [1100/6250] eta: 0:16:59 lr: 0.000054 grad: 0.0773 (0.0871) loss: 0.9860 (0.9863) time: 0.1562 data: 0.0573 max mem: 9377 +Train: [2] [1200/6250] eta: 0:16:27 lr: 0.000055 grad: 0.0813 (0.0878) loss: 0.9862 (0.9863) time: 0.1784 data: 0.0945 max mem: 9377 +Train: [2] [1300/6250] eta: 0:15:59 lr: 0.000055 grad: 0.0741 (0.0875) loss: 0.9865 (0.9863) time: 0.1674 data: 0.0704 max mem: 9377 +Train: [2] [1400/6250] eta: 0:15:31 lr: 0.000056 grad: 0.0902 (0.0875) loss: 0.9843 (0.9863) time: 0.1734 data: 0.0780 max mem: 9377 +Train: [2] [1500/6250] eta: 0:15:03 lr: 0.000056 grad: 0.0733 (0.0875) loss: 0.9865 (0.9862) time: 0.1484 data: 0.0597 max mem: 9377 +Train: [2] [1600/6250] eta: 0:14:37 lr: 0.000056 grad: 0.0850 (0.0875) loss: 0.9857 (0.9862) time: 0.1572 data: 0.0599 max mem: 9377 +Train: [2] [1700/6250] eta: 0:14:12 lr: 0.000057 grad: 0.0864 (0.0876) loss: 0.9871 (0.9862) time: 0.1659 data: 0.0850 max mem: 9377 +Train: [2] [1800/6250] eta: 0:13:48 lr: 0.000057 grad: 0.0824 (0.0875) loss: 0.9852 (0.9862) time: 0.1314 data: 0.0411 max mem: 9377 +Train: [2] [1900/6250] eta: 0:13:25 lr: 0.000058 grad: 0.0712 (0.0874) loss: 0.9863 (0.9862) time: 0.1979 data: 0.1107 max mem: 9377 +Train: [2] [2000/6250] eta: 0:13:01 lr: 0.000058 grad: 0.0782 (0.0872) loss: 0.9876 (0.9862) time: 0.1291 data: 0.0416 max mem: 9377 +Train: [2] [2100/6250] eta: 0:12:37 lr: 0.000058 grad: 0.0850 (0.0872) loss: 0.9857 (0.9862) time: 0.1584 data: 0.0703 max mem: 9377 +Train: [2] [2200/6250] eta: 0:12:14 lr: 0.000059 grad: 0.0923 (0.0871) loss: 0.9836 (0.9862) time: 0.1466 data: 0.0538 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:53 lr: 0.000059 grad: 0.0877 (0.0873) loss: 0.9852 (0.9862) time: 0.1769 data: 0.1005 max mem: 9377 +Train: [2] [2400/6250] eta: 0:11:32 lr: 0.000060 grad: 0.0853 (0.0874) loss: 0.9880 (0.9861) time: 0.1531 data: 0.0687 max mem: 9377 +Train: [2] [2500/6250] eta: 0:11:13 lr: 0.000060 grad: 0.0851 (0.0878) loss: 0.9854 (0.9861) time: 0.1734 data: 0.0849 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:53 lr: 0.000060 grad: 0.0926 (0.0883) loss: 0.9858 (0.9860) time: 0.1635 data: 0.0835 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:32 lr: 0.000061 grad: 0.0878 (0.0884) loss: 0.9870 (0.9860) time: 0.1541 data: 0.0679 max mem: 9377 +Train: [2] [2800/6250] eta: 0:10:13 lr: 0.000061 grad: 0.0837 (0.0886) loss: 0.9857 (0.9859) time: 0.1549 data: 0.0641 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:53 lr: 0.000062 grad: 0.0866 (0.0888) loss: 0.9848 (0.9859) time: 0.1654 data: 0.0726 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:33 lr: 0.000062 grad: 0.0921 (0.0892) loss: 0.9853 (0.9858) time: 0.1739 data: 0.0918 max mem: 9377 +Train: [2] [3100/6250] eta: 0:09:13 lr: 0.000062 grad: 0.0850 (0.0896) loss: 0.9846 (0.9858) time: 0.1460 data: 0.0649 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:54 lr: 0.000063 grad: 0.0870 (0.0899) loss: 0.9853 (0.9858) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:36 lr: 0.000063 grad: 0.0854 (0.0901) loss: 0.9851 (0.9857) time: 0.1621 data: 0.0761 max mem: 9377 +Train: [2] [3400/6250] eta: 0:08:17 lr: 0.000064 grad: 0.1018 (0.0905) loss: 0.9855 (0.9857) time: 0.1424 data: 0.0441 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:58 lr: 0.000064 grad: 0.1020 (0.0909) loss: 0.9819 (0.9857) time: 0.1465 data: 0.0621 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:40 lr: 0.000064 grad: 0.1038 (0.0915) loss: 0.9840 (0.9856) time: 0.1684 data: 0.0833 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:22 lr: 0.000065 grad: 0.1069 (0.0920) loss: 0.9855 (0.9856) time: 0.1507 data: 0.0566 max mem: 9377 +Train: [2] [3800/6250] eta: 0:07:03 lr: 0.000065 grad: 0.0991 (0.0927) loss: 0.9834 (0.9855) time: 0.1617 data: 0.0734 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:46 lr: 0.000066 grad: 0.0879 (0.0934) loss: 0.9852 (0.9855) time: 0.1728 data: 0.0809 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:28 lr: 0.000066 grad: 0.1088 (0.0943) loss: 0.9856 (0.9855) time: 0.1847 data: 0.0803 max mem: 9377 +Train: [2] [4100/6250] eta: 0:06:10 lr: 0.000066 grad: 0.1302 (0.0955) loss: 0.9877 (0.9854) time: 0.1222 data: 0.0391 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:52 lr: 0.000067 grad: 0.1003 (0.0963) loss: 0.9834 (0.9854) time: 0.1406 data: 0.0483 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:34 lr: 0.000067 grad: 0.1079 (0.0974) loss: 0.9810 (0.9853) time: 0.1574 data: 0.0697 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:16 lr: 0.000068 grad: 0.1165 (0.0984) loss: 0.9825 (0.9853) time: 0.1583 data: 0.0607 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:59 lr: 0.000068 grad: 0.1685 (0.1001) loss: 0.9834 (0.9852) time: 0.1479 data: 0.0590 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:43 lr: 0.000068 grad: 0.1018 (0.1015) loss: 0.9840 (0.9852) time: 0.1068 data: 0.0003 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:25 lr: 0.000069 grad: 0.1727 (0.1029) loss: 0.9829 (0.9851) time: 0.1976 data: 0.1126 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:08 lr: 0.000069 grad: 0.1686 (0.1045) loss: 0.9837 (0.9850) time: 0.2055 data: 0.1143 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:50 lr: 0.000070 grad: 0.1206 (0.1064) loss: 0.9814 (0.9849) time: 0.1612 data: 0.0759 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:33 lr: 0.000070 grad: 0.2205 (0.1086) loss: 0.9812 (0.9849) time: 0.1652 data: 0.0761 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:15 lr: 0.000070 grad: 0.2057 (0.1110) loss: 0.9782 (0.9848) time: 0.1510 data: 0.0699 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:58 lr: 0.000071 grad: 0.1254 (0.1125) loss: 0.9800 (0.9847) time: 0.1910 data: 0.1018 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:41 lr: 0.000071 grad: 0.1499 (0.1147) loss: 0.9806 (0.9846) time: 0.1626 data: 0.0688 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:24 lr: 0.000072 grad: 0.2400 (0.1169) loss: 0.9782 (0.9845) time: 0.1591 data: 0.0678 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:07 lr: 0.000072 grad: 0.1546 (0.1189) loss: 0.9824 (0.9845) time: 0.1473 data: 0.0655 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:50 lr: 0.000072 grad: 0.1096 (0.1208) loss: 0.9818 (0.9844) time: 0.1746 data: 0.0904 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:32 lr: 0.000073 grad: 0.1916 (0.1224) loss: 0.9815 (0.9843) time: 0.1528 data: 0.0713 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:15 lr: 0.000073 grad: 0.1730 (0.1238) loss: 0.9766 (0.9843) time: 0.1569 data: 0.0669 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:58 lr: 0.000074 grad: 0.1852 (0.1254) loss: 0.9798 (0.9842) time: 0.1616 data: 0.0769 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:42 lr: 0.000074 grad: 0.2526 (0.1273) loss: 0.9806 (0.9841) time: 0.1664 data: 0.0788 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:25 lr: 0.000074 grad: 0.1868 (0.1289) loss: 0.9819 (0.9841) time: 0.1681 data: 0.0882 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1878 (0.1301) loss: 0.9815 (0.9840) time: 0.1572 data: 0.0660 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1989 (0.1307) loss: 0.9792 (0.9840) time: 0.1451 data: 0.0586 max mem: 9377 +Train: [2] Total time: 0:17:36 (0.1690 s / it) +Averaged stats: lr: 0.000075 grad: 0.1989 (0.1307) loss: 0.9792 (0.9840) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:04:39 loss: 0.9737 (0.9737) time: 4.5026 data: 4.4243 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9807 (0.9798) time: 0.2844 data: 0.2580 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:17 (0.2892 s / it) +Averaged stats (hcp-train-subset): loss: 0.9807 (0.9798) +Eval (hcp-val): [2] [ 0/62] eta: 0:05:22 loss: 0.9781 (0.9781) time: 5.2071 data: 5.1771 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9794 (0.9795) time: 0.1561 data: 0.1307 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:16 (0.2596 s / it) +Averaged stats (hcp-val): loss: 0.9794 (0.9795) +Eval (nsd-val): [2] [ 0/62] eta: 0:03:28 loss: 0.9787 (0.9787) time: 3.3686 data: 3.3148 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9830 (0.9826) time: 0.1301 data: 0.1039 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:15 (0.2547 s / it) +Averaged stats (nsd-val): loss: 0.9830 (0.9826) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 7:58:33 lr: 0.000075 grad: 0.4032 (0.4032) loss: 0.9888 (0.9888) time: 4.5941 data: 4.3627 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:24:13 lr: 0.000075 grad: 0.1248 (0.2273) loss: 0.9807 (0.9804) time: 0.1973 data: 0.1023 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:22:26 lr: 0.000076 grad: 0.1818 (0.2328) loss: 0.9796 (0.9796) time: 0.2153 data: 0.1327 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:20:57 lr: 0.000076 grad: 0.1392 (0.2275) loss: 0.9811 (0.9791) time: 0.2120 data: 0.1175 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:19:26 lr: 0.000077 grad: 0.1982 (0.2358) loss: 0.9790 (0.9786) time: 0.1766 data: 0.0720 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:18:26 lr: 0.000077 grad: 0.1913 (0.2331) loss: 0.9799 (0.9785) time: 0.1616 data: 0.0522 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:17:54 lr: 0.000077 grad: 0.1468 (0.2300) loss: 0.9794 (0.9783) time: 0.1757 data: 0.0808 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:17:21 lr: 0.000078 grad: 0.1668 (0.2277) loss: 0.9790 (0.9783) time: 0.1863 data: 0.0811 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:16:51 lr: 0.000078 grad: 0.2271 (0.2307) loss: 0.9811 (0.9783) time: 0.1890 data: 0.0804 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:16:39 lr: 0.000079 grad: 0.1307 (0.2287) loss: 0.9784 (0.9784) time: 0.2197 data: 0.1040 max mem: 9377 +Train: [3] [1000/6250] eta: 0:16:17 lr: 0.000079 grad: 0.1751 (0.2274) loss: 0.9772 (0.9784) time: 0.1507 data: 0.0415 max mem: 9377 +Train: [3] [1100/6250] eta: 0:15:51 lr: 0.000079 grad: 0.1719 (0.2249) loss: 0.9764 (0.9783) time: 0.1731 data: 0.0841 max mem: 9377 +Train: [3] [1200/6250] eta: 0:15:27 lr: 0.000080 grad: 0.1849 (0.2235) loss: 0.9795 (0.9783) time: 0.1645 data: 0.0679 max mem: 9377 +Train: [3] [1300/6250] eta: 0:15:01 lr: 0.000080 grad: 0.2174 (0.2228) loss: 0.9775 (0.9783) time: 0.1568 data: 0.0631 max mem: 9377 +Train: [3] [1400/6250] eta: 0:14:32 lr: 0.000081 grad: 0.2404 (0.2237) loss: 0.9776 (0.9782) time: 0.1467 data: 0.0569 max mem: 9377 +Train: [3] [1500/6250] eta: 0:14:09 lr: 0.000081 grad: 0.1761 (0.2227) loss: 0.9767 (0.9781) time: 0.1483 data: 0.0610 max mem: 9377 +Train: [3] [1600/6250] eta: 0:13:46 lr: 0.000081 grad: 0.2117 (0.2216) loss: 0.9788 (0.9781) time: 0.1580 data: 0.0762 max mem: 9377 +Train: [3] [1700/6250] eta: 0:13:26 lr: 0.000082 grad: 0.2153 (0.2211) loss: 0.9805 (0.9781) time: 0.1934 data: 0.1069 max mem: 9377 +Train: [3] [1800/6250] eta: 0:13:02 lr: 0.000082 grad: 0.1630 (0.2202) loss: 0.9783 (0.9781) time: 0.1566 data: 0.0706 max mem: 9377 +Train: [3] [1900/6250] eta: 0:12:40 lr: 0.000083 grad: 0.2409 (0.2196) loss: 0.9786 (0.9780) time: 0.1749 data: 0.0865 max mem: 9377 +Train: [3] [2000/6250] eta: 0:12:18 lr: 0.000083 grad: 0.1521 (0.2179) loss: 0.9763 (0.9780) time: 0.1695 data: 0.0827 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:56 lr: 0.000083 grad: 0.1538 (0.2168) loss: 0.9712 (0.9778) time: 0.1416 data: 0.0566 max mem: 9377 +Train: [3] [2200/6250] eta: 0:11:38 lr: 0.000084 grad: 0.1600 (0.2157) loss: 0.9779 (0.9778) time: 0.1678 data: 0.0807 max mem: 9377 +Train: [3] [2300/6250] eta: 0:11:18 lr: 0.000084 grad: 0.1595 (0.2152) loss: 0.9744 (0.9777) time: 0.1572 data: 0.0661 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:59 lr: 0.000085 grad: 0.1757 (0.2144) loss: 0.9754 (0.9776) time: 0.1715 data: 0.0822 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:40 lr: 0.000085 grad: 0.1426 (0.2134) loss: 0.9766 (0.9776) time: 0.1767 data: 0.0883 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:21 lr: 0.000085 grad: 0.1702 (0.2137) loss: 0.9751 (0.9775) time: 0.1578 data: 0.0747 max mem: 9377 +Train: [3] [2700/6250] eta: 0:10:03 lr: 0.000086 grad: 0.1954 (0.2123) loss: 0.9772 (0.9775) time: 0.1426 data: 0.0612 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:44 lr: 0.000086 grad: 0.1560 (0.2122) loss: 0.9761 (0.9774) time: 0.1682 data: 0.0714 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:27 lr: 0.000087 grad: 0.1415 (0.2110) loss: 0.9751 (0.9773) time: 0.1636 data: 0.0688 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:09 lr: 0.000087 grad: 0.1779 (0.2101) loss: 0.9753 (0.9772) time: 0.1694 data: 0.0777 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:51 lr: 0.000087 grad: 0.1850 (0.2097) loss: 0.9757 (0.9771) time: 0.1431 data: 0.0619 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:33 lr: 0.000088 grad: 0.1515 (0.2094) loss: 0.9752 (0.9771) time: 0.1681 data: 0.0754 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:15 lr: 0.000088 grad: 0.1375 (0.2091) loss: 0.9743 (0.9770) time: 0.1586 data: 0.0702 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:57 lr: 0.000089 grad: 0.1641 (0.2086) loss: 0.9728 (0.9770) time: 0.1485 data: 0.0591 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:39 lr: 0.000089 grad: 0.1286 (0.2080) loss: 0.9722 (0.9769) time: 0.1623 data: 0.0766 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:22 lr: 0.000089 grad: 0.2568 (0.2080) loss: 0.9761 (0.9768) time: 0.1636 data: 0.0726 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:05 lr: 0.000090 grad: 0.1598 (0.2071) loss: 0.9775 (0.9767) time: 0.1430 data: 0.0502 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:48 lr: 0.000090 grad: 0.1571 (0.2068) loss: 0.9780 (0.9767) time: 0.1580 data: 0.0585 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:31 lr: 0.000091 grad: 0.1964 (0.2064) loss: 0.9782 (0.9767) time: 0.1574 data: 0.0712 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:13 lr: 0.000091 grad: 0.2199 (0.2061) loss: 0.9769 (0.9766) time: 0.1600 data: 0.0761 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:56 lr: 0.000091 grad: 0.1534 (0.2058) loss: 0.9757 (0.9766) time: 0.1614 data: 0.0641 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:39 lr: 0.000092 grad: 0.1500 (0.2051) loss: 0.9747 (0.9766) time: 0.1360 data: 0.0502 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:22 lr: 0.000092 grad: 0.1508 (0.2048) loss: 0.9749 (0.9765) time: 0.1497 data: 0.0657 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:05 lr: 0.000093 grad: 0.1919 (0.2041) loss: 0.9760 (0.9765) time: 0.1431 data: 0.0569 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:48 lr: 0.000093 grad: 0.1431 (0.2038) loss: 0.9741 (0.9765) time: 0.1863 data: 0.0979 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:32 lr: 0.000093 grad: 0.2249 (0.2039) loss: 0.9762 (0.9764) time: 0.1831 data: 0.1062 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:15 lr: 0.000094 grad: 0.1315 (0.2035) loss: 0.9726 (0.9763) time: 0.1695 data: 0.0854 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:58 lr: 0.000094 grad: 0.1317 (0.2034) loss: 0.9711 (0.9762) time: 0.1486 data: 0.0568 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:42 lr: 0.000095 grad: 0.1404 (0.2032) loss: 0.9720 (0.9761) time: 0.0906 data: 0.0002 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:25 lr: 0.000095 grad: 0.2202 (0.2029) loss: 0.9719 (0.9760) time: 0.1541 data: 0.0678 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:09 lr: 0.000095 grad: 0.1704 (0.2028) loss: 0.9698 (0.9759) time: 0.1502 data: 0.0693 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:52 lr: 0.000096 grad: 0.1699 (0.2024) loss: 0.9698 (0.9758) time: 0.1427 data: 0.0541 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:36 lr: 0.000096 grad: 0.1695 (0.2026) loss: 0.9694 (0.9757) time: 0.1317 data: 0.0466 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:19 lr: 0.000097 grad: 0.2007 (0.2026) loss: 0.9700 (0.9755) time: 0.1504 data: 0.0658 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:03 lr: 0.000097 grad: 0.1351 (0.2024) loss: 0.9682 (0.9754) time: 0.1643 data: 0.0762 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:46 lr: 0.000097 grad: 0.1503 (0.2023) loss: 0.9669 (0.9753) time: 0.1548 data: 0.0677 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:30 lr: 0.000098 grad: 0.1700 (0.2027) loss: 0.9681 (0.9751) time: 0.1470 data: 0.0602 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:13 lr: 0.000098 grad: 0.1308 (0.2024) loss: 0.9679 (0.9750) time: 0.1404 data: 0.0501 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:57 lr: 0.000099 grad: 0.2120 (0.2025) loss: 0.9688 (0.9749) time: 0.1672 data: 0.0800 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:40 lr: 0.000099 grad: 0.1711 (0.2024) loss: 0.9694 (0.9747) time: 0.1606 data: 0.0800 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:24 lr: 0.000099 grad: 0.1698 (0.2020) loss: 0.9672 (0.9746) time: 0.1828 data: 0.0873 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.2310 (0.2019) loss: 0.9659 (0.9744) time: 0.1579 data: 0.0741 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1639 (0.2016) loss: 0.9637 (0.9743) time: 0.1680 data: 0.0784 max mem: 9377 +Train: [3] Total time: 0:17:12 (0.1651 s / it) +Averaged stats: lr: 0.000100 grad: 0.1639 (0.2016) loss: 0.9637 (0.9743) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:06:11 loss: 0.9678 (0.9678) time: 5.9843 data: 5.9541 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9653 (0.9642) time: 0.1515 data: 0.1263 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:18 (0.2964 s / it) +Averaged stats (hcp-train-subset): loss: 0.9653 (0.9642) +Eval (hcp-val): [3] [ 0/62] eta: 0:06:01 loss: 0.9579 (0.9579) time: 5.8314 data: 5.7943 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9648 (0.9645) time: 0.1518 data: 0.1263 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:15 (0.2556 s / it) +Averaged stats (hcp-val): loss: 0.9648 (0.9645) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:28 loss: 0.9473 (0.9473) time: 5.2904 data: 5.2588 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9592 (0.9570) time: 0.1166 data: 0.0916 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:15 (0.2446 s / it) +Averaged stats (nsd-val): loss: 0.9592 (0.9570) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 7:39:34 lr: 0.000100 grad: 0.1980 (0.1980) loss: 0.9754 (0.9754) time: 4.4118 data: 4.1307 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:23:33 lr: 0.000100 grad: 0.1997 (0.1946) loss: 0.9661 (0.9658) time: 0.2134 data: 0.1364 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:22:25 lr: 0.000101 grad: 0.1781 (0.2098) loss: 0.9612 (0.9639) time: 0.2649 data: 0.1811 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:20:45 lr: 0.000101 grad: 0.1464 (0.2118) loss: 0.9641 (0.9629) time: 0.1606 data: 0.0762 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:19:18 lr: 0.000102 grad: 0.2203 (0.2154) loss: 0.9625 (0.9628) time: 0.1599 data: 0.0698 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:18:33 lr: 0.000102 grad: 0.2032 (0.2155) loss: 0.9647 (0.9627) time: 0.1746 data: 0.0905 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:18:13 lr: 0.000102 grad: 0.1724 (0.2086) loss: 0.9623 (0.9626) time: 0.2107 data: 0.1028 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:17:51 lr: 0.000103 grad: 0.1789 (0.2066) loss: 0.9606 (0.9624) time: 0.1924 data: 0.1046 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:17:37 lr: 0.000103 grad: 0.1681 (0.2069) loss: 0.9598 (0.9620) time: 0.2186 data: 0.1048 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:17:06 lr: 0.000104 grad: 0.2111 (0.2075) loss: 0.9598 (0.9617) time: 0.1764 data: 0.0636 max mem: 9377 +Train: [4] [1000/6250] eta: 0:16:37 lr: 0.000104 grad: 0.2315 (0.2101) loss: 0.9572 (0.9615) time: 0.1839 data: 0.0737 max mem: 9377 +Train: [4] [1100/6250] eta: 0:16:09 lr: 0.000104 grad: 0.1769 (0.2118) loss: 0.9576 (0.9612) time: 0.1902 data: 0.1003 max mem: 9377 +Train: [4] [1200/6250] eta: 0:15:45 lr: 0.000105 grad: 0.2057 (0.2132) loss: 0.9573 (0.9608) time: 0.1901 data: 0.0955 max mem: 9377 +Train: [4] [1300/6250] eta: 0:15:14 lr: 0.000105 grad: 0.2444 (0.2152) loss: 0.9498 (0.9603) time: 0.1497 data: 0.0596 max mem: 9377 +Train: [4] [1400/6250] eta: 0:14:48 lr: 0.000106 grad: 0.2275 (0.2198) loss: 0.9517 (0.9599) time: 0.1557 data: 0.0712 max mem: 9377 +Train: [4] [1500/6250] eta: 0:14:21 lr: 0.000106 grad: 0.3286 (0.2252) loss: 0.9528 (0.9595) time: 0.1437 data: 0.0549 max mem: 9377 +Train: [4] [1600/6250] eta: 0:13:57 lr: 0.000106 grad: 0.2159 (0.2268) loss: 0.9546 (0.9590) time: 0.1738 data: 0.0861 max mem: 9377 +Train: [4] [1700/6250] eta: 0:13:33 lr: 0.000107 grad: 0.2144 (0.2287) loss: 0.9526 (0.9586) time: 0.1299 data: 0.0432 max mem: 9377 +Train: [4] [1800/6250] eta: 0:13:10 lr: 0.000107 grad: 0.3324 (0.2321) loss: 0.9467 (0.9580) time: 0.1775 data: 0.0916 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:46 lr: 0.000108 grad: 0.2292 (0.2326) loss: 0.9469 (0.9575) time: 0.1386 data: 0.0556 max mem: 9377 +Train: [4] [2000/6250] eta: 0:12:23 lr: 0.000108 grad: 0.2347 (0.2353) loss: 0.9488 (0.9570) time: 0.1271 data: 0.0394 max mem: 9377 +Train: [4] [2100/6250] eta: 0:12:03 lr: 0.000108 grad: 0.2961 (0.2373) loss: 0.9455 (0.9566) time: 0.1610 data: 0.0780 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:42 lr: 0.000109 grad: 0.2625 (0.2389) loss: 0.9486 (0.9561) time: 0.1621 data: 0.0779 max mem: 9377 +Train: [4] [2300/6250] eta: 0:11:21 lr: 0.000109 grad: 0.2113 (0.2412) loss: 0.9429 (0.9556) time: 0.1622 data: 0.0719 max mem: 9377 +Train: [4] [2400/6250] eta: 0:11:02 lr: 0.000110 grad: 0.1891 (0.2429) loss: 0.9427 (0.9551) time: 0.1662 data: 0.0696 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:43 lr: 0.000110 grad: 0.3038 (0.2438) loss: 0.9432 (0.9546) time: 0.1781 data: 0.0976 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:24 lr: 0.000110 grad: 0.2554 (0.2442) loss: 0.9438 (0.9542) time: 0.1090 data: 0.0002 max mem: 9377 +Train: [4] [2700/6250] eta: 0:10:05 lr: 0.000111 grad: 0.2551 (0.2443) loss: 0.9391 (0.9537) time: 0.1476 data: 0.0569 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:47 lr: 0.000111 grad: 0.2740 (0.2453) loss: 0.9424 (0.9532) time: 0.1731 data: 0.0781 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:29 lr: 0.000112 grad: 0.2716 (0.2470) loss: 0.9416 (0.9528) time: 0.1827 data: 0.1016 max mem: 9377 +Train: [4] [3000/6250] eta: 0:09:10 lr: 0.000112 grad: 0.2362 (0.2476) loss: 0.9380 (0.9523) time: 0.1515 data: 0.0646 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:52 lr: 0.000112 grad: 0.2260 (0.2477) loss: 0.9385 (0.9518) time: 0.1483 data: 0.0615 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:35 lr: 0.000113 grad: 0.2545 (0.2487) loss: 0.9420 (0.9514) time: 0.1892 data: 0.1074 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:18 lr: 0.000113 grad: 0.2511 (0.2493) loss: 0.9392 (0.9510) time: 0.1424 data: 0.0627 max mem: 9377 +Train: [4] [3400/6250] eta: 0:08:00 lr: 0.000114 grad: 0.2238 (0.2500) loss: 0.9383 (0.9505) time: 0.1591 data: 0.0677 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:42 lr: 0.000114 grad: 0.1935 (0.2505) loss: 0.9371 (0.9501) time: 0.1580 data: 0.0659 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:25 lr: 0.000114 grad: 0.2567 (0.2509) loss: 0.9343 (0.9497) time: 0.1455 data: 0.0554 max mem: 9377 +Train: [4] [3700/6250] eta: 0:07:07 lr: 0.000115 grad: 0.2004 (0.2503) loss: 0.9319 (0.9493) time: 0.1241 data: 0.0304 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:50 lr: 0.000115 grad: 0.1953 (0.2499) loss: 0.9305 (0.9489) time: 0.1483 data: 0.0570 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:32 lr: 0.000116 grad: 0.1990 (0.2499) loss: 0.9347 (0.9485) time: 0.1309 data: 0.0414 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:15 lr: 0.000116 grad: 0.2367 (0.2493) loss: 0.9325 (0.9481) time: 0.1603 data: 0.0747 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:57 lr: 0.000116 grad: 0.1830 (0.2490) loss: 0.9355 (0.9477) time: 0.1528 data: 0.0624 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:40 lr: 0.000117 grad: 0.1788 (0.2485) loss: 0.9301 (0.9474) time: 0.1621 data: 0.0730 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:24 lr: 0.000117 grad: 0.1640 (0.2477) loss: 0.9304 (0.9470) time: 0.1881 data: 0.1041 max mem: 9377 +Train: [4] [4400/6250] eta: 0:05:07 lr: 0.000118 grad: 0.2786 (0.2474) loss: 0.9345 (0.9466) time: 0.1591 data: 0.0756 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:50 lr: 0.000118 grad: 0.1815 (0.2470) loss: 0.9316 (0.9463) time: 0.1584 data: 0.0650 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:34 lr: 0.000118 grad: 0.2296 (0.2465) loss: 0.9316 (0.9459) time: 0.1864 data: 0.0994 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:17 lr: 0.000119 grad: 0.1708 (0.2454) loss: 0.9265 (0.9455) time: 0.1734 data: 0.0898 max mem: 9377 +Train: [4] [4800/6250] eta: 0:04:00 lr: 0.000119 grad: 0.2412 (0.2451) loss: 0.9283 (0.9452) time: 0.1480 data: 0.0473 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:43 lr: 0.000120 grad: 0.2243 (0.2451) loss: 0.9305 (0.9449) time: 0.1758 data: 0.0935 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:27 lr: 0.000120 grad: 0.2049 (0.2452) loss: 0.9243 (0.9445) time: 0.1505 data: 0.0608 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:10 lr: 0.000120 grad: 0.2110 (0.2444) loss: 0.9276 (0.9441) time: 0.1797 data: 0.0950 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:54 lr: 0.000121 grad: 0.2005 (0.2439) loss: 0.9217 (0.9437) time: 0.1043 data: 0.0003 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:37 lr: 0.000121 grad: 0.2402 (0.2434) loss: 0.9225 (0.9434) time: 0.1672 data: 0.0831 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:20 lr: 0.000122 grad: 0.2593 (0.2432) loss: 0.9283 (0.9430) time: 0.1513 data: 0.0490 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:03 lr: 0.000122 grad: 0.1921 (0.2430) loss: 0.9229 (0.9427) time: 0.1170 data: 0.0210 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:47 lr: 0.000122 grad: 0.2282 (0.2428) loss: 0.9225 (0.9424) time: 0.1467 data: 0.0533 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:30 lr: 0.000123 grad: 0.1681 (0.2421) loss: 0.9235 (0.9420) time: 0.1324 data: 0.0374 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:14 lr: 0.000123 grad: 0.2236 (0.2419) loss: 0.9256 (0.9417) time: 0.1355 data: 0.0368 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.1823 (0.2414) loss: 0.9250 (0.9414) time: 0.1871 data: 0.0880 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.1975 (0.2407) loss: 0.9217 (0.9411) time: 0.1738 data: 0.0882 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1660 (0.2398) loss: 0.9215 (0.9408) time: 0.1495 data: 0.0548 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1736 (0.2392) loss: 0.9215 (0.9406) time: 0.1671 data: 0.0808 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1848 (0.2389) loss: 0.9225 (0.9404) time: 0.1533 data: 0.0539 max mem: 9377 +Train: [4] Total time: 0:17:18 (0.1662 s / it) +Averaged stats: lr: 0.000125 grad: 0.1848 (0.2389) loss: 0.9225 (0.9404) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:03:49 loss: 0.9187 (0.9187) time: 3.7064 data: 3.6029 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9174 (0.9197) time: 0.1636 data: 0.1380 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:17 (0.2839 s / it) +Averaged stats (hcp-train-subset): loss: 0.9174 (0.9197) +Making plots (hcp-train-subset): example=11 +Eval (hcp-val): [4] [ 0/62] eta: 0:05:13 loss: 0.9158 (0.9158) time: 5.0599 data: 5.0294 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9191 (0.9188) time: 0.1471 data: 0.1212 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:15 (0.2487 s / it) +Averaged stats (hcp-val): loss: 0.9191 (0.9188) +Making plots (hcp-val): example=36 +Eval (nsd-val): [4] [ 0/62] eta: 0:05:34 loss: 0.8811 (0.8811) time: 5.3900 data: 5.3595 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8912 (0.8908) time: 0.1282 data: 0.1027 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:14 (0.2401 s / it) +Averaged stats (nsd-val): loss: 0.8912 (0.8908) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 10:08:29 lr: 0.000125 grad: 0.1425 (0.1425) loss: 0.9393 (0.9393) time: 5.8415 data: 5.7067 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:22:59 lr: 0.000125 grad: 0.1828 (0.2388) loss: 0.9220 (0.9193) time: 0.1768 data: 0.0815 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:20:51 lr: 0.000125 grad: 0.1755 (0.2246) loss: 0.9221 (0.9197) time: 0.2263 data: 0.1534 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:19:25 lr: 0.000125 grad: 0.1645 (0.2124) loss: 0.9197 (0.9200) time: 0.1430 data: 0.0491 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:18:17 lr: 0.000125 grad: 0.1764 (0.2070) loss: 0.9168 (0.9198) time: 0.1410 data: 0.0504 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:35 lr: 0.000125 grad: 0.1515 (0.2038) loss: 0.9163 (0.9193) time: 0.1608 data: 0.0755 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:16:53 lr: 0.000125 grad: 0.2136 (0.2018) loss: 0.9183 (0.9188) time: 0.1509 data: 0.0652 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:40 lr: 0.000125 grad: 0.1546 (0.1990) loss: 0.9181 (0.9187) time: 0.1662 data: 0.0677 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:16:16 lr: 0.000125 grad: 0.1908 (0.1981) loss: 0.9203 (0.9187) time: 0.1479 data: 0.0408 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:50 lr: 0.000125 grad: 0.1505 (0.1946) loss: 0.9194 (0.9188) time: 0.1737 data: 0.0790 max mem: 9377 +Train: [5] [1000/6250] eta: 0:15:25 lr: 0.000125 grad: 0.2147 (0.1950) loss: 0.9200 (0.9188) time: 0.1650 data: 0.0637 max mem: 9377 +Train: [5] [1100/6250] eta: 0:15:06 lr: 0.000125 grad: 0.1839 (0.1943) loss: 0.9182 (0.9186) time: 0.1964 data: 0.0826 max mem: 9377 +Train: [5] [1200/6250] eta: 0:14:41 lr: 0.000125 grad: 0.1522 (0.1927) loss: 0.9143 (0.9185) time: 0.1407 data: 0.0410 max mem: 9377 +Train: [5] [1300/6250] eta: 0:14:18 lr: 0.000125 grad: 0.2107 (0.1926) loss: 0.9144 (0.9182) time: 0.1607 data: 0.0659 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:56 lr: 0.000125 grad: 0.1455 (0.1915) loss: 0.9130 (0.9180) time: 0.1206 data: 0.0270 max mem: 9377 +Train: [5] [1500/6250] eta: 0:13:33 lr: 0.000125 grad: 0.1609 (0.1905) loss: 0.9141 (0.9177) time: 0.1288 data: 0.0292 max mem: 9377 +Train: [5] [1600/6250] eta: 0:13:12 lr: 0.000125 grad: 0.1354 (0.1886) loss: 0.9145 (0.9174) time: 0.1670 data: 0.0720 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:50 lr: 0.000125 grad: 0.1519 (0.1886) loss: 0.9120 (0.9172) time: 0.1675 data: 0.0959 max mem: 9377 +Train: [5] [1800/6250] eta: 0:12:30 lr: 0.000125 grad: 0.1885 (0.1875) loss: 0.9160 (0.9171) time: 0.1721 data: 0.0881 max mem: 9377 +Train: [5] [1900/6250] eta: 0:12:08 lr: 0.000125 grad: 0.1769 (0.1861) loss: 0.9141 (0.9169) time: 0.1494 data: 0.0672 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:48 lr: 0.000125 grad: 0.1437 (0.1851) loss: 0.9136 (0.9168) time: 0.1216 data: 0.0344 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:28 lr: 0.000125 grad: 0.1423 (0.1839) loss: 0.9123 (0.9166) time: 0.1470 data: 0.0552 max mem: 9377 +Train: [5] [2200/6250] eta: 0:11:10 lr: 0.000125 grad: 0.1469 (0.1829) loss: 0.9092 (0.9164) time: 0.1411 data: 0.0517 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:51 lr: 0.000125 grad: 0.1896 (0.1821) loss: 0.9085 (0.9162) time: 0.1447 data: 0.0519 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:34 lr: 0.000125 grad: 0.1215 (0.1810) loss: 0.9128 (0.9161) time: 0.1493 data: 0.0560 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:16 lr: 0.000125 grad: 0.1310 (0.1801) loss: 0.9129 (0.9160) time: 0.1594 data: 0.0723 max mem: 9377 +Train: [5] [2600/6250] eta: 0:09:58 lr: 0.000125 grad: 0.1559 (0.1795) loss: 0.9122 (0.9159) time: 0.1580 data: 0.0581 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:41 lr: 0.000125 grad: 0.1559 (0.1786) loss: 0.9103 (0.9158) time: 0.1442 data: 0.0458 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:24 lr: 0.000125 grad: 0.1797 (0.1781) loss: 0.9067 (0.9156) time: 0.1661 data: 0.0725 max mem: 9377 +Train: [5] [2900/6250] eta: 0:09:07 lr: 0.000125 grad: 0.1543 (0.1773) loss: 0.9072 (0.9155) time: 0.1503 data: 0.0660 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:50 lr: 0.000125 grad: 0.1444 (0.1768) loss: 0.9081 (0.9153) time: 0.1343 data: 0.0457 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:34 lr: 0.000125 grad: 0.1593 (0.1761) loss: 0.9113 (0.9151) time: 0.1581 data: 0.0744 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:17 lr: 0.000125 grad: 0.1750 (0.1756) loss: 0.9124 (0.9150) time: 0.1329 data: 0.0497 max mem: 9377 +Train: [5] [3300/6250] eta: 0:08:00 lr: 0.000125 grad: 0.1316 (0.1753) loss: 0.9105 (0.9148) time: 0.1716 data: 0.0852 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:44 lr: 0.000125 grad: 0.1532 (0.1747) loss: 0.9113 (0.9146) time: 0.1470 data: 0.0657 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:27 lr: 0.000125 grad: 0.1363 (0.1741) loss: 0.9092 (0.9145) time: 0.1675 data: 0.0852 max mem: 9377 +Train: [5] [3600/6250] eta: 0:07:11 lr: 0.000125 grad: 0.1347 (0.1738) loss: 0.9070 (0.9143) time: 0.1749 data: 0.0912 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:55 lr: 0.000125 grad: 0.1462 (0.1733) loss: 0.9039 (0.9141) time: 0.1508 data: 0.0725 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:38 lr: 0.000125 grad: 0.1384 (0.1732) loss: 0.9038 (0.9138) time: 0.1759 data: 0.0956 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:22 lr: 0.000125 grad: 0.1447 (0.1728) loss: 0.9038 (0.9136) time: 0.1735 data: 0.0884 max mem: 9377 +Train: [5] [4000/6250] eta: 0:06:05 lr: 0.000125 grad: 0.1473 (0.1723) loss: 0.9078 (0.9134) time: 0.1562 data: 0.0675 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:49 lr: 0.000125 grad: 0.1688 (0.1721) loss: 0.9016 (0.9131) time: 0.1377 data: 0.0497 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:33 lr: 0.000125 grad: 0.1487 (0.1719) loss: 0.9003 (0.9128) time: 0.2147 data: 0.1236 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:16 lr: 0.000125 grad: 0.1362 (0.1719) loss: 0.8945 (0.9125) time: 0.1555 data: 0.0683 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:59 lr: 0.000125 grad: 0.1378 (0.1714) loss: 0.8960 (0.9121) time: 0.1475 data: 0.0588 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:43 lr: 0.000125 grad: 0.1554 (0.1711) loss: 0.8975 (0.9118) time: 0.1547 data: 0.0712 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:27 lr: 0.000125 grad: 0.1566 (0.1707) loss: 0.8987 (0.9115) time: 0.2731 data: 0.1881 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:10 lr: 0.000125 grad: 0.1428 (0.1703) loss: 0.8932 (0.9111) time: 0.1657 data: 0.0807 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1351 (0.1700) loss: 0.8957 (0.9108) time: 0.1393 data: 0.0533 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:37 lr: 0.000125 grad: 0.1299 (0.1696) loss: 0.8912 (0.9104) time: 0.1785 data: 0.0879 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.1389 (0.1692) loss: 0.8889 (0.9101) time: 0.1476 data: 0.0630 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.1241 (0.1688) loss: 0.8909 (0.9097) time: 0.1680 data: 0.0770 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:48 lr: 0.000125 grad: 0.1411 (0.1684) loss: 0.8896 (0.9093) time: 0.1503 data: 0.0659 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:32 lr: 0.000125 grad: 0.1391 (0.1682) loss: 0.8894 (0.9090) time: 0.1586 data: 0.0745 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:16 lr: 0.000125 grad: 0.1494 (0.1679) loss: 0.8886 (0.9086) time: 0.1543 data: 0.0623 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:00 lr: 0.000125 grad: 0.1289 (0.1674) loss: 0.8914 (0.9082) time: 0.1622 data: 0.0736 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.1354 (0.1671) loss: 0.8863 (0.9079) time: 0.1763 data: 0.0792 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.1567 (0.1672) loss: 0.8892 (0.9075) time: 0.1549 data: 0.0545 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.1434 (0.1670) loss: 0.8845 (0.9071) time: 0.1799 data: 0.0909 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1434 (0.1666) loss: 0.8841 (0.9067) time: 0.1701 data: 0.0727 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1265 (0.1662) loss: 0.8789 (0.9064) time: 0.1794 data: 0.0905 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1147 (0.1657) loss: 0.8839 (0.9060) time: 0.1564 data: 0.0600 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1343 (0.1652) loss: 0.8813 (0.9056) time: 0.1649 data: 0.0723 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1287 (0.1651) loss: 0.8813 (0.9055) time: 0.1637 data: 0.0764 max mem: 9377 +Train: [5] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000125 grad: 0.1287 (0.1651) loss: 0.8813 (0.9055) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:03:54 loss: 0.8858 (0.8858) time: 3.7822 data: 3.7042 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8793 (0.8823) time: 0.1161 data: 0.0896 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:16 (0.2616 s / it) +Averaged stats (hcp-train-subset): loss: 0.8793 (0.8823) +Eval (hcp-val): [5] [ 0/62] eta: 0:06:03 loss: 0.8769 (0.8769) time: 5.8596 data: 5.8272 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8805 (0.8816) time: 0.1444 data: 0.1191 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:15 (0.2457 s / it) +Averaged stats (hcp-val): loss: 0.8805 (0.8816) +Eval (nsd-val): [5] [ 0/62] eta: 0:04:31 loss: 0.8330 (0.8330) time: 4.3722 data: 4.2889 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8444 (0.8450) time: 0.1368 data: 0.1095 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2405 s / it) +Averaged stats (nsd-val): loss: 0.8444 (0.8450) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 7:55:18 lr: 0.000125 grad: 0.0944 (0.0944) loss: 0.9071 (0.9071) time: 4.5630 data: 4.2933 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:22:52 lr: 0.000125 grad: 0.1265 (0.1621) loss: 0.8896 (0.8882) time: 0.1839 data: 0.0939 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:21:16 lr: 0.000125 grad: 0.1346 (0.1487) loss: 0.8867 (0.8888) time: 0.1908 data: 0.1125 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:19:50 lr: 0.000125 grad: 0.1255 (0.1445) loss: 0.8871 (0.8883) time: 0.1819 data: 0.0838 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:18:36 lr: 0.000125 grad: 0.1497 (0.1415) loss: 0.8871 (0.8881) time: 0.1556 data: 0.0634 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:17:39 lr: 0.000125 grad: 0.1252 (0.1397) loss: 0.8882 (0.8879) time: 0.1342 data: 0.0445 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:17:17 lr: 0.000125 grad: 0.1306 (0.1393) loss: 0.8860 (0.8875) time: 0.1951 data: 0.0992 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:16:34 lr: 0.000125 grad: 0.1112 (0.1370) loss: 0.8912 (0.8872) time: 0.1476 data: 0.0357 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:16:16 lr: 0.000125 grad: 0.1161 (0.1363) loss: 0.8829 (0.8870) time: 0.1862 data: 0.0899 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:15:55 lr: 0.000125 grad: 0.1246 (0.1362) loss: 0.8864 (0.8867) time: 0.1883 data: 0.0834 max mem: 9377 +Train: [6] [1000/6250] eta: 0:15:28 lr: 0.000125 grad: 0.1258 (0.1363) loss: 0.8837 (0.8865) time: 0.1664 data: 0.0694 max mem: 9377 +Train: [6] [1100/6250] eta: 0:15:00 lr: 0.000125 grad: 0.1187 (0.1357) loss: 0.8863 (0.8864) time: 0.1539 data: 0.0596 max mem: 9377 +Train: [6] [1200/6250] eta: 0:14:34 lr: 0.000125 grad: 0.1216 (0.1354) loss: 0.8865 (0.8862) time: 0.1645 data: 0.0684 max mem: 9377 +Train: [6] [1300/6250] eta: 0:14:09 lr: 0.000125 grad: 0.1111 (0.1347) loss: 0.8852 (0.8860) time: 0.1614 data: 0.0739 max mem: 9377 +Train: [6] [1400/6250] eta: 0:13:51 lr: 0.000125 grad: 0.1130 (0.1335) loss: 0.8850 (0.8859) time: 0.1418 data: 0.0460 max mem: 9377 +Train: [6] [1500/6250] eta: 0:13:29 lr: 0.000125 grad: 0.1159 (0.1325) loss: 0.8822 (0.8858) time: 0.1673 data: 0.0810 max mem: 9377 +Train: [6] [1600/6250] eta: 0:13:11 lr: 0.000125 grad: 0.1261 (0.1320) loss: 0.8854 (0.8858) time: 0.1482 data: 0.0483 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:50 lr: 0.000125 grad: 0.1136 (0.1323) loss: 0.8838 (0.8856) time: 0.1425 data: 0.0532 max mem: 9377 +Train: [6] [1800/6250] eta: 0:12:31 lr: 0.000125 grad: 0.1081 (0.1315) loss: 0.8830 (0.8855) time: 0.1765 data: 0.0916 max mem: 9377 +Train: [6] [1900/6250] eta: 0:12:11 lr: 0.000125 grad: 0.1149 (0.1307) loss: 0.8808 (0.8853) time: 0.1436 data: 0.0513 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:52 lr: 0.000125 grad: 0.1128 (0.1298) loss: 0.8792 (0.8852) time: 0.1567 data: 0.0727 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:33 lr: 0.000125 grad: 0.1177 (0.1296) loss: 0.8829 (0.8850) time: 0.1593 data: 0.0680 max mem: 9377 +Train: [6] [2200/6250] eta: 0:11:14 lr: 0.000125 grad: 0.1009 (0.1293) loss: 0.8819 (0.8849) time: 0.1507 data: 0.0660 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:56 lr: 0.000125 grad: 0.1117 (0.1287) loss: 0.8854 (0.8847) time: 0.1808 data: 0.0923 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:39 lr: 0.000125 grad: 0.1358 (0.1289) loss: 0.8769 (0.8845) time: 0.2231 data: 0.1368 max mem: 9377 +Train: [6] [2500/6250] eta: 0:10:20 lr: 0.000125 grad: 0.1147 (0.1286) loss: 0.8778 (0.8843) time: 0.1563 data: 0.0707 max mem: 9377 +Train: [6] [2600/6250] eta: 0:10:04 lr: 0.000125 grad: 0.1083 (0.1281) loss: 0.8766 (0.8841) time: 0.1490 data: 0.0657 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:47 lr: 0.000125 grad: 0.0979 (0.1276) loss: 0.8761 (0.8839) time: 0.1548 data: 0.0681 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:29 lr: 0.000125 grad: 0.1162 (0.1278) loss: 0.8781 (0.8836) time: 0.1659 data: 0.0695 max mem: 9377 +Train: [6] [2900/6250] eta: 0:09:10 lr: 0.000125 grad: 0.1040 (0.1273) loss: 0.8732 (0.8834) time: 0.1489 data: 0.0699 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:53 lr: 0.000125 grad: 0.1185 (0.1267) loss: 0.8770 (0.8832) time: 0.1236 data: 0.0455 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:35 lr: 0.000125 grad: 0.1053 (0.1262) loss: 0.8758 (0.8829) time: 0.1487 data: 0.0654 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:18 lr: 0.000125 grad: 0.1119 (0.1259) loss: 0.8763 (0.8828) time: 0.1643 data: 0.0788 max mem: 9377 +Train: [6] [3300/6250] eta: 0:08:03 lr: 0.000125 grad: 0.1073 (0.1257) loss: 0.8763 (0.8826) time: 0.1636 data: 0.0735 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:46 lr: 0.000125 grad: 0.1112 (0.1252) loss: 0.8738 (0.8823) time: 0.1594 data: 0.0801 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:29 lr: 0.000125 grad: 0.1148 (0.1251) loss: 0.8771 (0.8822) time: 0.1556 data: 0.0686 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:12 lr: 0.000125 grad: 0.1053 (0.1247) loss: 0.8723 (0.8820) time: 0.1479 data: 0.0601 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:56 lr: 0.000125 grad: 0.1063 (0.1242) loss: 0.8735 (0.8818) time: 0.1257 data: 0.0347 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:39 lr: 0.000125 grad: 0.1031 (0.1238) loss: 0.8758 (0.8816) time: 0.1535 data: 0.0652 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:23 lr: 0.000125 grad: 0.0949 (0.1234) loss: 0.8722 (0.8814) time: 0.1745 data: 0.0886 max mem: 9377 +Train: [6] [4000/6250] eta: 0:06:07 lr: 0.000125 grad: 0.1058 (0.1232) loss: 0.8736 (0.8812) time: 0.1663 data: 0.0792 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:50 lr: 0.000125 grad: 0.1047 (0.1231) loss: 0.8757 (0.8809) time: 0.1491 data: 0.0574 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:33 lr: 0.000125 grad: 0.1080 (0.1228) loss: 0.8751 (0.8807) time: 0.1691 data: 0.0869 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:17 lr: 0.000125 grad: 0.1015 (0.1225) loss: 0.8659 (0.8805) time: 0.1590 data: 0.0631 max mem: 9377 +Train: [6] [4400/6250] eta: 0:05:00 lr: 0.000125 grad: 0.1069 (0.1222) loss: 0.8718 (0.8803) time: 0.1619 data: 0.0667 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:44 lr: 0.000125 grad: 0.1152 (0.1220) loss: 0.8722 (0.8801) time: 0.1532 data: 0.0627 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:27 lr: 0.000125 grad: 0.0952 (0.1217) loss: 0.8674 (0.8799) time: 0.1703 data: 0.0842 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:11 lr: 0.000125 grad: 0.0975 (0.1213) loss: 0.8680 (0.8797) time: 0.1523 data: 0.0698 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1042 (0.1212) loss: 0.8706 (0.8795) time: 0.1508 data: 0.0645 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:38 lr: 0.000125 grad: 0.1011 (0.1210) loss: 0.8720 (0.8793) time: 0.1350 data: 0.0441 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:22 lr: 0.000125 grad: 0.0970 (0.1209) loss: 0.8714 (0.8792) time: 0.1773 data: 0.0924 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.0969 (0.1206) loss: 0.8746 (0.8791) time: 0.1793 data: 0.0930 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:49 lr: 0.000125 grad: 0.1094 (0.1205) loss: 0.8681 (0.8789) time: 0.1484 data: 0.0543 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:33 lr: 0.000125 grad: 0.0943 (0.1204) loss: 0.8675 (0.8787) time: 0.1507 data: 0.0620 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:17 lr: 0.000125 grad: 0.1042 (0.1202) loss: 0.8704 (0.8786) time: 0.1399 data: 0.0492 max mem: 9377 +Train: [6] [5500/6250] eta: 0:02:00 lr: 0.000125 grad: 0.1006 (0.1200) loss: 0.8679 (0.8784) time: 0.1716 data: 0.0873 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.1076 (0.1198) loss: 0.8644 (0.8783) time: 0.1429 data: 0.0511 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.0967 (0.1195) loss: 0.8685 (0.8781) time: 0.1739 data: 0.0831 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.0980 (0.1192) loss: 0.8662 (0.8779) time: 0.1552 data: 0.0629 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.0968 (0.1190) loss: 0.8656 (0.8777) time: 0.1765 data: 0.0907 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1089 (0.1187) loss: 0.8686 (0.8776) time: 0.1783 data: 0.0919 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0937 (0.1185) loss: 0.8647 (0.8774) time: 0.1771 data: 0.0988 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0951 (0.1182) loss: 0.8667 (0.8773) time: 0.1721 data: 0.0849 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1057 (0.1181) loss: 0.8725 (0.8772) time: 0.1531 data: 0.0648 max mem: 9377 +Train: [6] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000125 grad: 0.1057 (0.1181) loss: 0.8725 (0.8772) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:15 loss: 0.8648 (0.8648) time: 4.1241 data: 4.0393 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8670 (0.8668) time: 0.1294 data: 0.1032 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:14 (0.2406 s / it) +Averaged stats (hcp-train-subset): loss: 0.8670 (0.8668) +Eval (hcp-val): [6] [ 0/62] eta: 0:05:12 loss: 0.8637 (0.8637) time: 5.0374 data: 5.0079 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8646 (0.8663) time: 0.1348 data: 0.1099 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-val): loss: 0.8646 (0.8663) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:48 loss: 0.8263 (0.8263) time: 5.6170 data: 5.5861 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8324 (0.8338) time: 0.1138 data: 0.0880 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (nsd-val): loss: 0.8324 (0.8338) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 11:09:05 lr: 0.000125 grad: 0.0804 (0.0804) loss: 0.8956 (0.8956) time: 6.4233 data: 6.2883 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:24:17 lr: 0.000125 grad: 0.0954 (0.1154) loss: 0.8727 (0.8705) time: 0.1877 data: 0.0892 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:20:45 lr: 0.000125 grad: 0.0961 (0.1116) loss: 0.8674 (0.8697) time: 0.1429 data: 0.0443 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:19:28 lr: 0.000125 grad: 0.0939 (0.1093) loss: 0.8739 (0.8702) time: 0.1578 data: 0.0692 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:18:47 lr: 0.000125 grad: 0.0911 (0.1051) loss: 0.8715 (0.8709) time: 0.1617 data: 0.0731 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:45 lr: 0.000125 grad: 0.0946 (0.1044) loss: 0.8721 (0.8709) time: 0.1410 data: 0.0553 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:17:01 lr: 0.000125 grad: 0.0966 (0.1044) loss: 0.8680 (0.8703) time: 0.1456 data: 0.0614 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:30 lr: 0.000125 grad: 0.0894 (0.1031) loss: 0.8689 (0.8699) time: 0.2086 data: 0.1192 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:16:05 lr: 0.000125 grad: 0.0895 (0.1031) loss: 0.8667 (0.8700) time: 0.1782 data: 0.0731 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:51 lr: 0.000125 grad: 0.0839 (0.1025) loss: 0.8708 (0.8700) time: 0.1704 data: 0.0677 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:26 lr: 0.000125 grad: 0.0902 (0.1018) loss: 0.8707 (0.8702) time: 0.1442 data: 0.0498 max mem: 9377 +Train: [7] [1100/6250] eta: 0:14:59 lr: 0.000125 grad: 0.1051 (0.1018) loss: 0.8695 (0.8703) time: 0.1486 data: 0.0428 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:34 lr: 0.000125 grad: 0.0883 (0.1013) loss: 0.8710 (0.8703) time: 0.1637 data: 0.0787 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:09 lr: 0.000125 grad: 0.0951 (0.1009) loss: 0.8670 (0.8703) time: 0.1319 data: 0.0431 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:49 lr: 0.000125 grad: 0.1006 (0.1012) loss: 0.8688 (0.8702) time: 0.1642 data: 0.0744 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:32 lr: 0.000125 grad: 0.0896 (0.1007) loss: 0.8717 (0.8701) time: 0.1593 data: 0.0646 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:15 lr: 0.000125 grad: 0.0965 (0.1003) loss: 0.8700 (0.8700) time: 0.1787 data: 0.0843 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:57 lr: 0.000125 grad: 0.0908 (0.1000) loss: 0.8695 (0.8700) time: 0.1582 data: 0.0685 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:40 lr: 0.000125 grad: 0.0949 (0.0996) loss: 0.8699 (0.8700) time: 0.1747 data: 0.0821 max mem: 9377 +Train: [7] [1900/6250] eta: 0:12:22 lr: 0.000125 grad: 0.0928 (0.0993) loss: 0.8717 (0.8700) time: 0.1731 data: 0.0794 max mem: 9377 +Train: [7] [2000/6250] eta: 0:12:00 lr: 0.000125 grad: 0.0883 (0.0993) loss: 0.8696 (0.8699) time: 0.1568 data: 0.0603 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:42 lr: 0.000125 grad: 0.0876 (0.0989) loss: 0.8709 (0.8698) time: 0.1629 data: 0.0727 max mem: 9377 +Train: [7] [2200/6250] eta: 0:11:23 lr: 0.000125 grad: 0.0803 (0.0984) loss: 0.8710 (0.8698) time: 0.1696 data: 0.0846 max mem: 9377 +Train: [7] [2300/6250] eta: 0:11:05 lr: 0.000125 grad: 0.0882 (0.0984) loss: 0.8687 (0.8697) time: 0.1589 data: 0.0724 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:47 lr: 0.000125 grad: 0.0834 (0.0981) loss: 0.8685 (0.8696) time: 0.1553 data: 0.0594 max mem: 9377 +Train: [7] [2500/6250] eta: 0:10:28 lr: 0.000125 grad: 0.0917 (0.0981) loss: 0.8659 (0.8695) time: 0.1418 data: 0.0477 max mem: 9377 +Train: [7] [2600/6250] eta: 0:10:11 lr: 0.000125 grad: 0.0855 (0.0980) loss: 0.8677 (0.8695) time: 0.1472 data: 0.0638 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:53 lr: 0.000125 grad: 0.0926 (0.0981) loss: 0.8637 (0.8694) time: 0.1697 data: 0.0875 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:35 lr: 0.000125 grad: 0.0890 (0.0980) loss: 0.8673 (0.8693) time: 0.1841 data: 0.1051 max mem: 9377 +Train: [7] [2900/6250] eta: 0:09:18 lr: 0.000125 grad: 0.0935 (0.0978) loss: 0.8653 (0.8692) time: 0.1812 data: 0.0980 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:59 lr: 0.000125 grad: 0.0880 (0.0975) loss: 0.8691 (0.8691) time: 0.1288 data: 0.0421 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:41 lr: 0.000125 grad: 0.0997 (0.0973) loss: 0.8683 (0.8690) time: 0.1672 data: 0.0835 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:24 lr: 0.000125 grad: 0.0864 (0.0971) loss: 0.8642 (0.8690) time: 0.1732 data: 0.0975 max mem: 9377 +Train: [7] [3300/6250] eta: 0:08:07 lr: 0.000125 grad: 0.0797 (0.0968) loss: 0.8686 (0.8689) time: 0.1597 data: 0.0823 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:49 lr: 0.000125 grad: 0.1058 (0.0969) loss: 0.8641 (0.8689) time: 0.1463 data: 0.0579 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:32 lr: 0.000125 grad: 0.0807 (0.0967) loss: 0.8643 (0.8687) time: 0.1215 data: 0.0349 max mem: 9377 +Train: [7] [3600/6250] eta: 0:07:15 lr: 0.000125 grad: 0.0887 (0.0965) loss: 0.8626 (0.8686) time: 0.1528 data: 0.0575 max mem: 9377 +Train: [7] [3700/6250] eta: 0:06:58 lr: 0.000125 grad: 0.0884 (0.0964) loss: 0.8641 (0.8684) time: 0.1563 data: 0.0753 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:41 lr: 0.000125 grad: 0.0906 (0.0963) loss: 0.8610 (0.8683) time: 0.1707 data: 0.0791 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:25 lr: 0.000125 grad: 0.0836 (0.0961) loss: 0.8679 (0.8682) time: 0.1858 data: 0.0877 max mem: 9377 +Train: [7] [4000/6250] eta: 0:06:08 lr: 0.000125 grad: 0.0851 (0.0960) loss: 0.8591 (0.8680) time: 0.1781 data: 0.0948 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:52 lr: 0.000125 grad: 0.0913 (0.0959) loss: 0.8625 (0.8679) time: 0.1641 data: 0.0714 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:35 lr: 0.000125 grad: 0.0767 (0.0958) loss: 0.8614 (0.8678) time: 0.1598 data: 0.0661 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:19 lr: 0.000125 grad: 0.0882 (0.0957) loss: 0.8680 (0.8677) time: 0.1510 data: 0.0626 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:02 lr: 0.000125 grad: 0.0844 (0.0956) loss: 0.8627 (0.8676) time: 0.1842 data: 0.1026 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:46 lr: 0.000125 grad: 0.0833 (0.0956) loss: 0.8613 (0.8675) time: 0.1614 data: 0.0721 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:29 lr: 0.000125 grad: 0.0827 (0.0954) loss: 0.8644 (0.8674) time: 0.1558 data: 0.0649 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:13 lr: 0.000125 grad: 0.0871 (0.0952) loss: 0.8651 (0.8673) time: 0.1778 data: 0.0807 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:56 lr: 0.000125 grad: 0.0850 (0.0951) loss: 0.8610 (0.8672) time: 0.1659 data: 0.0820 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:40 lr: 0.000125 grad: 0.0834 (0.0950) loss: 0.8637 (0.8671) time: 0.1415 data: 0.0497 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:24 lr: 0.000125 grad: 0.0838 (0.0950) loss: 0.8593 (0.8669) time: 0.1591 data: 0.0758 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:07 lr: 0.000125 grad: 0.0805 (0.0948) loss: 0.8627 (0.8669) time: 0.1456 data: 0.0534 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:51 lr: 0.000125 grad: 0.0850 (0.0949) loss: 0.8653 (0.8668) time: 0.1467 data: 0.0567 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:34 lr: 0.000125 grad: 0.0814 (0.0947) loss: 0.8621 (0.8668) time: 0.2003 data: 0.1191 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:18 lr: 0.000125 grad: 0.1071 (0.0947) loss: 0.8621 (0.8667) time: 0.1519 data: 0.0596 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:02 lr: 0.000125 grad: 0.0874 (0.0946) loss: 0.8609 (0.8666) time: 0.1972 data: 0.0993 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:45 lr: 0.000125 grad: 0.0849 (0.0944) loss: 0.8615 (0.8665) time: 0.1620 data: 0.0657 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:29 lr: 0.000125 grad: 0.0853 (0.0943) loss: 0.8627 (0.8665) time: 0.1610 data: 0.0747 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:13 lr: 0.000125 grad: 0.0872 (0.0942) loss: 0.8657 (0.8664) time: 0.1757 data: 0.0862 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:57 lr: 0.000125 grad: 0.0959 (0.0943) loss: 0.8597 (0.8663) time: 0.1852 data: 0.0948 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.0825 (0.0943) loss: 0.8592 (0.8662) time: 0.1860 data: 0.0867 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0928 (0.0941) loss: 0.8606 (0.8661) time: 0.1577 data: 0.0763 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0872 (0.0940) loss: 0.8588 (0.8660) time: 0.1684 data: 0.0765 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0776 (0.0939) loss: 0.8635 (0.8660) time: 0.1685 data: 0.0787 max mem: 9377 +Train: [7] Total time: 0:17:08 (0.1645 s / it) +Averaged stats: lr: 0.000125 grad: 0.0776 (0.0939) loss: 0.8635 (0.8660) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:05:53 loss: 0.8568 (0.8568) time: 5.7029 data: 5.6700 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8593 (0.8594) time: 0.1517 data: 0.1261 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:15 (0.2460 s / it) +Averaged stats (hcp-train-subset): loss: 0.8593 (0.8594) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:51 loss: 0.8560 (0.8560) time: 4.7031 data: 4.6709 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8575 (0.8586) time: 0.1193 data: 0.0923 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-val): loss: 0.8575 (0.8586) +Eval (nsd-val): [7] [ 0/62] eta: 0:05:32 loss: 0.8164 (0.8164) time: 5.3636 data: 5.3300 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8235 (0.8247) time: 0.1214 data: 0.0964 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (nsd-val): loss: 0.8235 (0.8247) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 7:19:10 lr: 0.000125 grad: 0.0760 (0.0760) loss: 0.8695 (0.8695) time: 4.2161 data: 3.9835 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:23:28 lr: 0.000125 grad: 0.1145 (0.1313) loss: 0.8672 (0.8669) time: 0.1813 data: 0.0763 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:20:49 lr: 0.000125 grad: 0.0819 (0.1111) loss: 0.8619 (0.8628) time: 0.2188 data: 0.1244 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:51 lr: 0.000125 grad: 0.0813 (0.1059) loss: 0.8544 (0.8609) time: 0.1679 data: 0.0759 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:19:03 lr: 0.000125 grad: 0.0837 (0.1012) loss: 0.8568 (0.8597) time: 0.1612 data: 0.0590 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:18:38 lr: 0.000125 grad: 0.0942 (0.0993) loss: 0.8538 (0.8594) time: 0.1931 data: 0.1007 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:17:40 lr: 0.000125 grad: 0.0852 (0.1015) loss: 0.8611 (0.8594) time: 0.1529 data: 0.0618 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:17:13 lr: 0.000125 grad: 0.0830 (0.0996) loss: 0.8617 (0.8594) time: 0.1608 data: 0.0730 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:16:39 lr: 0.000125 grad: 0.0832 (0.0983) loss: 0.8588 (0.8594) time: 0.1994 data: 0.0981 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:16:22 lr: 0.000125 grad: 0.0857 (0.0978) loss: 0.8629 (0.8595) time: 0.1605 data: 0.0512 max mem: 9377 +Train: [8] [1000/6250] eta: 0:15:57 lr: 0.000125 grad: 0.0880 (0.0968) loss: 0.8595 (0.8595) time: 0.1382 data: 0.0295 max mem: 9377 +Train: [8] [1100/6250] eta: 0:15:29 lr: 0.000125 grad: 0.0903 (0.0962) loss: 0.8556 (0.8595) time: 0.1569 data: 0.0505 max mem: 9377 +Train: [8] [1200/6250] eta: 0:15:04 lr: 0.000125 grad: 0.0819 (0.0951) loss: 0.8588 (0.8594) time: 0.1487 data: 0.0592 max mem: 9377 +Train: [8] [1300/6250] eta: 0:14:35 lr: 0.000125 grad: 0.0789 (0.0945) loss: 0.8576 (0.8594) time: 0.1515 data: 0.0523 max mem: 9377 +Train: [8] [1400/6250] eta: 0:14:10 lr: 0.000125 grad: 0.0769 (0.0937) loss: 0.8596 (0.8594) time: 0.1487 data: 0.0667 max mem: 9377 +Train: [8] [1500/6250] eta: 0:13:48 lr: 0.000125 grad: 0.0854 (0.0932) loss: 0.8577 (0.8593) time: 0.1732 data: 0.0914 max mem: 9377 +Train: [8] [1600/6250] eta: 0:13:27 lr: 0.000125 grad: 0.0798 (0.0926) loss: 0.8585 (0.8593) time: 0.1501 data: 0.0521 max mem: 9377 +Train: [8] [1700/6250] eta: 0:13:07 lr: 0.000125 grad: 0.0803 (0.0923) loss: 0.8620 (0.8593) time: 0.1651 data: 0.0834 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:47 lr: 0.000125 grad: 0.0877 (0.0919) loss: 0.8562 (0.8593) time: 0.1742 data: 0.0851 max mem: 9377 +Train: [8] [1900/6250] eta: 0:12:28 lr: 0.000125 grad: 0.0844 (0.0913) loss: 0.8552 (0.8593) time: 0.1677 data: 0.0824 max mem: 9377 +Train: [8] [2000/6250] eta: 0:12:08 lr: 0.000125 grad: 0.0740 (0.0907) loss: 0.8627 (0.8593) time: 0.1575 data: 0.0647 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:49 lr: 0.000125 grad: 0.0763 (0.0901) loss: 0.8593 (0.8593) time: 0.1546 data: 0.0685 max mem: 9377 +Train: [8] [2200/6250] eta: 0:11:30 lr: 0.000125 grad: 0.0759 (0.0898) loss: 0.8575 (0.8594) time: 0.1632 data: 0.0711 max mem: 9377 +Train: [8] [2300/6250] eta: 0:11:13 lr: 0.000125 grad: 0.0783 (0.0894) loss: 0.8576 (0.8593) time: 0.1093 data: 0.0038 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:52 lr: 0.000125 grad: 0.0794 (0.0891) loss: 0.8578 (0.8593) time: 0.1305 data: 0.0443 max mem: 9377 +Train: [8] [2500/6250] eta: 0:10:33 lr: 0.000125 grad: 0.0779 (0.0887) loss: 0.8634 (0.8593) time: 0.1486 data: 0.0623 max mem: 9377 +Train: [8] [2600/6250] eta: 0:10:15 lr: 0.000125 grad: 0.0799 (0.0886) loss: 0.8555 (0.8593) time: 0.1565 data: 0.0712 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:57 lr: 0.000125 grad: 0.0764 (0.0883) loss: 0.8624 (0.8593) time: 0.1435 data: 0.0574 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:39 lr: 0.000125 grad: 0.0784 (0.0880) loss: 0.8555 (0.8593) time: 0.1656 data: 0.0813 max mem: 9377 +Train: [8] [2900/6250] eta: 0:09:20 lr: 0.000125 grad: 0.0789 (0.0879) loss: 0.8611 (0.8593) time: 0.1634 data: 0.0753 max mem: 9377 +Train: [8] [3000/6250] eta: 0:09:04 lr: 0.000125 grad: 0.0780 (0.0876) loss: 0.8609 (0.8594) time: 0.1482 data: 0.0563 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:46 lr: 0.000125 grad: 0.0773 (0.0874) loss: 0.8624 (0.8594) time: 0.1666 data: 0.0816 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:28 lr: 0.000125 grad: 0.0783 (0.0874) loss: 0.8576 (0.8594) time: 0.1820 data: 0.0969 max mem: 9377 +Train: [8] [3300/6250] eta: 0:08:10 lr: 0.000125 grad: 0.0813 (0.0871) loss: 0.8594 (0.8594) time: 0.1443 data: 0.0538 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:53 lr: 0.000125 grad: 0.0765 (0.0870) loss: 0.8542 (0.8593) time: 0.1612 data: 0.0805 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:35 lr: 0.000125 grad: 0.0762 (0.0874) loss: 0.8580 (0.8592) time: 0.1617 data: 0.0777 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:18 lr: 0.000125 grad: 0.0798 (0.0872) loss: 0.8577 (0.8592) time: 0.1726 data: 0.0835 max mem: 9377 +Train: [8] [3700/6250] eta: 0:07:01 lr: 0.000125 grad: 0.0794 (0.0872) loss: 0.8558 (0.8591) time: 0.1536 data: 0.0581 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:45 lr: 0.000125 grad: 0.0830 (0.0871) loss: 0.8579 (0.8590) time: 0.1736 data: 0.0863 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:28 lr: 0.000125 grad: 0.0811 (0.0870) loss: 0.8577 (0.8589) time: 0.1434 data: 0.0578 max mem: 9377 +Train: [8] [4000/6250] eta: 0:06:12 lr: 0.000125 grad: 0.0797 (0.0869) loss: 0.8578 (0.8589) time: 0.1593 data: 0.0803 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:54 lr: 0.000125 grad: 0.0775 (0.0869) loss: 0.8563 (0.8589) time: 0.1536 data: 0.0684 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:38 lr: 0.000125 grad: 0.0799 (0.0868) loss: 0.8600 (0.8589) time: 0.1794 data: 0.0922 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:21 lr: 0.000125 grad: 0.0762 (0.0866) loss: 0.8549 (0.8588) time: 0.1955 data: 0.1074 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:04 lr: 0.000125 grad: 0.0736 (0.0865) loss: 0.8573 (0.8588) time: 0.1357 data: 0.0464 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:47 lr: 0.000125 grad: 0.0771 (0.0863) loss: 0.8569 (0.8588) time: 0.1751 data: 0.0909 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:30 lr: 0.000125 grad: 0.0822 (0.0863) loss: 0.8581 (0.8588) time: 0.1559 data: 0.0735 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:14 lr: 0.000125 grad: 0.0757 (0.0861) loss: 0.8592 (0.8588) time: 0.1692 data: 0.0879 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:57 lr: 0.000125 grad: 0.0816 (0.0861) loss: 0.8581 (0.8588) time: 0.1781 data: 0.0916 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:40 lr: 0.000125 grad: 0.0773 (0.0860) loss: 0.8582 (0.8587) time: 0.1539 data: 0.0701 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:24 lr: 0.000125 grad: 0.0735 (0.0859) loss: 0.8569 (0.8587) time: 0.1383 data: 0.0536 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:08 lr: 0.000125 grad: 0.0781 (0.0862) loss: 0.8549 (0.8586) time: 0.1782 data: 0.0971 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.0795 (0.0861) loss: 0.8560 (0.8586) time: 0.1578 data: 0.0618 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.0751 (0.0859) loss: 0.8558 (0.8585) time: 0.1505 data: 0.0608 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0809 (0.0859) loss: 0.8537 (0.8585) time: 0.1325 data: 0.0476 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0780 (0.0858) loss: 0.8510 (0.8584) time: 0.1510 data: 0.0699 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.0742 (0.0856) loss: 0.8576 (0.8584) time: 0.1589 data: 0.0702 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.0754 (0.0855) loss: 0.8614 (0.8584) time: 0.1523 data: 0.0520 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0768 (0.0854) loss: 0.8562 (0.8583) time: 0.1427 data: 0.0461 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0806 (0.0853) loss: 0.8552 (0.8583) time: 0.1565 data: 0.0696 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0763 (0.0852) loss: 0.8530 (0.8583) time: 0.1475 data: 0.0580 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0831 (0.0852) loss: 0.8494 (0.8582) time: 0.1427 data: 0.0497 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0762 (0.0851) loss: 0.8516 (0.8582) time: 0.1568 data: 0.0612 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0845 (0.0851) loss: 0.8585 (0.8581) time: 0.1535 data: 0.0593 max mem: 9377 +Train: [8] Total time: 0:17:02 (0.1636 s / it) +Averaged stats: lr: 0.000124 grad: 0.0845 (0.0851) loss: 0.8585 (0.8581) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:03:44 loss: 0.8551 (0.8551) time: 3.6213 data: 3.5232 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8532 (0.8540) time: 0.1646 data: 0.1376 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:16 (0.2601 s / it) +Averaged stats (hcp-train-subset): loss: 0.8532 (0.8540) +Eval (hcp-val): [8] [ 0/62] eta: 0:05:50 loss: 0.8558 (0.8558) time: 5.6503 data: 5.6169 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8541 (0.8542) time: 0.1303 data: 0.1049 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-val): loss: 0.8541 (0.8542) +Eval (nsd-val): [8] [ 0/62] eta: 0:06:02 loss: 0.8079 (0.8079) time: 5.8407 data: 5.8091 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8197 (0.8204) time: 0.1150 data: 0.0880 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (nsd-val): loss: 0.8197 (0.8204) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 9:20:39 lr: 0.000124 grad: 0.0600 (0.0600) loss: 0.8815 (0.8815) time: 5.3824 data: 5.2240 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:21:49 lr: 0.000124 grad: 0.0776 (0.0840) loss: 0.8516 (0.8603) time: 0.1838 data: 0.0944 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:19:09 lr: 0.000124 grad: 0.0772 (0.0875) loss: 0.8530 (0.8555) time: 0.1611 data: 0.0734 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:17:47 lr: 0.000124 grad: 0.0752 (0.0863) loss: 0.8591 (0.8553) time: 0.1362 data: 0.0472 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:17:44 lr: 0.000124 grad: 0.0761 (0.0850) loss: 0.8590 (0.8554) time: 0.1906 data: 0.1015 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:17:14 lr: 0.000124 grad: 0.0716 (0.0847) loss: 0.8593 (0.8556) time: 0.1903 data: 0.1107 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:48 lr: 0.000124 grad: 0.0739 (0.0843) loss: 0.8574 (0.8558) time: 0.1538 data: 0.0707 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:16:29 lr: 0.000124 grad: 0.0741 (0.0834) loss: 0.8583 (0.8560) time: 0.2195 data: 0.1365 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:16:01 lr: 0.000124 grad: 0.0759 (0.0828) loss: 0.8570 (0.8559) time: 0.1291 data: 0.0405 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:45 lr: 0.000124 grad: 0.0738 (0.0822) loss: 0.8539 (0.8558) time: 0.1945 data: 0.1030 max mem: 9377 +Train: [9] [1000/6250] eta: 0:15:28 lr: 0.000124 grad: 0.0788 (0.0821) loss: 0.8539 (0.8556) time: 0.1759 data: 0.0751 max mem: 9377 +Train: [9] [1100/6250] eta: 0:15:08 lr: 0.000124 grad: 0.0705 (0.0816) loss: 0.8567 (0.8556) time: 0.1771 data: 0.0866 max mem: 9377 +Train: [9] [1200/6250] eta: 0:14:43 lr: 0.000124 grad: 0.0953 (0.0824) loss: 0.8506 (0.8555) time: 0.1414 data: 0.0423 max mem: 9377 +Train: [9] [1300/6250] eta: 0:14:18 lr: 0.000124 grad: 0.0699 (0.0818) loss: 0.8575 (0.8554) time: 0.1533 data: 0.0476 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:56 lr: 0.000124 grad: 0.0825 (0.0814) loss: 0.8544 (0.8555) time: 0.1570 data: 0.0719 max mem: 9377 +Train: [9] [1500/6250] eta: 0:13:35 lr: 0.000124 grad: 0.0863 (0.0820) loss: 0.8544 (0.8554) time: 0.1430 data: 0.0520 max mem: 9377 +Train: [9] [1600/6250] eta: 0:13:14 lr: 0.000124 grad: 0.0749 (0.0818) loss: 0.8585 (0.8555) time: 0.1619 data: 0.0731 max mem: 9377 +Train: [9] [1700/6250] eta: 0:12:53 lr: 0.000124 grad: 0.0783 (0.0815) loss: 0.8532 (0.8554) time: 0.1380 data: 0.0462 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:35 lr: 0.000124 grad: 0.0719 (0.0814) loss: 0.8554 (0.8554) time: 0.1924 data: 0.1098 max mem: 9377 +Train: [9] [1900/6250] eta: 0:12:13 lr: 0.000124 grad: 0.0765 (0.0811) loss: 0.8575 (0.8554) time: 0.1630 data: 0.0806 max mem: 9377 +Train: [9] [2000/6250] eta: 0:11:54 lr: 0.000124 grad: 0.0673 (0.0811) loss: 0.8565 (0.8554) time: 0.1550 data: 0.0781 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:46 lr: 0.000124 grad: 0.0705 (0.0808) loss: 0.8586 (0.8555) time: 0.3163 data: 0.2281 max mem: 9377 +Train: [9] [2200/6250] eta: 0:11:25 lr: 0.000124 grad: 0.0803 (0.0807) loss: 0.8553 (0.8556) time: 0.2150 data: 0.1386 max mem: 9377 +Train: [9] [2300/6250] eta: 0:11:07 lr: 0.000124 grad: 0.0737 (0.0805) loss: 0.8569 (0.8556) time: 0.1770 data: 0.1016 max mem: 9377 +Train: [9] [2400/6250] eta: 0:10:53 lr: 0.000124 grad: 0.0788 (0.0805) loss: 0.8591 (0.8557) time: 0.2404 data: 0.1517 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:35 lr: 0.000124 grad: 0.0696 (0.0802) loss: 0.8581 (0.8557) time: 0.1391 data: 0.0596 max mem: 9377 +Train: [9] [2600/6250] eta: 0:10:18 lr: 0.000124 grad: 0.0705 (0.0800) loss: 0.8593 (0.8559) time: 0.1831 data: 0.0912 max mem: 9377 +Train: [9] [2700/6250] eta: 0:10:01 lr: 0.000124 grad: 0.0746 (0.0799) loss: 0.8563 (0.8559) time: 0.1620 data: 0.0852 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:45 lr: 0.000124 grad: 0.0715 (0.0798) loss: 0.8588 (0.8560) time: 0.1618 data: 0.0757 max mem: 9377 +Train: [9] [2900/6250] eta: 0:09:27 lr: 0.000124 grad: 0.0774 (0.0797) loss: 0.8583 (0.8560) time: 0.1627 data: 0.0824 max mem: 9377 +Train: [9] [3000/6250] eta: 0:09:10 lr: 0.000124 grad: 0.0727 (0.0795) loss: 0.8574 (0.8560) time: 0.1632 data: 0.0778 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:53 lr: 0.000124 grad: 0.0744 (0.0794) loss: 0.8522 (0.8560) time: 0.1208 data: 0.0289 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:34 lr: 0.000124 grad: 0.0811 (0.0794) loss: 0.8567 (0.8559) time: 0.1357 data: 0.0427 max mem: 9377 +Train: [9] [3300/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0749 (0.0794) loss: 0.8562 (0.8559) time: 0.1474 data: 0.0553 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:57 lr: 0.000124 grad: 0.0746 (0.0795) loss: 0.8548 (0.8558) time: 0.1537 data: 0.0731 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:39 lr: 0.000124 grad: 0.0753 (0.0795) loss: 0.8532 (0.8558) time: 0.1553 data: 0.0596 max mem: 9377 +Train: [9] [3600/6250] eta: 0:07:21 lr: 0.000124 grad: 0.0839 (0.0795) loss: 0.8559 (0.8558) time: 0.1617 data: 0.0754 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:04 lr: 0.000124 grad: 0.0759 (0.0795) loss: 0.8533 (0.8558) time: 0.1479 data: 0.0534 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:47 lr: 0.000124 grad: 0.0740 (0.0794) loss: 0.8546 (0.8557) time: 0.1574 data: 0.0645 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:31 lr: 0.000124 grad: 0.0782 (0.0795) loss: 0.8502 (0.8557) time: 0.1720 data: 0.0857 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:13 lr: 0.000124 grad: 0.0751 (0.0794) loss: 0.8537 (0.8556) time: 0.1638 data: 0.0699 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:56 lr: 0.000124 grad: 0.0738 (0.0794) loss: 0.8546 (0.8556) time: 0.1557 data: 0.0589 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:39 lr: 0.000124 grad: 0.0697 (0.0794) loss: 0.8538 (0.8556) time: 0.1726 data: 0.0872 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:22 lr: 0.000124 grad: 0.0721 (0.0793) loss: 0.8588 (0.8555) time: 0.1599 data: 0.0777 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:05 lr: 0.000124 grad: 0.0738 (0.0792) loss: 0.8559 (0.8555) time: 0.1755 data: 0.0915 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:48 lr: 0.000124 grad: 0.0749 (0.0791) loss: 0.8522 (0.8555) time: 0.1585 data: 0.0778 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:31 lr: 0.000124 grad: 0.0754 (0.0791) loss: 0.8569 (0.8555) time: 0.1417 data: 0.0508 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:15 lr: 0.000124 grad: 0.0788 (0.0790) loss: 0.8547 (0.8555) time: 0.1562 data: 0.0605 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:58 lr: 0.000124 grad: 0.0706 (0.0789) loss: 0.8566 (0.8554) time: 0.1822 data: 0.0986 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.0714 (0.0789) loss: 0.8554 (0.8554) time: 0.1285 data: 0.0389 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:24 lr: 0.000124 grad: 0.0717 (0.0788) loss: 0.8536 (0.8554) time: 0.1510 data: 0.0598 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:08 lr: 0.000124 grad: 0.0817 (0.0789) loss: 0.8544 (0.8554) time: 0.1401 data: 0.0421 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.0751 (0.0788) loss: 0.8571 (0.8554) time: 0.1508 data: 0.0558 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.0721 (0.0788) loss: 0.8557 (0.8554) time: 0.1502 data: 0.0606 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:19 lr: 0.000124 grad: 0.0735 (0.0787) loss: 0.8536 (0.8553) time: 0.1662 data: 0.0854 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0797 (0.0787) loss: 0.8551 (0.8553) time: 0.1836 data: 0.0927 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.0707 (0.0786) loss: 0.8579 (0.8553) time: 0.1590 data: 0.0705 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.0734 (0.0786) loss: 0.8535 (0.8553) time: 0.1569 data: 0.0669 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0757 (0.0787) loss: 0.8569 (0.8554) time: 0.1664 data: 0.0763 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0793 (0.0789) loss: 0.8481 (0.8553) time: 0.1644 data: 0.0804 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0810 (0.0789) loss: 0.8507 (0.8553) time: 0.1467 data: 0.0533 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0733 (0.0788) loss: 0.8520 (0.8552) time: 0.1544 data: 0.0634 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0729 (0.0788) loss: 0.8590 (0.8552) time: 0.1838 data: 0.0986 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0705 (0.0789) loss: 0.8570 (0.8552) time: 0.1808 data: 0.0952 max mem: 9377 +Train: [9] Total time: 0:17:03 (0.1638 s / it) +Averaged stats: lr: 0.000124 grad: 0.0705 (0.0789) loss: 0.8570 (0.8552) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:06:41 loss: 0.8483 (0.8483) time: 6.4702 data: 6.3935 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8516 (0.8508) time: 0.1362 data: 0.1109 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:16 (0.2626 s / it) +Averaged stats (hcp-train-subset): loss: 0.8516 (0.8508) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [9] [ 0/62] eta: 0:04:48 loss: 0.8486 (0.8486) time: 4.6607 data: 4.6293 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8516 (0.8514) time: 0.1338 data: 0.1086 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:14 (0.2399 s / it) +Averaged stats (hcp-val): loss: 0.8516 (0.8514) +Making plots (hcp-val): example=3 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:07 loss: 0.8121 (0.8121) time: 5.9321 data: 5.9014 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8206 (0.8219) time: 0.1486 data: 0.1231 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:14 (0.2419 s / it) +Averaged stats (nsd-val): loss: 0.8206 (0.8219) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 12:07:56 lr: 0.000124 grad: 0.0891 (0.0891) loss: 0.8507 (0.8507) time: 6.9882 data: 6.8903 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:23:09 lr: 0.000124 grad: 0.0730 (0.0739) loss: 0.8613 (0.8655) time: 0.1697 data: 0.0734 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:20:19 lr: 0.000124 grad: 0.0785 (0.0770) loss: 0.8552 (0.8614) time: 0.1648 data: 0.0748 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:19:32 lr: 0.000124 grad: 0.0773 (0.0783) loss: 0.8522 (0.8589) time: 0.1252 data: 0.0246 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:18:43 lr: 0.000124 grad: 0.0716 (0.0794) loss: 0.8513 (0.8569) time: 0.1653 data: 0.0649 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:18:09 lr: 0.000124 grad: 0.0771 (0.0794) loss: 0.8485 (0.8553) time: 0.1834 data: 0.1013 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:17:18 lr: 0.000124 grad: 0.0773 (0.0790) loss: 0.8458 (0.8545) time: 0.1383 data: 0.0560 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:16:44 lr: 0.000124 grad: 0.0703 (0.0788) loss: 0.8535 (0.8540) time: 0.1698 data: 0.0910 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:16:20 lr: 0.000124 grad: 0.0767 (0.0783) loss: 0.8528 (0.8538) time: 0.1644 data: 0.0763 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:15:47 lr: 0.000124 grad: 0.0675 (0.0777) loss: 0.8544 (0.8537) time: 0.1540 data: 0.0522 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:28 lr: 0.000124 grad: 0.0705 (0.0776) loss: 0.8483 (0.8536) time: 0.1686 data: 0.0691 max mem: 9377 +Train: [10] [1100/6250] eta: 0:15:05 lr: 0.000124 grad: 0.0727 (0.0773) loss: 0.8559 (0.8537) time: 0.1727 data: 0.0659 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:43 lr: 0.000124 grad: 0.0704 (0.0769) loss: 0.8533 (0.8537) time: 0.1662 data: 0.0683 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:23 lr: 0.000124 grad: 0.0728 (0.0767) loss: 0.8549 (0.8537) time: 0.1562 data: 0.0649 max mem: 9377 +Train: [10] [1400/6250] eta: 0:13:57 lr: 0.000124 grad: 0.0693 (0.0765) loss: 0.8566 (0.8537) time: 0.1344 data: 0.0411 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:33 lr: 0.000124 grad: 0.0655 (0.0762) loss: 0.8585 (0.8537) time: 0.1608 data: 0.0669 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:10 lr: 0.000124 grad: 0.0740 (0.0760) loss: 0.8525 (0.8537) time: 0.1621 data: 0.0772 max mem: 9377 +Train: [10] [1700/6250] eta: 0:12:52 lr: 0.000124 grad: 0.0694 (0.0762) loss: 0.8499 (0.8537) time: 0.1940 data: 0.1103 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:34 lr: 0.000124 grad: 0.0734 (0.0761) loss: 0.8497 (0.8536) time: 0.1139 data: 0.0334 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:17 lr: 0.000124 grad: 0.0705 (0.0758) loss: 0.8508 (0.8536) time: 0.1360 data: 0.0563 max mem: 9377 +Train: [10] [2000/6250] eta: 0:12:01 lr: 0.000124 grad: 0.0756 (0.0759) loss: 0.8539 (0.8535) time: 0.1346 data: 0.0531 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:44 lr: 0.000124 grad: 0.0689 (0.0758) loss: 0.8507 (0.8534) time: 0.1825 data: 0.1000 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:27 lr: 0.000124 grad: 0.0692 (0.0757) loss: 0.8556 (0.8534) time: 0.1303 data: 0.0386 max mem: 9377 +Train: [10] [2300/6250] eta: 0:11:09 lr: 0.000124 grad: 0.0710 (0.0756) loss: 0.8540 (0.8534) time: 0.1691 data: 0.0872 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:51 lr: 0.000124 grad: 0.0703 (0.0753) loss: 0.8545 (0.8534) time: 0.1707 data: 0.0896 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:34 lr: 0.000124 grad: 0.0728 (0.0753) loss: 0.8499 (0.8533) time: 0.1890 data: 0.1061 max mem: 9377 +Train: [10] [2600/6250] eta: 0:10:14 lr: 0.000124 grad: 0.0740 (0.0753) loss: 0.8508 (0.8532) time: 0.1691 data: 0.0837 max mem: 9377 +Train: [10] [2700/6250] eta: 0:09:56 lr: 0.000124 grad: 0.0748 (0.0754) loss: 0.8550 (0.8532) time: 0.1617 data: 0.0746 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:38 lr: 0.000124 grad: 0.0690 (0.0754) loss: 0.8542 (0.8531) time: 0.1510 data: 0.0626 max mem: 9377 +Train: [10] [2900/6250] eta: 0:09:20 lr: 0.000124 grad: 0.0738 (0.0754) loss: 0.8437 (0.8529) time: 0.1552 data: 0.0762 max mem: 9377 +Train: [10] [3000/6250] eta: 0:09:03 lr: 0.000124 grad: 0.0732 (0.0755) loss: 0.8507 (0.8529) time: 0.1544 data: 0.0679 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:46 lr: 0.000124 grad: 0.0798 (0.0756) loss: 0.8466 (0.8528) time: 0.1726 data: 0.0876 max mem: 9377 +Train: [10] [3200/6250] eta: 0:08:29 lr: 0.000124 grad: 0.0727 (0.0756) loss: 0.8490 (0.8527) time: 0.1467 data: 0.0540 max mem: 9377 +Train: [10] [3300/6250] eta: 0:08:12 lr: 0.000124 grad: 0.0726 (0.0755) loss: 0.8499 (0.8526) time: 0.1435 data: 0.0558 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:55 lr: 0.000124 grad: 0.0829 (0.0755) loss: 0.8504 (0.8526) time: 0.1687 data: 0.0828 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:38 lr: 0.000124 grad: 0.0735 (0.0755) loss: 0.8491 (0.8525) time: 0.1353 data: 0.0394 max mem: 9377 +Train: [10] [3600/6250] eta: 0:07:21 lr: 0.000124 grad: 0.0716 (0.0756) loss: 0.8503 (0.8524) time: 0.1382 data: 0.0552 max mem: 9377 +Train: [10] [3700/6250] eta: 0:07:04 lr: 0.000124 grad: 0.0686 (0.0756) loss: 0.8449 (0.8523) time: 0.1288 data: 0.0288 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:46 lr: 0.000124 grad: 0.0747 (0.0755) loss: 0.8516 (0.8522) time: 0.1549 data: 0.0652 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:29 lr: 0.000124 grad: 0.0689 (0.0755) loss: 0.8489 (0.8521) time: 0.1419 data: 0.0531 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:12 lr: 0.000124 grad: 0.0740 (0.0754) loss: 0.8491 (0.8520) time: 0.1600 data: 0.0778 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:55 lr: 0.000124 grad: 0.0690 (0.0753) loss: 0.8508 (0.8520) time: 0.1723 data: 0.0904 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:38 lr: 0.000124 grad: 0.0650 (0.0752) loss: 0.8519 (0.8519) time: 0.1014 data: 0.0035 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:21 lr: 0.000124 grad: 0.0695 (0.0752) loss: 0.8485 (0.8518) time: 0.1754 data: 0.0912 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:04 lr: 0.000124 grad: 0.0713 (0.0752) loss: 0.8482 (0.8518) time: 0.1629 data: 0.0768 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:47 lr: 0.000124 grad: 0.0667 (0.0752) loss: 0.8489 (0.8517) time: 0.1647 data: 0.0796 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:31 lr: 0.000124 grad: 0.0690 (0.0751) loss: 0.8470 (0.8516) time: 0.1405 data: 0.0577 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:14 lr: 0.000124 grad: 0.0730 (0.0751) loss: 0.8470 (0.8515) time: 0.1571 data: 0.0554 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:57 lr: 0.000124 grad: 0.0722 (0.0750) loss: 0.8474 (0.8515) time: 0.1600 data: 0.0707 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.0713 (0.0750) loss: 0.8503 (0.8514) time: 0.1577 data: 0.0655 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:24 lr: 0.000124 grad: 0.0745 (0.0750) loss: 0.8455 (0.8514) time: 0.1483 data: 0.0583 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:07 lr: 0.000124 grad: 0.0640 (0.0750) loss: 0.8532 (0.8514) time: 0.1315 data: 0.0470 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.0696 (0.0749) loss: 0.8510 (0.8513) time: 0.1526 data: 0.0588 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.0712 (0.0749) loss: 0.8526 (0.8513) time: 0.1607 data: 0.0690 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0670 (0.0749) loss: 0.8548 (0.8513) time: 0.1574 data: 0.0723 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0725 (0.0749) loss: 0.8509 (0.8513) time: 0.1567 data: 0.0700 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.0711 (0.0750) loss: 0.8525 (0.8512) time: 0.1522 data: 0.0614 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.0683 (0.0750) loss: 0.8585 (0.8513) time: 0.1570 data: 0.0596 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0743 (0.0750) loss: 0.8540 (0.8513) time: 0.1466 data: 0.0555 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0754 (0.0750) loss: 0.8504 (0.8513) time: 0.1510 data: 0.0625 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0704 (0.0750) loss: 0.8566 (0.8514) time: 0.1494 data: 0.0589 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0723 (0.0749) loss: 0.8507 (0.8514) time: 0.1339 data: 0.0474 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0680 (0.0748) loss: 0.8490 (0.8514) time: 0.1432 data: 0.0457 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0716 (0.0748) loss: 0.8510 (0.8514) time: 0.1478 data: 0.0588 max mem: 9377 +Train: [10] Total time: 0:16:55 (0.1625 s / it) +Averaged stats: lr: 0.000124 grad: 0.0716 (0.0748) loss: 0.8510 (0.8514) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:06:29 loss: 0.8493 (0.8493) time: 6.2865 data: 6.2564 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8472 (0.8497) time: 0.1428 data: 0.1176 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:15 (0.2526 s / it) +Averaged stats (hcp-train-subset): loss: 0.8472 (0.8497) +Eval (hcp-val): [10] [ 0/62] eta: 0:04:29 loss: 0.8496 (0.8496) time: 4.3413 data: 4.2307 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8494 (0.8506) time: 0.1600 data: 0.1331 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (hcp-val): loss: 0.8494 (0.8506) +Eval (nsd-val): [10] [ 0/62] eta: 0:04:39 loss: 0.8090 (0.8090) time: 4.5085 data: 4.4727 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8193 (0.8197) time: 0.1407 data: 0.1154 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (nsd-val): loss: 0.8193 (0.8197) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 9:32:02 lr: 0.000124 grad: 0.0627 (0.0627) loss: 0.8470 (0.8470) time: 5.4916 data: 5.3795 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:22:44 lr: 0.000124 grad: 0.0671 (0.0830) loss: 0.8530 (0.8564) time: 0.1713 data: 0.0778 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:20:22 lr: 0.000124 grad: 0.0686 (0.0799) loss: 0.8524 (0.8553) time: 0.1780 data: 0.0862 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:18:47 lr: 0.000124 grad: 0.0684 (0.0771) loss: 0.8551 (0.8550) time: 0.1681 data: 0.0676 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:17:48 lr: 0.000124 grad: 0.0652 (0.0752) loss: 0.8512 (0.8545) time: 0.1598 data: 0.0734 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:17:31 lr: 0.000124 grad: 0.0700 (0.0763) loss: 0.8472 (0.8535) time: 0.1889 data: 0.0992 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:16:55 lr: 0.000124 grad: 0.0720 (0.0751) loss: 0.8557 (0.8534) time: 0.1702 data: 0.0872 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:16:28 lr: 0.000124 grad: 0.0703 (0.0744) loss: 0.8545 (0.8534) time: 0.1557 data: 0.0647 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:16:02 lr: 0.000124 grad: 0.0653 (0.0740) loss: 0.8548 (0.8533) time: 0.1515 data: 0.0559 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:15:42 lr: 0.000124 grad: 0.0692 (0.0738) loss: 0.8525 (0.8534) time: 0.1483 data: 0.0431 max mem: 9377 +Train: [11] [1000/6250] eta: 0:15:10 lr: 0.000124 grad: 0.0696 (0.0734) loss: 0.8560 (0.8533) time: 0.1167 data: 0.0189 max mem: 9377 +Train: [11] [1100/6250] eta: 0:14:41 lr: 0.000124 grad: 0.0676 (0.0729) loss: 0.8534 (0.8533) time: 0.1344 data: 0.0307 max mem: 9377 +Train: [11] [1200/6250] eta: 0:14:17 lr: 0.000124 grad: 0.0664 (0.0726) loss: 0.8489 (0.8533) time: 0.1659 data: 0.0776 max mem: 9377 +Train: [11] [1300/6250] eta: 0:13:51 lr: 0.000124 grad: 0.0647 (0.0723) loss: 0.8537 (0.8532) time: 0.1631 data: 0.0590 max mem: 9377 +Train: [11] [1400/6250] eta: 0:13:26 lr: 0.000124 grad: 0.0694 (0.0722) loss: 0.8513 (0.8532) time: 0.1461 data: 0.0557 max mem: 9377 +Train: [11] [1500/6250] eta: 0:13:04 lr: 0.000124 grad: 0.0681 (0.0719) loss: 0.8540 (0.8531) time: 0.1618 data: 0.0807 max mem: 9377 +Train: [11] [1600/6250] eta: 0:12:46 lr: 0.000124 grad: 0.0682 (0.0716) loss: 0.8530 (0.8531) time: 0.1682 data: 0.0847 max mem: 9377 +Train: [11] [1700/6250] eta: 0:12:32 lr: 0.000124 grad: 0.0669 (0.0714) loss: 0.8542 (0.8532) time: 0.1722 data: 0.0955 max mem: 9377 +Train: [11] [1800/6250] eta: 0:12:17 lr: 0.000124 grad: 0.0638 (0.0713) loss: 0.8529 (0.8531) time: 0.1907 data: 0.1126 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:58 lr: 0.000124 grad: 0.0653 (0.0712) loss: 0.8478 (0.8530) time: 0.1792 data: 0.0932 max mem: 9377 +Train: [11] [2000/6250] eta: 0:11:40 lr: 0.000124 grad: 0.0674 (0.0711) loss: 0.8534 (0.8529) time: 0.1583 data: 0.0788 max mem: 9377 +Train: [11] [2100/6250] eta: 0:11:23 lr: 0.000124 grad: 0.0676 (0.0710) loss: 0.8514 (0.8528) time: 0.1488 data: 0.0650 max mem: 9377 +Train: [11] [2200/6250] eta: 0:11:04 lr: 0.000124 grad: 0.0663 (0.0710) loss: 0.8510 (0.8527) time: 0.1625 data: 0.0855 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:49 lr: 0.000124 grad: 0.0655 (0.0708) loss: 0.8521 (0.8526) time: 0.1711 data: 0.0914 max mem: 9377 +Train: [11] [2400/6250] eta: 0:10:31 lr: 0.000124 grad: 0.0666 (0.0708) loss: 0.8509 (0.8525) time: 0.1557 data: 0.0702 max mem: 9377 +Train: [11] [2500/6250] eta: 0:10:14 lr: 0.000124 grad: 0.0698 (0.0709) loss: 0.8493 (0.8524) time: 0.1408 data: 0.0611 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:58 lr: 0.000124 grad: 0.0673 (0.0708) loss: 0.8517 (0.8524) time: 0.1722 data: 0.0848 max mem: 9377 +Train: [11] [2700/6250] eta: 0:09:41 lr: 0.000124 grad: 0.0656 (0.0708) loss: 0.8528 (0.8523) time: 0.1744 data: 0.0878 max mem: 9377 +Train: [11] [2800/6250] eta: 0:09:24 lr: 0.000124 grad: 0.0703 (0.0708) loss: 0.8502 (0.8522) time: 0.1593 data: 0.0697 max mem: 9377 +Train: [11] [2900/6250] eta: 0:09:06 lr: 0.000124 grad: 0.0674 (0.0708) loss: 0.8489 (0.8521) time: 0.1688 data: 0.0877 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:49 lr: 0.000124 grad: 0.0694 (0.0707) loss: 0.8438 (0.8521) time: 0.1506 data: 0.0661 max mem: 9377 +Train: [11] [3100/6250] eta: 0:08:32 lr: 0.000124 grad: 0.0682 (0.0707) loss: 0.8499 (0.8521) time: 0.1364 data: 0.0539 max mem: 9377 +Train: [11] [3200/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0702 (0.0708) loss: 0.8497 (0.8520) time: 0.1573 data: 0.0711 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:59 lr: 0.000124 grad: 0.0707 (0.0709) loss: 0.8524 (0.8519) time: 0.1527 data: 0.0684 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:42 lr: 0.000124 grad: 0.0694 (0.0708) loss: 0.8461 (0.8518) time: 0.1702 data: 0.0895 max mem: 9377 +Train: [11] [3500/6250] eta: 0:07:26 lr: 0.000124 grad: 0.0679 (0.0708) loss: 0.8486 (0.8518) time: 0.1296 data: 0.0319 max mem: 9377 +Train: [11] [3600/6250] eta: 0:07:10 lr: 0.000124 grad: 0.0700 (0.0708) loss: 0.8513 (0.8518) time: 0.1464 data: 0.0588 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:53 lr: 0.000124 grad: 0.0745 (0.0709) loss: 0.8458 (0.8517) time: 0.1611 data: 0.0731 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:36 lr: 0.000124 grad: 0.0651 (0.0709) loss: 0.8473 (0.8516) time: 0.1353 data: 0.0459 max mem: 9377 +Train: [11] [3900/6250] eta: 0:06:20 lr: 0.000124 grad: 0.0687 (0.0709) loss: 0.8467 (0.8516) time: 0.1525 data: 0.0685 max mem: 9377 +Train: [11] [4000/6250] eta: 0:06:03 lr: 0.000123 grad: 0.0699 (0.0709) loss: 0.8468 (0.8515) time: 0.1461 data: 0.0621 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:47 lr: 0.000123 grad: 0.0688 (0.0710) loss: 0.8555 (0.8515) time: 0.1473 data: 0.0643 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:30 lr: 0.000123 grad: 0.0719 (0.0710) loss: 0.8497 (0.8514) time: 0.1599 data: 0.0755 max mem: 9377 +Train: [11] [4300/6250] eta: 0:05:14 lr: 0.000123 grad: 0.0709 (0.0710) loss: 0.8491 (0.8514) time: 0.1416 data: 0.0514 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:57 lr: 0.000123 grad: 0.0710 (0.0711) loss: 0.8441 (0.8513) time: 0.1137 data: 0.0229 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:41 lr: 0.000123 grad: 0.0708 (0.0711) loss: 0.8467 (0.8512) time: 0.1674 data: 0.0838 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:25 lr: 0.000123 grad: 0.0688 (0.0712) loss: 0.8543 (0.8512) time: 0.1428 data: 0.0618 max mem: 9377 +Train: [11] [4700/6250] eta: 0:04:08 lr: 0.000123 grad: 0.0699 (0.0712) loss: 0.8496 (0.8512) time: 0.1571 data: 0.0676 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:52 lr: 0.000123 grad: 0.0684 (0.0713) loss: 0.8505 (0.8511) time: 0.1570 data: 0.0671 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:36 lr: 0.000123 grad: 0.0697 (0.0713) loss: 0.8478 (0.8511) time: 0.1466 data: 0.0603 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:20 lr: 0.000123 grad: 0.0710 (0.0714) loss: 0.8491 (0.8510) time: 0.1765 data: 0.0856 max mem: 9377 +Train: [11] [5100/6250] eta: 0:03:04 lr: 0.000123 grad: 0.0674 (0.0714) loss: 0.8431 (0.8510) time: 0.1647 data: 0.0783 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:48 lr: 0.000123 grad: 0.0736 (0.0714) loss: 0.8503 (0.8509) time: 0.1775 data: 0.0873 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:31 lr: 0.000123 grad: 0.0683 (0.0714) loss: 0.8483 (0.8509) time: 0.1545 data: 0.0651 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:15 lr: 0.000123 grad: 0.0688 (0.0714) loss: 0.8524 (0.8509) time: 0.1248 data: 0.0347 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:59 lr: 0.000123 grad: 0.0702 (0.0714) loss: 0.8478 (0.8508) time: 0.1585 data: 0.0666 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:43 lr: 0.000123 grad: 0.0704 (0.0714) loss: 0.8498 (0.8508) time: 0.1743 data: 0.0927 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:27 lr: 0.000123 grad: 0.0702 (0.0715) loss: 0.8505 (0.8507) time: 0.1539 data: 0.0638 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:11 lr: 0.000123 grad: 0.0683 (0.0715) loss: 0.8489 (0.8507) time: 0.1535 data: 0.0728 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0701 (0.0715) loss: 0.8491 (0.8507) time: 0.1538 data: 0.0564 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0807 (0.0715) loss: 0.8490 (0.8506) time: 0.1426 data: 0.0469 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0716 (0.0715) loss: 0.8489 (0.8506) time: 0.1532 data: 0.0634 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0638 (0.0715) loss: 0.8522 (0.8506) time: 0.1727 data: 0.0875 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0714 (0.0716) loss: 0.8470 (0.8506) time: 0.1479 data: 0.0565 max mem: 9377 +Train: [11] Total time: 0:16:43 (0.1606 s / it) +Averaged stats: lr: 0.000123 grad: 0.0714 (0.0716) loss: 0.8470 (0.8506) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:27 loss: 0.8478 (0.8478) time: 5.2768 data: 5.2470 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8475 (0.8480) time: 0.1427 data: 0.1175 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-train-subset): loss: 0.8475 (0.8480) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:31 loss: 0.8451 (0.8451) time: 5.3402 data: 5.2768 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8480 (0.8487) time: 0.1295 data: 0.1043 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (hcp-val): loss: 0.8480 (0.8487) +Eval (nsd-val): [11] [ 0/62] eta: 0:03:37 loss: 0.8059 (0.8059) time: 3.5147 data: 3.4203 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8135 (0.8158) time: 0.1327 data: 0.1061 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (nsd-val): loss: 0.8135 (0.8158) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 11:40:00 lr: 0.000123 grad: 0.0662 (0.0662) loss: 0.8690 (0.8690) time: 6.7201 data: 6.6233 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:23:04 lr: 0.000123 grad: 0.0808 (0.0866) loss: 0.8471 (0.8526) time: 0.1798 data: 0.0935 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:20:08 lr: 0.000123 grad: 0.0710 (0.0803) loss: 0.8465 (0.8494) time: 0.1944 data: 0.0982 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:18:56 lr: 0.000123 grad: 0.0696 (0.0774) loss: 0.8453 (0.8488) time: 0.1581 data: 0.0710 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:17:58 lr: 0.000123 grad: 0.0700 (0.0762) loss: 0.8494 (0.8484) time: 0.1849 data: 0.0953 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:17:15 lr: 0.000123 grad: 0.0737 (0.0754) loss: 0.8450 (0.8481) time: 0.1615 data: 0.0763 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:16:34 lr: 0.000123 grad: 0.0682 (0.0756) loss: 0.8468 (0.8478) time: 0.1603 data: 0.0688 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:16:03 lr: 0.000123 grad: 0.0664 (0.0750) loss: 0.8479 (0.8476) time: 0.1615 data: 0.0798 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:15:28 lr: 0.000123 grad: 0.0687 (0.0745) loss: 0.8431 (0.8475) time: 0.1465 data: 0.0566 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:15:01 lr: 0.000123 grad: 0.0735 (0.0741) loss: 0.8454 (0.8473) time: 0.1452 data: 0.0571 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:30 lr: 0.000123 grad: 0.0666 (0.0739) loss: 0.8481 (0.8472) time: 0.1366 data: 0.0515 max mem: 9377 +Train: [12] [1100/6250] eta: 0:14:08 lr: 0.000123 grad: 0.0683 (0.0735) loss: 0.8462 (0.8473) time: 0.1613 data: 0.0646 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:42 lr: 0.000123 grad: 0.0710 (0.0733) loss: 0.8416 (0.8471) time: 0.1395 data: 0.0350 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:20 lr: 0.000123 grad: 0.0682 (0.0732) loss: 0.8434 (0.8470) time: 0.1300 data: 0.0252 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:59 lr: 0.000123 grad: 0.0681 (0.0731) loss: 0.8446 (0.8469) time: 0.1371 data: 0.0334 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:38 lr: 0.000123 grad: 0.0680 (0.0730) loss: 0.8465 (0.8468) time: 0.1461 data: 0.0579 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:20 lr: 0.000123 grad: 0.0721 (0.0728) loss: 0.8408 (0.8467) time: 0.1537 data: 0.0559 max mem: 9377 +Train: [12] [1700/6250] eta: 0:12:03 lr: 0.000123 grad: 0.0675 (0.0726) loss: 0.8495 (0.8467) time: 0.1600 data: 0.0719 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:49 lr: 0.000123 grad: 0.0687 (0.0725) loss: 0.8482 (0.8467) time: 0.1660 data: 0.0804 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:35 lr: 0.000123 grad: 0.0667 (0.0723) loss: 0.8474 (0.8467) time: 0.1639 data: 0.0793 max mem: 9377 +Train: [12] [2000/6250] eta: 0:11:20 lr: 0.000123 grad: 0.0704 (0.0722) loss: 0.8450 (0.8467) time: 0.1595 data: 0.0704 max mem: 9377 +Train: [12] [2100/6250] eta: 0:11:05 lr: 0.000123 grad: 0.0697 (0.0720) loss: 0.8476 (0.8468) time: 0.1370 data: 0.0519 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0634 (0.0720) loss: 0.8548 (0.8469) time: 0.1500 data: 0.0598 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:32 lr: 0.000123 grad: 0.0668 (0.0719) loss: 0.8480 (0.8471) time: 0.1298 data: 0.0386 max mem: 9377 +Train: [12] [2400/6250] eta: 0:10:14 lr: 0.000123 grad: 0.0683 (0.0718) loss: 0.8449 (0.8471) time: 0.1483 data: 0.0545 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:57 lr: 0.000123 grad: 0.0651 (0.0717) loss: 0.8497 (0.8472) time: 0.1493 data: 0.0548 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:41 lr: 0.000123 grad: 0.0722 (0.0717) loss: 0.8474 (0.8472) time: 0.1716 data: 0.0875 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:23 lr: 0.000123 grad: 0.0658 (0.0716) loss: 0.8499 (0.8473) time: 0.1532 data: 0.0636 max mem: 9377 +Train: [12] [2800/6250] eta: 0:09:07 lr: 0.000123 grad: 0.0687 (0.0715) loss: 0.8461 (0.8473) time: 0.1313 data: 0.0425 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:49 lr: 0.000123 grad: 0.0669 (0.0714) loss: 0.8481 (0.8473) time: 0.1444 data: 0.0581 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:33 lr: 0.000123 grad: 0.0668 (0.0714) loss: 0.8432 (0.8474) time: 0.1475 data: 0.0562 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:17 lr: 0.000123 grad: 0.0676 (0.0712) loss: 0.8479 (0.8475) time: 0.1678 data: 0.0816 max mem: 9377 +Train: [12] [3200/6250] eta: 0:08:01 lr: 0.000123 grad: 0.0686 (0.0713) loss: 0.8477 (0.8475) time: 0.1602 data: 0.0727 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:46 lr: 0.000123 grad: 0.0650 (0.0713) loss: 0.8504 (0.8475) time: 0.2080 data: 0.1207 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:30 lr: 0.000123 grad: 0.0686 (0.0712) loss: 0.8467 (0.8476) time: 0.1306 data: 0.0333 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:13 lr: 0.000123 grad: 0.0647 (0.0711) loss: 0.8474 (0.8476) time: 0.1313 data: 0.0389 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:58 lr: 0.000123 grad: 0.0690 (0.0712) loss: 0.8521 (0.8477) time: 0.1586 data: 0.0750 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:42 lr: 0.000123 grad: 0.0640 (0.0711) loss: 0.8497 (0.8478) time: 0.1445 data: 0.0590 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:26 lr: 0.000123 grad: 0.0655 (0.0711) loss: 0.8507 (0.8478) time: 0.1809 data: 0.0996 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:10 lr: 0.000123 grad: 0.0737 (0.0711) loss: 0.8466 (0.8478) time: 0.1558 data: 0.0755 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:54 lr: 0.000123 grad: 0.0659 (0.0711) loss: 0.8454 (0.8478) time: 0.1508 data: 0.0592 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:38 lr: 0.000123 grad: 0.0698 (0.0711) loss: 0.8435 (0.8478) time: 0.1618 data: 0.0711 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:22 lr: 0.000123 grad: 0.0695 (0.0710) loss: 0.8461 (0.8478) time: 0.1514 data: 0.0657 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:07 lr: 0.000123 grad: 0.0720 (0.0711) loss: 0.8457 (0.8478) time: 0.1829 data: 0.1000 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:51 lr: 0.000123 grad: 0.0698 (0.0711) loss: 0.8487 (0.8478) time: 0.1372 data: 0.0429 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:35 lr: 0.000123 grad: 0.0688 (0.0711) loss: 0.8472 (0.8477) time: 0.1568 data: 0.0717 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:20 lr: 0.000123 grad: 0.0743 (0.0711) loss: 0.8457 (0.8477) time: 0.1788 data: 0.0983 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:04 lr: 0.000123 grad: 0.0630 (0.0711) loss: 0.8538 (0.8477) time: 0.1452 data: 0.0625 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:48 lr: 0.000123 grad: 0.0668 (0.0711) loss: 0.8459 (0.8477) time: 0.1632 data: 0.0750 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:32 lr: 0.000123 grad: 0.0653 (0.0711) loss: 0.8550 (0.8478) time: 0.1636 data: 0.0763 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:16 lr: 0.000123 grad: 0.0702 (0.0711) loss: 0.8478 (0.8478) time: 0.1721 data: 0.0864 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:01 lr: 0.000123 grad: 0.0728 (0.0711) loss: 0.8506 (0.8478) time: 0.1641 data: 0.0746 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:45 lr: 0.000123 grad: 0.0748 (0.0711) loss: 0.8511 (0.8478) time: 0.1571 data: 0.0727 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:29 lr: 0.000123 grad: 0.0692 (0.0712) loss: 0.8441 (0.8478) time: 0.1730 data: 0.0898 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:13 lr: 0.000123 grad: 0.0689 (0.0711) loss: 0.8458 (0.8478) time: 0.1472 data: 0.0567 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:57 lr: 0.000123 grad: 0.0677 (0.0711) loss: 0.8493 (0.8478) time: 0.1595 data: 0.0681 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:42 lr: 0.000123 grad: 0.0668 (0.0711) loss: 0.8488 (0.8477) time: 0.1708 data: 0.0850 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:26 lr: 0.000123 grad: 0.0672 (0.0711) loss: 0.8476 (0.8478) time: 0.1411 data: 0.0427 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:10 lr: 0.000123 grad: 0.0638 (0.0711) loss: 0.8454 (0.8478) time: 0.1637 data: 0.0778 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0713 (0.0711) loss: 0.8432 (0.8478) time: 0.1788 data: 0.0966 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0686 (0.0711) loss: 0.8491 (0.8478) time: 0.1490 data: 0.0615 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0666 (0.0712) loss: 0.8481 (0.8478) time: 0.1496 data: 0.0612 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0726 (0.0712) loss: 0.8464 (0.8477) time: 0.1550 data: 0.0666 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0660 (0.0712) loss: 0.8494 (0.8477) time: 0.1316 data: 0.0429 max mem: 9377 +Train: [12] Total time: 0:16:32 (0.1588 s / it) +Averaged stats: lr: 0.000123 grad: 0.0660 (0.0712) loss: 0.8494 (0.8477) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:49 loss: 0.8462 (0.8462) time: 5.6321 data: 5.6026 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8428 (0.8461) time: 0.1347 data: 0.1094 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (hcp-train-subset): loss: 0.8428 (0.8461) +Eval (hcp-val): [12] [ 0/62] eta: 0:06:27 loss: 0.8445 (0.8445) time: 6.2567 data: 6.2219 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8469 (0.8474) time: 0.1124 data: 0.0873 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-val): loss: 0.8469 (0.8474) +Eval (nsd-val): [12] [ 0/62] eta: 0:05:27 loss: 0.8079 (0.8079) time: 5.2832 data: 5.2530 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8147 (0.8161) time: 0.1257 data: 0.0991 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (nsd-val): loss: 0.8147 (0.8161) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 8:25:03 lr: 0.000123 grad: 0.0713 (0.0713) loss: 0.8378 (0.8378) time: 4.8485 data: 4.5301 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:22:10 lr: 0.000123 grad: 0.0769 (0.0815) loss: 0.8512 (0.8465) time: 0.1742 data: 0.0703 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:18:59 lr: 0.000123 grad: 0.0706 (0.0776) loss: 0.8365 (0.8441) time: 0.1636 data: 0.0694 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:18:04 lr: 0.000123 grad: 0.0727 (0.0766) loss: 0.8455 (0.8435) time: 0.1820 data: 0.0947 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:17:17 lr: 0.000123 grad: 0.0661 (0.0753) loss: 0.8404 (0.8438) time: 0.1877 data: 0.1054 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:16:36 lr: 0.000123 grad: 0.0723 (0.0747) loss: 0.8447 (0.8438) time: 0.1383 data: 0.0466 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:16:02 lr: 0.000123 grad: 0.0704 (0.0744) loss: 0.8425 (0.8441) time: 0.1256 data: 0.0281 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:15:30 lr: 0.000123 grad: 0.0681 (0.0735) loss: 0.8435 (0.8443) time: 0.1383 data: 0.0532 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:15:06 lr: 0.000123 grad: 0.0709 (0.0732) loss: 0.8419 (0.8445) time: 0.1593 data: 0.0764 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:14:44 lr: 0.000123 grad: 0.0738 (0.0735) loss: 0.8478 (0.8446) time: 0.1471 data: 0.0573 max mem: 9377 +Train: [13] [1000/6250] eta: 0:14:21 lr: 0.000123 grad: 0.0739 (0.0735) loss: 0.8421 (0.8446) time: 0.1516 data: 0.0639 max mem: 9377 +Train: [13] [1100/6250] eta: 0:14:07 lr: 0.000123 grad: 0.0659 (0.0734) loss: 0.8428 (0.8445) time: 0.1981 data: 0.0979 max mem: 9377 +Train: [13] [1200/6250] eta: 0:13:54 lr: 0.000123 grad: 0.0692 (0.0735) loss: 0.8428 (0.8445) time: 0.1631 data: 0.0527 max mem: 9377 +Train: [13] [1300/6250] eta: 0:13:41 lr: 0.000123 grad: 0.0670 (0.0733) loss: 0.8440 (0.8443) time: 0.1946 data: 0.0904 max mem: 9377 +Train: [13] [1400/6250] eta: 0:13:23 lr: 0.000123 grad: 0.0655 (0.0730) loss: 0.8443 (0.8443) time: 0.1437 data: 0.0309 max mem: 9377 +Train: [13] [1500/6250] eta: 0:13:05 lr: 0.000123 grad: 0.0677 (0.0728) loss: 0.8462 (0.8443) time: 0.1668 data: 0.0685 max mem: 9377 +Train: [13] [1600/6250] eta: 0:12:47 lr: 0.000123 grad: 0.0686 (0.0727) loss: 0.8425 (0.8443) time: 0.1584 data: 0.0735 max mem: 9377 +Train: [13] [1700/6250] eta: 0:12:27 lr: 0.000123 grad: 0.0668 (0.0728) loss: 0.8447 (0.8443) time: 0.1535 data: 0.0656 max mem: 9377 +Train: [13] [1800/6250] eta: 0:12:11 lr: 0.000123 grad: 0.0765 (0.0727) loss: 0.8399 (0.8442) time: 0.0975 data: 0.0003 max mem: 9377 +Train: [13] [1900/6250] eta: 0:11:54 lr: 0.000123 grad: 0.0656 (0.0727) loss: 0.8499 (0.8443) time: 0.1526 data: 0.0516 max mem: 9377 +Train: [13] [2000/6250] eta: 0:11:35 lr: 0.000123 grad: 0.0692 (0.0726) loss: 0.8441 (0.8443) time: 0.1486 data: 0.0570 max mem: 9377 +Train: [13] [2100/6250] eta: 0:11:17 lr: 0.000123 grad: 0.0651 (0.0725) loss: 0.8449 (0.8443) time: 0.1539 data: 0.0657 max mem: 9377 +Train: [13] [2200/6250] eta: 0:10:59 lr: 0.000123 grad: 0.0678 (0.0724) loss: 0.8440 (0.8443) time: 0.1441 data: 0.0540 max mem: 9377 +Train: [13] [2300/6250] eta: 0:10:40 lr: 0.000123 grad: 0.0689 (0.0723) loss: 0.8464 (0.8443) time: 0.1383 data: 0.0445 max mem: 9377 +Train: [13] [2400/6250] eta: 0:10:22 lr: 0.000123 grad: 0.0729 (0.0723) loss: 0.8407 (0.8443) time: 0.1637 data: 0.0711 max mem: 9377 +Train: [13] [2500/6250] eta: 0:10:05 lr: 0.000123 grad: 0.0720 (0.0722) loss: 0.8446 (0.8442) time: 0.1575 data: 0.0646 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:47 lr: 0.000123 grad: 0.0714 (0.0721) loss: 0.8434 (0.8442) time: 0.1447 data: 0.0599 max mem: 9377 +Train: [13] [2700/6250] eta: 0:09:30 lr: 0.000123 grad: 0.0654 (0.0720) loss: 0.8457 (0.8442) time: 0.1454 data: 0.0524 max mem: 9377 +Train: [13] [2800/6250] eta: 0:09:13 lr: 0.000123 grad: 0.0667 (0.0720) loss: 0.8432 (0.8443) time: 0.1589 data: 0.0751 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:56 lr: 0.000123 grad: 0.0675 (0.0719) loss: 0.8465 (0.8443) time: 0.1603 data: 0.0745 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:39 lr: 0.000123 grad: 0.0678 (0.0718) loss: 0.8459 (0.8444) time: 0.1465 data: 0.0497 max mem: 9377 +Train: [13] [3100/6250] eta: 0:08:22 lr: 0.000123 grad: 0.0674 (0.0718) loss: 0.8446 (0.8444) time: 0.1420 data: 0.0425 max mem: 9377 +Train: [13] [3200/6250] eta: 0:08:06 lr: 0.000123 grad: 0.0658 (0.0717) loss: 0.8499 (0.8444) time: 0.1619 data: 0.0705 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:50 lr: 0.000123 grad: 0.0737 (0.0716) loss: 0.8479 (0.8445) time: 0.1390 data: 0.0435 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:35 lr: 0.000123 grad: 0.0635 (0.0716) loss: 0.8462 (0.8446) time: 0.1530 data: 0.0635 max mem: 9377 +Train: [13] [3500/6250] eta: 0:07:20 lr: 0.000123 grad: 0.0668 (0.0716) loss: 0.8456 (0.8446) time: 0.1834 data: 0.0880 max mem: 9377 +Train: [13] [3600/6250] eta: 0:07:05 lr: 0.000123 grad: 0.0647 (0.0715) loss: 0.8503 (0.8446) time: 0.1790 data: 0.0860 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:49 lr: 0.000122 grad: 0.0664 (0.0714) loss: 0.8469 (0.8447) time: 0.1497 data: 0.0643 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:33 lr: 0.000122 grad: 0.0675 (0.0713) loss: 0.8451 (0.8447) time: 0.1626 data: 0.0710 max mem: 9377 +Train: [13] [3900/6250] eta: 0:06:17 lr: 0.000122 grad: 0.0655 (0.0712) loss: 0.8418 (0.8448) time: 0.1692 data: 0.0799 max mem: 9377 +Train: [13] [4000/6250] eta: 0:06:00 lr: 0.000122 grad: 0.0631 (0.0712) loss: 0.8497 (0.8448) time: 0.1577 data: 0.0652 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:44 lr: 0.000122 grad: 0.0710 (0.0711) loss: 0.8432 (0.8448) time: 0.1573 data: 0.0573 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:28 lr: 0.000122 grad: 0.0706 (0.0711) loss: 0.8423 (0.8448) time: 0.1628 data: 0.0639 max mem: 9377 +Train: [13] [4300/6250] eta: 0:05:12 lr: 0.000122 grad: 0.0697 (0.0711) loss: 0.8483 (0.8448) time: 0.1555 data: 0.0701 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:56 lr: 0.000122 grad: 0.0649 (0.0710) loss: 0.8478 (0.8448) time: 0.1620 data: 0.0777 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:40 lr: 0.000122 grad: 0.0689 (0.0710) loss: 0.8401 (0.8448) time: 0.1502 data: 0.0559 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:24 lr: 0.000122 grad: 0.0706 (0.0710) loss: 0.8460 (0.8448) time: 0.1539 data: 0.0675 max mem: 9377 +Train: [13] [4700/6250] eta: 0:04:08 lr: 0.000122 grad: 0.0671 (0.0710) loss: 0.8442 (0.8449) time: 0.1378 data: 0.0476 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:52 lr: 0.000122 grad: 0.0689 (0.0710) loss: 0.8427 (0.8448) time: 0.1519 data: 0.0619 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:35 lr: 0.000122 grad: 0.0658 (0.0709) loss: 0.8496 (0.8449) time: 0.1702 data: 0.0740 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:19 lr: 0.000122 grad: 0.0703 (0.0709) loss: 0.8449 (0.8450) time: 0.1506 data: 0.0576 max mem: 9377 +Train: [13] [5100/6250] eta: 0:03:03 lr: 0.000122 grad: 0.0614 (0.0708) loss: 0.8490 (0.8451) time: 0.1413 data: 0.0513 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:47 lr: 0.000122 grad: 0.0681 (0.0708) loss: 0.8507 (0.8451) time: 0.1734 data: 0.0855 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:31 lr: 0.000122 grad: 0.0666 (0.0707) loss: 0.8488 (0.8452) time: 0.1576 data: 0.0692 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:15 lr: 0.000122 grad: 0.0680 (0.0707) loss: 0.8435 (0.8451) time: 0.1874 data: 0.1062 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:59 lr: 0.000122 grad: 0.0655 (0.0707) loss: 0.8431 (0.8451) time: 0.1553 data: 0.0705 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:43 lr: 0.000122 grad: 0.0667 (0.0706) loss: 0.8482 (0.8452) time: 0.1525 data: 0.0644 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:27 lr: 0.000122 grad: 0.0688 (0.0706) loss: 0.8481 (0.8452) time: 0.1434 data: 0.0624 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:11 lr: 0.000122 grad: 0.0680 (0.0706) loss: 0.8452 (0.8453) time: 0.1440 data: 0.0489 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:55 lr: 0.000122 grad: 0.0656 (0.0705) loss: 0.8469 (0.8453) time: 0.1580 data: 0.0736 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:39 lr: 0.000122 grad: 0.0614 (0.0705) loss: 0.8450 (0.8453) time: 0.1836 data: 0.1017 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:23 lr: 0.000122 grad: 0.0676 (0.0705) loss: 0.8479 (0.8453) time: 0.1715 data: 0.0770 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0623 (0.0704) loss: 0.8514 (0.8454) time: 0.1383 data: 0.0525 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0634 (0.0704) loss: 0.8515 (0.8454) time: 0.1914 data: 0.1160 max mem: 9377 +Train: [13] Total time: 0:16:40 (0.1601 s / it) +Averaged stats: lr: 0.000122 grad: 0.0634 (0.0704) loss: 0.8515 (0.8454) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:05:49 loss: 0.8453 (0.8453) time: 5.6335 data: 5.6024 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8439 (0.8448) time: 0.1561 data: 0.1296 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8439 (0.8448) +Eval (hcp-val): [13] [ 0/62] eta: 0:04:43 loss: 0.8436 (0.8436) time: 4.5785 data: 4.5092 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8447 (0.8467) time: 0.1505 data: 0.1252 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:14 (0.2279 s / it) +Averaged stats (hcp-val): loss: 0.8447 (0.8467) +Eval (nsd-val): [13] [ 0/62] eta: 0:03:58 loss: 0.8076 (0.8076) time: 3.8467 data: 3.7365 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8161 (0.8176) time: 0.1182 data: 0.0932 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (nsd-val): loss: 0.8161 (0.8176) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 7:29:15 lr: 0.000122 grad: 0.0497 (0.0497) loss: 0.8775 (0.8775) time: 4.3129 data: 4.0456 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:22:04 lr: 0.000122 grad: 0.0653 (0.0732) loss: 0.8478 (0.8518) time: 0.1726 data: 0.0704 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:19:39 lr: 0.000122 grad: 0.0692 (0.0733) loss: 0.8334 (0.8479) time: 0.1861 data: 0.0972 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:18:10 lr: 0.000122 grad: 0.0762 (0.0728) loss: 0.8531 (0.8465) time: 0.1479 data: 0.0601 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:17:37 lr: 0.000122 grad: 0.0719 (0.0720) loss: 0.8389 (0.8454) time: 0.1751 data: 0.0794 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:17:27 lr: 0.000122 grad: 0.0683 (0.0716) loss: 0.8504 (0.8456) time: 0.2086 data: 0.1170 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:17:05 lr: 0.000122 grad: 0.0675 (0.0710) loss: 0.8447 (0.8459) time: 0.1935 data: 0.0941 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:16:44 lr: 0.000122 grad: 0.0665 (0.0708) loss: 0.8474 (0.8458) time: 0.1716 data: 0.0697 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:16:29 lr: 0.000122 grad: 0.0633 (0.0705) loss: 0.8485 (0.8459) time: 0.1904 data: 0.1056 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:16:09 lr: 0.000122 grad: 0.0674 (0.0702) loss: 0.8502 (0.8461) time: 0.1573 data: 0.0541 max mem: 9377 +Train: [14] [1000/6250] eta: 0:15:44 lr: 0.000122 grad: 0.0656 (0.0700) loss: 0.8416 (0.8461) time: 0.1508 data: 0.0558 max mem: 9377 +Train: [14] [1100/6250] eta: 0:15:18 lr: 0.000122 grad: 0.0672 (0.0699) loss: 0.8476 (0.8462) time: 0.1820 data: 0.0782 max mem: 9377 +Train: [14] [1200/6250] eta: 0:15:01 lr: 0.000122 grad: 0.0640 (0.0698) loss: 0.8519 (0.8463) time: 0.1767 data: 0.0690 max mem: 9377 +Train: [14] [1300/6250] eta: 0:14:38 lr: 0.000122 grad: 0.0694 (0.0697) loss: 0.8474 (0.8465) time: 0.1456 data: 0.0395 max mem: 9377 +Train: [14] [1400/6250] eta: 0:14:13 lr: 0.000122 grad: 0.0688 (0.0697) loss: 0.8446 (0.8465) time: 0.1434 data: 0.0575 max mem: 9377 +Train: [14] [1500/6250] eta: 0:13:52 lr: 0.000122 grad: 0.0671 (0.0696) loss: 0.8416 (0.8463) time: 0.1911 data: 0.1010 max mem: 9377 +Train: [14] [1600/6250] eta: 0:13:29 lr: 0.000122 grad: 0.0672 (0.0694) loss: 0.8481 (0.8464) time: 0.1671 data: 0.0734 max mem: 9377 +Train: [14] [1700/6250] eta: 0:13:06 lr: 0.000122 grad: 0.0625 (0.0692) loss: 0.8493 (0.8465) time: 0.1580 data: 0.0723 max mem: 9377 +Train: [14] [1800/6250] eta: 0:12:49 lr: 0.000122 grad: 0.0667 (0.0691) loss: 0.8458 (0.8466) time: 0.1500 data: 0.0694 max mem: 9377 +Train: [14] [1900/6250] eta: 0:12:31 lr: 0.000122 grad: 0.0637 (0.0691) loss: 0.8476 (0.8466) time: 0.1639 data: 0.0786 max mem: 9377 +Train: [14] [2000/6250] eta: 0:12:13 lr: 0.000122 grad: 0.0618 (0.0690) loss: 0.8455 (0.8465) time: 0.1735 data: 0.0941 max mem: 9377 +Train: [14] [2100/6250] eta: 0:11:56 lr: 0.000122 grad: 0.0652 (0.0690) loss: 0.8492 (0.8466) time: 0.1801 data: 0.0939 max mem: 9377 +Train: [14] [2200/6250] eta: 0:11:40 lr: 0.000122 grad: 0.0673 (0.0688) loss: 0.8500 (0.8466) time: 0.1868 data: 0.1077 max mem: 9377 +Train: [14] [2300/6250] eta: 0:11:18 lr: 0.000122 grad: 0.0676 (0.0687) loss: 0.8423 (0.8466) time: 0.1593 data: 0.0647 max mem: 9377 +Train: [14] [2400/6250] eta: 0:10:57 lr: 0.000122 grad: 0.0678 (0.0686) loss: 0.8467 (0.8465) time: 0.1781 data: 0.0955 max mem: 9377 +Train: [14] [2500/6250] eta: 0:10:37 lr: 0.000122 grad: 0.0629 (0.0685) loss: 0.8490 (0.8466) time: 0.1334 data: 0.0449 max mem: 9377 +Train: [14] [2600/6250] eta: 0:10:20 lr: 0.000122 grad: 0.0666 (0.0684) loss: 0.8495 (0.8466) time: 0.1795 data: 0.0854 max mem: 9377 +Train: [14] [2700/6250] eta: 0:10:01 lr: 0.000122 grad: 0.0633 (0.0683) loss: 0.8445 (0.8466) time: 0.1334 data: 0.0495 max mem: 9377 +Train: [14] [2800/6250] eta: 0:09:43 lr: 0.000122 grad: 0.0676 (0.0683) loss: 0.8448 (0.8467) time: 0.1779 data: 0.0907 max mem: 9377 +Train: [14] [2900/6250] eta: 0:09:25 lr: 0.000122 grad: 0.0688 (0.0683) loss: 0.8502 (0.8467) time: 0.1817 data: 0.0941 max mem: 9377 +Train: [14] [3000/6250] eta: 0:09:06 lr: 0.000122 grad: 0.0659 (0.0683) loss: 0.8475 (0.8467) time: 0.1658 data: 0.0728 max mem: 9377 +Train: [14] [3100/6250] eta: 0:08:48 lr: 0.000122 grad: 0.0665 (0.0682) loss: 0.8457 (0.8466) time: 0.1352 data: 0.0420 max mem: 9377 +Train: [14] [3200/6250] eta: 0:08:30 lr: 0.000122 grad: 0.0635 (0.0682) loss: 0.8443 (0.8466) time: 0.1648 data: 0.0805 max mem: 9377 +Train: [14] [3300/6250] eta: 0:08:12 lr: 0.000122 grad: 0.0622 (0.0682) loss: 0.8504 (0.8466) time: 0.1478 data: 0.0547 max mem: 9377 +Train: [14] [3400/6250] eta: 0:07:54 lr: 0.000122 grad: 0.0682 (0.0682) loss: 0.8416 (0.8466) time: 0.1298 data: 0.0283 max mem: 9377 +Train: [14] [3500/6250] eta: 0:07:37 lr: 0.000122 grad: 0.0637 (0.0682) loss: 0.8449 (0.8465) time: 0.1705 data: 0.0907 max mem: 9377 +Train: [14] [3600/6250] eta: 0:07:19 lr: 0.000122 grad: 0.0624 (0.0681) loss: 0.8406 (0.8465) time: 0.1562 data: 0.0592 max mem: 9377 +Train: [14] [3700/6250] eta: 0:07:02 lr: 0.000122 grad: 0.0646 (0.0681) loss: 0.8456 (0.8465) time: 0.1754 data: 0.0841 max mem: 9377 +Train: [14] [3800/6250] eta: 0:06:45 lr: 0.000122 grad: 0.0643 (0.0680) loss: 0.8470 (0.8465) time: 0.1573 data: 0.0702 max mem: 9377 +Train: [14] [3900/6250] eta: 0:06:28 lr: 0.000122 grad: 0.0647 (0.0680) loss: 0.8470 (0.8465) time: 0.1715 data: 0.0828 max mem: 9377 +Train: [14] [4000/6250] eta: 0:06:10 lr: 0.000122 grad: 0.0635 (0.0680) loss: 0.8470 (0.8465) time: 0.1564 data: 0.0687 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:54 lr: 0.000122 grad: 0.0687 (0.0680) loss: 0.8438 (0.8465) time: 0.1538 data: 0.0616 max mem: 9377 +Train: [14] [4200/6250] eta: 0:05:36 lr: 0.000122 grad: 0.0629 (0.0680) loss: 0.8466 (0.8464) time: 0.1570 data: 0.0597 max mem: 9377 +Train: [14] [4300/6250] eta: 0:05:19 lr: 0.000122 grad: 0.0693 (0.0681) loss: 0.8433 (0.8464) time: 0.1539 data: 0.0610 max mem: 9377 +Train: [14] [4400/6250] eta: 0:05:02 lr: 0.000122 grad: 0.0660 (0.0681) loss: 0.8384 (0.8463) time: 0.1430 data: 0.0546 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:46 lr: 0.000122 grad: 0.0670 (0.0682) loss: 0.8455 (0.8463) time: 0.1450 data: 0.0651 max mem: 9377 +Train: [14] [4600/6250] eta: 0:04:29 lr: 0.000122 grad: 0.0715 (0.0682) loss: 0.8423 (0.8462) time: 0.1523 data: 0.0705 max mem: 9377 +Train: [14] [4700/6250] eta: 0:04:12 lr: 0.000122 grad: 0.0750 (0.0683) loss: 0.8441 (0.8461) time: 0.1504 data: 0.0650 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:56 lr: 0.000122 grad: 0.0704 (0.0684) loss: 0.8463 (0.8460) time: 0.1650 data: 0.0789 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:39 lr: 0.000122 grad: 0.0706 (0.0684) loss: 0.8425 (0.8460) time: 0.1458 data: 0.0599 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:23 lr: 0.000122 grad: 0.0698 (0.0684) loss: 0.8439 (0.8459) time: 0.1513 data: 0.0699 max mem: 9377 +Train: [14] [5100/6250] eta: 0:03:06 lr: 0.000122 grad: 0.0679 (0.0685) loss: 0.8432 (0.8458) time: 0.1403 data: 0.0467 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:50 lr: 0.000122 grad: 0.0682 (0.0685) loss: 0.8443 (0.8458) time: 0.1780 data: 0.0887 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:34 lr: 0.000122 grad: 0.0683 (0.0685) loss: 0.8489 (0.8458) time: 0.1460 data: 0.0626 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:17 lr: 0.000122 grad: 0.0701 (0.0686) loss: 0.8431 (0.8457) time: 0.1817 data: 0.0889 max mem: 9377 +Train: [14] [5500/6250] eta: 0:02:01 lr: 0.000122 grad: 0.0704 (0.0686) loss: 0.8368 (0.8457) time: 0.1396 data: 0.0479 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:45 lr: 0.000122 grad: 0.0695 (0.0687) loss: 0.8426 (0.8456) time: 0.1493 data: 0.0732 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:28 lr: 0.000122 grad: 0.0688 (0.0687) loss: 0.8448 (0.8456) time: 0.1640 data: 0.0830 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:12 lr: 0.000122 grad: 0.0694 (0.0688) loss: 0.8475 (0.8456) time: 0.1535 data: 0.0739 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:56 lr: 0.000122 grad: 0.0711 (0.0689) loss: 0.8446 (0.8455) time: 0.1377 data: 0.0553 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:40 lr: 0.000122 grad: 0.0714 (0.0690) loss: 0.8445 (0.8455) time: 0.1390 data: 0.0613 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0656 (0.0691) loss: 0.8465 (0.8455) time: 0.1117 data: 0.0260 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0738 (0.0692) loss: 0.8423 (0.8454) time: 0.1583 data: 0.0730 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0694 (0.0692) loss: 0.8465 (0.8454) time: 0.1600 data: 0.0769 max mem: 9377 +Train: [14] Total time: 0:16:55 (0.1624 s / it) +Averaged stats: lr: 0.000122 grad: 0.0694 (0.0692) loss: 0.8465 (0.8454) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:03:15 loss: 0.8450 (0.8450) time: 3.1516 data: 3.0775 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8418 (0.8450) time: 0.1498 data: 0.1246 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8418 (0.8450) +Making plots (hcp-train-subset): example=34 +Eval (hcp-val): [14] [ 0/62] eta: 0:04:16 loss: 0.8451 (0.8451) time: 4.1360 data: 4.0637 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8464 (0.8468) time: 0.1535 data: 0.1281 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8468) +Making plots (hcp-val): example=4 +Eval (nsd-val): [14] [ 0/62] eta: 0:03:52 loss: 0.8121 (0.8121) time: 3.7501 data: 3.6854 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8178 (0.8184) time: 0.1461 data: 0.1209 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:14 (0.2330 s / it) +Averaged stats (nsd-val): loss: 0.8178 (0.8184) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 12:30:25 lr: 0.000122 grad: 0.0700 (0.0700) loss: 0.8753 (0.8753) time: 7.2041 data: 7.0965 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:26:30 lr: 0.000122 grad: 0.0766 (0.0818) loss: 0.8399 (0.8427) time: 0.1989 data: 0.0876 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:22:34 lr: 0.000122 grad: 0.0714 (0.0784) loss: 0.8365 (0.8410) time: 0.1790 data: 0.0832 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:21:00 lr: 0.000122 grad: 0.0768 (0.0767) loss: 0.8347 (0.8417) time: 0.1898 data: 0.1005 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:20:07 lr: 0.000122 grad: 0.0731 (0.0754) loss: 0.8338 (0.8410) time: 0.2078 data: 0.1112 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:19:16 lr: 0.000122 grad: 0.0742 (0.0757) loss: 0.8361 (0.8403) time: 0.1846 data: 0.0879 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:18:35 lr: 0.000122 grad: 0.0719 (0.0753) loss: 0.8395 (0.8400) time: 0.1746 data: 0.0830 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:17:51 lr: 0.000122 grad: 0.0737 (0.0751) loss: 0.8374 (0.8398) time: 0.1634 data: 0.0658 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:17:17 lr: 0.000122 grad: 0.0700 (0.0745) loss: 0.8388 (0.8398) time: 0.1562 data: 0.0782 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:16:46 lr: 0.000122 grad: 0.0666 (0.0739) loss: 0.8415 (0.8398) time: 0.1867 data: 0.0937 max mem: 9377 +Train: [15] [1000/6250] eta: 0:16:08 lr: 0.000122 grad: 0.0647 (0.0733) loss: 0.8416 (0.8401) time: 0.1522 data: 0.0590 max mem: 9377 +Train: [15] [1100/6250] eta: 0:15:32 lr: 0.000121 grad: 0.0691 (0.0730) loss: 0.8436 (0.8402) time: 0.1372 data: 0.0550 max mem: 9377 +Train: [15] [1200/6250] eta: 0:14:58 lr: 0.000121 grad: 0.0673 (0.0727) loss: 0.8416 (0.8403) time: 0.1493 data: 0.0624 max mem: 9377 +Train: [15] [1300/6250] eta: 0:14:32 lr: 0.000121 grad: 0.0700 (0.0725) loss: 0.8411 (0.8403) time: 0.1266 data: 0.0395 max mem: 9377 +Train: [15] [1400/6250] eta: 0:14:06 lr: 0.000121 grad: 0.0659 (0.0722) loss: 0.8438 (0.8405) time: 0.1410 data: 0.0361 max mem: 9377 +Train: [15] [1500/6250] eta: 0:13:39 lr: 0.000121 grad: 0.0707 (0.0719) loss: 0.8414 (0.8406) time: 0.1357 data: 0.0454 max mem: 9377 +Train: [15] [1600/6250] eta: 0:13:16 lr: 0.000121 grad: 0.0666 (0.0718) loss: 0.8437 (0.8407) time: 0.1480 data: 0.0575 max mem: 9377 +Train: [15] [1700/6250] eta: 0:12:53 lr: 0.000121 grad: 0.0670 (0.0717) loss: 0.8438 (0.8408) time: 0.1576 data: 0.0743 max mem: 9377 +Train: [15] [1800/6250] eta: 0:12:31 lr: 0.000121 grad: 0.0709 (0.0716) loss: 0.8404 (0.8409) time: 0.1566 data: 0.0746 max mem: 9377 +Train: [15] [1900/6250] eta: 0:12:11 lr: 0.000121 grad: 0.0665 (0.0715) loss: 0.8437 (0.8410) time: 0.1691 data: 0.0878 max mem: 9377 +Train: [15] [2000/6250] eta: 0:11:50 lr: 0.000121 grad: 0.0642 (0.0713) loss: 0.8434 (0.8412) time: 0.1549 data: 0.0681 max mem: 9377 +Train: [15] [2100/6250] eta: 0:11:29 lr: 0.000121 grad: 0.0711 (0.0712) loss: 0.8412 (0.8414) time: 0.1313 data: 0.0419 max mem: 9377 +Train: [15] [2200/6250] eta: 0:11:11 lr: 0.000121 grad: 0.0671 (0.0712) loss: 0.8436 (0.8415) time: 0.1426 data: 0.0467 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:53 lr: 0.000121 grad: 0.0647 (0.0711) loss: 0.8452 (0.8416) time: 0.1463 data: 0.0654 max mem: 9377 +Train: [15] [2400/6250] eta: 0:10:36 lr: 0.000121 grad: 0.0651 (0.0709) loss: 0.8426 (0.8417) time: 0.1688 data: 0.0850 max mem: 9377 +Train: [15] [2500/6250] eta: 0:10:20 lr: 0.000121 grad: 0.0704 (0.0710) loss: 0.8464 (0.8418) time: 0.1610 data: 0.0638 max mem: 9377 +Train: [15] [2600/6250] eta: 0:10:03 lr: 0.000121 grad: 0.0670 (0.0710) loss: 0.8390 (0.8418) time: 0.1709 data: 0.0800 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:45 lr: 0.000121 grad: 0.0686 (0.0710) loss: 0.8435 (0.8418) time: 0.1506 data: 0.0722 max mem: 9377 +Train: [15] [2800/6250] eta: 0:09:28 lr: 0.000121 grad: 0.0675 (0.0710) loss: 0.8452 (0.8418) time: 0.1642 data: 0.0715 max mem: 9377 +Train: [15] [2900/6250] eta: 0:09:11 lr: 0.000121 grad: 0.0646 (0.0710) loss: 0.8428 (0.8418) time: 0.1697 data: 0.0795 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:54 lr: 0.000121 grad: 0.0657 (0.0709) loss: 0.8443 (0.8419) time: 0.1638 data: 0.0759 max mem: 9377 +Train: [15] [3100/6250] eta: 0:08:36 lr: 0.000121 grad: 0.0649 (0.0708) loss: 0.8409 (0.8419) time: 0.1539 data: 0.0651 max mem: 9377 +Train: [15] [3200/6250] eta: 0:08:19 lr: 0.000121 grad: 0.0728 (0.0708) loss: 0.8401 (0.8420) time: 0.1504 data: 0.0626 max mem: 9377 +Train: [15] [3300/6250] eta: 0:08:02 lr: 0.000121 grad: 0.0695 (0.0708) loss: 0.8439 (0.8420) time: 0.1389 data: 0.0451 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:45 lr: 0.000121 grad: 0.0656 (0.0707) loss: 0.8417 (0.8420) time: 0.1456 data: 0.0514 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:28 lr: 0.000121 grad: 0.0688 (0.0707) loss: 0.8400 (0.8420) time: 0.1397 data: 0.0511 max mem: 9377 +Train: [15] [3600/6250] eta: 0:07:12 lr: 0.000121 grad: 0.0711 (0.0707) loss: 0.8387 (0.8420) time: 0.1590 data: 0.0722 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:55 lr: 0.000121 grad: 0.0662 (0.0707) loss: 0.8384 (0.8420) time: 0.1692 data: 0.0869 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:39 lr: 0.000121 grad: 0.0665 (0.0706) loss: 0.8433 (0.8419) time: 0.1661 data: 0.0803 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:23 lr: 0.000121 grad: 0.0665 (0.0706) loss: 0.8362 (0.8419) time: 0.1645 data: 0.0795 max mem: 9377 +Train: [15] [4000/6250] eta: 0:06:07 lr: 0.000121 grad: 0.0647 (0.0706) loss: 0.8377 (0.8418) time: 0.1704 data: 0.0875 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:50 lr: 0.000121 grad: 0.0680 (0.0705) loss: 0.8344 (0.8418) time: 0.1440 data: 0.0583 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:34 lr: 0.000121 grad: 0.0683 (0.0706) loss: 0.8379 (0.8418) time: 0.1699 data: 0.0850 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:17 lr: 0.000121 grad: 0.0631 (0.0705) loss: 0.8387 (0.8418) time: 0.1266 data: 0.0454 max mem: 9377 +Train: [15] [4400/6250] eta: 0:05:01 lr: 0.000121 grad: 0.0674 (0.0705) loss: 0.8335 (0.8417) time: 0.1422 data: 0.0544 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:44 lr: 0.000121 grad: 0.0665 (0.0705) loss: 0.8436 (0.8417) time: 0.1683 data: 0.0859 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:28 lr: 0.000121 grad: 0.0664 (0.0704) loss: 0.8413 (0.8417) time: 0.1469 data: 0.0614 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:11 lr: 0.000121 grad: 0.0673 (0.0705) loss: 0.8344 (0.8416) time: 0.1423 data: 0.0439 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:55 lr: 0.000121 grad: 0.0743 (0.0705) loss: 0.8400 (0.8416) time: 0.1450 data: 0.0618 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:38 lr: 0.000121 grad: 0.0664 (0.0704) loss: 0.8446 (0.8416) time: 0.1652 data: 0.0774 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:22 lr: 0.000121 grad: 0.0674 (0.0704) loss: 0.8369 (0.8416) time: 0.1593 data: 0.0683 max mem: 9377 +Train: [15] [5100/6250] eta: 0:03:06 lr: 0.000121 grad: 0.0623 (0.0703) loss: 0.8432 (0.8416) time: 0.1838 data: 0.0961 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:49 lr: 0.000121 grad: 0.0666 (0.0703) loss: 0.8441 (0.8416) time: 0.1671 data: 0.0793 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:33 lr: 0.000121 grad: 0.0616 (0.0702) loss: 0.8425 (0.8416) time: 0.1591 data: 0.0719 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:17 lr: 0.000121 grad: 0.0630 (0.0701) loss: 0.8448 (0.8416) time: 0.1753 data: 0.0922 max mem: 9377 +Train: [15] [5500/6250] eta: 0:02:01 lr: 0.000121 grad: 0.0659 (0.0701) loss: 0.8362 (0.8416) time: 0.1577 data: 0.0633 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:44 lr: 0.000121 grad: 0.0663 (0.0701) loss: 0.8393 (0.8416) time: 0.1505 data: 0.0684 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:28 lr: 0.000121 grad: 0.0653 (0.0701) loss: 0.8416 (0.8416) time: 0.1844 data: 0.0955 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:12 lr: 0.000121 grad: 0.0711 (0.0702) loss: 0.8336 (0.8415) time: 0.1691 data: 0.0828 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:56 lr: 0.000121 grad: 0.0640 (0.0701) loss: 0.8411 (0.8415) time: 0.1714 data: 0.0849 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:40 lr: 0.000121 grad: 0.0656 (0.0701) loss: 0.8360 (0.8415) time: 0.1620 data: 0.0731 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:24 lr: 0.000121 grad: 0.0640 (0.0701) loss: 0.8422 (0.8414) time: 0.1557 data: 0.0666 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:08 lr: 0.000121 grad: 0.0682 (0.0700) loss: 0.8395 (0.8414) time: 0.1658 data: 0.0791 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0671 (0.0700) loss: 0.8383 (0.8414) time: 0.1515 data: 0.0682 max mem: 9377 +Train: [15] Total time: 0:16:57 (0.1627 s / it) +Averaged stats: lr: 0.000121 grad: 0.0671 (0.0700) loss: 0.8383 (0.8414) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:06:17 loss: 0.8459 (0.8459) time: 6.0877 data: 6.0429 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8402 (0.8428) time: 0.1792 data: 0.1524 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:15 (0.2481 s / it) +Averaged stats (hcp-train-subset): loss: 0.8402 (0.8428) +Eval (hcp-val): [15] [ 0/62] eta: 0:05:46 loss: 0.8428 (0.8428) time: 5.5925 data: 5.5582 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8457 (0.8454) time: 0.1040 data: 0.0792 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-val): loss: 0.8457 (0.8454) +Eval (nsd-val): [15] [ 0/62] eta: 0:05:40 loss: 0.8079 (0.8079) time: 5.4877 data: 5.4403 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8150 (0.8169) time: 0.1541 data: 0.1286 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:15 (0.2445 s / it) +Averaged stats (nsd-val): loss: 0.8150 (0.8169) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [16] [ 0/6250] eta: 11:18:33 lr: 0.000121 grad: 0.0785 (0.0785) loss: 0.8282 (0.8282) time: 6.5142 data: 6.4074 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:24:02 lr: 0.000121 grad: 0.0627 (0.0688) loss: 0.8394 (0.8516) time: 0.1748 data: 0.0700 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:20:16 lr: 0.000121 grad: 0.0652 (0.0685) loss: 0.8453 (0.8497) time: 0.1642 data: 0.0743 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:19:06 lr: 0.000121 grad: 0.0637 (0.0697) loss: 0.8466 (0.8484) time: 0.1572 data: 0.0616 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:18:23 lr: 0.000121 grad: 0.0652 (0.0699) loss: 0.8483 (0.8478) time: 0.1697 data: 0.0721 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:17:46 lr: 0.000121 grad: 0.0679 (0.0703) loss: 0.8524 (0.8476) time: 0.1667 data: 0.0686 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:17:27 lr: 0.000121 grad: 0.0676 (0.0702) loss: 0.8417 (0.8474) time: 0.1651 data: 0.0755 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:16:58 lr: 0.000121 grad: 0.0648 (0.0696) loss: 0.8443 (0.8471) time: 0.1627 data: 0.0637 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:16:19 lr: 0.000121 grad: 0.0650 (0.0693) loss: 0.8425 (0.8469) time: 0.1418 data: 0.0484 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:15:47 lr: 0.000121 grad: 0.0651 (0.0690) loss: 0.8455 (0.8469) time: 0.1643 data: 0.0815 max mem: 9377 +Train: [16] [1000/6250] eta: 0:15:22 lr: 0.000121 grad: 0.0664 (0.0687) loss: 0.8504 (0.8469) time: 0.1455 data: 0.0639 max mem: 9377 +Train: [16] [1100/6250] eta: 0:14:52 lr: 0.000121 grad: 0.0679 (0.0688) loss: 0.8403 (0.8467) time: 0.1332 data: 0.0478 max mem: 9377 +Train: [16] [1200/6250] eta: 0:14:27 lr: 0.000121 grad: 0.0651 (0.0687) loss: 0.8423 (0.8465) time: 0.1374 data: 0.0560 max mem: 9377 +Train: [16] [1300/6250] eta: 0:13:58 lr: 0.000121 grad: 0.0649 (0.0686) loss: 0.8471 (0.8464) time: 0.1412 data: 0.0403 max mem: 9377 +Train: [16] [1400/6250] eta: 0:13:36 lr: 0.000121 grad: 0.0594 (0.0685) loss: 0.8464 (0.8462) time: 0.1581 data: 0.0698 max mem: 9377 +Train: [16] [1500/6250] eta: 0:13:11 lr: 0.000121 grad: 0.0653 (0.0685) loss: 0.8436 (0.8461) time: 0.1418 data: 0.0421 max mem: 9377 +Train: [16] [1600/6250] eta: 0:12:50 lr: 0.000121 grad: 0.0694 (0.0685) loss: 0.8434 (0.8458) time: 0.1545 data: 0.0518 max mem: 9377 +Train: [16] [1700/6250] eta: 0:12:31 lr: 0.000121 grad: 0.0668 (0.0686) loss: 0.8453 (0.8457) time: 0.1467 data: 0.0569 max mem: 9377 +Train: [16] [1800/6250] eta: 0:12:12 lr: 0.000121 grad: 0.0689 (0.0687) loss: 0.8453 (0.8454) time: 0.1753 data: 0.0908 max mem: 9377 +Train: [16] [1900/6250] eta: 0:11:53 lr: 0.000121 grad: 0.0708 (0.0688) loss: 0.8420 (0.8451) time: 0.1557 data: 0.0598 max mem: 9377 +Train: [16] [2000/6250] eta: 0:11:36 lr: 0.000121 grad: 0.0677 (0.0687) loss: 0.8355 (0.8449) time: 0.1584 data: 0.0687 max mem: 9377 +Train: [16] [2100/6250] eta: 0:11:18 lr: 0.000121 grad: 0.0685 (0.0688) loss: 0.8412 (0.8446) time: 0.1411 data: 0.0489 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:59 lr: 0.000121 grad: 0.0655 (0.0688) loss: 0.8420 (0.8444) time: 0.1456 data: 0.0626 max mem: 9377 +Train: [16] [2300/6250] eta: 0:10:43 lr: 0.000121 grad: 0.0666 (0.0688) loss: 0.8375 (0.8442) time: 0.1584 data: 0.0779 max mem: 9377 +Train: [16] [2400/6250] eta: 0:10:27 lr: 0.000121 grad: 0.0653 (0.0688) loss: 0.8422 (0.8441) time: 0.1710 data: 0.0839 max mem: 9377 +Train: [16] [2500/6250] eta: 0:10:09 lr: 0.000121 grad: 0.0639 (0.0688) loss: 0.8416 (0.8440) time: 0.1444 data: 0.0653 max mem: 9377 +Train: [16] [2600/6250] eta: 0:09:51 lr: 0.000121 grad: 0.0702 (0.0688) loss: 0.8407 (0.8439) time: 0.1294 data: 0.0393 max mem: 9377 +Train: [16] [2700/6250] eta: 0:09:34 lr: 0.000121 grad: 0.0660 (0.0689) loss: 0.8438 (0.8439) time: 0.1458 data: 0.0548 max mem: 9377 +Train: [16] [2800/6250] eta: 0:09:18 lr: 0.000121 grad: 0.0625 (0.0688) loss: 0.8383 (0.8439) time: 0.1338 data: 0.0492 max mem: 9377 +Train: [16] [2900/6250] eta: 0:09:01 lr: 0.000121 grad: 0.0652 (0.0688) loss: 0.8449 (0.8439) time: 0.1511 data: 0.0654 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:43 lr: 0.000121 grad: 0.0667 (0.0687) loss: 0.8436 (0.8438) time: 0.1551 data: 0.0702 max mem: 9377 +Train: [16] [3100/6250] eta: 0:08:26 lr: 0.000121 grad: 0.0680 (0.0688) loss: 0.8398 (0.8438) time: 0.1458 data: 0.0530 max mem: 9377 +Train: [16] [3200/6250] eta: 0:08:09 lr: 0.000121 grad: 0.0628 (0.0688) loss: 0.8432 (0.8438) time: 0.1319 data: 0.0361 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:53 lr: 0.000121 grad: 0.0691 (0.0688) loss: 0.8373 (0.8438) time: 0.1587 data: 0.0656 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:37 lr: 0.000121 grad: 0.0680 (0.0688) loss: 0.8418 (0.8438) time: 0.1587 data: 0.0709 max mem: 9377 +Train: [16] [3500/6250] eta: 0:07:21 lr: 0.000120 grad: 0.0687 (0.0688) loss: 0.8376 (0.8438) time: 0.1517 data: 0.0678 max mem: 9377 +Train: [16] [3600/6250] eta: 0:07:04 lr: 0.000120 grad: 0.0648 (0.0688) loss: 0.8347 (0.8437) time: 0.1517 data: 0.0612 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:48 lr: 0.000120 grad: 0.0658 (0.0687) loss: 0.8475 (0.8438) time: 0.1273 data: 0.0391 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:31 lr: 0.000120 grad: 0.0664 (0.0687) loss: 0.8492 (0.8438) time: 0.1541 data: 0.0718 max mem: 9377 +Train: [16] [3900/6250] eta: 0:06:15 lr: 0.000120 grad: 0.0650 (0.0686) loss: 0.8462 (0.8438) time: 0.1438 data: 0.0638 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:59 lr: 0.000120 grad: 0.0677 (0.0686) loss: 0.8423 (0.8438) time: 0.1392 data: 0.0544 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:43 lr: 0.000120 grad: 0.0688 (0.0687) loss: 0.8476 (0.8438) time: 0.1752 data: 0.0851 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:27 lr: 0.000120 grad: 0.0630 (0.0687) loss: 0.8384 (0.8437) time: 0.1476 data: 0.0655 max mem: 9377 +Train: [16] [4300/6250] eta: 0:05:11 lr: 0.000120 grad: 0.0664 (0.0686) loss: 0.8423 (0.8436) time: 0.1682 data: 0.0929 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:55 lr: 0.000120 grad: 0.0641 (0.0686) loss: 0.8398 (0.8436) time: 0.1689 data: 0.0830 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:39 lr: 0.000120 grad: 0.0621 (0.0686) loss: 0.8396 (0.8435) time: 0.1613 data: 0.0772 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:23 lr: 0.000120 grad: 0.0686 (0.0686) loss: 0.8419 (0.8434) time: 0.1396 data: 0.0518 max mem: 9377 +Train: [16] [4700/6250] eta: 0:04:07 lr: 0.000120 grad: 0.0647 (0.0686) loss: 0.8423 (0.8434) time: 0.1395 data: 0.0534 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:51 lr: 0.000120 grad: 0.0664 (0.0686) loss: 0.8477 (0.8434) time: 0.1423 data: 0.0558 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:35 lr: 0.000120 grad: 0.0676 (0.0685) loss: 0.8413 (0.8433) time: 0.1186 data: 0.0303 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:19 lr: 0.000120 grad: 0.0704 (0.0686) loss: 0.8369 (0.8433) time: 0.1698 data: 0.0841 max mem: 9377 +Train: [16] [5100/6250] eta: 0:03:03 lr: 0.000120 grad: 0.0679 (0.0686) loss: 0.8357 (0.8432) time: 0.1594 data: 0.0694 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:47 lr: 0.000120 grad: 0.0694 (0.0687) loss: 0.8353 (0.8431) time: 0.1558 data: 0.0648 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:31 lr: 0.000120 grad: 0.0703 (0.0687) loss: 0.8397 (0.8430) time: 0.1470 data: 0.0613 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:15 lr: 0.000120 grad: 0.0689 (0.0687) loss: 0.8462 (0.8430) time: 0.1540 data: 0.0585 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:59 lr: 0.000120 grad: 0.0686 (0.0687) loss: 0.8403 (0.8430) time: 0.1588 data: 0.0681 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:43 lr: 0.000120 grad: 0.0738 (0.0688) loss: 0.8432 (0.8430) time: 0.1735 data: 0.0795 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:27 lr: 0.000120 grad: 0.0666 (0.0688) loss: 0.8350 (0.8429) time: 0.1954 data: 0.1071 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:11 lr: 0.000120 grad: 0.0676 (0.0689) loss: 0.8412 (0.8429) time: 0.1583 data: 0.0679 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:55 lr: 0.000120 grad: 0.0744 (0.0689) loss: 0.8364 (0.8428) time: 0.1602 data: 0.0739 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:39 lr: 0.000120 grad: 0.0726 (0.0690) loss: 0.8427 (0.8427) time: 0.1773 data: 0.0886 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:23 lr: 0.000120 grad: 0.0664 (0.0690) loss: 0.8396 (0.8427) time: 0.1608 data: 0.0709 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.0673 (0.0691) loss: 0.8375 (0.8426) time: 0.1699 data: 0.0820 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0710 (0.0691) loss: 0.8396 (0.8426) time: 0.1817 data: 0.0939 max mem: 9377 +Train: [16] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000120 grad: 0.0710 (0.0691) loss: 0.8396 (0.8426) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:52 loss: 0.8456 (0.8456) time: 4.7257 data: 4.6962 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8436 (0.8432) time: 0.1316 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-train-subset): loss: 0.8436 (0.8432) +Eval (hcp-val): [16] [ 0/62] eta: 0:05:51 loss: 0.8419 (0.8419) time: 5.6731 data: 5.6446 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8447 (0.8456) time: 0.1304 data: 0.1051 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:14 (0.2368 s / it) +Averaged stats (hcp-val): loss: 0.8447 (0.8456) +Eval (nsd-val): [16] [ 0/62] eta: 0:04:10 loss: 0.8056 (0.8056) time: 4.0446 data: 3.9696 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8178 (0.8187) time: 0.1449 data: 0.1193 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:14 (0.2397 s / it) +Averaged stats (nsd-val): loss: 0.8178 (0.8187) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 10:25:42 lr: 0.000120 grad: 0.0754 (0.0754) loss: 0.8505 (0.8505) time: 6.0068 data: 5.9016 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:22:33 lr: 0.000120 grad: 0.0651 (0.0846) loss: 0.8539 (0.8511) time: 0.1638 data: 0.0615 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:19:27 lr: 0.000120 grad: 0.0689 (0.0786) loss: 0.8413 (0.8468) time: 0.1572 data: 0.0634 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:18:06 lr: 0.000120 grad: 0.0643 (0.0777) loss: 0.8419 (0.8434) time: 0.1578 data: 0.0636 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:17:17 lr: 0.000120 grad: 0.0702 (0.0759) loss: 0.8383 (0.8425) time: 0.1345 data: 0.0308 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:16:34 lr: 0.000120 grad: 0.0695 (0.0757) loss: 0.8443 (0.8420) time: 0.1742 data: 0.0738 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:16:15 lr: 0.000120 grad: 0.0723 (0.0752) loss: 0.8452 (0.8415) time: 0.1863 data: 0.0939 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:15:57 lr: 0.000120 grad: 0.0680 (0.0745) loss: 0.8407 (0.8413) time: 0.1615 data: 0.0748 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:15:30 lr: 0.000120 grad: 0.0664 (0.0740) loss: 0.8371 (0.8409) time: 0.1661 data: 0.0808 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:15:03 lr: 0.000120 grad: 0.0645 (0.0737) loss: 0.8398 (0.8410) time: 0.1403 data: 0.0459 max mem: 9377 +Train: [17] [1000/6250] eta: 0:14:51 lr: 0.000120 grad: 0.0697 (0.0735) loss: 0.8375 (0.8409) time: 0.1956 data: 0.1195 max mem: 9377 +Train: [17] [1100/6250] eta: 0:14:35 lr: 0.000120 grad: 0.0659 (0.0733) loss: 0.8397 (0.8408) time: 0.1841 data: 0.0892 max mem: 9377 +Train: [17] [1200/6250] eta: 0:14:20 lr: 0.000120 grad: 0.0708 (0.0736) loss: 0.8373 (0.8408) time: 0.1822 data: 0.0934 max mem: 9377 +Train: [17] [1300/6250] eta: 0:14:01 lr: 0.000120 grad: 0.0660 (0.0733) loss: 0.8428 (0.8407) time: 0.1598 data: 0.0537 max mem: 9377 +Train: [17] [1400/6250] eta: 0:13:48 lr: 0.000120 grad: 0.0653 (0.0730) loss: 0.8379 (0.8407) time: 0.1709 data: 0.0705 max mem: 9377 +Train: [17] [1500/6250] eta: 0:13:34 lr: 0.000120 grad: 0.0699 (0.0728) loss: 0.8371 (0.8407) time: 0.1587 data: 0.0587 max mem: 9377 +Train: [17] [1600/6250] eta: 0:13:14 lr: 0.000120 grad: 0.0713 (0.0726) loss: 0.8376 (0.8406) time: 0.1721 data: 0.0792 max mem: 9377 +Train: [17] [1700/6250] eta: 0:12:54 lr: 0.000120 grad: 0.0756 (0.0726) loss: 0.8421 (0.8405) time: 0.1586 data: 0.0624 max mem: 9377 +Train: [17] [1800/6250] eta: 0:12:36 lr: 0.000120 grad: 0.0726 (0.0727) loss: 0.8333 (0.8405) time: 0.1966 data: 0.1125 max mem: 9377 +Train: [17] [1900/6250] eta: 0:12:16 lr: 0.000120 grad: 0.0671 (0.0725) loss: 0.8426 (0.8404) time: 0.1662 data: 0.0729 max mem: 9377 +Train: [17] [2000/6250] eta: 0:11:58 lr: 0.000120 grad: 0.0670 (0.0725) loss: 0.8376 (0.8404) time: 0.1876 data: 0.0974 max mem: 9377 +Train: [17] [2100/6250] eta: 0:11:38 lr: 0.000120 grad: 0.0696 (0.0724) loss: 0.8383 (0.8404) time: 0.1654 data: 0.0832 max mem: 9377 +Train: [17] [2200/6250] eta: 0:11:20 lr: 0.000120 grad: 0.0652 (0.0724) loss: 0.8422 (0.8404) time: 0.1763 data: 0.0859 max mem: 9377 +Train: [17] [2300/6250] eta: 0:11:02 lr: 0.000120 grad: 0.0697 (0.0724) loss: 0.8370 (0.8404) time: 0.1554 data: 0.0674 max mem: 9377 +Train: [17] [2400/6250] eta: 0:10:44 lr: 0.000120 grad: 0.0659 (0.0722) loss: 0.8383 (0.8403) time: 0.1697 data: 0.0861 max mem: 9377 +Train: [17] [2500/6250] eta: 0:10:26 lr: 0.000120 grad: 0.0658 (0.0722) loss: 0.8399 (0.8404) time: 0.1555 data: 0.0677 max mem: 9377 +Train: [17] [2600/6250] eta: 0:10:08 lr: 0.000120 grad: 0.0678 (0.0721) loss: 0.8394 (0.8403) time: 0.1679 data: 0.0779 max mem: 9377 +Train: [17] [2700/6250] eta: 0:09:50 lr: 0.000120 grad: 0.0719 (0.0721) loss: 0.8360 (0.8402) time: 0.1593 data: 0.0753 max mem: 9377 +Train: [17] [2800/6250] eta: 0:09:33 lr: 0.000120 grad: 0.0700 (0.0720) loss: 0.8381 (0.8402) time: 0.1590 data: 0.0751 max mem: 9377 +Train: [17] [2900/6250] eta: 0:09:15 lr: 0.000120 grad: 0.0653 (0.0720) loss: 0.8409 (0.8402) time: 0.1450 data: 0.0599 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:58 lr: 0.000120 grad: 0.0688 (0.0720) loss: 0.8356 (0.8401) time: 0.1770 data: 0.0873 max mem: 9377 +Train: [17] [3100/6250] eta: 0:08:39 lr: 0.000120 grad: 0.0727 (0.0720) loss: 0.8393 (0.8401) time: 0.1509 data: 0.0548 max mem: 9377 +Train: [17] [3200/6250] eta: 0:08:21 lr: 0.000120 grad: 0.0676 (0.0718) loss: 0.8387 (0.8401) time: 0.1085 data: 0.0209 max mem: 9377 +Train: [17] [3300/6250] eta: 0:08:03 lr: 0.000120 grad: 0.0642 (0.0718) loss: 0.8401 (0.8400) time: 0.1493 data: 0.0578 max mem: 9377 +Train: [17] [3400/6250] eta: 0:07:46 lr: 0.000120 grad: 0.0678 (0.0717) loss: 0.8366 (0.8400) time: 0.1469 data: 0.0630 max mem: 9377 +Train: [17] [3500/6250] eta: 0:07:29 lr: 0.000120 grad: 0.0664 (0.0717) loss: 0.8391 (0.8399) time: 0.1723 data: 0.0832 max mem: 9377 +Train: [17] [3600/6250] eta: 0:07:12 lr: 0.000120 grad: 0.0683 (0.0717) loss: 0.8438 (0.8398) time: 0.1698 data: 0.0819 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:56 lr: 0.000120 grad: 0.0748 (0.0716) loss: 0.8371 (0.8397) time: 0.1857 data: 0.0997 max mem: 9377 +Train: [17] [3800/6250] eta: 0:06:39 lr: 0.000120 grad: 0.0747 (0.0717) loss: 0.8358 (0.8396) time: 0.1701 data: 0.0848 max mem: 9377 +Train: [17] [3900/6250] eta: 0:06:22 lr: 0.000120 grad: 0.0646 (0.0717) loss: 0.8368 (0.8395) time: 0.1459 data: 0.0590 max mem: 9377 +Train: [17] [4000/6250] eta: 0:06:04 lr: 0.000120 grad: 0.0701 (0.0717) loss: 0.8377 (0.8394) time: 0.1292 data: 0.0344 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:48 lr: 0.000120 grad: 0.0693 (0.0717) loss: 0.8390 (0.8393) time: 0.1546 data: 0.0762 max mem: 9377 +Train: [17] [4200/6250] eta: 0:05:31 lr: 0.000120 grad: 0.0683 (0.0717) loss: 0.8360 (0.8392) time: 0.1665 data: 0.0838 max mem: 9377 +Train: [17] [4300/6250] eta: 0:05:15 lr: 0.000120 grad: 0.0701 (0.0717) loss: 0.8388 (0.8391) time: 0.1240 data: 0.0346 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:58 lr: 0.000120 grad: 0.0696 (0.0718) loss: 0.8338 (0.8390) time: 0.1369 data: 0.0547 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:42 lr: 0.000120 grad: 0.0723 (0.0718) loss: 0.8394 (0.8389) time: 0.2075 data: 0.1217 max mem: 9377 +Train: [17] [4600/6250] eta: 0:04:26 lr: 0.000120 grad: 0.0702 (0.0718) loss: 0.8358 (0.8388) time: 0.1584 data: 0.0648 max mem: 9377 +Train: [17] [4700/6250] eta: 0:04:10 lr: 0.000120 grad: 0.0664 (0.0718) loss: 0.8395 (0.8387) time: 0.1438 data: 0.0463 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:54 lr: 0.000120 grad: 0.0668 (0.0718) loss: 0.8394 (0.8387) time: 0.1795 data: 0.0926 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:38 lr: 0.000119 grad: 0.0683 (0.0718) loss: 0.8353 (0.8387) time: 0.1638 data: 0.0717 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:22 lr: 0.000119 grad: 0.0728 (0.0718) loss: 0.8375 (0.8387) time: 0.1571 data: 0.0703 max mem: 9377 +Train: [17] [5100/6250] eta: 0:03:06 lr: 0.000119 grad: 0.0695 (0.0718) loss: 0.8367 (0.8387) time: 0.1584 data: 0.0726 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:49 lr: 0.000119 grad: 0.0654 (0.0718) loss: 0.8393 (0.8387) time: 0.1783 data: 0.0930 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:33 lr: 0.000119 grad: 0.0695 (0.0718) loss: 0.8400 (0.8388) time: 0.1448 data: 0.0569 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:17 lr: 0.000119 grad: 0.0687 (0.0717) loss: 0.8396 (0.8388) time: 0.1612 data: 0.0748 max mem: 9377 +Train: [17] [5500/6250] eta: 0:02:01 lr: 0.000119 grad: 0.0655 (0.0717) loss: 0.8409 (0.8388) time: 0.1533 data: 0.0728 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:45 lr: 0.000119 grad: 0.0690 (0.0717) loss: 0.8400 (0.8389) time: 0.1667 data: 0.0749 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:28 lr: 0.000119 grad: 0.0653 (0.0716) loss: 0.8382 (0.8389) time: 0.1538 data: 0.0664 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:12 lr: 0.000119 grad: 0.0737 (0.0716) loss: 0.8378 (0.8389) time: 0.1594 data: 0.0655 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:56 lr: 0.000119 grad: 0.0638 (0.0716) loss: 0.8382 (0.8389) time: 0.1645 data: 0.0785 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:40 lr: 0.000119 grad: 0.0654 (0.0715) loss: 0.8397 (0.8389) time: 0.1702 data: 0.0834 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:24 lr: 0.000119 grad: 0.0678 (0.0715) loss: 0.8407 (0.8389) time: 0.2437 data: 0.1691 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:08 lr: 0.000119 grad: 0.0637 (0.0715) loss: 0.8443 (0.8390) time: 0.2129 data: 0.1319 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0703 (0.0715) loss: 0.8396 (0.8390) time: 0.1682 data: 0.0852 max mem: 9377 +Train: [17] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000119 grad: 0.0703 (0.0715) loss: 0.8396 (0.8390) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:05:14 loss: 0.8430 (0.8430) time: 5.0771 data: 5.0463 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8405 (0.8413) time: 0.1178 data: 0.0926 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (hcp-train-subset): loss: 0.8405 (0.8413) +Eval (hcp-val): [17] [ 0/62] eta: 0:03:20 loss: 0.8427 (0.8427) time: 3.2395 data: 3.1795 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8460 (0.8452) time: 0.1235 data: 0.0980 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-val): loss: 0.8460 (0.8452) +Eval (nsd-val): [17] [ 0/62] eta: 0:03:18 loss: 0.8087 (0.8087) time: 3.1987 data: 3.1147 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8143 (0.8169) time: 0.1436 data: 0.1181 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (nsd-val): loss: 0.8143 (0.8169) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [18] [ 0/6250] eta: 9:47:02 lr: 0.000119 grad: 0.1079 (0.1079) loss: 0.8458 (0.8458) time: 5.6356 data: 5.4656 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:22:35 lr: 0.000119 grad: 0.0721 (0.0758) loss: 0.8449 (0.8459) time: 0.1659 data: 0.0658 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:19:17 lr: 0.000119 grad: 0.0663 (0.0738) loss: 0.8425 (0.8422) time: 0.1621 data: 0.0607 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:17:54 lr: 0.000119 grad: 0.0648 (0.0719) loss: 0.8403 (0.8416) time: 0.1779 data: 0.0907 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:16:50 lr: 0.000119 grad: 0.0664 (0.0714) loss: 0.8416 (0.8405) time: 0.1442 data: 0.0515 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:16:21 lr: 0.000119 grad: 0.0633 (0.0708) loss: 0.8413 (0.8401) time: 0.1406 data: 0.0225 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:16:31 lr: 0.000119 grad: 0.0662 (0.0704) loss: 0.8351 (0.8394) time: 0.1890 data: 0.0866 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:16:31 lr: 0.000119 grad: 0.0684 (0.0700) loss: 0.8366 (0.8389) time: 0.2357 data: 0.1283 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:16:05 lr: 0.000119 grad: 0.0689 (0.0698) loss: 0.8349 (0.8386) time: 0.1491 data: 0.0593 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:15:40 lr: 0.000119 grad: 0.0669 (0.0697) loss: 0.8396 (0.8382) time: 0.1931 data: 0.0978 max mem: 9377 +Train: [18] [1000/6250] eta: 0:15:15 lr: 0.000119 grad: 0.0631 (0.0696) loss: 0.8396 (0.8382) time: 0.1455 data: 0.0460 max mem: 9377 +Train: [18] [1100/6250] eta: 0:15:02 lr: 0.000119 grad: 0.0670 (0.0696) loss: 0.8344 (0.8380) time: 0.1597 data: 0.0684 max mem: 9377 +Train: [18] [1200/6250] eta: 0:14:43 lr: 0.000119 grad: 0.0661 (0.0696) loss: 0.8356 (0.8378) time: 0.1398 data: 0.0422 max mem: 9377 +Train: [18] [1300/6250] eta: 0:14:21 lr: 0.000119 grad: 0.0665 (0.0698) loss: 0.8446 (0.8376) time: 0.1593 data: 0.0785 max mem: 9377 +Train: [18] [1400/6250] eta: 0:13:59 lr: 0.000119 grad: 0.0676 (0.0700) loss: 0.8359 (0.8374) time: 0.1632 data: 0.0671 max mem: 9377 +Train: [18] [1500/6250] eta: 0:13:44 lr: 0.000119 grad: 0.0672 (0.0699) loss: 0.8389 (0.8373) time: 0.1664 data: 0.0679 max mem: 9377 +Train: [18] [1600/6250] eta: 0:13:25 lr: 0.000119 grad: 0.0680 (0.0700) loss: 0.8370 (0.8372) time: 0.1691 data: 0.0740 max mem: 9377 +Train: [18] [1700/6250] eta: 0:13:03 lr: 0.000119 grad: 0.0656 (0.0699) loss: 0.8388 (0.8372) time: 0.1507 data: 0.0560 max mem: 9377 +Train: [18] [1800/6250] eta: 0:12:43 lr: 0.000119 grad: 0.0672 (0.0699) loss: 0.8328 (0.8371) time: 0.1541 data: 0.0644 max mem: 9377 +Train: [18] [1900/6250] eta: 0:12:25 lr: 0.000119 grad: 0.0726 (0.0699) loss: 0.8327 (0.8370) time: 0.1641 data: 0.0804 max mem: 9377 +Train: [18] [2000/6250] eta: 0:12:05 lr: 0.000119 grad: 0.0668 (0.0700) loss: 0.8290 (0.8369) time: 0.1573 data: 0.0725 max mem: 9377 +Train: [18] [2100/6250] eta: 0:11:47 lr: 0.000119 grad: 0.0687 (0.0701) loss: 0.8349 (0.8367) time: 0.1693 data: 0.0860 max mem: 9377 +Train: [18] [2200/6250] eta: 0:11:28 lr: 0.000119 grad: 0.0752 (0.0702) loss: 0.8297 (0.8366) time: 0.1627 data: 0.0717 max mem: 9377 +Train: [18] [2300/6250] eta: 0:11:10 lr: 0.000119 grad: 0.0656 (0.0705) loss: 0.8310 (0.8365) time: 0.1610 data: 0.0830 max mem: 9377 +Train: [18] [2400/6250] eta: 0:10:52 lr: 0.000119 grad: 0.0698 (0.0705) loss: 0.8344 (0.8364) time: 0.1797 data: 0.0963 max mem: 9377 +Train: [18] [2500/6250] eta: 0:10:35 lr: 0.000119 grad: 0.0653 (0.0705) loss: 0.8380 (0.8363) time: 0.1770 data: 0.0961 max mem: 9377 +Train: [18] [2600/6250] eta: 0:10:17 lr: 0.000119 grad: 0.0664 (0.0705) loss: 0.8384 (0.8363) time: 0.1222 data: 0.0423 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:58 lr: 0.000119 grad: 0.0676 (0.0706) loss: 0.8341 (0.8363) time: 0.1808 data: 0.0929 max mem: 9377 +Train: [18] [2800/6250] eta: 0:09:40 lr: 0.000119 grad: 0.0692 (0.0707) loss: 0.8344 (0.8362) time: 0.1475 data: 0.0614 max mem: 9377 +Train: [18] [2900/6250] eta: 0:09:22 lr: 0.000119 grad: 0.0689 (0.0708) loss: 0.8352 (0.8361) time: 0.1638 data: 0.0713 max mem: 9377 +Train: [18] [3000/6250] eta: 0:09:04 lr: 0.000119 grad: 0.0734 (0.0708) loss: 0.8339 (0.8361) time: 0.1600 data: 0.0685 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:46 lr: 0.000119 grad: 0.0724 (0.0710) loss: 0.8311 (0.8360) time: 0.1631 data: 0.0796 max mem: 9377 +Train: [18] [3200/6250] eta: 0:08:28 lr: 0.000119 grad: 0.0730 (0.0710) loss: 0.8322 (0.8359) time: 0.1400 data: 0.0468 max mem: 9377 +Train: [18] [3300/6250] eta: 0:08:10 lr: 0.000119 grad: 0.0738 (0.0712) loss: 0.8362 (0.8358) time: 0.1389 data: 0.0389 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:53 lr: 0.000119 grad: 0.0719 (0.0712) loss: 0.8353 (0.8357) time: 0.1665 data: 0.0834 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:35 lr: 0.000119 grad: 0.0706 (0.0713) loss: 0.8313 (0.8357) time: 0.1634 data: 0.0815 max mem: 9377 +Train: [18] [3600/6250] eta: 0:07:17 lr: 0.000119 grad: 0.0658 (0.0714) loss: 0.8355 (0.8357) time: 0.1598 data: 0.0749 max mem: 9377 +Train: [18] [3700/6250] eta: 0:07:00 lr: 0.000119 grad: 0.0725 (0.0715) loss: 0.8369 (0.8357) time: 0.1548 data: 0.0705 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:42 lr: 0.000119 grad: 0.0726 (0.0715) loss: 0.8342 (0.8357) time: 0.1349 data: 0.0492 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:25 lr: 0.000119 grad: 0.0723 (0.0716) loss: 0.8370 (0.8357) time: 0.1384 data: 0.0490 max mem: 9377 +Train: [18] [4000/6250] eta: 0:06:09 lr: 0.000119 grad: 0.0685 (0.0716) loss: 0.8367 (0.8357) time: 0.1534 data: 0.0621 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:52 lr: 0.000119 grad: 0.0717 (0.0716) loss: 0.8360 (0.8357) time: 0.1578 data: 0.0664 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:36 lr: 0.000119 grad: 0.0764 (0.0717) loss: 0.8343 (0.8358) time: 0.1628 data: 0.0684 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:19 lr: 0.000119 grad: 0.0717 (0.0717) loss: 0.8383 (0.8358) time: 0.1686 data: 0.0768 max mem: 9377 +Train: [18] [4400/6250] eta: 0:05:03 lr: 0.000119 grad: 0.0701 (0.0717) loss: 0.8392 (0.8358) time: 0.1499 data: 0.0548 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:46 lr: 0.000119 grad: 0.0729 (0.0718) loss: 0.8354 (0.8359) time: 0.1400 data: 0.0463 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:30 lr: 0.000119 grad: 0.0690 (0.0718) loss: 0.8380 (0.8359) time: 0.1670 data: 0.0833 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:13 lr: 0.000119 grad: 0.0673 (0.0718) loss: 0.8364 (0.8359) time: 0.1773 data: 0.0877 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:57 lr: 0.000119 grad: 0.0745 (0.0719) loss: 0.8387 (0.8359) time: 0.1517 data: 0.0603 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:40 lr: 0.000119 grad: 0.0636 (0.0719) loss: 0.8453 (0.8360) time: 0.1491 data: 0.0505 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:24 lr: 0.000119 grad: 0.0712 (0.0719) loss: 0.8396 (0.8359) time: 0.1502 data: 0.0611 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:07 lr: 0.000119 grad: 0.0702 (0.0719) loss: 0.8354 (0.8360) time: 0.1485 data: 0.0595 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:51 lr: 0.000119 grad: 0.0689 (0.0719) loss: 0.8395 (0.8359) time: 0.1543 data: 0.0647 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:34 lr: 0.000119 grad: 0.0741 (0.0720) loss: 0.8379 (0.8360) time: 0.1526 data: 0.0638 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:18 lr: 0.000119 grad: 0.0647 (0.0720) loss: 0.8431 (0.8360) time: 0.1564 data: 0.0722 max mem: 9377 +Train: [18] [5500/6250] eta: 0:02:02 lr: 0.000119 grad: 0.0710 (0.0721) loss: 0.8391 (0.8361) time: 0.1657 data: 0.0758 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:45 lr: 0.000119 grad: 0.0708 (0.0721) loss: 0.8349 (0.8361) time: 0.1505 data: 0.0572 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:29 lr: 0.000119 grad: 0.0773 (0.0722) loss: 0.8388 (0.8361) time: 0.1451 data: 0.0605 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:13 lr: 0.000118 grad: 0.0748 (0.0722) loss: 0.8322 (0.8360) time: 0.1374 data: 0.0486 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:56 lr: 0.000118 grad: 0.0742 (0.0723) loss: 0.8283 (0.8360) time: 0.1615 data: 0.0779 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.0733 (0.0723) loss: 0.8277 (0.8359) time: 0.2086 data: 0.1286 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:24 lr: 0.000118 grad: 0.0716 (0.0724) loss: 0.8378 (0.8359) time: 0.1314 data: 0.0455 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:08 lr: 0.000118 grad: 0.0677 (0.0724) loss: 0.8400 (0.8359) time: 0.1524 data: 0.0741 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0724 (0.0724) loss: 0.8393 (0.8359) time: 0.1548 data: 0.0727 max mem: 9377 +Train: [18] Total time: 0:17:01 (0.1635 s / it) +Averaged stats: lr: 0.000118 grad: 0.0724 (0.0724) loss: 0.8393 (0.8359) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:04:55 loss: 0.8444 (0.8444) time: 4.7605 data: 4.7291 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8390 (0.8405) time: 0.1343 data: 0.1093 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:14 (0.2287 s / it) +Averaged stats (hcp-train-subset): loss: 0.8390 (0.8405) +Eval (hcp-val): [18] [ 0/62] eta: 0:04:39 loss: 0.8418 (0.8418) time: 4.5114 data: 4.4161 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8456 (0.8444) time: 0.1343 data: 0.1073 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8444) +Eval (nsd-val): [18] [ 0/62] eta: 0:04:07 loss: 0.8068 (0.8068) time: 3.9869 data: 3.9050 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8140 (0.8163) time: 0.1339 data: 0.1085 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (nsd-val): loss: 0.8140 (0.8163) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 9:50:31 lr: 0.000118 grad: 0.0633 (0.0633) loss: 0.8131 (0.8131) time: 5.6690 data: 5.4974 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:22:11 lr: 0.000118 grad: 0.0710 (0.0776) loss: 0.8420 (0.8406) time: 0.1483 data: 0.0535 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:19:22 lr: 0.000118 grad: 0.0629 (0.0764) loss: 0.8372 (0.8388) time: 0.1724 data: 0.0782 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:18:20 lr: 0.000118 grad: 0.0645 (0.0754) loss: 0.8460 (0.8406) time: 0.1727 data: 0.0743 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:17:55 lr: 0.000118 grad: 0.0657 (0.0737) loss: 0.8350 (0.8409) time: 0.1524 data: 0.0476 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:17:18 lr: 0.000118 grad: 0.0658 (0.0729) loss: 0.8384 (0.8409) time: 0.1813 data: 0.0782 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:16:56 lr: 0.000118 grad: 0.0703 (0.0726) loss: 0.8338 (0.8401) time: 0.1654 data: 0.0713 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:16:38 lr: 0.000118 grad: 0.0638 (0.0720) loss: 0.8439 (0.8397) time: 0.1742 data: 0.0830 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:16:11 lr: 0.000118 grad: 0.0666 (0.0715) loss: 0.8401 (0.8394) time: 0.1391 data: 0.0497 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:15:47 lr: 0.000118 grad: 0.0716 (0.0714) loss: 0.8378 (0.8391) time: 0.1581 data: 0.0634 max mem: 9377 +Train: [19] [1000/6250] eta: 0:15:28 lr: 0.000118 grad: 0.0666 (0.0715) loss: 0.8401 (0.8389) time: 0.1640 data: 0.0567 max mem: 9377 +Train: [19] [1100/6250] eta: 0:15:10 lr: 0.000118 grad: 0.0647 (0.0714) loss: 0.8428 (0.8386) time: 0.1679 data: 0.0663 max mem: 9377 +Train: [19] [1200/6250] eta: 0:14:48 lr: 0.000118 grad: 0.0711 (0.0715) loss: 0.8381 (0.8385) time: 0.1744 data: 0.0810 max mem: 9377 +Train: [19] [1300/6250] eta: 0:14:27 lr: 0.000118 grad: 0.0704 (0.0714) loss: 0.8332 (0.8383) time: 0.1636 data: 0.0640 max mem: 9377 +Train: [19] [1400/6250] eta: 0:13:59 lr: 0.000118 grad: 0.0674 (0.0714) loss: 0.8376 (0.8382) time: 0.1284 data: 0.0389 max mem: 9377 +Train: [19] [1500/6250] eta: 0:13:37 lr: 0.000118 grad: 0.0670 (0.0714) loss: 0.8371 (0.8381) time: 0.1584 data: 0.0648 max mem: 9377 +Train: [19] [1600/6250] eta: 0:13:15 lr: 0.000118 grad: 0.0690 (0.0715) loss: 0.8386 (0.8380) time: 0.1293 data: 0.0316 max mem: 9377 +Train: [19] [1700/6250] eta: 0:12:53 lr: 0.000118 grad: 0.0709 (0.0715) loss: 0.8337 (0.8378) time: 0.1405 data: 0.0475 max mem: 9377 +Train: [19] [1800/6250] eta: 0:12:31 lr: 0.000118 grad: 0.0628 (0.0715) loss: 0.8404 (0.8378) time: 0.1572 data: 0.0543 max mem: 9377 +Train: [19] [1900/6250] eta: 0:12:08 lr: 0.000118 grad: 0.0644 (0.0713) loss: 0.8429 (0.8379) time: 0.1437 data: 0.0581 max mem: 9377 +Train: [19] [2000/6250] eta: 0:11:49 lr: 0.000118 grad: 0.0695 (0.0712) loss: 0.8380 (0.8379) time: 0.1767 data: 0.0903 max mem: 9377 +Train: [19] [2100/6250] eta: 0:11:29 lr: 0.000118 grad: 0.0634 (0.0710) loss: 0.8401 (0.8379) time: 0.1496 data: 0.0604 max mem: 9377 +Train: [19] [2200/6250] eta: 0:11:11 lr: 0.000118 grad: 0.0674 (0.0709) loss: 0.8364 (0.8378) time: 0.1333 data: 0.0519 max mem: 9377 +Train: [19] [2300/6250] eta: 0:10:53 lr: 0.000118 grad: 0.0661 (0.0707) loss: 0.8387 (0.8379) time: 0.1450 data: 0.0525 max mem: 9377 +Train: [19] [2400/6250] eta: 0:10:35 lr: 0.000118 grad: 0.0624 (0.0706) loss: 0.8418 (0.8380) time: 0.1566 data: 0.0693 max mem: 9377 +Train: [19] [2500/6250] eta: 0:10:18 lr: 0.000118 grad: 0.0641 (0.0705) loss: 0.8451 (0.8380) time: 0.1757 data: 0.0820 max mem: 9377 +Train: [19] [2600/6250] eta: 0:10:00 lr: 0.000118 grad: 0.0681 (0.0705) loss: 0.8415 (0.8381) time: 0.1490 data: 0.0563 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:43 lr: 0.000118 grad: 0.0662 (0.0704) loss: 0.8417 (0.8381) time: 0.1503 data: 0.0616 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:27 lr: 0.000118 grad: 0.0666 (0.0704) loss: 0.8393 (0.8381) time: 0.1708 data: 0.0832 max mem: 9377 +Train: [19] [2900/6250] eta: 0:09:09 lr: 0.000118 grad: 0.0695 (0.0705) loss: 0.8395 (0.8381) time: 0.1656 data: 0.0737 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:53 lr: 0.000118 grad: 0.0690 (0.0705) loss: 0.8403 (0.8381) time: 0.1623 data: 0.0718 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:36 lr: 0.000118 grad: 0.0716 (0.0706) loss: 0.8328 (0.8380) time: 0.1893 data: 0.0906 max mem: 9377 +Train: [19] [3200/6250] eta: 0:08:19 lr: 0.000118 grad: 0.0680 (0.0707) loss: 0.8324 (0.8378) time: 0.1515 data: 0.0655 max mem: 9377 +Train: [19] [3300/6250] eta: 0:08:03 lr: 0.000118 grad: 0.0711 (0.0707) loss: 0.8286 (0.8377) time: 0.1733 data: 0.0853 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:47 lr: 0.000118 grad: 0.0714 (0.0708) loss: 0.8316 (0.8375) time: 0.1680 data: 0.0723 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:31 lr: 0.000118 grad: 0.0747 (0.0709) loss: 0.8325 (0.8374) time: 0.1822 data: 0.0916 max mem: 9377 +Train: [19] [3600/6250] eta: 0:07:15 lr: 0.000118 grad: 0.0681 (0.0709) loss: 0.8336 (0.8373) time: 0.1837 data: 0.0892 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:58 lr: 0.000118 grad: 0.0711 (0.0710) loss: 0.8280 (0.8371) time: 0.1550 data: 0.0548 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:41 lr: 0.000118 grad: 0.0730 (0.0710) loss: 0.8317 (0.8370) time: 0.1420 data: 0.0521 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:25 lr: 0.000118 grad: 0.0748 (0.0711) loss: 0.8373 (0.8369) time: 0.1718 data: 0.0815 max mem: 9377 +Train: [19] [4000/6250] eta: 0:06:09 lr: 0.000118 grad: 0.0683 (0.0711) loss: 0.8291 (0.8368) time: 0.1748 data: 0.0847 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:52 lr: 0.000118 grad: 0.0715 (0.0711) loss: 0.8325 (0.8367) time: 0.1537 data: 0.0682 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:35 lr: 0.000118 grad: 0.0769 (0.0712) loss: 0.8280 (0.8366) time: 0.1464 data: 0.0515 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:19 lr: 0.000118 grad: 0.0733 (0.0713) loss: 0.8319 (0.8364) time: 0.1502 data: 0.0529 max mem: 9377 +Train: [19] [4400/6250] eta: 0:05:02 lr: 0.000118 grad: 0.0711 (0.0712) loss: 0.8368 (0.8364) time: 0.1684 data: 0.0732 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:46 lr: 0.000118 grad: 0.0724 (0.0713) loss: 0.8353 (0.8363) time: 0.1467 data: 0.0550 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:29 lr: 0.000118 grad: 0.0720 (0.0714) loss: 0.8346 (0.8363) time: 0.1479 data: 0.0519 max mem: 9377 +Train: [19] [4700/6250] eta: 0:04:12 lr: 0.000118 grad: 0.0733 (0.0714) loss: 0.8348 (0.8362) time: 0.1543 data: 0.0642 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:56 lr: 0.000118 grad: 0.0716 (0.0714) loss: 0.8343 (0.8362) time: 0.1537 data: 0.0624 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:40 lr: 0.000118 grad: 0.0711 (0.0715) loss: 0.8316 (0.8361) time: 0.1611 data: 0.0646 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:23 lr: 0.000118 grad: 0.0682 (0.0715) loss: 0.8333 (0.8361) time: 0.1525 data: 0.0629 max mem: 9377 +Train: [19] [5100/6250] eta: 0:03:07 lr: 0.000118 grad: 0.0720 (0.0716) loss: 0.8370 (0.8361) time: 0.1447 data: 0.0601 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:50 lr: 0.000118 grad: 0.0712 (0.0716) loss: 0.8329 (0.8361) time: 0.1507 data: 0.0617 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:34 lr: 0.000118 grad: 0.0705 (0.0717) loss: 0.8311 (0.8361) time: 0.1684 data: 0.0827 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:18 lr: 0.000118 grad: 0.0736 (0.0718) loss: 0.8349 (0.8361) time: 0.1334 data: 0.0449 max mem: 9377 +Train: [19] [5500/6250] eta: 0:02:01 lr: 0.000118 grad: 0.0711 (0.0718) loss: 0.8378 (0.8360) time: 0.1488 data: 0.0459 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:45 lr: 0.000118 grad: 0.0711 (0.0718) loss: 0.8321 (0.8360) time: 0.1679 data: 0.0829 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:29 lr: 0.000118 grad: 0.0712 (0.0718) loss: 0.8351 (0.8360) time: 0.1479 data: 0.0598 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:12 lr: 0.000118 grad: 0.0727 (0.0719) loss: 0.8311 (0.8359) time: 0.1450 data: 0.0483 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:56 lr: 0.000118 grad: 0.0757 (0.0720) loss: 0.8334 (0.8359) time: 0.1455 data: 0.0678 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.0778 (0.0720) loss: 0.8339 (0.8359) time: 0.1794 data: 0.0900 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:24 lr: 0.000117 grad: 0.0727 (0.0721) loss: 0.8357 (0.8359) time: 0.1489 data: 0.0624 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:08 lr: 0.000117 grad: 0.0709 (0.0722) loss: 0.8374 (0.8359) time: 0.1716 data: 0.0814 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0709 (0.0722) loss: 0.8383 (0.8359) time: 0.1675 data: 0.0769 max mem: 9377 +Train: [19] Total time: 0:16:58 (0.1630 s / it) +Averaged stats: lr: 0.000117 grad: 0.0709 (0.0722) loss: 0.8383 (0.8359) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:04:59 loss: 0.8457 (0.8457) time: 4.8357 data: 4.8053 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8383 (0.8393) time: 0.1549 data: 0.1299 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (hcp-train-subset): loss: 0.8383 (0.8393) +Making plots (hcp-train-subset): example=12 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:15 loss: 0.8413 (0.8413) time: 4.1146 data: 4.0540 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8443 (0.8445) time: 0.1781 data: 0.1519 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:16 (0.2589 s / it) +Averaged stats (hcp-val): loss: 0.8443 (0.8445) +Making plots (hcp-val): example=2 +Eval (nsd-val): [19] [ 0/62] eta: 0:05:12 loss: 0.8061 (0.8061) time: 5.0403 data: 5.0059 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8152 (0.8165) time: 0.1572 data: 0.1287 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:15 (0.2497 s / it) +Averaged stats (nsd-val): loss: 0.8152 (0.8165) +Making plots (nsd-val): example=55 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 8:48:09 lr: 0.000117 grad: 0.0515 (0.0515) loss: 0.8691 (0.8691) time: 5.0702 data: 4.7817 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:25:43 lr: 0.000117 grad: 0.0712 (0.0704) loss: 0.8473 (0.8448) time: 0.1896 data: 0.0871 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:22:12 lr: 0.000117 grad: 0.0674 (0.0694) loss: 0.8367 (0.8430) time: 0.2050 data: 0.1152 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:20:42 lr: 0.000117 grad: 0.0750 (0.0705) loss: 0.8386 (0.8407) time: 0.1861 data: 0.0813 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:19:40 lr: 0.000117 grad: 0.0695 (0.0711) loss: 0.8375 (0.8399) time: 0.1699 data: 0.0777 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:19:08 lr: 0.000117 grad: 0.0704 (0.0717) loss: 0.8315 (0.8391) time: 0.2093 data: 0.1310 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:18:25 lr: 0.000117 grad: 0.0690 (0.0719) loss: 0.8336 (0.8387) time: 0.1580 data: 0.0648 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:17:46 lr: 0.000117 grad: 0.0695 (0.0723) loss: 0.8343 (0.8382) time: 0.1480 data: 0.0626 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:17:12 lr: 0.000117 grad: 0.0834 (0.0730) loss: 0.8320 (0.8375) time: 0.1725 data: 0.0845 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:16:33 lr: 0.000117 grad: 0.0695 (0.0733) loss: 0.8321 (0.8370) time: 0.1568 data: 0.0651 max mem: 9377 +Train: [20] [1000/6250] eta: 0:16:04 lr: 0.000117 grad: 0.0742 (0.0731) loss: 0.8377 (0.8369) time: 0.1493 data: 0.0626 max mem: 9377 +Train: [20] [1100/6250] eta: 0:15:35 lr: 0.000117 grad: 0.0644 (0.0730) loss: 0.8414 (0.8370) time: 0.1525 data: 0.0624 max mem: 9377 +Train: [20] [1200/6250] eta: 0:15:04 lr: 0.000117 grad: 0.0720 (0.0731) loss: 0.8350 (0.8368) time: 0.1586 data: 0.0697 max mem: 9377 +Train: [20] [1300/6250] eta: 0:14:35 lr: 0.000117 grad: 0.0744 (0.0733) loss: 0.8331 (0.8366) time: 0.1561 data: 0.0617 max mem: 9377 +Train: [20] [1400/6250] eta: 0:14:07 lr: 0.000117 grad: 0.0703 (0.0734) loss: 0.8311 (0.8364) time: 0.1541 data: 0.0724 max mem: 9377 +Train: [20] [1500/6250] eta: 0:13:40 lr: 0.000117 grad: 0.0702 (0.0733) loss: 0.8388 (0.8361) time: 0.1599 data: 0.0722 max mem: 9377 +Train: [20] [1600/6250] eta: 0:13:17 lr: 0.000117 grad: 0.0686 (0.0733) loss: 0.8338 (0.8360) time: 0.1544 data: 0.0592 max mem: 9377 +Train: [20] [1700/6250] eta: 0:12:57 lr: 0.000117 grad: 0.0697 (0.0732) loss: 0.8374 (0.8360) time: 0.1295 data: 0.0294 max mem: 9377 +Train: [20] [1800/6250] eta: 0:12:35 lr: 0.000117 grad: 0.0702 (0.0731) loss: 0.8355 (0.8360) time: 0.1345 data: 0.0378 max mem: 9377 +Train: [20] [1900/6250] eta: 0:12:12 lr: 0.000117 grad: 0.0715 (0.0732) loss: 0.8365 (0.8360) time: 0.1424 data: 0.0581 max mem: 9377 +Train: [20] [2000/6250] eta: 0:11:51 lr: 0.000117 grad: 0.0722 (0.0732) loss: 0.8365 (0.8360) time: 0.1342 data: 0.0245 max mem: 9377 +Train: [20] [2100/6250] eta: 0:11:30 lr: 0.000117 grad: 0.0690 (0.0732) loss: 0.8369 (0.8359) time: 0.1487 data: 0.0538 max mem: 9377 +Train: [20] [2200/6250] eta: 0:11:11 lr: 0.000117 grad: 0.0673 (0.0732) loss: 0.8399 (0.8359) time: 0.1455 data: 0.0570 max mem: 9377 +Train: [20] [2300/6250] eta: 0:10:52 lr: 0.000117 grad: 0.0700 (0.0732) loss: 0.8388 (0.8359) time: 0.1686 data: 0.0802 max mem: 9377 +Train: [20] [2400/6250] eta: 0:10:35 lr: 0.000117 grad: 0.0747 (0.0731) loss: 0.8372 (0.8360) time: 0.1663 data: 0.0801 max mem: 9377 +Train: [20] [2500/6250] eta: 0:10:18 lr: 0.000117 grad: 0.0749 (0.0733) loss: 0.8344 (0.8361) time: 0.1324 data: 0.0374 max mem: 9377 +Train: [20] [2600/6250] eta: 0:10:00 lr: 0.000117 grad: 0.0685 (0.0733) loss: 0.8409 (0.8362) time: 0.1828 data: 0.1044 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:43 lr: 0.000117 grad: 0.0746 (0.0733) loss: 0.8381 (0.8362) time: 0.1807 data: 0.0988 max mem: 9377 +Train: [20] [2800/6250] eta: 0:09:25 lr: 0.000117 grad: 0.0716 (0.0734) loss: 0.8356 (0.8363) time: 0.1686 data: 0.0765 max mem: 9377 +Train: [20] [2900/6250] eta: 0:09:08 lr: 0.000117 grad: 0.0663 (0.0734) loss: 0.8435 (0.8363) time: 0.1369 data: 0.0421 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:51 lr: 0.000117 grad: 0.0692 (0.0733) loss: 0.8352 (0.8363) time: 0.1631 data: 0.0780 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:35 lr: 0.000117 grad: 0.0690 (0.0733) loss: 0.8405 (0.8363) time: 0.1658 data: 0.0820 max mem: 9377 +Train: [20] [3200/6250] eta: 0:08:18 lr: 0.000117 grad: 0.0681 (0.0733) loss: 0.8395 (0.8364) time: 0.1468 data: 0.0615 max mem: 9377 +Train: [20] [3300/6250] eta: 0:08:01 lr: 0.000117 grad: 0.0640 (0.0732) loss: 0.8367 (0.8365) time: 0.1434 data: 0.0483 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:44 lr: 0.000117 grad: 0.0689 (0.0731) loss: 0.8383 (0.8365) time: 0.1528 data: 0.0654 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:27 lr: 0.000117 grad: 0.0731 (0.0731) loss: 0.8337 (0.8365) time: 0.1580 data: 0.0753 max mem: 9377 +Train: [20] [3600/6250] eta: 0:07:10 lr: 0.000117 grad: 0.0736 (0.0731) loss: 0.8350 (0.8365) time: 0.1475 data: 0.0608 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:54 lr: 0.000117 grad: 0.0680 (0.0731) loss: 0.8394 (0.8365) time: 0.1821 data: 0.0919 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:37 lr: 0.000117 grad: 0.0723 (0.0732) loss: 0.8376 (0.8365) time: 0.1754 data: 0.0903 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:21 lr: 0.000117 grad: 0.0663 (0.0732) loss: 0.8376 (0.8365) time: 0.1553 data: 0.0665 max mem: 9377 +Train: [20] [4000/6250] eta: 0:06:05 lr: 0.000117 grad: 0.0706 (0.0732) loss: 0.8363 (0.8365) time: 0.1594 data: 0.0743 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:48 lr: 0.000117 grad: 0.0705 (0.0732) loss: 0.8317 (0.8364) time: 0.1784 data: 0.0843 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:32 lr: 0.000117 grad: 0.0721 (0.0732) loss: 0.8285 (0.8363) time: 0.1451 data: 0.0602 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:16 lr: 0.000117 grad: 0.0664 (0.0732) loss: 0.8340 (0.8362) time: 0.1842 data: 0.0906 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:59 lr: 0.000117 grad: 0.0739 (0.0733) loss: 0.8362 (0.8362) time: 0.1429 data: 0.0516 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:43 lr: 0.000117 grad: 0.0716 (0.0732) loss: 0.8370 (0.8361) time: 0.1814 data: 0.0974 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:26 lr: 0.000117 grad: 0.0686 (0.0732) loss: 0.8342 (0.8361) time: 0.1819 data: 0.1008 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:10 lr: 0.000117 grad: 0.0671 (0.0732) loss: 0.8404 (0.8361) time: 0.1586 data: 0.0684 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:54 lr: 0.000117 grad: 0.0681 (0.0732) loss: 0.8322 (0.8361) time: 0.1092 data: 0.0004 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:38 lr: 0.000117 grad: 0.0727 (0.0732) loss: 0.8336 (0.8361) time: 0.2103 data: 0.1186 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:21 lr: 0.000117 grad: 0.0673 (0.0732) loss: 0.8349 (0.8360) time: 0.1344 data: 0.0289 max mem: 9377 +Train: [20] [5100/6250] eta: 0:03:05 lr: 0.000117 grad: 0.0692 (0.0732) loss: 0.8306 (0.8360) time: 0.1800 data: 0.1004 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:49 lr: 0.000117 grad: 0.0718 (0.0732) loss: 0.8340 (0.8359) time: 0.1572 data: 0.0718 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:32 lr: 0.000117 grad: 0.0671 (0.0731) loss: 0.8381 (0.8359) time: 0.1357 data: 0.0547 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:16 lr: 0.000117 grad: 0.0699 (0.0731) loss: 0.8363 (0.8359) time: 0.1425 data: 0.0455 max mem: 9377 +Train: [20] [5500/6250] eta: 0:02:00 lr: 0.000117 grad: 0.0703 (0.0731) loss: 0.8382 (0.8359) time: 0.1444 data: 0.0574 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:44 lr: 0.000117 grad: 0.0670 (0.0731) loss: 0.8330 (0.8358) time: 0.1667 data: 0.0806 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:28 lr: 0.000117 grad: 0.0690 (0.0730) loss: 0.8382 (0.8358) time: 0.1428 data: 0.0479 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:12 lr: 0.000117 grad: 0.0661 (0.0729) loss: 0.8340 (0.8358) time: 0.1710 data: 0.0891 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:56 lr: 0.000117 grad: 0.0660 (0.0729) loss: 0.8347 (0.8358) time: 0.1413 data: 0.0529 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:40 lr: 0.000116 grad: 0.0686 (0.0729) loss: 0.8340 (0.8358) time: 0.2117 data: 0.1310 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:24 lr: 0.000116 grad: 0.0656 (0.0729) loss: 0.8329 (0.8358) time: 0.1556 data: 0.0689 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:08 lr: 0.000116 grad: 0.0748 (0.0729) loss: 0.8324 (0.8357) time: 0.1707 data: 0.0851 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0691 (0.0729) loss: 0.8340 (0.8357) time: 0.1763 data: 0.0889 max mem: 9377 +Train: [20] Total time: 0:16:56 (0.1626 s / it) +Averaged stats: lr: 0.000116 grad: 0.0691 (0.0729) loss: 0.8340 (0.8357) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:03:47 loss: 0.8427 (0.8427) time: 3.6622 data: 3.5869 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8375 (0.8390) time: 0.1507 data: 0.1251 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:14 (0.2416 s / it) +Averaged stats (hcp-train-subset): loss: 0.8375 (0.8390) +Eval (hcp-val): [20] [ 0/62] eta: 0:03:57 loss: 0.8388 (0.8388) time: 3.8305 data: 3.7533 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8423 (0.8436) time: 0.1370 data: 0.1114 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:14 (0.2302 s / it) +Averaged stats (hcp-val): loss: 0.8423 (0.8436) +Eval (nsd-val): [20] [ 0/62] eta: 0:04:16 loss: 0.8047 (0.8047) time: 4.1430 data: 4.0685 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8146 (0.8158) time: 0.1150 data: 0.0899 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (nsd-val): loss: 0.8146 (0.8158) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [21] [ 0/6250] eta: 7:44:14 lr: 0.000116 grad: 0.0443 (0.0443) loss: 0.8818 (0.8818) time: 4.4567 data: 4.2089 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:22:28 lr: 0.000116 grad: 0.0879 (0.0891) loss: 0.8149 (0.8368) time: 0.1676 data: 0.0618 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:19:19 lr: 0.000116 grad: 0.0680 (0.0818) loss: 0.8380 (0.8361) time: 0.1744 data: 0.0760 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:17:57 lr: 0.000116 grad: 0.0649 (0.0778) loss: 0.8385 (0.8371) time: 0.1712 data: 0.0798 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:17:00 lr: 0.000116 grad: 0.0684 (0.0769) loss: 0.8408 (0.8370) time: 0.1592 data: 0.0695 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:16:12 lr: 0.000116 grad: 0.0683 (0.0758) loss: 0.8354 (0.8373) time: 0.1266 data: 0.0207 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:16:02 lr: 0.000116 grad: 0.0692 (0.0749) loss: 0.8402 (0.8376) time: 0.1607 data: 0.0704 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:15:44 lr: 0.000116 grad: 0.0668 (0.0744) loss: 0.8419 (0.8379) time: 0.1756 data: 0.0854 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:15:19 lr: 0.000116 grad: 0.0721 (0.0740) loss: 0.8380 (0.8381) time: 0.1695 data: 0.0814 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:14:56 lr: 0.000116 grad: 0.0633 (0.0737) loss: 0.8410 (0.8382) time: 0.1744 data: 0.0859 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:30 lr: 0.000116 grad: 0.0673 (0.0736) loss: 0.8408 (0.8384) time: 0.1489 data: 0.0584 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:07 lr: 0.000116 grad: 0.0674 (0.0733) loss: 0.8407 (0.8384) time: 0.1569 data: 0.0636 max mem: 9377 +Train: [21] [1200/6250] eta: 0:13:46 lr: 0.000116 grad: 0.0714 (0.0735) loss: 0.8359 (0.8382) time: 0.1839 data: 0.0982 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:27 lr: 0.000116 grad: 0.0673 (0.0736) loss: 0.8384 (0.8379) time: 0.1332 data: 0.0439 max mem: 9377 +Train: [21] [1400/6250] eta: 0:13:09 lr: 0.000116 grad: 0.0666 (0.0735) loss: 0.8337 (0.8376) time: 0.1753 data: 0.0781 max mem: 9377 +Train: [21] [1500/6250] eta: 0:12:49 lr: 0.000116 grad: 0.0708 (0.0735) loss: 0.8378 (0.8375) time: 0.1480 data: 0.0619 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:37 lr: 0.000116 grad: 0.0745 (0.0735) loss: 0.8338 (0.8374) time: 0.1653 data: 0.0673 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:21 lr: 0.000116 grad: 0.0731 (0.0735) loss: 0.8338 (0.8373) time: 0.1596 data: 0.0603 max mem: 9377 +Train: [21] [1800/6250] eta: 0:12:04 lr: 0.000116 grad: 0.0762 (0.0735) loss: 0.8405 (0.8373) time: 0.1380 data: 0.0487 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:48 lr: 0.000116 grad: 0.0722 (0.0733) loss: 0.8356 (0.8373) time: 0.1298 data: 0.0410 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:30 lr: 0.000116 grad: 0.0642 (0.0732) loss: 0.8362 (0.8372) time: 0.1505 data: 0.0682 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:11 lr: 0.000116 grad: 0.0721 (0.0730) loss: 0.8365 (0.8372) time: 0.1275 data: 0.0325 max mem: 9377 +Train: [21] [2200/6250] eta: 0:10:55 lr: 0.000116 grad: 0.0780 (0.0730) loss: 0.8370 (0.8371) time: 0.1575 data: 0.0798 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:38 lr: 0.000116 grad: 0.0689 (0.0730) loss: 0.8354 (0.8371) time: 0.1566 data: 0.0744 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:20 lr: 0.000116 grad: 0.0670 (0.0729) loss: 0.8437 (0.8372) time: 0.1582 data: 0.0734 max mem: 9377 +Train: [21] [2500/6250] eta: 0:10:04 lr: 0.000116 grad: 0.0696 (0.0729) loss: 0.8396 (0.8372) time: 0.1725 data: 0.0875 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:47 lr: 0.000116 grad: 0.0694 (0.0728) loss: 0.8366 (0.8372) time: 0.1674 data: 0.0798 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:30 lr: 0.000116 grad: 0.0720 (0.0728) loss: 0.8347 (0.8372) time: 0.1312 data: 0.0358 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:14 lr: 0.000116 grad: 0.0741 (0.0728) loss: 0.8295 (0.8372) time: 0.1465 data: 0.0526 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:57 lr: 0.000116 grad: 0.0718 (0.0727) loss: 0.8333 (0.8372) time: 0.1940 data: 0.1073 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:40 lr: 0.000116 grad: 0.0732 (0.0727) loss: 0.8409 (0.8372) time: 0.1431 data: 0.0444 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:24 lr: 0.000116 grad: 0.0692 (0.0727) loss: 0.8397 (0.8373) time: 0.1354 data: 0.0447 max mem: 9377 +Train: [21] [3200/6250] eta: 0:08:08 lr: 0.000116 grad: 0.0733 (0.0727) loss: 0.8365 (0.8373) time: 0.1405 data: 0.0473 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:51 lr: 0.000116 grad: 0.0673 (0.0727) loss: 0.8371 (0.8372) time: 0.1442 data: 0.0549 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:35 lr: 0.000116 grad: 0.0738 (0.0727) loss: 0.8337 (0.8372) time: 0.1613 data: 0.0789 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:19 lr: 0.000116 grad: 0.0766 (0.0727) loss: 0.8360 (0.8372) time: 0.1755 data: 0.0894 max mem: 9377 +Train: [21] [3600/6250] eta: 0:07:03 lr: 0.000116 grad: 0.0695 (0.0727) loss: 0.8406 (0.8372) time: 0.1538 data: 0.0586 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:47 lr: 0.000116 grad: 0.0744 (0.0727) loss: 0.8375 (0.8372) time: 0.1564 data: 0.0688 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:30 lr: 0.000116 grad: 0.0730 (0.0728) loss: 0.8314 (0.8371) time: 0.1572 data: 0.0724 max mem: 9377 +Train: [21] [3900/6250] eta: 0:06:15 lr: 0.000116 grad: 0.0757 (0.0728) loss: 0.8322 (0.8371) time: 0.1713 data: 0.0915 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:59 lr: 0.000116 grad: 0.0701 (0.0728) loss: 0.8415 (0.8371) time: 0.1597 data: 0.0669 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:42 lr: 0.000116 grad: 0.0645 (0.0728) loss: 0.8384 (0.8371) time: 0.1599 data: 0.0703 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:26 lr: 0.000116 grad: 0.0708 (0.0727) loss: 0.8428 (0.8372) time: 0.1403 data: 0.0577 max mem: 9377 +Train: [21] [4300/6250] eta: 0:05:10 lr: 0.000116 grad: 0.0689 (0.0727) loss: 0.8433 (0.8373) time: 0.1542 data: 0.0629 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:54 lr: 0.000116 grad: 0.0704 (0.0727) loss: 0.8359 (0.8373) time: 0.1362 data: 0.0563 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:37 lr: 0.000116 grad: 0.0730 (0.0726) loss: 0.8363 (0.8373) time: 0.1581 data: 0.0726 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:21 lr: 0.000116 grad: 0.0679 (0.0727) loss: 0.8311 (0.8373) time: 0.1229 data: 0.0397 max mem: 9377 +Train: [21] [4700/6250] eta: 0:04:06 lr: 0.000116 grad: 0.0727 (0.0727) loss: 0.8388 (0.8373) time: 0.1560 data: 0.0725 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:50 lr: 0.000116 grad: 0.0660 (0.0727) loss: 0.8386 (0.8373) time: 0.1108 data: 0.0198 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:34 lr: 0.000116 grad: 0.0753 (0.0728) loss: 0.8399 (0.8373) time: 0.1471 data: 0.0686 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:18 lr: 0.000116 grad: 0.0713 (0.0729) loss: 0.8348 (0.8372) time: 0.1567 data: 0.0590 max mem: 9377 +Train: [21] [5100/6250] eta: 0:03:02 lr: 0.000116 grad: 0.0708 (0.0729) loss: 0.8402 (0.8373) time: 0.1364 data: 0.0437 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:46 lr: 0.000116 grad: 0.0787 (0.0729) loss: 0.8390 (0.8372) time: 0.1429 data: 0.0572 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:30 lr: 0.000116 grad: 0.0709 (0.0729) loss: 0.8382 (0.8372) time: 0.1590 data: 0.0649 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:14 lr: 0.000116 grad: 0.0693 (0.0729) loss: 0.8332 (0.8372) time: 0.1617 data: 0.0663 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:58 lr: 0.000116 grad: 0.0723 (0.0729) loss: 0.8373 (0.8372) time: 0.1988 data: 0.1085 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:42 lr: 0.000115 grad: 0.0694 (0.0729) loss: 0.8375 (0.8372) time: 0.1345 data: 0.0532 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:27 lr: 0.000115 grad: 0.0661 (0.0729) loss: 0.8429 (0.8372) time: 0.1338 data: 0.0360 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:11 lr: 0.000115 grad: 0.0706 (0.0729) loss: 0.8315 (0.8371) time: 0.1520 data: 0.0640 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:55 lr: 0.000115 grad: 0.0673 (0.0729) loss: 0.8435 (0.8372) time: 0.1447 data: 0.0567 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:39 lr: 0.000115 grad: 0.0701 (0.0729) loss: 0.8366 (0.8372) time: 0.1851 data: 0.0991 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:23 lr: 0.000115 grad: 0.0717 (0.0729) loss: 0.8353 (0.8372) time: 0.1604 data: 0.0776 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.0779 (0.0729) loss: 0.8370 (0.8372) time: 0.1432 data: 0.0599 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0717 (0.0729) loss: 0.8359 (0.8372) time: 0.1789 data: 0.0944 max mem: 9377 +Train: [21] Total time: 0:16:39 (0.1599 s / it) +Averaged stats: lr: 0.000115 grad: 0.0717 (0.0729) loss: 0.8359 (0.8372) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:05:23 loss: 0.8385 (0.8385) time: 5.2228 data: 5.1805 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8367 (0.8382) time: 0.1231 data: 0.0976 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:14 (0.2344 s / it) +Averaged stats (hcp-train-subset): loss: 0.8367 (0.8382) +Eval (hcp-val): [21] [ 0/62] eta: 0:03:48 loss: 0.8415 (0.8415) time: 3.6844 data: 3.5859 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8436 (0.8438) time: 0.1190 data: 0.0937 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8436 (0.8438) +Eval (nsd-val): [21] [ 0/62] eta: 0:04:50 loss: 0.8059 (0.8059) time: 4.6870 data: 4.6553 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8145 (0.8164) time: 0.1402 data: 0.1147 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (nsd-val): loss: 0.8145 (0.8164) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 7:14:12 lr: 0.000115 grad: 0.0518 (0.0518) loss: 0.8624 (0.8624) time: 4.1684 data: 3.9370 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:21:24 lr: 0.000115 grad: 0.0702 (0.0840) loss: 0.8476 (0.8469) time: 0.1738 data: 0.0741 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:18:54 lr: 0.000115 grad: 0.0713 (0.0814) loss: 0.8386 (0.8423) time: 0.1736 data: 0.0765 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:17:30 lr: 0.000115 grad: 0.0728 (0.0810) loss: 0.8329 (0.8390) time: 0.1472 data: 0.0492 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:16:34 lr: 0.000115 grad: 0.0763 (0.0796) loss: 0.8282 (0.8376) time: 0.1386 data: 0.0436 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:16:03 lr: 0.000115 grad: 0.0732 (0.0782) loss: 0.8321 (0.8365) time: 0.1634 data: 0.0628 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:15:35 lr: 0.000115 grad: 0.0655 (0.0772) loss: 0.8350 (0.8363) time: 0.1591 data: 0.0621 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:15:30 lr: 0.000115 grad: 0.0657 (0.0761) loss: 0.8372 (0.8365) time: 0.1947 data: 0.0980 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:15:12 lr: 0.000115 grad: 0.0639 (0.0755) loss: 0.8441 (0.8368) time: 0.1926 data: 0.1024 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:14:52 lr: 0.000115 grad: 0.0689 (0.0749) loss: 0.8388 (0.8369) time: 0.1825 data: 0.0872 max mem: 9377 +Train: [22] [1000/6250] eta: 0:14:38 lr: 0.000115 grad: 0.0676 (0.0744) loss: 0.8393 (0.8371) time: 0.1679 data: 0.0652 max mem: 9377 +Train: [22] [1100/6250] eta: 0:14:19 lr: 0.000115 grad: 0.0680 (0.0741) loss: 0.8397 (0.8372) time: 0.1397 data: 0.0540 max mem: 9377 +Train: [22] [1200/6250] eta: 0:13:59 lr: 0.000115 grad: 0.0678 (0.0738) loss: 0.8345 (0.8370) time: 0.1596 data: 0.0751 max mem: 9377 +Train: [22] [1300/6250] eta: 0:13:46 lr: 0.000115 grad: 0.0674 (0.0737) loss: 0.8352 (0.8369) time: 0.1752 data: 0.0943 max mem: 9377 +Train: [22] [1400/6250] eta: 0:13:27 lr: 0.000115 grad: 0.0691 (0.0738) loss: 0.8382 (0.8368) time: 0.1510 data: 0.0675 max mem: 9377 +Train: [22] [1500/6250] eta: 0:13:08 lr: 0.000115 grad: 0.0733 (0.0738) loss: 0.8332 (0.8367) time: 0.1631 data: 0.0836 max mem: 9377 +Train: [22] [1600/6250] eta: 0:12:49 lr: 0.000115 grad: 0.0633 (0.0736) loss: 0.8415 (0.8367) time: 0.1467 data: 0.0575 max mem: 9377 +Train: [22] [1700/6250] eta: 0:12:33 lr: 0.000115 grad: 0.0707 (0.0735) loss: 0.8369 (0.8366) time: 0.1699 data: 0.0711 max mem: 9377 +Train: [22] [1800/6250] eta: 0:12:14 lr: 0.000115 grad: 0.0697 (0.0734) loss: 0.8334 (0.8366) time: 0.1354 data: 0.0367 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:57 lr: 0.000115 grad: 0.0691 (0.0733) loss: 0.8400 (0.8365) time: 0.1609 data: 0.0703 max mem: 9377 +Train: [22] [2000/6250] eta: 0:11:39 lr: 0.000115 grad: 0.0722 (0.0733) loss: 0.8401 (0.8366) time: 0.1716 data: 0.0900 max mem: 9377 +Train: [22] [2100/6250] eta: 0:11:19 lr: 0.000115 grad: 0.0702 (0.0733) loss: 0.8360 (0.8365) time: 0.1477 data: 0.0519 max mem: 9377 +Train: [22] [2200/6250] eta: 0:11:02 lr: 0.000115 grad: 0.0718 (0.0732) loss: 0.8360 (0.8365) time: 0.1858 data: 0.0960 max mem: 9377 +Train: [22] [2300/6250] eta: 0:10:43 lr: 0.000115 grad: 0.0692 (0.0731) loss: 0.8334 (0.8363) time: 0.1416 data: 0.0507 max mem: 9377 +Train: [22] [2400/6250] eta: 0:10:27 lr: 0.000115 grad: 0.0667 (0.0731) loss: 0.8380 (0.8362) time: 0.1186 data: 0.0274 max mem: 9377 +Train: [22] [2500/6250] eta: 0:10:09 lr: 0.000115 grad: 0.0751 (0.0731) loss: 0.8285 (0.8361) time: 0.1647 data: 0.0774 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:51 lr: 0.000115 grad: 0.0687 (0.0731) loss: 0.8346 (0.8361) time: 0.1677 data: 0.0824 max mem: 9377 +Train: [22] [2700/6250] eta: 0:09:34 lr: 0.000115 grad: 0.0672 (0.0730) loss: 0.8359 (0.8360) time: 0.1690 data: 0.0854 max mem: 9377 +Train: [22] [2800/6250] eta: 0:09:16 lr: 0.000115 grad: 0.0701 (0.0730) loss: 0.8341 (0.8360) time: 0.1553 data: 0.0703 max mem: 9377 +Train: [22] [2900/6250] eta: 0:08:59 lr: 0.000115 grad: 0.0743 (0.0729) loss: 0.8368 (0.8360) time: 0.1338 data: 0.0396 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:43 lr: 0.000115 grad: 0.0699 (0.0729) loss: 0.8345 (0.8360) time: 0.1942 data: 0.1103 max mem: 9377 +Train: [22] [3100/6250] eta: 0:08:25 lr: 0.000115 grad: 0.0693 (0.0729) loss: 0.8347 (0.8358) time: 0.1551 data: 0.0685 max mem: 9377 +Train: [22] [3200/6250] eta: 0:08:09 lr: 0.000115 grad: 0.0708 (0.0729) loss: 0.8340 (0.8358) time: 0.1601 data: 0.0748 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:53 lr: 0.000115 grad: 0.0697 (0.0730) loss: 0.8347 (0.8358) time: 0.1562 data: 0.0723 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:35 lr: 0.000115 grad: 0.0725 (0.0731) loss: 0.8360 (0.8358) time: 0.1380 data: 0.0535 max mem: 9377 +Train: [22] [3500/6250] eta: 0:07:19 lr: 0.000115 grad: 0.0689 (0.0732) loss: 0.8339 (0.8358) time: 0.1599 data: 0.0747 max mem: 9377 +Train: [22] [3600/6250] eta: 0:07:03 lr: 0.000115 grad: 0.0686 (0.0731) loss: 0.8381 (0.8358) time: 0.1487 data: 0.0608 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:47 lr: 0.000115 grad: 0.0682 (0.0731) loss: 0.8359 (0.8359) time: 0.1928 data: 0.1000 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:29 lr: 0.000115 grad: 0.0710 (0.0731) loss: 0.8370 (0.8359) time: 0.1510 data: 0.0617 max mem: 9377 +Train: [22] [3900/6250] eta: 0:06:13 lr: 0.000115 grad: 0.0713 (0.0731) loss: 0.8360 (0.8359) time: 0.1531 data: 0.0672 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:57 lr: 0.000115 grad: 0.0763 (0.0730) loss: 0.8305 (0.8359) time: 0.1654 data: 0.0777 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:41 lr: 0.000115 grad: 0.0659 (0.0730) loss: 0.8372 (0.8359) time: 0.1332 data: 0.0408 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:24 lr: 0.000115 grad: 0.0677 (0.0730) loss: 0.8390 (0.8359) time: 0.1287 data: 0.0475 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:09 lr: 0.000115 grad: 0.0681 (0.0730) loss: 0.8407 (0.8360) time: 0.1576 data: 0.0683 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:53 lr: 0.000115 grad: 0.0665 (0.0730) loss: 0.8371 (0.8360) time: 0.1478 data: 0.0565 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:36 lr: 0.000115 grad: 0.0691 (0.0729) loss: 0.8411 (0.8360) time: 0.1516 data: 0.0635 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:20 lr: 0.000115 grad: 0.0672 (0.0729) loss: 0.8382 (0.8361) time: 0.1351 data: 0.0472 max mem: 9377 +Train: [22] [4700/6250] eta: 0:04:05 lr: 0.000115 grad: 0.0729 (0.0729) loss: 0.8367 (0.8361) time: 0.2055 data: 0.1192 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:49 lr: 0.000115 grad: 0.0668 (0.0728) loss: 0.8452 (0.8362) time: 0.1798 data: 0.0921 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:34 lr: 0.000114 grad: 0.0740 (0.0728) loss: 0.8371 (0.8362) time: 0.1532 data: 0.0554 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:18 lr: 0.000114 grad: 0.0719 (0.0728) loss: 0.8383 (0.8362) time: 0.1510 data: 0.0515 max mem: 9377 +Train: [22] [5100/6250] eta: 0:03:02 lr: 0.000114 grad: 0.0714 (0.0729) loss: 0.8286 (0.8362) time: 0.1535 data: 0.0575 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:46 lr: 0.000114 grad: 0.0703 (0.0729) loss: 0.8300 (0.8362) time: 0.1520 data: 0.0553 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:30 lr: 0.000114 grad: 0.0718 (0.0729) loss: 0.8364 (0.8361) time: 0.1629 data: 0.0799 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:14 lr: 0.000114 grad: 0.0710 (0.0729) loss: 0.8317 (0.8361) time: 0.1517 data: 0.0556 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:58 lr: 0.000114 grad: 0.0720 (0.0729) loss: 0.8329 (0.8361) time: 0.1405 data: 0.0548 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:43 lr: 0.000114 grad: 0.0719 (0.0730) loss: 0.8262 (0.8360) time: 0.1473 data: 0.0573 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:27 lr: 0.000114 grad: 0.0762 (0.0730) loss: 0.8348 (0.8359) time: 0.1619 data: 0.0797 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:11 lr: 0.000114 grad: 0.0723 (0.0731) loss: 0.8316 (0.8358) time: 0.1570 data: 0.0652 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:55 lr: 0.000114 grad: 0.0743 (0.0732) loss: 0.8326 (0.8357) time: 0.2090 data: 0.1306 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:39 lr: 0.000114 grad: 0.0753 (0.0733) loss: 0.8326 (0.8356) time: 0.1620 data: 0.0755 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:23 lr: 0.000114 grad: 0.0685 (0.0733) loss: 0.8347 (0.8355) time: 0.1549 data: 0.0653 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.0694 (0.0733) loss: 0.8343 (0.8355) time: 0.1405 data: 0.0617 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0762 (0.0733) loss: 0.8295 (0.8354) time: 0.1734 data: 0.0941 max mem: 9377 +Train: [22] Total time: 0:16:36 (0.1595 s / it) +Averaged stats: lr: 0.000114 grad: 0.0762 (0.0733) loss: 0.8295 (0.8354) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:06:20 loss: 0.8377 (0.8377) time: 6.1403 data: 6.1081 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8348 (0.8374) time: 0.1218 data: 0.0951 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (hcp-train-subset): loss: 0.8348 (0.8374) +Eval (hcp-val): [22] [ 0/62] eta: 0:03:49 loss: 0.8422 (0.8422) time: 3.6951 data: 3.6324 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8397 (0.8424) time: 0.1390 data: 0.1134 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (hcp-val): loss: 0.8397 (0.8424) +Eval (nsd-val): [22] [ 0/62] eta: 0:05:33 loss: 0.8108 (0.8108) time: 5.3866 data: 5.3527 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8143 (0.8165) time: 0.1382 data: 0.1122 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8143 (0.8165) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [23] [ 0/6250] eta: 7:50:18 lr: 0.000114 grad: 0.0472 (0.0472) loss: 0.8817 (0.8817) time: 4.5150 data: 4.2057 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:21:43 lr: 0.000114 grad: 0.0680 (0.0797) loss: 0.8517 (0.8453) time: 0.1569 data: 0.0588 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:18:41 lr: 0.000114 grad: 0.0731 (0.0818) loss: 0.8411 (0.8403) time: 0.1446 data: 0.0434 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:17:20 lr: 0.000114 grad: 0.0658 (0.0788) loss: 0.8403 (0.8400) time: 0.1461 data: 0.0374 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:16:51 lr: 0.000114 grad: 0.0635 (0.0766) loss: 0.8464 (0.8405) time: 0.1761 data: 0.0798 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:16:29 lr: 0.000114 grad: 0.0692 (0.0756) loss: 0.8389 (0.8407) time: 0.1551 data: 0.0450 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:16:12 lr: 0.000114 grad: 0.0681 (0.0757) loss: 0.8366 (0.8401) time: 0.1938 data: 0.1136 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:15:54 lr: 0.000114 grad: 0.0685 (0.0754) loss: 0.8333 (0.8394) time: 0.1816 data: 0.0904 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:15:35 lr: 0.000114 grad: 0.0650 (0.0748) loss: 0.8304 (0.8389) time: 0.1769 data: 0.0869 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:15:17 lr: 0.000114 grad: 0.0658 (0.0743) loss: 0.8363 (0.8386) time: 0.1871 data: 0.0891 max mem: 9377 +Train: [23] [1000/6250] eta: 0:14:58 lr: 0.000114 grad: 0.0644 (0.0740) loss: 0.8420 (0.8385) time: 0.1576 data: 0.0650 max mem: 9377 +Train: [23] [1100/6250] eta: 0:14:35 lr: 0.000114 grad: 0.0660 (0.0737) loss: 0.8313 (0.8382) time: 0.1577 data: 0.0574 max mem: 9377 +Train: [23] [1200/6250] eta: 0:14:11 lr: 0.000114 grad: 0.0698 (0.0737) loss: 0.8275 (0.8376) time: 0.1411 data: 0.0442 max mem: 9377 +Train: [23] [1300/6250] eta: 0:14:00 lr: 0.000114 grad: 0.0770 (0.0737) loss: 0.8283 (0.8373) time: 0.1676 data: 0.0800 max mem: 9377 +Train: [23] [1400/6250] eta: 0:13:44 lr: 0.000114 grad: 0.0661 (0.0736) loss: 0.8367 (0.8371) time: 0.1507 data: 0.0649 max mem: 9377 +Train: [23] [1500/6250] eta: 0:13:23 lr: 0.000114 grad: 0.0719 (0.0735) loss: 0.8285 (0.8367) time: 0.1478 data: 0.0599 max mem: 9377 +Train: [23] [1600/6250] eta: 0:13:04 lr: 0.000114 grad: 0.0696 (0.0735) loss: 0.8363 (0.8365) time: 0.1733 data: 0.0748 max mem: 9377 +Train: [23] [1700/6250] eta: 0:12:47 lr: 0.000114 grad: 0.0772 (0.0737) loss: 0.8342 (0.8362) time: 0.1824 data: 0.0923 max mem: 9377 +Train: [23] [1800/6250] eta: 0:12:29 lr: 0.000114 grad: 0.0693 (0.0736) loss: 0.8340 (0.8360) time: 0.1715 data: 0.0886 max mem: 9377 +Train: [23] [1900/6250] eta: 0:12:10 lr: 0.000114 grad: 0.0693 (0.0736) loss: 0.8346 (0.8359) time: 0.1608 data: 0.0633 max mem: 9377 +Train: [23] [2000/6250] eta: 0:11:50 lr: 0.000114 grad: 0.0752 (0.0738) loss: 0.8326 (0.8357) time: 0.1397 data: 0.0527 max mem: 9377 +Train: [23] [2100/6250] eta: 0:11:30 lr: 0.000114 grad: 0.0740 (0.0740) loss: 0.8320 (0.8355) time: 0.1451 data: 0.0573 max mem: 9377 +Train: [23] [2200/6250] eta: 0:11:12 lr: 0.000114 grad: 0.0752 (0.0740) loss: 0.8343 (0.8353) time: 0.1595 data: 0.0807 max mem: 9377 +Train: [23] [2300/6250] eta: 0:10:53 lr: 0.000114 grad: 0.0751 (0.0741) loss: 0.8309 (0.8352) time: 0.1400 data: 0.0491 max mem: 9377 +Train: [23] [2400/6250] eta: 0:10:36 lr: 0.000114 grad: 0.0741 (0.0741) loss: 0.8280 (0.8351) time: 0.1605 data: 0.0695 max mem: 9377 +Train: [23] [2500/6250] eta: 0:10:18 lr: 0.000114 grad: 0.0747 (0.0741) loss: 0.8374 (0.8350) time: 0.1470 data: 0.0592 max mem: 9377 +Train: [23] [2600/6250] eta: 0:10:00 lr: 0.000114 grad: 0.0725 (0.0740) loss: 0.8343 (0.8349) time: 0.1450 data: 0.0537 max mem: 9377 +Train: [23] [2700/6250] eta: 0:09:41 lr: 0.000114 grad: 0.0698 (0.0740) loss: 0.8357 (0.8349) time: 0.1426 data: 0.0544 max mem: 9377 +Train: [23] [2800/6250] eta: 0:09:23 lr: 0.000114 grad: 0.0681 (0.0740) loss: 0.8358 (0.8348) time: 0.1362 data: 0.0510 max mem: 9377 +Train: [23] [2900/6250] eta: 0:09:06 lr: 0.000114 grad: 0.0775 (0.0742) loss: 0.8313 (0.8347) time: 0.1542 data: 0.0685 max mem: 9377 +Train: [23] [3000/6250] eta: 0:08:48 lr: 0.000114 grad: 0.0714 (0.0741) loss: 0.8252 (0.8346) time: 0.1476 data: 0.0572 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:31 lr: 0.000114 grad: 0.0665 (0.0742) loss: 0.8417 (0.8347) time: 0.1538 data: 0.0652 max mem: 9377 +Train: [23] [3200/6250] eta: 0:08:15 lr: 0.000114 grad: 0.0744 (0.0741) loss: 0.8306 (0.8346) time: 0.1803 data: 0.0929 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:59 lr: 0.000114 grad: 0.0728 (0.0741) loss: 0.8346 (0.8345) time: 0.1772 data: 0.0919 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:42 lr: 0.000114 grad: 0.0699 (0.0741) loss: 0.8334 (0.8345) time: 0.1680 data: 0.0829 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:25 lr: 0.000114 grad: 0.0722 (0.0741) loss: 0.8326 (0.8344) time: 0.1800 data: 0.0957 max mem: 9377 +Train: [23] [3600/6250] eta: 0:07:08 lr: 0.000114 grad: 0.0690 (0.0741) loss: 0.8344 (0.8343) time: 0.1539 data: 0.0649 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:51 lr: 0.000114 grad: 0.0730 (0.0740) loss: 0.8308 (0.8343) time: 0.1625 data: 0.0746 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:35 lr: 0.000114 grad: 0.0693 (0.0740) loss: 0.8339 (0.8342) time: 0.1694 data: 0.0825 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:18 lr: 0.000114 grad: 0.0728 (0.0740) loss: 0.8327 (0.8342) time: 0.1496 data: 0.0609 max mem: 9377 +Train: [23] [4000/6250] eta: 0:06:01 lr: 0.000113 grad: 0.0739 (0.0740) loss: 0.8283 (0.8341) time: 0.1549 data: 0.0652 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:45 lr: 0.000113 grad: 0.0754 (0.0740) loss: 0.8305 (0.8340) time: 0.1645 data: 0.0779 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:29 lr: 0.000113 grad: 0.0793 (0.0741) loss: 0.8270 (0.8339) time: 0.1973 data: 0.1119 max mem: 9377 +Train: [23] [4300/6250] eta: 0:05:12 lr: 0.000113 grad: 0.0689 (0.0742) loss: 0.8286 (0.8338) time: 0.1268 data: 0.0348 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:56 lr: 0.000113 grad: 0.0716 (0.0741) loss: 0.8271 (0.8337) time: 0.2241 data: 0.1384 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:40 lr: 0.000113 grad: 0.0735 (0.0741) loss: 0.8305 (0.8337) time: 0.1513 data: 0.0513 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:24 lr: 0.000113 grad: 0.0767 (0.0742) loss: 0.8290 (0.8337) time: 0.1528 data: 0.0661 max mem: 9377 +Train: [23] [4700/6250] eta: 0:04:07 lr: 0.000113 grad: 0.0721 (0.0742) loss: 0.8356 (0.8337) time: 0.1567 data: 0.0660 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:51 lr: 0.000113 grad: 0.0772 (0.0743) loss: 0.8290 (0.8336) time: 0.1367 data: 0.0507 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:35 lr: 0.000113 grad: 0.0729 (0.0743) loss: 0.8282 (0.8336) time: 0.1509 data: 0.0586 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:19 lr: 0.000113 grad: 0.0756 (0.0743) loss: 0.8314 (0.8336) time: 0.1479 data: 0.0621 max mem: 9377 +Train: [23] [5100/6250] eta: 0:03:03 lr: 0.000113 grad: 0.0694 (0.0744) loss: 0.8347 (0.8335) time: 0.1581 data: 0.0724 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:47 lr: 0.000113 grad: 0.0749 (0.0744) loss: 0.8374 (0.8335) time: 0.1440 data: 0.0520 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:31 lr: 0.000113 grad: 0.0657 (0.0744) loss: 0.8391 (0.8335) time: 0.1550 data: 0.0645 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:15 lr: 0.000113 grad: 0.0733 (0.0743) loss: 0.8341 (0.8336) time: 0.1975 data: 0.1124 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:59 lr: 0.000113 grad: 0.0723 (0.0743) loss: 0.8351 (0.8336) time: 0.1567 data: 0.0656 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:43 lr: 0.000113 grad: 0.0720 (0.0743) loss: 0.8346 (0.8337) time: 0.1567 data: 0.0715 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:27 lr: 0.000113 grad: 0.0705 (0.0744) loss: 0.8366 (0.8337) time: 0.1760 data: 0.0888 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:11 lr: 0.000113 grad: 0.0751 (0.0745) loss: 0.8326 (0.8337) time: 0.1681 data: 0.0785 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:55 lr: 0.000113 grad: 0.0717 (0.0744) loss: 0.8344 (0.8337) time: 0.1655 data: 0.0745 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:39 lr: 0.000113 grad: 0.0732 (0.0744) loss: 0.8387 (0.8337) time: 0.1558 data: 0.0734 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:23 lr: 0.000113 grad: 0.0659 (0.0744) loss: 0.8370 (0.8337) time: 0.1559 data: 0.0695 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.0738 (0.0744) loss: 0.8334 (0.8338) time: 0.1856 data: 0.0935 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.0712 (0.0744) loss: 0.8346 (0.8338) time: 0.2022 data: 0.1054 max mem: 9377 +Train: [23] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000113 grad: 0.0712 (0.0744) loss: 0.8346 (0.8338) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:06:21 loss: 0.8387 (0.8387) time: 6.1592 data: 6.1269 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8356 (0.8365) time: 0.1406 data: 0.1151 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:15 (0.2506 s / it) +Averaged stats (hcp-train-subset): loss: 0.8356 (0.8365) +Eval (hcp-val): [23] [ 0/62] eta: 0:05:18 loss: 0.8419 (0.8419) time: 5.1447 data: 5.1063 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8414 (0.8426) time: 0.1615 data: 0.1357 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:15 (0.2494 s / it) +Averaged stats (hcp-val): loss: 0.8414 (0.8426) +Eval (nsd-val): [23] [ 0/62] eta: 0:05:10 loss: 0.8066 (0.8066) time: 5.0006 data: 4.9696 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8141 (0.8167) time: 0.1603 data: 0.1327 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:16 (0.2594 s / it) +Averaged stats (nsd-val): loss: 0.8141 (0.8167) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 11:11:43 lr: 0.000113 grad: 0.1163 (0.1163) loss: 0.8368 (0.8368) time: 6.4485 data: 6.2486 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:24:47 lr: 0.000113 grad: 0.0822 (0.0833) loss: 0.8173 (0.8380) time: 0.2029 data: 0.0913 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:21:14 lr: 0.000113 grad: 0.0682 (0.0786) loss: 0.8339 (0.8377) time: 0.1770 data: 0.0760 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:19:56 lr: 0.000113 grad: 0.0697 (0.0771) loss: 0.8343 (0.8367) time: 0.1899 data: 0.0846 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:18:44 lr: 0.000113 grad: 0.0678 (0.0758) loss: 0.8343 (0.8366) time: 0.1448 data: 0.0487 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:17:50 lr: 0.000113 grad: 0.0668 (0.0751) loss: 0.8342 (0.8366) time: 0.1461 data: 0.0448 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:17:15 lr: 0.000113 grad: 0.0719 (0.0747) loss: 0.8343 (0.8366) time: 0.1920 data: 0.1128 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:16:43 lr: 0.000113 grad: 0.0716 (0.0742) loss: 0.8388 (0.8367) time: 0.1530 data: 0.0664 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:16:21 lr: 0.000113 grad: 0.0660 (0.0737) loss: 0.8377 (0.8369) time: 0.1975 data: 0.1059 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:15:51 lr: 0.000113 grad: 0.0627 (0.0732) loss: 0.8431 (0.8371) time: 0.1656 data: 0.0831 max mem: 9377 +Train: [24] [1000/6250] eta: 0:15:29 lr: 0.000113 grad: 0.0665 (0.0728) loss: 0.8352 (0.8372) time: 0.1604 data: 0.0756 max mem: 9377 +Train: [24] [1100/6250] eta: 0:15:07 lr: 0.000113 grad: 0.0719 (0.0725) loss: 0.8329 (0.8373) time: 0.1712 data: 0.0801 max mem: 9377 +Train: [24] [1200/6250] eta: 0:14:40 lr: 0.000113 grad: 0.0633 (0.0723) loss: 0.8290 (0.8370) time: 0.1554 data: 0.0615 max mem: 9377 +Train: [24] [1300/6250] eta: 0:14:26 lr: 0.000113 grad: 0.0656 (0.0722) loss: 0.8432 (0.8370) time: 0.1768 data: 0.0843 max mem: 9377 +Train: [24] [1400/6250] eta: 0:14:07 lr: 0.000113 grad: 0.0713 (0.0721) loss: 0.8388 (0.8370) time: 0.1746 data: 0.0831 max mem: 9377 +Train: [24] [1500/6250] eta: 0:13:50 lr: 0.000113 grad: 0.0735 (0.0722) loss: 0.8372 (0.8370) time: 0.1793 data: 0.0934 max mem: 9377 +Train: [24] [1600/6250] eta: 0:13:30 lr: 0.000113 grad: 0.0709 (0.0724) loss: 0.8351 (0.8368) time: 0.1819 data: 0.0884 max mem: 9377 +Train: [24] [1700/6250] eta: 0:13:13 lr: 0.000113 grad: 0.0705 (0.0724) loss: 0.8339 (0.8368) time: 0.1712 data: 0.0765 max mem: 9377 +Train: [24] [1800/6250] eta: 0:12:53 lr: 0.000113 grad: 0.0711 (0.0724) loss: 0.8379 (0.8368) time: 0.1656 data: 0.0677 max mem: 9377 +Train: [24] [1900/6250] eta: 0:12:31 lr: 0.000113 grad: 0.0745 (0.0724) loss: 0.8351 (0.8367) time: 0.1467 data: 0.0549 max mem: 9377 +Train: [24] [2000/6250] eta: 0:12:09 lr: 0.000113 grad: 0.0702 (0.0725) loss: 0.8423 (0.8367) time: 0.1405 data: 0.0435 max mem: 9377 +Train: [24] [2100/6250] eta: 0:11:49 lr: 0.000113 grad: 0.0690 (0.0725) loss: 0.8394 (0.8367) time: 0.1621 data: 0.0741 max mem: 9377 +Train: [24] [2200/6250] eta: 0:11:28 lr: 0.000113 grad: 0.0694 (0.0725) loss: 0.8398 (0.8368) time: 0.1355 data: 0.0416 max mem: 9377 +Train: [24] [2300/6250] eta: 0:11:09 lr: 0.000113 grad: 0.0718 (0.0726) loss: 0.8385 (0.8368) time: 0.1663 data: 0.0806 max mem: 9377 +Train: [24] [2400/6250] eta: 0:10:50 lr: 0.000113 grad: 0.0704 (0.0726) loss: 0.8404 (0.8369) time: 0.1692 data: 0.0790 max mem: 9377 +Train: [24] [2500/6250] eta: 0:10:31 lr: 0.000113 grad: 0.0669 (0.0726) loss: 0.8402 (0.8369) time: 0.1459 data: 0.0599 max mem: 9377 +Train: [24] [2600/6250] eta: 0:10:12 lr: 0.000113 grad: 0.0724 (0.0727) loss: 0.8397 (0.8370) time: 0.1582 data: 0.0666 max mem: 9377 +Train: [24] [2700/6250] eta: 0:09:53 lr: 0.000113 grad: 0.0733 (0.0727) loss: 0.8391 (0.8370) time: 0.1195 data: 0.0284 max mem: 9377 +Train: [24] [2800/6250] eta: 0:09:35 lr: 0.000113 grad: 0.0690 (0.0728) loss: 0.8321 (0.8369) time: 0.1521 data: 0.0652 max mem: 9377 +Train: [24] [2900/6250] eta: 0:09:18 lr: 0.000112 grad: 0.0730 (0.0728) loss: 0.8348 (0.8369) time: 0.1090 data: 0.0141 max mem: 9377 +Train: [24] [3000/6250] eta: 0:09:01 lr: 0.000112 grad: 0.0709 (0.0729) loss: 0.8339 (0.8368) time: 0.1987 data: 0.1224 max mem: 9377 +Train: [24] [3100/6250] eta: 0:08:44 lr: 0.000112 grad: 0.0741 (0.0730) loss: 0.8321 (0.8368) time: 0.1642 data: 0.0752 max mem: 9377 +Train: [24] [3200/6250] eta: 0:08:27 lr: 0.000112 grad: 0.0696 (0.0731) loss: 0.8349 (0.8367) time: 0.1904 data: 0.0996 max mem: 9377 +Train: [24] [3300/6250] eta: 0:08:08 lr: 0.000112 grad: 0.0710 (0.0732) loss: 0.8324 (0.8366) time: 0.1601 data: 0.0760 max mem: 9377 +Train: [24] [3400/6250] eta: 0:07:51 lr: 0.000112 grad: 0.0703 (0.0732) loss: 0.8368 (0.8365) time: 0.1668 data: 0.0803 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:33 lr: 0.000112 grad: 0.0758 (0.0734) loss: 0.8347 (0.8364) time: 0.1513 data: 0.0591 max mem: 9377 +Train: [24] [3600/6250] eta: 0:07:16 lr: 0.000112 grad: 0.0808 (0.0736) loss: 0.8292 (0.8363) time: 0.1375 data: 0.0449 max mem: 9377 +Train: [24] [3700/6250] eta: 0:06:59 lr: 0.000112 grad: 0.0737 (0.0736) loss: 0.8346 (0.8362) time: 0.1576 data: 0.0689 max mem: 9377 +Train: [24] [3800/6250] eta: 0:06:41 lr: 0.000112 grad: 0.0747 (0.0737) loss: 0.8282 (0.8360) time: 0.1541 data: 0.0658 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:25 lr: 0.000112 grad: 0.0668 (0.0738) loss: 0.8360 (0.8359) time: 0.1460 data: 0.0542 max mem: 9377 +Train: [24] [4000/6250] eta: 0:06:08 lr: 0.000112 grad: 0.0760 (0.0738) loss: 0.8257 (0.8358) time: 0.1594 data: 0.0673 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:51 lr: 0.000112 grad: 0.0660 (0.0739) loss: 0.8336 (0.8357) time: 0.1716 data: 0.0815 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:34 lr: 0.000112 grad: 0.0712 (0.0739) loss: 0.8325 (0.8356) time: 0.1508 data: 0.0624 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:17 lr: 0.000112 grad: 0.0681 (0.0739) loss: 0.8374 (0.8356) time: 0.1681 data: 0.0826 max mem: 9377 +Train: [24] [4400/6250] eta: 0:05:01 lr: 0.000112 grad: 0.0671 (0.0739) loss: 0.8375 (0.8355) time: 0.1724 data: 0.0793 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:45 lr: 0.000112 grad: 0.0746 (0.0739) loss: 0.8297 (0.8354) time: 0.1776 data: 0.0865 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:29 lr: 0.000112 grad: 0.0736 (0.0740) loss: 0.8333 (0.8353) time: 0.1651 data: 0.0845 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:12 lr: 0.000112 grad: 0.0731 (0.0740) loss: 0.8301 (0.8353) time: 0.1594 data: 0.0618 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:56 lr: 0.000112 grad: 0.0706 (0.0740) loss: 0.8348 (0.8352) time: 0.1795 data: 0.0928 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:39 lr: 0.000112 grad: 0.0704 (0.0740) loss: 0.8330 (0.8352) time: 0.1864 data: 0.1006 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:23 lr: 0.000112 grad: 0.0741 (0.0741) loss: 0.8308 (0.8352) time: 0.1549 data: 0.0609 max mem: 9377 +Train: [24] [5100/6250] eta: 0:03:06 lr: 0.000112 grad: 0.0691 (0.0740) loss: 0.8359 (0.8352) time: 0.1412 data: 0.0597 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:50 lr: 0.000112 grad: 0.0679 (0.0740) loss: 0.8336 (0.8352) time: 0.1353 data: 0.0380 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:34 lr: 0.000112 grad: 0.0740 (0.0740) loss: 0.8314 (0.8352) time: 0.1459 data: 0.0550 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:17 lr: 0.000112 grad: 0.0684 (0.0740) loss: 0.8350 (0.8352) time: 0.1611 data: 0.0686 max mem: 9377 +Train: [24] [5500/6250] eta: 0:02:01 lr: 0.000112 grad: 0.0665 (0.0740) loss: 0.8396 (0.8352) time: 0.1248 data: 0.0339 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:45 lr: 0.000112 grad: 0.0801 (0.0740) loss: 0.8330 (0.8352) time: 0.1780 data: 0.0896 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:29 lr: 0.000112 grad: 0.0752 (0.0740) loss: 0.8326 (0.8352) time: 0.1711 data: 0.0734 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:12 lr: 0.000112 grad: 0.0729 (0.0741) loss: 0.8323 (0.8352) time: 0.1605 data: 0.0633 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:56 lr: 0.000112 grad: 0.0774 (0.0741) loss: 0.8338 (0.8351) time: 0.1738 data: 0.0840 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:40 lr: 0.000112 grad: 0.0773 (0.0742) loss: 0.8293 (0.8351) time: 0.1675 data: 0.0814 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:24 lr: 0.000112 grad: 0.0781 (0.0743) loss: 0.8275 (0.8350) time: 0.1677 data: 0.0781 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:08 lr: 0.000112 grad: 0.0829 (0.0745) loss: 0.8265 (0.8349) time: 0.1669 data: 0.0875 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0745 (0.0745) loss: 0.8314 (0.8349) time: 0.1873 data: 0.0974 max mem: 9377 +Train: [24] Total time: 0:17:03 (0.1638 s / it) +Averaged stats: lr: 0.000112 grad: 0.0745 (0.0745) loss: 0.8314 (0.8349) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:03:36 loss: 0.8383 (0.8383) time: 3.4909 data: 3.3991 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8335 (0.8367) time: 0.1406 data: 0.1139 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (hcp-train-subset): loss: 0.8335 (0.8367) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [24] [ 0/62] eta: 0:04:20 loss: 0.8379 (0.8379) time: 4.1993 data: 4.1124 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8431 (0.8436) time: 0.1478 data: 0.1222 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (hcp-val): loss: 0.8431 (0.8436) +Making plots (hcp-val): example=27 +Eval (nsd-val): [24] [ 0/62] eta: 0:04:02 loss: 0.8018 (0.8018) time: 3.9134 data: 3.8657 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8131 (0.8131) time: 0.1379 data: 0.1119 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:14 (0.2419 s / it) +Averaged stats (nsd-val): loss: 0.8131 (0.8131) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 11:00:55 lr: 0.000112 grad: 0.0624 (0.0624) loss: 0.8427 (0.8427) time: 6.3450 data: 6.2005 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:23:54 lr: 0.000112 grad: 0.0758 (0.1004) loss: 0.8382 (0.8356) time: 0.1957 data: 0.0962 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:20:27 lr: 0.000112 grad: 0.0699 (0.0909) loss: 0.8319 (0.8337) time: 0.1449 data: 0.0389 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:18:59 lr: 0.000112 grad: 0.0723 (0.0871) loss: 0.8402 (0.8328) time: 0.1370 data: 0.0454 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:18:21 lr: 0.000112 grad: 0.0691 (0.0843) loss: 0.8383 (0.8321) time: 0.1777 data: 0.0791 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:17:37 lr: 0.000112 grad: 0.0797 (0.0836) loss: 0.8405 (0.8322) time: 0.1605 data: 0.0488 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:17:35 lr: 0.000112 grad: 0.0676 (0.0821) loss: 0.8345 (0.8325) time: 0.2086 data: 0.1249 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:17:07 lr: 0.000112 grad: 0.0673 (0.0809) loss: 0.8402 (0.8332) time: 0.1841 data: 0.0936 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:16:47 lr: 0.000112 grad: 0.0688 (0.0795) loss: 0.8382 (0.8337) time: 0.2050 data: 0.1238 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:16:21 lr: 0.000112 grad: 0.0674 (0.0792) loss: 0.8385 (0.8335) time: 0.1724 data: 0.0848 max mem: 9377 +Train: [25] [1000/6250] eta: 0:15:56 lr: 0.000112 grad: 0.0716 (0.0783) loss: 0.8326 (0.8337) time: 0.1775 data: 0.0835 max mem: 9377 +Train: [25] [1100/6250] eta: 0:15:34 lr: 0.000112 grad: 0.0697 (0.0778) loss: 0.8415 (0.8339) time: 0.1802 data: 0.0861 max mem: 9377 +Train: [25] [1200/6250] eta: 0:15:14 lr: 0.000112 grad: 0.0717 (0.0775) loss: 0.8353 (0.8340) time: 0.2326 data: 0.1497 max mem: 9377 +Train: [25] [1300/6250] eta: 0:15:00 lr: 0.000112 grad: 0.0672 (0.0772) loss: 0.8311 (0.8341) time: 0.1635 data: 0.0748 max mem: 9377 +Train: [25] [1400/6250] eta: 0:14:41 lr: 0.000112 grad: 0.0670 (0.0768) loss: 0.8350 (0.8343) time: 0.1814 data: 0.1006 max mem: 9377 +Train: [25] [1500/6250] eta: 0:14:22 lr: 0.000112 grad: 0.0696 (0.0767) loss: 0.8381 (0.8341) time: 0.1410 data: 0.0526 max mem: 9377 +Train: [25] [1600/6250] eta: 0:13:56 lr: 0.000111 grad: 0.0769 (0.0770) loss: 0.8291 (0.8340) time: 0.1593 data: 0.0748 max mem: 9377 +Train: [25] [1700/6250] eta: 0:13:37 lr: 0.000111 grad: 0.0762 (0.0770) loss: 0.8286 (0.8337) time: 0.1906 data: 0.1073 max mem: 9377 +Train: [25] [1800/6250] eta: 0:13:17 lr: 0.000111 grad: 0.0703 (0.0769) loss: 0.8342 (0.8337) time: 0.1741 data: 0.0721 max mem: 9377 +Train: [25] [1900/6250] eta: 0:12:56 lr: 0.000111 grad: 0.0738 (0.0768) loss: 0.8383 (0.8335) time: 0.1474 data: 0.0567 max mem: 9377 +Train: [25] [2000/6250] eta: 0:12:34 lr: 0.000111 grad: 0.0761 (0.0768) loss: 0.8273 (0.8334) time: 0.1755 data: 0.0747 max mem: 9377 +Train: [25] [2100/6250] eta: 0:12:10 lr: 0.000111 grad: 0.0731 (0.0768) loss: 0.8285 (0.8331) time: 0.1377 data: 0.0499 max mem: 9377 +Train: [25] [2200/6250] eta: 0:11:49 lr: 0.000111 grad: 0.0715 (0.0767) loss: 0.8268 (0.8330) time: 0.1422 data: 0.0444 max mem: 9377 +Train: [25] [2300/6250] eta: 0:11:28 lr: 0.000111 grad: 0.0749 (0.0766) loss: 0.8363 (0.8328) time: 0.1446 data: 0.0549 max mem: 9377 +Train: [25] [2400/6250] eta: 0:11:11 lr: 0.000111 grad: 0.0717 (0.0766) loss: 0.8265 (0.8326) time: 0.1782 data: 0.1043 max mem: 9377 +Train: [25] [2500/6250] eta: 0:10:51 lr: 0.000111 grad: 0.0753 (0.0767) loss: 0.8232 (0.8323) time: 0.1896 data: 0.1120 max mem: 9377 +Train: [25] [2600/6250] eta: 0:10:33 lr: 0.000111 grad: 0.0733 (0.0769) loss: 0.8351 (0.8322) time: 0.1678 data: 0.0813 max mem: 9377 +Train: [25] [2700/6250] eta: 0:10:17 lr: 0.000111 grad: 0.0749 (0.0769) loss: 0.8298 (0.8320) time: 0.1971 data: 0.1121 max mem: 9377 +Train: [25] [2800/6250] eta: 0:09:57 lr: 0.000111 grad: 0.0741 (0.0771) loss: 0.8308 (0.8318) time: 0.1617 data: 0.0764 max mem: 9377 +Train: [25] [2900/6250] eta: 0:09:38 lr: 0.000111 grad: 0.0768 (0.0772) loss: 0.8280 (0.8315) time: 0.1617 data: 0.0728 max mem: 9377 +Train: [25] [3000/6250] eta: 0:09:19 lr: 0.000111 grad: 0.0746 (0.0774) loss: 0.8291 (0.8313) time: 0.1533 data: 0.0667 max mem: 9377 +Train: [25] [3100/6250] eta: 0:09:01 lr: 0.000111 grad: 0.0758 (0.0775) loss: 0.8301 (0.8311) time: 0.1891 data: 0.1111 max mem: 9377 +Train: [25] [3200/6250] eta: 0:08:43 lr: 0.000111 grad: 0.0791 (0.0775) loss: 0.8266 (0.8309) time: 0.1720 data: 0.0932 max mem: 9377 +Train: [25] [3300/6250] eta: 0:08:24 lr: 0.000111 grad: 0.0766 (0.0776) loss: 0.8256 (0.8307) time: 0.1434 data: 0.0544 max mem: 9377 +Train: [25] [3400/6250] eta: 0:08:06 lr: 0.000111 grad: 0.0758 (0.0777) loss: 0.8306 (0.8306) time: 0.1594 data: 0.0748 max mem: 9377 +Train: [25] [3500/6250] eta: 0:07:48 lr: 0.000111 grad: 0.0734 (0.0778) loss: 0.8277 (0.8304) time: 0.1660 data: 0.0769 max mem: 9377 +Train: [25] [3600/6250] eta: 0:07:30 lr: 0.000111 grad: 0.0712 (0.0779) loss: 0.8289 (0.8303) time: 0.1546 data: 0.0687 max mem: 9377 +Train: [25] [3700/6250] eta: 0:07:12 lr: 0.000111 grad: 0.0724 (0.0780) loss: 0.8290 (0.8303) time: 0.1650 data: 0.0874 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:54 lr: 0.000111 grad: 0.0745 (0.0780) loss: 0.8315 (0.8302) time: 0.1697 data: 0.0882 max mem: 9377 +Train: [25] [3900/6250] eta: 0:06:36 lr: 0.000111 grad: 0.0745 (0.0781) loss: 0.8272 (0.8301) time: 0.1471 data: 0.0584 max mem: 9377 +Train: [25] [4000/6250] eta: 0:06:19 lr: 0.000111 grad: 0.0805 (0.0782) loss: 0.8231 (0.8300) time: 0.1733 data: 0.0883 max mem: 9377 +Train: [25] [4100/6250] eta: 0:06:01 lr: 0.000111 grad: 0.0869 (0.0783) loss: 0.8224 (0.8299) time: 0.1680 data: 0.0839 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:44 lr: 0.000111 grad: 0.0700 (0.0783) loss: 0.8269 (0.8298) time: 0.1512 data: 0.0667 max mem: 9377 +Train: [25] [4300/6250] eta: 0:05:27 lr: 0.000111 grad: 0.0764 (0.0784) loss: 0.8249 (0.8297) time: 0.1612 data: 0.0708 max mem: 9377 +Train: [25] [4400/6250] eta: 0:05:10 lr: 0.000111 grad: 0.0819 (0.0784) loss: 0.8271 (0.8296) time: 0.1595 data: 0.0744 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:52 lr: 0.000111 grad: 0.0787 (0.0784) loss: 0.8274 (0.8296) time: 0.1470 data: 0.0620 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:35 lr: 0.000111 grad: 0.0778 (0.0785) loss: 0.8279 (0.8295) time: 0.1537 data: 0.0631 max mem: 9377 +Train: [25] [4700/6250] eta: 0:04:18 lr: 0.000111 grad: 0.0756 (0.0785) loss: 0.8265 (0.8295) time: 0.1578 data: 0.0710 max mem: 9377 +Train: [25] [4800/6250] eta: 0:04:01 lr: 0.000111 grad: 0.0722 (0.0786) loss: 0.8319 (0.8295) time: 0.1607 data: 0.0754 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:44 lr: 0.000111 grad: 0.0772 (0.0786) loss: 0.8286 (0.8294) time: 0.1550 data: 0.0707 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:27 lr: 0.000111 grad: 0.0757 (0.0787) loss: 0.8304 (0.8295) time: 0.1584 data: 0.0648 max mem: 9377 +Train: [25] [5100/6250] eta: 0:03:11 lr: 0.000111 grad: 0.0734 (0.0786) loss: 0.8305 (0.8295) time: 0.1810 data: 0.0925 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:54 lr: 0.000111 grad: 0.0775 (0.0787) loss: 0.8281 (0.8296) time: 0.1822 data: 0.1008 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:38 lr: 0.000111 grad: 0.0778 (0.0786) loss: 0.8269 (0.8295) time: 0.1549 data: 0.0666 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:21 lr: 0.000111 grad: 0.0756 (0.0787) loss: 0.8267 (0.8296) time: 0.1741 data: 0.0806 max mem: 9377 +Train: [25] [5500/6250] eta: 0:02:04 lr: 0.000111 grad: 0.0825 (0.0787) loss: 0.8279 (0.8296) time: 0.1831 data: 0.0969 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:48 lr: 0.000111 grad: 0.0762 (0.0788) loss: 0.8251 (0.8295) time: 0.1432 data: 0.0574 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:31 lr: 0.000111 grad: 0.0804 (0.0788) loss: 0.8286 (0.8295) time: 0.1649 data: 0.0797 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:14 lr: 0.000111 grad: 0.0786 (0.0789) loss: 0.8251 (0.8295) time: 0.1714 data: 0.0813 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:58 lr: 0.000111 grad: 0.0760 (0.0789) loss: 0.8340 (0.8295) time: 0.1785 data: 0.0889 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:41 lr: 0.000111 grad: 0.0742 (0.0788) loss: 0.8389 (0.8296) time: 0.1467 data: 0.0633 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:24 lr: 0.000111 grad: 0.0724 (0.0788) loss: 0.8358 (0.8297) time: 0.1642 data: 0.0810 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:08 lr: 0.000111 grad: 0.0759 (0.0788) loss: 0.8303 (0.8296) time: 0.1753 data: 0.0824 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0772 (0.0788) loss: 0.8299 (0.8297) time: 0.1869 data: 0.1044 max mem: 9377 +Train: [25] Total time: 0:17:26 (0.1674 s / it) +Averaged stats: lr: 0.000111 grad: 0.0772 (0.0788) loss: 0.8299 (0.8297) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:05:21 loss: 0.8446 (0.8446) time: 5.1916 data: 5.1603 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8327 (0.8366) time: 0.1291 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:15 (0.2468 s / it) +Averaged stats (hcp-train-subset): loss: 0.8327 (0.8366) +Eval (hcp-val): [25] [ 0/62] eta: 0:05:45 loss: 0.8405 (0.8405) time: 5.5649 data: 5.5338 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8424 (0.8435) time: 0.1376 data: 0.1117 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (hcp-val): loss: 0.8424 (0.8435) +Eval (nsd-val): [25] [ 0/62] eta: 0:03:55 loss: 0.8020 (0.8020) time: 3.7935 data: 3.7170 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8131 (0.8145) time: 0.1445 data: 0.1191 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (nsd-val): loss: 0.8131 (0.8145) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 11:40:27 lr: 0.000111 grad: 0.0671 (0.0671) loss: 0.8336 (0.8336) time: 6.7243 data: 6.5996 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:22:40 lr: 0.000111 grad: 0.0918 (0.0915) loss: 0.8260 (0.8344) time: 0.1731 data: 0.0676 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:19:34 lr: 0.000110 grad: 0.0807 (0.0880) loss: 0.8294 (0.8329) time: 0.1426 data: 0.0287 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:18:11 lr: 0.000110 grad: 0.0711 (0.0844) loss: 0.8391 (0.8328) time: 0.1652 data: 0.0600 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:17:22 lr: 0.000110 grad: 0.0670 (0.0821) loss: 0.8342 (0.8329) time: 0.1553 data: 0.0580 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:16:40 lr: 0.000110 grad: 0.0702 (0.0803) loss: 0.8384 (0.8335) time: 0.1686 data: 0.0780 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:16:04 lr: 0.000110 grad: 0.0686 (0.0788) loss: 0.8375 (0.8339) time: 0.1727 data: 0.0722 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:15:48 lr: 0.000110 grad: 0.0731 (0.0780) loss: 0.8294 (0.8343) time: 0.1687 data: 0.0755 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:15:28 lr: 0.000110 grad: 0.0687 (0.0774) loss: 0.8389 (0.8342) time: 0.1808 data: 0.0894 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:15:05 lr: 0.000110 grad: 0.0700 (0.0771) loss: 0.8379 (0.8337) time: 0.1813 data: 0.0893 max mem: 9377 +Train: [26] [1000/6250] eta: 0:14:38 lr: 0.000110 grad: 0.0764 (0.0769) loss: 0.8382 (0.8335) time: 0.1662 data: 0.0722 max mem: 9377 +Train: [26] [1100/6250] eta: 0:14:18 lr: 0.000110 grad: 0.0749 (0.0768) loss: 0.8272 (0.8334) time: 0.1516 data: 0.0662 max mem: 9377 +Train: [26] [1200/6250] eta: 0:13:58 lr: 0.000110 grad: 0.0774 (0.0770) loss: 0.8253 (0.8330) time: 0.1427 data: 0.0509 max mem: 9377 +Train: [26] [1300/6250] eta: 0:13:39 lr: 0.000110 grad: 0.0789 (0.0769) loss: 0.8242 (0.8327) time: 0.1503 data: 0.0521 max mem: 9377 +Train: [26] [1400/6250] eta: 0:13:25 lr: 0.000110 grad: 0.0688 (0.0767) loss: 0.8363 (0.8326) time: 0.2343 data: 0.1557 max mem: 9377 +Train: [26] [1500/6250] eta: 0:13:07 lr: 0.000110 grad: 0.0776 (0.0766) loss: 0.8329 (0.8326) time: 0.1423 data: 0.0554 max mem: 9377 +Train: [26] [1600/6250] eta: 0:12:52 lr: 0.000110 grad: 0.0690 (0.0766) loss: 0.8300 (0.8326) time: 0.1969 data: 0.1062 max mem: 9377 +Train: [26] [1700/6250] eta: 0:12:30 lr: 0.000110 grad: 0.0772 (0.0767) loss: 0.8295 (0.8324) time: 0.1424 data: 0.0529 max mem: 9377 +Train: [26] [1800/6250] eta: 0:12:13 lr: 0.000110 grad: 0.0766 (0.0770) loss: 0.8305 (0.8322) time: 0.1593 data: 0.0681 max mem: 9377 +Train: [26] [1900/6250] eta: 0:11:54 lr: 0.000110 grad: 0.0767 (0.0771) loss: 0.8275 (0.8320) time: 0.1454 data: 0.0499 max mem: 9377 +Train: [26] [2000/6250] eta: 0:11:34 lr: 0.000110 grad: 0.0800 (0.0772) loss: 0.8334 (0.8319) time: 0.1286 data: 0.0213 max mem: 9377 +Train: [26] [2100/6250] eta: 0:11:15 lr: 0.000110 grad: 0.0765 (0.0774) loss: 0.8297 (0.8317) time: 0.1483 data: 0.0566 max mem: 9377 +Train: [26] [2200/6250] eta: 0:10:55 lr: 0.000110 grad: 0.0809 (0.0776) loss: 0.8255 (0.8315) time: 0.1531 data: 0.0633 max mem: 9377 +Train: [26] [2300/6250] eta: 0:10:37 lr: 0.000110 grad: 0.0830 (0.0776) loss: 0.8275 (0.8313) time: 0.1435 data: 0.0600 max mem: 9377 +Train: [26] [2400/6250] eta: 0:10:20 lr: 0.000110 grad: 0.0741 (0.0777) loss: 0.8289 (0.8311) time: 0.1450 data: 0.0526 max mem: 9377 +Train: [26] [2500/6250] eta: 0:10:04 lr: 0.000110 grad: 0.0762 (0.0779) loss: 0.8239 (0.8308) time: 0.1536 data: 0.0656 max mem: 9377 +Train: [26] [2600/6250] eta: 0:09:51 lr: 0.000110 grad: 0.0776 (0.0780) loss: 0.8328 (0.8306) time: 0.2242 data: 0.1364 max mem: 9377 +Train: [26] [2700/6250] eta: 0:09:32 lr: 0.000110 grad: 0.0757 (0.0781) loss: 0.8314 (0.8305) time: 0.1503 data: 0.0617 max mem: 9377 +Train: [26] [2800/6250] eta: 0:09:16 lr: 0.000110 grad: 0.0754 (0.0781) loss: 0.8287 (0.8303) time: 0.1738 data: 0.0882 max mem: 9377 +Train: [26] [2900/6250] eta: 0:08:59 lr: 0.000110 grad: 0.0785 (0.0782) loss: 0.8249 (0.8302) time: 0.1688 data: 0.0890 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:43 lr: 0.000110 grad: 0.0861 (0.0783) loss: 0.8268 (0.8302) time: 0.1976 data: 0.1155 max mem: 9377 +Train: [26] [3100/6250] eta: 0:08:28 lr: 0.000110 grad: 0.0688 (0.0782) loss: 0.8348 (0.8302) time: 0.1378 data: 0.0515 max mem: 9377 +Train: [26] [3200/6250] eta: 0:08:11 lr: 0.000110 grad: 0.0745 (0.0782) loss: 0.8277 (0.8302) time: 0.1459 data: 0.0454 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:55 lr: 0.000110 grad: 0.0739 (0.0782) loss: 0.8312 (0.8301) time: 0.1766 data: 0.0917 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:40 lr: 0.000110 grad: 0.0757 (0.0781) loss: 0.8317 (0.8301) time: 0.1255 data: 0.0300 max mem: 9377 +Train: [26] [3500/6250] eta: 0:07:23 lr: 0.000110 grad: 0.0777 (0.0781) loss: 0.8285 (0.8301) time: 0.1694 data: 0.0921 max mem: 9377 +Train: [26] [3600/6250] eta: 0:07:06 lr: 0.000110 grad: 0.0714 (0.0781) loss: 0.8310 (0.8301) time: 0.1529 data: 0.0703 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:50 lr: 0.000110 grad: 0.0749 (0.0780) loss: 0.8353 (0.8301) time: 0.1673 data: 0.0807 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:33 lr: 0.000110 grad: 0.0714 (0.0780) loss: 0.8252 (0.8302) time: 0.1697 data: 0.0820 max mem: 9377 +Train: [26] [3900/6250] eta: 0:06:17 lr: 0.000110 grad: 0.0746 (0.0780) loss: 0.8286 (0.8301) time: 0.1484 data: 0.0595 max mem: 9377 +Train: [26] [4000/6250] eta: 0:06:00 lr: 0.000110 grad: 0.0729 (0.0780) loss: 0.8283 (0.8302) time: 0.1572 data: 0.0765 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:44 lr: 0.000110 grad: 0.0787 (0.0781) loss: 0.8265 (0.8301) time: 0.1415 data: 0.0574 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:28 lr: 0.000110 grad: 0.0769 (0.0781) loss: 0.8340 (0.8301) time: 0.1577 data: 0.0686 max mem: 9377 +Train: [26] [4300/6250] eta: 0:05:12 lr: 0.000110 grad: 0.0797 (0.0781) loss: 0.8314 (0.8301) time: 0.2052 data: 0.1232 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:56 lr: 0.000110 grad: 0.0813 (0.0782) loss: 0.8337 (0.8301) time: 0.1775 data: 0.0845 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:40 lr: 0.000110 grad: 0.0745 (0.0782) loss: 0.8274 (0.8301) time: 0.1477 data: 0.0618 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:24 lr: 0.000110 grad: 0.0760 (0.0782) loss: 0.8281 (0.8300) time: 0.1520 data: 0.0638 max mem: 9377 +Train: [26] [4700/6250] eta: 0:04:08 lr: 0.000110 grad: 0.0706 (0.0782) loss: 0.8363 (0.8300) time: 0.1809 data: 0.0910 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:52 lr: 0.000109 grad: 0.0792 (0.0783) loss: 0.8346 (0.8300) time: 0.1455 data: 0.0592 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:36 lr: 0.000109 grad: 0.0742 (0.0783) loss: 0.8268 (0.8299) time: 0.1616 data: 0.0782 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:20 lr: 0.000109 grad: 0.0759 (0.0783) loss: 0.8298 (0.8298) time: 0.1882 data: 0.0985 max mem: 9377 +Train: [26] [5100/6250] eta: 0:03:04 lr: 0.000109 grad: 0.0797 (0.0784) loss: 0.8217 (0.8297) time: 0.1683 data: 0.0710 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:48 lr: 0.000109 grad: 0.0761 (0.0785) loss: 0.8240 (0.8296) time: 0.1405 data: 0.0559 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:32 lr: 0.000109 grad: 0.0748 (0.0785) loss: 0.8299 (0.8295) time: 0.1678 data: 0.0844 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:16 lr: 0.000109 grad: 0.0843 (0.0787) loss: 0.8197 (0.8294) time: 0.2176 data: 0.1306 max mem: 9377 +Train: [26] [5500/6250] eta: 0:02:00 lr: 0.000109 grad: 0.0841 (0.0787) loss: 0.8262 (0.8293) time: 0.1313 data: 0.0390 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:44 lr: 0.000109 grad: 0.0826 (0.0788) loss: 0.8249 (0.8292) time: 0.1868 data: 0.1033 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:28 lr: 0.000109 grad: 0.0794 (0.0789) loss: 0.8277 (0.8291) time: 0.1665 data: 0.0795 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:12 lr: 0.000109 grad: 0.0851 (0.0790) loss: 0.8324 (0.8290) time: 0.1633 data: 0.0683 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:56 lr: 0.000109 grad: 0.0835 (0.0791) loss: 0.8239 (0.8289) time: 0.1532 data: 0.0677 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:40 lr: 0.000109 grad: 0.0861 (0.0793) loss: 0.8269 (0.8288) time: 0.1667 data: 0.0869 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:24 lr: 0.000109 grad: 0.0797 (0.0794) loss: 0.8283 (0.8287) time: 0.1637 data: 0.0819 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:08 lr: 0.000109 grad: 0.0834 (0.0795) loss: 0.8296 (0.8286) time: 0.1924 data: 0.1085 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.0814 (0.0796) loss: 0.8227 (0.8285) time: 0.1765 data: 0.0875 max mem: 9377 +Train: [26] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000109 grad: 0.0814 (0.0796) loss: 0.8227 (0.8285) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:05:01 loss: 0.8394 (0.8394) time: 4.8618 data: 4.8289 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8327 (0.8367) time: 0.1557 data: 0.1305 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:14 (0.2419 s / it) +Averaged stats (hcp-train-subset): loss: 0.8327 (0.8367) +Eval (hcp-val): [26] [ 0/62] eta: 0:05:09 loss: 0.8428 (0.8428) time: 4.9900 data: 4.9582 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8432 (0.8439) time: 0.1537 data: 0.1285 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-val): loss: 0.8432 (0.8439) +Eval (nsd-val): [26] [ 0/62] eta: 0:04:49 loss: 0.8043 (0.8043) time: 4.6665 data: 4.5748 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8146 (0.8150) time: 0.1440 data: 0.1168 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:14 (0.2400 s / it) +Averaged stats (nsd-val): loss: 0.8146 (0.8150) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 12:11:30 lr: 0.000109 grad: 0.1429 (0.1429) loss: 0.8090 (0.8090) time: 7.0225 data: 6.8874 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:23:07 lr: 0.000109 grad: 0.0711 (0.0834) loss: 0.8441 (0.8446) time: 0.1812 data: 0.0802 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:19:46 lr: 0.000109 grad: 0.0748 (0.0809) loss: 0.8322 (0.8390) time: 0.1300 data: 0.0194 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:18:40 lr: 0.000109 grad: 0.0750 (0.0798) loss: 0.8315 (0.8377) time: 0.1830 data: 0.0857 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:17:43 lr: 0.000109 grad: 0.0731 (0.0794) loss: 0.8337 (0.8361) time: 0.1582 data: 0.0552 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:17:02 lr: 0.000109 grad: 0.0745 (0.0791) loss: 0.8263 (0.8345) time: 0.1419 data: 0.0473 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:16:18 lr: 0.000109 grad: 0.0761 (0.0787) loss: 0.8302 (0.8336) time: 0.1436 data: 0.0518 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:15:56 lr: 0.000109 grad: 0.0778 (0.0785) loss: 0.8283 (0.8329) time: 0.2058 data: 0.1292 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:15:37 lr: 0.000109 grad: 0.0754 (0.0783) loss: 0.8312 (0.8323) time: 0.1605 data: 0.0700 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:15:18 lr: 0.000109 grad: 0.0729 (0.0782) loss: 0.8322 (0.8322) time: 0.1769 data: 0.0732 max mem: 9377 +Train: [27] [1000/6250] eta: 0:14:53 lr: 0.000109 grad: 0.0697 (0.0777) loss: 0.8295 (0.8322) time: 0.1651 data: 0.0829 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:37 lr: 0.000109 grad: 0.0744 (0.0775) loss: 0.8277 (0.8321) time: 0.1883 data: 0.1058 max mem: 9377 +Train: [27] [1200/6250] eta: 0:14:17 lr: 0.000109 grad: 0.0737 (0.0778) loss: 0.8223 (0.8319) time: 0.1700 data: 0.0732 max mem: 9377 +Train: [27] [1300/6250] eta: 0:13:52 lr: 0.000109 grad: 0.0702 (0.0776) loss: 0.8313 (0.8317) time: 0.1353 data: 0.0460 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:31 lr: 0.000109 grad: 0.0764 (0.0777) loss: 0.8274 (0.8313) time: 0.1551 data: 0.0714 max mem: 9377 +Train: [27] [1500/6250] eta: 0:13:11 lr: 0.000109 grad: 0.0804 (0.0779) loss: 0.8248 (0.8309) time: 0.1853 data: 0.1079 max mem: 9377 +Train: [27] [1600/6250] eta: 0:12:54 lr: 0.000109 grad: 0.0729 (0.0779) loss: 0.8226 (0.8306) time: 0.1413 data: 0.0533 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:36 lr: 0.000109 grad: 0.0859 (0.0784) loss: 0.8183 (0.8301) time: 0.1489 data: 0.0622 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:17 lr: 0.000109 grad: 0.0789 (0.0786) loss: 0.8212 (0.8297) time: 0.1444 data: 0.0661 max mem: 9377 +Train: [27] [1900/6250] eta: 0:11:59 lr: 0.000109 grad: 0.0830 (0.0789) loss: 0.8194 (0.8292) time: 0.1510 data: 0.0551 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:41 lr: 0.000109 grad: 0.0902 (0.0794) loss: 0.8167 (0.8286) time: 0.1538 data: 0.0630 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:22 lr: 0.000109 grad: 0.0842 (0.0798) loss: 0.8235 (0.8282) time: 0.1297 data: 0.0376 max mem: 9377 +Train: [27] [2200/6250] eta: 0:11:02 lr: 0.000109 grad: 0.0767 (0.0802) loss: 0.8225 (0.8278) time: 0.1453 data: 0.0629 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:44 lr: 0.000109 grad: 0.0789 (0.0804) loss: 0.8253 (0.8275) time: 0.1833 data: 0.1049 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:28 lr: 0.000109 grad: 0.0794 (0.0807) loss: 0.8194 (0.8272) time: 0.1622 data: 0.0817 max mem: 9377 +Train: [27] [2500/6250] eta: 0:10:11 lr: 0.000109 grad: 0.0827 (0.0808) loss: 0.8260 (0.8270) time: 0.1729 data: 0.0857 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:54 lr: 0.000109 grad: 0.0871 (0.0810) loss: 0.8196 (0.8268) time: 0.1639 data: 0.0733 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:37 lr: 0.000109 grad: 0.0796 (0.0811) loss: 0.8309 (0.8267) time: 0.1477 data: 0.0585 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:21 lr: 0.000109 grad: 0.0794 (0.0811) loss: 0.8194 (0.8267) time: 0.1389 data: 0.0479 max mem: 9377 +Train: [27] [2900/6250] eta: 0:09:05 lr: 0.000109 grad: 0.0768 (0.0813) loss: 0.8197 (0.8266) time: 0.1231 data: 0.0402 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:48 lr: 0.000109 grad: 0.0826 (0.0813) loss: 0.8237 (0.8266) time: 0.1541 data: 0.0748 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:32 lr: 0.000108 grad: 0.0750 (0.0814) loss: 0.8316 (0.8266) time: 0.1811 data: 0.0992 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:15 lr: 0.000108 grad: 0.0793 (0.0815) loss: 0.8173 (0.8266) time: 0.1677 data: 0.0854 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:59 lr: 0.000108 grad: 0.0817 (0.0815) loss: 0.8247 (0.8266) time: 0.1816 data: 0.1021 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:42 lr: 0.000108 grad: 0.0781 (0.0814) loss: 0.8284 (0.8266) time: 0.1284 data: 0.0397 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:25 lr: 0.000108 grad: 0.0823 (0.0815) loss: 0.8262 (0.8266) time: 0.1633 data: 0.0731 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:08 lr: 0.000108 grad: 0.0753 (0.0815) loss: 0.8279 (0.8266) time: 0.1571 data: 0.0715 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:53 lr: 0.000108 grad: 0.0819 (0.0815) loss: 0.8146 (0.8266) time: 0.2302 data: 0.1527 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:36 lr: 0.000108 grad: 0.0747 (0.0815) loss: 0.8259 (0.8266) time: 0.1432 data: 0.0533 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:20 lr: 0.000108 grad: 0.0760 (0.0815) loss: 0.8317 (0.8267) time: 0.1707 data: 0.0844 max mem: 9377 +Train: [27] [4000/6250] eta: 0:06:03 lr: 0.000108 grad: 0.0839 (0.0814) loss: 0.8311 (0.8267) time: 0.1444 data: 0.0572 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:47 lr: 0.000108 grad: 0.0784 (0.0814) loss: 0.8258 (0.8268) time: 0.1301 data: 0.0278 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:30 lr: 0.000108 grad: 0.0772 (0.0815) loss: 0.8319 (0.8268) time: 0.1607 data: 0.0682 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:13 lr: 0.000108 grad: 0.0799 (0.0815) loss: 0.8236 (0.8267) time: 0.1530 data: 0.0599 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:57 lr: 0.000108 grad: 0.0744 (0.0816) loss: 0.8350 (0.8267) time: 0.1423 data: 0.0520 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:40 lr: 0.000108 grad: 0.0727 (0.0815) loss: 0.8327 (0.8268) time: 0.1766 data: 0.0939 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:24 lr: 0.000108 grad: 0.0822 (0.0815) loss: 0.8256 (0.8269) time: 0.1475 data: 0.0672 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:08 lr: 0.000108 grad: 0.0797 (0.0815) loss: 0.8252 (0.8269) time: 0.1663 data: 0.0737 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:52 lr: 0.000108 grad: 0.0775 (0.0815) loss: 0.8369 (0.8270) time: 0.1394 data: 0.0457 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:36 lr: 0.000108 grad: 0.0832 (0.0815) loss: 0.8297 (0.8270) time: 0.1397 data: 0.0568 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:19 lr: 0.000108 grad: 0.0802 (0.0815) loss: 0.8319 (0.8271) time: 0.1591 data: 0.0791 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:03 lr: 0.000108 grad: 0.0738 (0.0815) loss: 0.8386 (0.8271) time: 0.1405 data: 0.0550 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:47 lr: 0.000108 grad: 0.0813 (0.0816) loss: 0.8246 (0.8271) time: 0.1483 data: 0.0588 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:31 lr: 0.000108 grad: 0.0853 (0.0816) loss: 0.8308 (0.8272) time: 0.1520 data: 0.0726 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:15 lr: 0.000108 grad: 0.0814 (0.0817) loss: 0.8248 (0.8272) time: 0.1598 data: 0.0782 max mem: 9377 +Train: [27] [5500/6250] eta: 0:01:59 lr: 0.000108 grad: 0.0791 (0.0817) loss: 0.8277 (0.8272) time: 0.1732 data: 0.0881 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:43 lr: 0.000108 grad: 0.0794 (0.0817) loss: 0.8343 (0.8273) time: 0.1480 data: 0.0565 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:27 lr: 0.000108 grad: 0.0756 (0.0816) loss: 0.8287 (0.8273) time: 0.1242 data: 0.0418 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:11 lr: 0.000108 grad: 0.0780 (0.0816) loss: 0.8330 (0.8273) time: 0.1515 data: 0.0702 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:55 lr: 0.000108 grad: 0.0820 (0.0816) loss: 0.8337 (0.8273) time: 0.1646 data: 0.0803 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:39 lr: 0.000108 grad: 0.0829 (0.0816) loss: 0.8324 (0.8273) time: 0.1553 data: 0.0789 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:23 lr: 0.000108 grad: 0.0826 (0.0816) loss: 0.8188 (0.8273) time: 0.1604 data: 0.0786 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:08 lr: 0.000108 grad: 0.0789 (0.0817) loss: 0.8305 (0.8274) time: 0.1714 data: 0.0812 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0804 (0.0817) loss: 0.8287 (0.8274) time: 0.1650 data: 0.0826 max mem: 9377 +Train: [27] Total time: 0:16:48 (0.1613 s / it) +Averaged stats: lr: 0.000108 grad: 0.0804 (0.0817) loss: 0.8287 (0.8274) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:04:57 loss: 0.8412 (0.8412) time: 4.8039 data: 4.7292 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8307 (0.8347) time: 0.1289 data: 0.1037 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-train-subset): loss: 0.8307 (0.8347) +Eval (hcp-val): [27] [ 0/62] eta: 0:06:00 loss: 0.8399 (0.8399) time: 5.8107 data: 5.7788 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8419 (0.8423) time: 0.1249 data: 0.0994 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:14 (0.2417 s / it) +Averaged stats (hcp-val): loss: 0.8419 (0.8423) +Eval (nsd-val): [27] [ 0/62] eta: 0:03:22 loss: 0.8085 (0.8085) time: 3.2738 data: 3.1795 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8174 (0.8172) time: 0.1260 data: 0.1008 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (nsd-val): loss: 0.8174 (0.8172) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [28] [ 0/6250] eta: 11:34:04 lr: 0.000108 grad: 0.0656 (0.0656) loss: 0.8644 (0.8644) time: 6.6631 data: 6.5547 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:22:56 lr: 0.000108 grad: 0.0927 (0.1040) loss: 0.8264 (0.8323) time: 0.1780 data: 0.0719 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:19:30 lr: 0.000108 grad: 0.0778 (0.0950) loss: 0.8372 (0.8314) time: 0.1416 data: 0.0431 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:18:16 lr: 0.000108 grad: 0.0725 (0.0894) loss: 0.8286 (0.8308) time: 0.1615 data: 0.0641 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:17:29 lr: 0.000108 grad: 0.0759 (0.0868) loss: 0.8351 (0.8308) time: 0.1565 data: 0.0473 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:16:46 lr: 0.000108 grad: 0.0775 (0.0851) loss: 0.8300 (0.8304) time: 0.1329 data: 0.0444 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:16:14 lr: 0.000108 grad: 0.0727 (0.0840) loss: 0.8375 (0.8304) time: 0.1704 data: 0.0815 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:15:59 lr: 0.000108 grad: 0.0696 (0.0829) loss: 0.8378 (0.8304) time: 0.1695 data: 0.0750 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:15:31 lr: 0.000108 grad: 0.0763 (0.0821) loss: 0.8328 (0.8305) time: 0.1396 data: 0.0393 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:15:07 lr: 0.000108 grad: 0.0692 (0.0816) loss: 0.8265 (0.8301) time: 0.1577 data: 0.0712 max mem: 9377 +Train: [28] [1000/6250] eta: 0:14:46 lr: 0.000108 grad: 0.0799 (0.0815) loss: 0.8278 (0.8297) time: 0.1621 data: 0.0734 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:26 lr: 0.000108 grad: 0.0723 (0.0813) loss: 0.8251 (0.8292) time: 0.1732 data: 0.0810 max mem: 9377 +Train: [28] [1200/6250] eta: 0:14:01 lr: 0.000108 grad: 0.0746 (0.0808) loss: 0.8332 (0.8291) time: 0.1330 data: 0.0423 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:37 lr: 0.000107 grad: 0.0772 (0.0808) loss: 0.8258 (0.8288) time: 0.1500 data: 0.0621 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:16 lr: 0.000107 grad: 0.0753 (0.0806) loss: 0.8289 (0.8284) time: 0.1635 data: 0.0755 max mem: 9377 +Train: [28] [1500/6250] eta: 0:13:00 lr: 0.000107 grad: 0.0761 (0.0806) loss: 0.8264 (0.8281) time: 0.1685 data: 0.0841 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:43 lr: 0.000107 grad: 0.0775 (0.0806) loss: 0.8187 (0.8278) time: 0.1553 data: 0.0649 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:25 lr: 0.000107 grad: 0.0802 (0.0807) loss: 0.8274 (0.8276) time: 0.1461 data: 0.0595 max mem: 9377 +Train: [28] [1800/6250] eta: 0:12:08 lr: 0.000107 grad: 0.0780 (0.0808) loss: 0.8304 (0.8275) time: 0.1425 data: 0.0603 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:50 lr: 0.000107 grad: 0.0758 (0.0808) loss: 0.8250 (0.8275) time: 0.1637 data: 0.0751 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:32 lr: 0.000107 grad: 0.0751 (0.0809) loss: 0.8296 (0.8274) time: 0.1666 data: 0.0795 max mem: 9377 +Train: [28] [2100/6250] eta: 0:11:13 lr: 0.000107 grad: 0.0760 (0.0808) loss: 0.8291 (0.8274) time: 0.1505 data: 0.0561 max mem: 9377 +Train: [28] [2200/6250] eta: 0:10:54 lr: 0.000107 grad: 0.0813 (0.0808) loss: 0.8162 (0.8273) time: 0.1556 data: 0.0682 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:35 lr: 0.000107 grad: 0.0829 (0.0808) loss: 0.8238 (0.8272) time: 0.1545 data: 0.0693 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:18 lr: 0.000107 grad: 0.0797 (0.0809) loss: 0.8277 (0.8273) time: 0.1541 data: 0.0663 max mem: 9377 +Train: [28] [2500/6250] eta: 0:10:01 lr: 0.000107 grad: 0.0828 (0.0810) loss: 0.8292 (0.8274) time: 0.1700 data: 0.0865 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:45 lr: 0.000107 grad: 0.0752 (0.0809) loss: 0.8257 (0.8274) time: 0.1542 data: 0.0713 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:28 lr: 0.000107 grad: 0.0749 (0.0808) loss: 0.8305 (0.8275) time: 0.1491 data: 0.0623 max mem: 9377 +Train: [28] [2800/6250] eta: 0:09:12 lr: 0.000107 grad: 0.0746 (0.0808) loss: 0.8339 (0.8275) time: 0.1593 data: 0.0688 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:56 lr: 0.000107 grad: 0.0785 (0.0807) loss: 0.8277 (0.8276) time: 0.1721 data: 0.0837 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:40 lr: 0.000107 grad: 0.0763 (0.0807) loss: 0.8317 (0.8275) time: 0.1579 data: 0.0650 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:24 lr: 0.000107 grad: 0.0752 (0.0806) loss: 0.8292 (0.8275) time: 0.1666 data: 0.0820 max mem: 9377 +Train: [28] [3200/6250] eta: 0:08:07 lr: 0.000107 grad: 0.0798 (0.0807) loss: 0.8240 (0.8274) time: 0.1484 data: 0.0517 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:52 lr: 0.000107 grad: 0.0790 (0.0807) loss: 0.8306 (0.8275) time: 0.1845 data: 0.1002 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:36 lr: 0.000107 grad: 0.0752 (0.0806) loss: 0.8340 (0.8275) time: 0.1148 data: 0.0138 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:19 lr: 0.000107 grad: 0.0792 (0.0807) loss: 0.8296 (0.8275) time: 0.1512 data: 0.0700 max mem: 9377 +Train: [28] [3600/6250] eta: 0:07:04 lr: 0.000107 grad: 0.0746 (0.0806) loss: 0.8315 (0.8275) time: 0.1589 data: 0.0680 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:47 lr: 0.000107 grad: 0.0756 (0.0806) loss: 0.8294 (0.8276) time: 0.1456 data: 0.0487 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:31 lr: 0.000107 grad: 0.0790 (0.0805) loss: 0.8258 (0.8277) time: 0.1489 data: 0.0641 max mem: 9377 +Train: [28] [3900/6250] eta: 0:06:15 lr: 0.000107 grad: 0.0754 (0.0805) loss: 0.8303 (0.8277) time: 0.1686 data: 0.0771 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:59 lr: 0.000107 grad: 0.0755 (0.0805) loss: 0.8321 (0.8277) time: 0.1549 data: 0.0736 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:43 lr: 0.000107 grad: 0.0820 (0.0804) loss: 0.8331 (0.8278) time: 0.1586 data: 0.0625 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:26 lr: 0.000107 grad: 0.0758 (0.0804) loss: 0.8247 (0.8278) time: 0.1487 data: 0.0536 max mem: 9377 +Train: [28] [4300/6250] eta: 0:05:10 lr: 0.000107 grad: 0.0685 (0.0803) loss: 0.8322 (0.8278) time: 0.1598 data: 0.0734 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:54 lr: 0.000107 grad: 0.0691 (0.0803) loss: 0.8369 (0.8278) time: 0.1848 data: 0.0943 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:38 lr: 0.000107 grad: 0.0771 (0.0803) loss: 0.8301 (0.8279) time: 0.1528 data: 0.0555 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:22 lr: 0.000107 grad: 0.0757 (0.0803) loss: 0.8323 (0.8279) time: 0.1578 data: 0.0666 max mem: 9377 +Train: [28] [4700/6250] eta: 0:04:06 lr: 0.000107 grad: 0.0766 (0.0804) loss: 0.8347 (0.8279) time: 0.1693 data: 0.0893 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:50 lr: 0.000107 grad: 0.0739 (0.0804) loss: 0.8249 (0.8278) time: 0.1420 data: 0.0498 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:34 lr: 0.000107 grad: 0.0776 (0.0804) loss: 0.8233 (0.8278) time: 0.1706 data: 0.0856 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:18 lr: 0.000107 grad: 0.0800 (0.0805) loss: 0.8237 (0.8277) time: 0.1454 data: 0.0623 max mem: 9377 +Train: [28] [5100/6250] eta: 0:03:02 lr: 0.000107 grad: 0.0765 (0.0805) loss: 0.8266 (0.8276) time: 0.1694 data: 0.0813 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:46 lr: 0.000107 grad: 0.0787 (0.0806) loss: 0.8258 (0.8276) time: 0.1410 data: 0.0455 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:30 lr: 0.000107 grad: 0.0819 (0.0807) loss: 0.8303 (0.8275) time: 0.1386 data: 0.0560 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:15 lr: 0.000107 grad: 0.0833 (0.0807) loss: 0.8193 (0.8274) time: 0.1498 data: 0.0651 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:59 lr: 0.000107 grad: 0.0820 (0.0808) loss: 0.8243 (0.8274) time: 0.1671 data: 0.0718 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:43 lr: 0.000106 grad: 0.0820 (0.0808) loss: 0.8258 (0.8273) time: 0.1632 data: 0.0765 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:27 lr: 0.000106 grad: 0.0785 (0.0808) loss: 0.8274 (0.8272) time: 0.1540 data: 0.0617 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:11 lr: 0.000106 grad: 0.0818 (0.0809) loss: 0.8294 (0.8271) time: 0.1645 data: 0.0765 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:55 lr: 0.000106 grad: 0.0861 (0.0810) loss: 0.8226 (0.8271) time: 0.1572 data: 0.0718 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:39 lr: 0.000106 grad: 0.0798 (0.0810) loss: 0.8248 (0.8270) time: 0.1618 data: 0.0729 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:23 lr: 0.000106 grad: 0.0895 (0.0811) loss: 0.8205 (0.8270) time: 0.1696 data: 0.0814 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.0780 (0.0811) loss: 0.8299 (0.8270) time: 0.1956 data: 0.1104 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.0739 (0.0811) loss: 0.8291 (0.8270) time: 0.1666 data: 0.0707 max mem: 9377 +Train: [28] Total time: 0:16:45 (0.1609 s / it) +Averaged stats: lr: 0.000106 grad: 0.0739 (0.0811) loss: 0.8291 (0.8270) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:06:10 loss: 0.8384 (0.8384) time: 5.9801 data: 5.9475 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8335 (0.8341) time: 0.1203 data: 0.0948 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-train-subset): loss: 0.8335 (0.8341) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:54 loss: 0.8412 (0.8412) time: 4.7440 data: 4.6307 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8409 (0.8429) time: 0.1169 data: 0.0894 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:15 (0.2506 s / it) +Averaged stats (hcp-val): loss: 0.8409 (0.8429) +Eval (nsd-val): [28] [ 0/62] eta: 0:06:26 loss: 0.8018 (0.8018) time: 6.2373 data: 6.2056 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8140 (0.8139) time: 0.1271 data: 0.1014 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (nsd-val): loss: 0.8140 (0.8139) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 7:28:20 lr: 0.000106 grad: 0.0605 (0.0605) loss: 0.8994 (0.8994) time: 4.3040 data: 3.9796 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:21:56 lr: 0.000106 grad: 0.0827 (0.0971) loss: 0.8343 (0.8363) time: 0.1558 data: 0.0529 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:18:11 lr: 0.000106 grad: 0.0783 (0.0899) loss: 0.8279 (0.8349) time: 0.1483 data: 0.0521 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:17:05 lr: 0.000106 grad: 0.0844 (0.0894) loss: 0.8231 (0.8321) time: 0.1507 data: 0.0529 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:16:13 lr: 0.000106 grad: 0.0745 (0.0876) loss: 0.8272 (0.8302) time: 0.1439 data: 0.0468 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:15:42 lr: 0.000106 grad: 0.0756 (0.0862) loss: 0.8353 (0.8302) time: 0.1747 data: 0.0815 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:15:10 lr: 0.000106 grad: 0.0781 (0.0854) loss: 0.8323 (0.8296) time: 0.1547 data: 0.0537 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:15:01 lr: 0.000106 grad: 0.0769 (0.0849) loss: 0.8285 (0.8294) time: 0.1990 data: 0.1110 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:14:43 lr: 0.000106 grad: 0.0734 (0.0840) loss: 0.8289 (0.8295) time: 0.1570 data: 0.0670 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:14:28 lr: 0.000106 grad: 0.0742 (0.0833) loss: 0.8273 (0.8293) time: 0.1842 data: 0.0949 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:06 lr: 0.000106 grad: 0.0742 (0.0827) loss: 0.8301 (0.8290) time: 0.1651 data: 0.0791 max mem: 9377 +Train: [29] [1100/6250] eta: 0:13:49 lr: 0.000106 grad: 0.0786 (0.0824) loss: 0.8172 (0.8288) time: 0.1629 data: 0.0797 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:27 lr: 0.000106 grad: 0.0788 (0.0822) loss: 0.8245 (0.8285) time: 0.1110 data: 0.0177 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:09 lr: 0.000106 grad: 0.0764 (0.0819) loss: 0.8317 (0.8283) time: 0.1538 data: 0.0599 max mem: 9377 +Train: [29] [1400/6250] eta: 0:12:47 lr: 0.000106 grad: 0.0789 (0.0816) loss: 0.8291 (0.8283) time: 0.1309 data: 0.0486 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:29 lr: 0.000106 grad: 0.0781 (0.0815) loss: 0.8269 (0.8281) time: 0.1707 data: 0.0942 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:16 lr: 0.000106 grad: 0.0734 (0.0814) loss: 0.8315 (0.8281) time: 0.1497 data: 0.0704 max mem: 9377 +Train: [29] [1700/6250] eta: 0:11:59 lr: 0.000106 grad: 0.0794 (0.0814) loss: 0.8234 (0.8279) time: 0.1246 data: 0.0417 max mem: 9377 +Train: [29] [1800/6250] eta: 0:11:41 lr: 0.000106 grad: 0.0728 (0.0813) loss: 0.8293 (0.8279) time: 0.1374 data: 0.0487 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:26 lr: 0.000106 grad: 0.0776 (0.0812) loss: 0.8314 (0.8280) time: 0.1562 data: 0.0650 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:12 lr: 0.000106 grad: 0.0790 (0.0813) loss: 0.8318 (0.8281) time: 0.1610 data: 0.0688 max mem: 9377 +Train: [29] [2100/6250] eta: 0:10:57 lr: 0.000106 grad: 0.0801 (0.0812) loss: 0.8205 (0.8281) time: 0.1503 data: 0.0565 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:40 lr: 0.000106 grad: 0.0749 (0.0811) loss: 0.8306 (0.8281) time: 0.1573 data: 0.0750 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:25 lr: 0.000106 grad: 0.0736 (0.0810) loss: 0.8276 (0.8282) time: 0.1566 data: 0.0687 max mem: 9377 +Train: [29] [2400/6250] eta: 0:10:08 lr: 0.000106 grad: 0.0802 (0.0810) loss: 0.8235 (0.8282) time: 0.1438 data: 0.0619 max mem: 9377 +Train: [29] [2500/6250] eta: 0:09:50 lr: 0.000106 grad: 0.0752 (0.0809) loss: 0.8350 (0.8283) time: 0.1461 data: 0.0509 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:34 lr: 0.000106 grad: 0.0791 (0.0809) loss: 0.8250 (0.8283) time: 0.1425 data: 0.0554 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:18 lr: 0.000106 grad: 0.0810 (0.0809) loss: 0.8289 (0.8283) time: 0.1446 data: 0.0562 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:02 lr: 0.000106 grad: 0.0774 (0.0809) loss: 0.8254 (0.8283) time: 0.1662 data: 0.0817 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:45 lr: 0.000106 grad: 0.0800 (0.0809) loss: 0.8275 (0.8283) time: 0.1505 data: 0.0623 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:29 lr: 0.000106 grad: 0.0807 (0.0810) loss: 0.8282 (0.8284) time: 0.1389 data: 0.0517 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:14 lr: 0.000106 grad: 0.0783 (0.0810) loss: 0.8300 (0.8284) time: 0.1679 data: 0.0796 max mem: 9377 +Train: [29] [3200/6250] eta: 0:07:59 lr: 0.000106 grad: 0.0811 (0.0810) loss: 0.8346 (0.8284) time: 0.1828 data: 0.0966 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:42 lr: 0.000106 grad: 0.0809 (0.0811) loss: 0.8286 (0.8284) time: 0.1237 data: 0.0360 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:26 lr: 0.000106 grad: 0.0800 (0.0811) loss: 0.8276 (0.8284) time: 0.1530 data: 0.0690 max mem: 9377 +Train: [29] [3500/6250] eta: 0:07:11 lr: 0.000105 grad: 0.0751 (0.0812) loss: 0.8272 (0.8283) time: 0.1432 data: 0.0563 max mem: 9377 +Train: [29] [3600/6250] eta: 0:06:56 lr: 0.000105 grad: 0.0830 (0.0812) loss: 0.8253 (0.8284) time: 0.1734 data: 0.0855 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:40 lr: 0.000105 grad: 0.0760 (0.0812) loss: 0.8319 (0.8283) time: 0.1571 data: 0.0700 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:23 lr: 0.000105 grad: 0.0787 (0.0812) loss: 0.8246 (0.8283) time: 0.1500 data: 0.0631 max mem: 9377 +Train: [29] [3900/6250] eta: 0:06:08 lr: 0.000105 grad: 0.0853 (0.0813) loss: 0.8225 (0.8283) time: 0.1602 data: 0.0756 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:52 lr: 0.000105 grad: 0.0770 (0.0813) loss: 0.8285 (0.8282) time: 0.1443 data: 0.0631 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:36 lr: 0.000105 grad: 0.0831 (0.0814) loss: 0.8284 (0.8282) time: 0.1369 data: 0.0509 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:21 lr: 0.000105 grad: 0.0802 (0.0814) loss: 0.8225 (0.8281) time: 0.1631 data: 0.0710 max mem: 9377 +Train: [29] [4300/6250] eta: 0:05:05 lr: 0.000105 grad: 0.0784 (0.0814) loss: 0.8286 (0.8281) time: 0.1309 data: 0.0442 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:49 lr: 0.000105 grad: 0.0780 (0.0814) loss: 0.8298 (0.8282) time: 0.1440 data: 0.0535 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:34 lr: 0.000105 grad: 0.0763 (0.0815) loss: 0.8306 (0.8282) time: 0.1582 data: 0.0695 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:18 lr: 0.000105 grad: 0.0800 (0.0815) loss: 0.8256 (0.8282) time: 0.1411 data: 0.0547 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:02 lr: 0.000105 grad: 0.0773 (0.0815) loss: 0.8258 (0.8282) time: 0.1600 data: 0.0704 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:46 lr: 0.000105 grad: 0.0780 (0.0815) loss: 0.8234 (0.8281) time: 0.1189 data: 0.0263 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:31 lr: 0.000105 grad: 0.0770 (0.0815) loss: 0.8239 (0.8281) time: 0.1451 data: 0.0528 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:15 lr: 0.000105 grad: 0.0854 (0.0817) loss: 0.8288 (0.8280) time: 0.1640 data: 0.0761 max mem: 9377 +Train: [29] [5100/6250] eta: 0:02:59 lr: 0.000105 grad: 0.0821 (0.0817) loss: 0.8266 (0.8280) time: 0.1595 data: 0.0673 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:44 lr: 0.000105 grad: 0.0782 (0.0818) loss: 0.8314 (0.8280) time: 0.1370 data: 0.0492 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:28 lr: 0.000105 grad: 0.0800 (0.0818) loss: 0.8318 (0.8280) time: 0.1545 data: 0.0686 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:12 lr: 0.000105 grad: 0.0849 (0.0818) loss: 0.8237 (0.8280) time: 0.1431 data: 0.0563 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:57 lr: 0.000105 grad: 0.0768 (0.0817) loss: 0.8291 (0.8280) time: 0.1877 data: 0.1036 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:41 lr: 0.000105 grad: 0.0742 (0.0818) loss: 0.8283 (0.8280) time: 0.1670 data: 0.0806 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:25 lr: 0.000105 grad: 0.0816 (0.0818) loss: 0.8251 (0.8280) time: 0.1467 data: 0.0610 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:10 lr: 0.000105 grad: 0.0783 (0.0818) loss: 0.8270 (0.8280) time: 0.1750 data: 0.0898 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:54 lr: 0.000105 grad: 0.0825 (0.0818) loss: 0.8258 (0.8279) time: 0.1772 data: 0.0853 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:39 lr: 0.000105 grad: 0.0782 (0.0818) loss: 0.8244 (0.8279) time: 0.1684 data: 0.0800 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:23 lr: 0.000105 grad: 0.0871 (0.0818) loss: 0.8255 (0.8278) time: 0.1366 data: 0.0513 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.0817 (0.0818) loss: 0.8267 (0.8278) time: 0.1558 data: 0.0611 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.0792 (0.0818) loss: 0.8248 (0.8278) time: 0.1882 data: 0.0999 max mem: 9377 +Train: [29] Total time: 0:16:26 (0.1578 s / it) +Averaged stats: lr: 0.000105 grad: 0.0792 (0.0818) loss: 0.8248 (0.8278) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:05:17 loss: 0.8380 (0.8380) time: 5.1261 data: 5.0952 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8333 (0.8346) time: 0.1293 data: 0.1045 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:13 (0.2201 s / it) +Averaged stats (hcp-train-subset): loss: 0.8333 (0.8346) +Making plots (hcp-train-subset): example=8 +Eval (hcp-val): [29] [ 0/62] eta: 0:04:57 loss: 0.8436 (0.8436) time: 4.7946 data: 4.7625 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8421 (0.8429) time: 0.1378 data: 0.1122 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (hcp-val): loss: 0.8421 (0.8429) +Making plots (hcp-val): example=37 +Eval (nsd-val): [29] [ 0/62] eta: 0:05:56 loss: 0.8098 (0.8098) time: 5.7446 data: 5.7130 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8173 (0.8168) time: 0.1054 data: 0.0782 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (nsd-val): loss: 0.8173 (0.8168) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 11:33:59 lr: 0.000105 grad: 0.1627 (0.1627) loss: 0.8606 (0.8606) time: 6.6624 data: 6.5531 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:21:13 lr: 0.000105 grad: 0.0733 (0.0845) loss: 0.8433 (0.8461) time: 0.1515 data: 0.0494 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:17:57 lr: 0.000105 grad: 0.0704 (0.0803) loss: 0.8428 (0.8408) time: 0.1363 data: 0.0372 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:16:58 lr: 0.000105 grad: 0.0796 (0.0802) loss: 0.8304 (0.8384) time: 0.1622 data: 0.0604 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:16:14 lr: 0.000105 grad: 0.0756 (0.0797) loss: 0.8241 (0.8358) time: 0.1242 data: 0.0136 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:15:42 lr: 0.000105 grad: 0.0772 (0.0800) loss: 0.8215 (0.8334) time: 0.1607 data: 0.0655 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:15:09 lr: 0.000105 grad: 0.0830 (0.0798) loss: 0.8240 (0.8320) time: 0.1521 data: 0.0505 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:15:06 lr: 0.000105 grad: 0.0691 (0.0792) loss: 0.8342 (0.8317) time: 0.1592 data: 0.0717 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:14:53 lr: 0.000105 grad: 0.0798 (0.0791) loss: 0.8298 (0.8315) time: 0.1705 data: 0.0770 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:14:34 lr: 0.000105 grad: 0.0807 (0.0789) loss: 0.8288 (0.8311) time: 0.1447 data: 0.0574 max mem: 9377 +Train: [30] [1000/6250] eta: 0:14:12 lr: 0.000105 grad: 0.0756 (0.0785) loss: 0.8262 (0.8309) time: 0.1456 data: 0.0626 max mem: 9377 +Train: [30] [1100/6250] eta: 0:13:51 lr: 0.000105 grad: 0.0741 (0.0787) loss: 0.8228 (0.8305) time: 0.1415 data: 0.0534 max mem: 9377 +Train: [30] [1200/6250] eta: 0:13:39 lr: 0.000105 grad: 0.0774 (0.0789) loss: 0.8207 (0.8299) time: 0.1923 data: 0.1000 max mem: 9377 +Train: [30] [1300/6250] eta: 0:13:27 lr: 0.000105 grad: 0.0801 (0.0790) loss: 0.8230 (0.8295) time: 0.1526 data: 0.0613 max mem: 9377 +Train: [30] [1400/6250] eta: 0:13:09 lr: 0.000104 grad: 0.0834 (0.0793) loss: 0.8249 (0.8292) time: 0.1652 data: 0.0725 max mem: 9377 +Train: [30] [1500/6250] eta: 0:13:00 lr: 0.000104 grad: 0.0788 (0.0794) loss: 0.8217 (0.8290) time: 0.1801 data: 0.0842 max mem: 9377 +Train: [30] [1600/6250] eta: 0:12:46 lr: 0.000104 grad: 0.0760 (0.0795) loss: 0.8283 (0.8288) time: 0.1813 data: 0.0965 max mem: 9377 +Train: [30] [1700/6250] eta: 0:12:31 lr: 0.000104 grad: 0.0776 (0.0794) loss: 0.8351 (0.8286) time: 0.1868 data: 0.0929 max mem: 9377 +Train: [30] [1800/6250] eta: 0:12:16 lr: 0.000104 grad: 0.0823 (0.0797) loss: 0.8270 (0.8286) time: 0.1717 data: 0.0851 max mem: 9377 +Train: [30] [1900/6250] eta: 0:12:00 lr: 0.000104 grad: 0.0790 (0.0796) loss: 0.8208 (0.8284) time: 0.1668 data: 0.0779 max mem: 9377 +Train: [30] [2000/6250] eta: 0:11:47 lr: 0.000104 grad: 0.0773 (0.0798) loss: 0.8291 (0.8285) time: 0.1691 data: 0.0746 max mem: 9377 +Train: [30] [2100/6250] eta: 0:11:30 lr: 0.000104 grad: 0.0778 (0.0798) loss: 0.8254 (0.8285) time: 0.1635 data: 0.0698 max mem: 9377 +Train: [30] [2200/6250] eta: 0:11:11 lr: 0.000104 grad: 0.0758 (0.0798) loss: 0.8268 (0.8285) time: 0.1445 data: 0.0539 max mem: 9377 +Train: [30] [2300/6250] eta: 0:10:51 lr: 0.000104 grad: 0.0813 (0.0799) loss: 0.8263 (0.8285) time: 0.1459 data: 0.0513 max mem: 9377 +Train: [30] [2400/6250] eta: 0:10:33 lr: 0.000104 grad: 0.0795 (0.0801) loss: 0.8250 (0.8285) time: 0.1758 data: 0.0850 max mem: 9377 +Train: [30] [2500/6250] eta: 0:10:16 lr: 0.000104 grad: 0.0792 (0.0802) loss: 0.8280 (0.8284) time: 0.1612 data: 0.0666 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:59 lr: 0.000104 grad: 0.0787 (0.0804) loss: 0.8323 (0.8284) time: 0.1458 data: 0.0535 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:42 lr: 0.000104 grad: 0.0791 (0.0805) loss: 0.8291 (0.8284) time: 0.1497 data: 0.0596 max mem: 9377 +Train: [30] [2800/6250] eta: 0:09:25 lr: 0.000104 grad: 0.0855 (0.0806) loss: 0.8186 (0.8283) time: 0.1645 data: 0.0736 max mem: 9377 +Train: [30] [2900/6250] eta: 0:09:08 lr: 0.000104 grad: 0.0821 (0.0807) loss: 0.8220 (0.8283) time: 0.1741 data: 0.0855 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:52 lr: 0.000104 grad: 0.0878 (0.0808) loss: 0.8226 (0.8282) time: 0.1577 data: 0.0702 max mem: 9377 +Train: [30] [3100/6250] eta: 0:08:35 lr: 0.000104 grad: 0.0824 (0.0809) loss: 0.8270 (0.8282) time: 0.1681 data: 0.0764 max mem: 9377 +Train: [30] [3200/6250] eta: 0:08:18 lr: 0.000104 grad: 0.0817 (0.0810) loss: 0.8309 (0.8282) time: 0.1536 data: 0.0589 max mem: 9377 +Train: [30] [3300/6250] eta: 0:08:00 lr: 0.000104 grad: 0.0799 (0.0811) loss: 0.8273 (0.8281) time: 0.1534 data: 0.0681 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:43 lr: 0.000104 grad: 0.0826 (0.0812) loss: 0.8294 (0.8281) time: 0.1499 data: 0.0581 max mem: 9377 +Train: [30] [3500/6250] eta: 0:07:26 lr: 0.000104 grad: 0.0795 (0.0813) loss: 0.8297 (0.8281) time: 0.1798 data: 0.1004 max mem: 9377 +Train: [30] [3600/6250] eta: 0:07:09 lr: 0.000104 grad: 0.0822 (0.0814) loss: 0.8255 (0.8281) time: 0.1063 data: 0.0136 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:53 lr: 0.000104 grad: 0.0778 (0.0814) loss: 0.8255 (0.8281) time: 0.1657 data: 0.0741 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:36 lr: 0.000104 grad: 0.0818 (0.0814) loss: 0.8261 (0.8281) time: 0.1608 data: 0.0699 max mem: 9377 +Train: [30] [3900/6250] eta: 0:06:19 lr: 0.000104 grad: 0.0809 (0.0815) loss: 0.8229 (0.8281) time: 0.1427 data: 0.0530 max mem: 9377 +Train: [30] [4000/6250] eta: 0:06:03 lr: 0.000104 grad: 0.0822 (0.0815) loss: 0.8240 (0.8280) time: 0.1708 data: 0.0884 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:46 lr: 0.000104 grad: 0.0831 (0.0815) loss: 0.8243 (0.8279) time: 0.1587 data: 0.0709 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:29 lr: 0.000104 grad: 0.0791 (0.0816) loss: 0.8261 (0.8278) time: 0.1542 data: 0.0667 max mem: 9377 +Train: [30] [4300/6250] eta: 0:05:13 lr: 0.000104 grad: 0.0850 (0.0816) loss: 0.8265 (0.8278) time: 0.1514 data: 0.0743 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:57 lr: 0.000104 grad: 0.0742 (0.0816) loss: 0.8262 (0.8278) time: 0.1427 data: 0.0600 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:41 lr: 0.000104 grad: 0.0804 (0.0816) loss: 0.8269 (0.8278) time: 0.1539 data: 0.0668 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:25 lr: 0.000104 grad: 0.0893 (0.0816) loss: 0.8220 (0.8278) time: 0.1478 data: 0.0590 max mem: 9377 +Train: [30] [4700/6250] eta: 0:04:08 lr: 0.000104 grad: 0.0801 (0.0816) loss: 0.8276 (0.8277) time: 0.1574 data: 0.0711 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:52 lr: 0.000104 grad: 0.0762 (0.0815) loss: 0.8263 (0.8277) time: 0.1448 data: 0.0506 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:36 lr: 0.000104 grad: 0.0755 (0.0815) loss: 0.8255 (0.8278) time: 0.1588 data: 0.0810 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:20 lr: 0.000104 grad: 0.0761 (0.0815) loss: 0.8341 (0.8278) time: 0.1534 data: 0.0673 max mem: 9377 +Train: [30] [5100/6250] eta: 0:03:03 lr: 0.000104 grad: 0.0767 (0.0815) loss: 0.8320 (0.8278) time: 0.1396 data: 0.0548 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:47 lr: 0.000104 grad: 0.0750 (0.0814) loss: 0.8330 (0.8278) time: 0.1547 data: 0.0630 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:31 lr: 0.000104 grad: 0.0736 (0.0814) loss: 0.8302 (0.8278) time: 0.1503 data: 0.0595 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:15 lr: 0.000103 grad: 0.0773 (0.0814) loss: 0.8242 (0.8278) time: 0.1766 data: 0.0877 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:59 lr: 0.000103 grad: 0.0829 (0.0814) loss: 0.8283 (0.8277) time: 0.1269 data: 0.0378 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:43 lr: 0.000103 grad: 0.0786 (0.0814) loss: 0.8244 (0.8277) time: 0.1366 data: 0.0406 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:27 lr: 0.000103 grad: 0.0832 (0.0814) loss: 0.8284 (0.8277) time: 0.1388 data: 0.0395 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:11 lr: 0.000103 grad: 0.0842 (0.0814) loss: 0.8243 (0.8276) time: 0.1406 data: 0.0497 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:55 lr: 0.000103 grad: 0.0868 (0.0814) loss: 0.8229 (0.8276) time: 0.1697 data: 0.0798 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:39 lr: 0.000103 grad: 0.0759 (0.0814) loss: 0.8198 (0.8276) time: 0.1558 data: 0.0712 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:23 lr: 0.000103 grad: 0.0729 (0.0814) loss: 0.8240 (0.8276) time: 0.1280 data: 0.0449 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.0805 (0.0814) loss: 0.8235 (0.8276) time: 0.1526 data: 0.0665 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.0851 (0.0815) loss: 0.8296 (0.8275) time: 0.1413 data: 0.0557 max mem: 9377 +Train: [30] Total time: 0:16:43 (0.1605 s / it) +Averaged stats: lr: 0.000103 grad: 0.0851 (0.0815) loss: 0.8296 (0.8275) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:05:01 loss: 0.8374 (0.8374) time: 4.8630 data: 4.8287 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8318 (0.8335) time: 0.1420 data: 0.1150 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-train-subset): loss: 0.8318 (0.8335) +Eval (hcp-val): [30] [ 0/62] eta: 0:06:04 loss: 0.8430 (0.8430) time: 5.8780 data: 5.8471 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8404 (0.8415) time: 0.1114 data: 0.0861 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (hcp-val): loss: 0.8404 (0.8415) +Eval (nsd-val): [30] [ 0/62] eta: 0:06:04 loss: 0.8045 (0.8045) time: 5.8842 data: 5.8526 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8165 (0.8166) time: 0.1174 data: 0.0902 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (nsd-val): loss: 0.8165 (0.8166) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [31] [ 0/6250] eta: 11:10:07 lr: 0.000103 grad: 0.0875 (0.0875) loss: 0.8132 (0.8132) time: 6.4332 data: 6.3051 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:21:30 lr: 0.000103 grad: 0.0795 (0.0982) loss: 0.8312 (0.8334) time: 0.1464 data: 0.0392 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:18:11 lr: 0.000103 grad: 0.0716 (0.0875) loss: 0.8257 (0.8316) time: 0.1430 data: 0.0372 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:17:14 lr: 0.000103 grad: 0.0758 (0.0848) loss: 0.8228 (0.8308) time: 0.1773 data: 0.0824 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:16:23 lr: 0.000103 grad: 0.0798 (0.0838) loss: 0.8244 (0.8290) time: 0.1771 data: 0.0712 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:15:56 lr: 0.000103 grad: 0.0831 (0.0842) loss: 0.8244 (0.8276) time: 0.1609 data: 0.0527 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:15:29 lr: 0.000103 grad: 0.0762 (0.0833) loss: 0.8239 (0.8272) time: 0.1330 data: 0.0442 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:15:14 lr: 0.000103 grad: 0.0837 (0.0833) loss: 0.8200 (0.8268) time: 0.1800 data: 0.0910 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:15:10 lr: 0.000103 grad: 0.0809 (0.0832) loss: 0.8244 (0.8259) time: 0.1553 data: 0.0619 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:14:59 lr: 0.000103 grad: 0.0802 (0.0831) loss: 0.8233 (0.8253) time: 0.1671 data: 0.0741 max mem: 9377 +Train: [31] [1000/6250] eta: 0:14:44 lr: 0.000103 grad: 0.0798 (0.0834) loss: 0.8211 (0.8251) time: 0.1656 data: 0.0803 max mem: 9377 +Train: [31] [1100/6250] eta: 0:14:30 lr: 0.000103 grad: 0.0811 (0.0834) loss: 0.8250 (0.8248) time: 0.1631 data: 0.0639 max mem: 9377 +Train: [31] [1200/6250] eta: 0:14:13 lr: 0.000103 grad: 0.0829 (0.0834) loss: 0.8190 (0.8245) time: 0.1435 data: 0.0580 max mem: 9377 +Train: [31] [1300/6250] eta: 0:13:58 lr: 0.000103 grad: 0.0845 (0.0836) loss: 0.8163 (0.8241) time: 0.1747 data: 0.0927 max mem: 9377 +Train: [31] [1400/6250] eta: 0:13:38 lr: 0.000103 grad: 0.0799 (0.0836) loss: 0.8234 (0.8237) time: 0.1666 data: 0.0779 max mem: 9377 +Train: [31] [1500/6250] eta: 0:13:17 lr: 0.000103 grad: 0.0797 (0.0834) loss: 0.8185 (0.8235) time: 0.1436 data: 0.0579 max mem: 9377 +Train: [31] [1600/6250] eta: 0:13:02 lr: 0.000103 grad: 0.0790 (0.0834) loss: 0.8234 (0.8233) time: 0.1542 data: 0.0717 max mem: 9377 +Train: [31] [1700/6250] eta: 0:12:45 lr: 0.000103 grad: 0.0771 (0.0832) loss: 0.8199 (0.8234) time: 0.1586 data: 0.0731 max mem: 9377 +Train: [31] [1800/6250] eta: 0:12:27 lr: 0.000103 grad: 0.0776 (0.0830) loss: 0.8249 (0.8234) time: 0.1587 data: 0.0836 max mem: 9377 +Train: [31] [1900/6250] eta: 0:12:09 lr: 0.000103 grad: 0.0821 (0.0829) loss: 0.8165 (0.8233) time: 0.1807 data: 0.0863 max mem: 9377 +Train: [31] [2000/6250] eta: 0:11:52 lr: 0.000103 grad: 0.0808 (0.0829) loss: 0.8240 (0.8233) time: 0.1882 data: 0.1011 max mem: 9377 +Train: [31] [2100/6250] eta: 0:11:33 lr: 0.000103 grad: 0.0811 (0.0829) loss: 0.8192 (0.8232) time: 0.1628 data: 0.0676 max mem: 9377 +Train: [31] [2200/6250] eta: 0:11:17 lr: 0.000103 grad: 0.0858 (0.0829) loss: 0.8163 (0.8231) time: 0.1472 data: 0.0487 max mem: 9377 +Train: [31] [2300/6250] eta: 0:10:58 lr: 0.000103 grad: 0.0778 (0.0829) loss: 0.8219 (0.8231) time: 0.1492 data: 0.0618 max mem: 9377 +Train: [31] [2400/6250] eta: 0:10:39 lr: 0.000103 grad: 0.0818 (0.0831) loss: 0.8197 (0.8230) time: 0.1455 data: 0.0571 max mem: 9377 +Train: [31] [2500/6250] eta: 0:10:21 lr: 0.000103 grad: 0.0852 (0.0832) loss: 0.8231 (0.8229) time: 0.1582 data: 0.0676 max mem: 9377 +Train: [31] [2600/6250] eta: 0:10:05 lr: 0.000103 grad: 0.0831 (0.0833) loss: 0.8252 (0.8229) time: 0.1717 data: 0.0815 max mem: 9377 +Train: [31] [2700/6250] eta: 0:09:46 lr: 0.000103 grad: 0.0832 (0.0834) loss: 0.8242 (0.8229) time: 0.1492 data: 0.0594 max mem: 9377 +Train: [31] [2800/6250] eta: 0:09:28 lr: 0.000103 grad: 0.0808 (0.0835) loss: 0.8262 (0.8228) time: 0.1625 data: 0.0760 max mem: 9377 +Train: [31] [2900/6250] eta: 0:09:11 lr: 0.000103 grad: 0.0902 (0.0836) loss: 0.8187 (0.8227) time: 0.1506 data: 0.0611 max mem: 9377 +Train: [31] [3000/6250] eta: 0:08:53 lr: 0.000103 grad: 0.0859 (0.0838) loss: 0.8226 (0.8226) time: 0.1404 data: 0.0436 max mem: 9377 +Train: [31] [3100/6250] eta: 0:08:36 lr: 0.000103 grad: 0.0783 (0.0839) loss: 0.8245 (0.8225) time: 0.1410 data: 0.0420 max mem: 9377 +Train: [31] [3200/6250] eta: 0:08:19 lr: 0.000102 grad: 0.0873 (0.0840) loss: 0.8221 (0.8224) time: 0.1903 data: 0.1089 max mem: 9377 +Train: [31] [3300/6250] eta: 0:08:02 lr: 0.000102 grad: 0.0872 (0.0842) loss: 0.8218 (0.8223) time: 0.1434 data: 0.0583 max mem: 9377 +Train: [31] [3400/6250] eta: 0:07:45 lr: 0.000102 grad: 0.0874 (0.0844) loss: 0.8209 (0.8222) time: 0.1487 data: 0.0631 max mem: 9377 +Train: [31] [3500/6250] eta: 0:07:28 lr: 0.000102 grad: 0.0872 (0.0845) loss: 0.8150 (0.8221) time: 0.1352 data: 0.0484 max mem: 9377 +Train: [31] [3600/6250] eta: 0:07:11 lr: 0.000102 grad: 0.0912 (0.0847) loss: 0.8171 (0.8220) time: 0.1397 data: 0.0538 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:55 lr: 0.000102 grad: 0.0835 (0.0847) loss: 0.8185 (0.8219) time: 0.1673 data: 0.0795 max mem: 9377 +Train: [31] [3800/6250] eta: 0:06:38 lr: 0.000102 grad: 0.0922 (0.0848) loss: 0.8183 (0.8219) time: 0.1642 data: 0.0762 max mem: 9377 +Train: [31] [3900/6250] eta: 0:06:21 lr: 0.000102 grad: 0.0845 (0.0849) loss: 0.8177 (0.8218) time: 0.1421 data: 0.0542 max mem: 9377 +Train: [31] [4000/6250] eta: 0:06:04 lr: 0.000102 grad: 0.0853 (0.0849) loss: 0.8191 (0.8217) time: 0.1528 data: 0.0727 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:48 lr: 0.000102 grad: 0.0820 (0.0849) loss: 0.8167 (0.8217) time: 0.1712 data: 0.0875 max mem: 9377 +Train: [31] [4200/6250] eta: 0:05:32 lr: 0.000102 grad: 0.0887 (0.0850) loss: 0.8236 (0.8216) time: 0.1562 data: 0.0704 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:15 lr: 0.000102 grad: 0.0898 (0.0850) loss: 0.8170 (0.8216) time: 0.1273 data: 0.0331 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:58 lr: 0.000102 grad: 0.0825 (0.0851) loss: 0.8184 (0.8215) time: 0.1444 data: 0.0518 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:42 lr: 0.000102 grad: 0.0843 (0.0852) loss: 0.8287 (0.8215) time: 0.1760 data: 0.0973 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:26 lr: 0.000102 grad: 0.0845 (0.0852) loss: 0.8235 (0.8214) time: 0.2232 data: 0.1344 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:10 lr: 0.000102 grad: 0.0843 (0.0853) loss: 0.8233 (0.8214) time: 0.1672 data: 0.0857 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:54 lr: 0.000102 grad: 0.0888 (0.0854) loss: 0.8207 (0.8213) time: 0.2242 data: 0.1418 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:37 lr: 0.000102 grad: 0.0856 (0.0855) loss: 0.8202 (0.8212) time: 0.1542 data: 0.0674 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:21 lr: 0.000102 grad: 0.0890 (0.0856) loss: 0.8163 (0.8212) time: 0.1551 data: 0.0676 max mem: 9377 +Train: [31] [5100/6250] eta: 0:03:04 lr: 0.000102 grad: 0.0819 (0.0857) loss: 0.8195 (0.8211) time: 0.1234 data: 0.0359 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:48 lr: 0.000102 grad: 0.0862 (0.0858) loss: 0.8190 (0.8211) time: 0.1708 data: 0.0831 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:32 lr: 0.000102 grad: 0.0867 (0.0858) loss: 0.8192 (0.8211) time: 0.1521 data: 0.0601 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:16 lr: 0.000102 grad: 0.0881 (0.0859) loss: 0.8225 (0.8211) time: 0.1307 data: 0.0458 max mem: 9377 +Train: [31] [5500/6250] eta: 0:02:00 lr: 0.000102 grad: 0.0810 (0.0859) loss: 0.8212 (0.8211) time: 0.1321 data: 0.0542 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:44 lr: 0.000102 grad: 0.0870 (0.0859) loss: 0.8259 (0.8211) time: 0.1532 data: 0.0754 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:28 lr: 0.000102 grad: 0.0838 (0.0859) loss: 0.8215 (0.8212) time: 0.2267 data: 0.1443 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:12 lr: 0.000102 grad: 0.0851 (0.0859) loss: 0.8228 (0.8212) time: 0.1560 data: 0.0717 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:56 lr: 0.000102 grad: 0.0830 (0.0859) loss: 0.8236 (0.8213) time: 0.1443 data: 0.0668 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:40 lr: 0.000102 grad: 0.0813 (0.0859) loss: 0.8336 (0.8213) time: 0.1656 data: 0.0871 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:24 lr: 0.000102 grad: 0.0830 (0.0859) loss: 0.8208 (0.8213) time: 0.1610 data: 0.0756 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:08 lr: 0.000102 grad: 0.0879 (0.0859) loss: 0.8202 (0.8214) time: 0.1447 data: 0.0532 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.0817 (0.0859) loss: 0.8241 (0.8214) time: 0.1537 data: 0.0664 max mem: 9377 +Train: [31] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000102 grad: 0.0817 (0.0859) loss: 0.8241 (0.8214) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:05:14 loss: 0.8351 (0.8351) time: 5.0651 data: 5.0332 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8338 (0.8331) time: 0.1434 data: 0.1165 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.8338 (0.8331) +Eval (hcp-val): [31] [ 0/62] eta: 0:04:50 loss: 0.8394 (0.8394) time: 4.6854 data: 4.5975 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8394 (0.8423) time: 0.1413 data: 0.1159 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-val): loss: 0.8394 (0.8423) +Eval (nsd-val): [31] [ 0/62] eta: 0:03:22 loss: 0.8096 (0.8096) time: 3.2659 data: 3.2010 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8164 (0.8181) time: 0.1242 data: 0.0990 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (nsd-val): loss: 0.8164 (0.8181) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 11:32:49 lr: 0.000102 grad: 0.1316 (0.1316) loss: 0.8787 (0.8787) time: 6.6511 data: 6.4681 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:24:40 lr: 0.000102 grad: 0.0861 (0.1061) loss: 0.8284 (0.8337) time: 0.1816 data: 0.0708 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:21:54 lr: 0.000102 grad: 0.0754 (0.0953) loss: 0.8351 (0.8323) time: 0.1906 data: 0.0843 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:19:58 lr: 0.000102 grad: 0.0737 (0.0887) loss: 0.8308 (0.8323) time: 0.1800 data: 0.0679 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:19:03 lr: 0.000102 grad: 0.0845 (0.0866) loss: 0.8367 (0.8323) time: 0.1929 data: 0.0945 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:18:08 lr: 0.000102 grad: 0.0781 (0.0853) loss: 0.8339 (0.8320) time: 0.1472 data: 0.0597 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:17:29 lr: 0.000102 grad: 0.0838 (0.0848) loss: 0.8224 (0.8316) time: 0.1494 data: 0.0373 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:17:04 lr: 0.000102 grad: 0.0743 (0.0840) loss: 0.8305 (0.8312) time: 0.2041 data: 0.1095 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:16:45 lr: 0.000101 grad: 0.0818 (0.0834) loss: 0.8298 (0.8308) time: 0.1953 data: 0.1087 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:16:20 lr: 0.000101 grad: 0.0734 (0.0827) loss: 0.8317 (0.8304) time: 0.1394 data: 0.0471 max mem: 9377 +Train: [32] [1000/6250] eta: 0:15:53 lr: 0.000101 grad: 0.0757 (0.0825) loss: 0.8240 (0.8298) time: 0.1557 data: 0.0716 max mem: 9377 +Train: [32] [1100/6250] eta: 0:15:31 lr: 0.000101 grad: 0.0797 (0.0821) loss: 0.8238 (0.8295) time: 0.1800 data: 0.0891 max mem: 9377 +Train: [32] [1200/6250] eta: 0:15:05 lr: 0.000101 grad: 0.0706 (0.0818) loss: 0.8237 (0.8292) time: 0.1728 data: 0.0893 max mem: 9377 +Train: [32] [1300/6250] eta: 0:14:42 lr: 0.000101 grad: 0.0771 (0.0817) loss: 0.8201 (0.8288) time: 0.1852 data: 0.0938 max mem: 9377 +Train: [32] [1400/6250] eta: 0:14:18 lr: 0.000101 grad: 0.0780 (0.0817) loss: 0.8266 (0.8284) time: 0.1442 data: 0.0375 max mem: 9377 +Train: [32] [1500/6250] eta: 0:13:58 lr: 0.000101 grad: 0.0796 (0.0817) loss: 0.8258 (0.8280) time: 0.2023 data: 0.1150 max mem: 9377 +Train: [32] [1600/6250] eta: 0:13:37 lr: 0.000101 grad: 0.0844 (0.0819) loss: 0.8171 (0.8274) time: 0.1884 data: 0.0961 max mem: 9377 +Train: [32] [1700/6250] eta: 0:13:16 lr: 0.000101 grad: 0.0798 (0.0821) loss: 0.8220 (0.8268) time: 0.1708 data: 0.0846 max mem: 9377 +Train: [32] [1800/6250] eta: 0:12:56 lr: 0.000101 grad: 0.0774 (0.0821) loss: 0.8240 (0.8264) time: 0.1570 data: 0.0638 max mem: 9377 +Train: [32] [1900/6250] eta: 0:12:36 lr: 0.000101 grad: 0.0773 (0.0822) loss: 0.8208 (0.8261) time: 0.1626 data: 0.0765 max mem: 9377 +Train: [32] [2000/6250] eta: 0:12:18 lr: 0.000101 grad: 0.0797 (0.0825) loss: 0.8245 (0.8259) time: 0.1458 data: 0.0530 max mem: 9377 +Train: [32] [2100/6250] eta: 0:11:59 lr: 0.000101 grad: 0.0885 (0.0826) loss: 0.8247 (0.8257) time: 0.1734 data: 0.0817 max mem: 9377 +Train: [32] [2200/6250] eta: 0:11:38 lr: 0.000101 grad: 0.0795 (0.0827) loss: 0.8264 (0.8256) time: 0.1557 data: 0.0608 max mem: 9377 +Train: [32] [2300/6250] eta: 0:11:17 lr: 0.000101 grad: 0.0813 (0.0829) loss: 0.8280 (0.8255) time: 0.1553 data: 0.0568 max mem: 9377 +Train: [32] [2400/6250] eta: 0:10:58 lr: 0.000101 grad: 0.0897 (0.0832) loss: 0.8157 (0.8253) time: 0.1544 data: 0.0675 max mem: 9377 +Train: [32] [2500/6250] eta: 0:10:39 lr: 0.000101 grad: 0.0803 (0.0835) loss: 0.8194 (0.8251) time: 0.1719 data: 0.0811 max mem: 9377 +Train: [32] [2600/6250] eta: 0:10:22 lr: 0.000101 grad: 0.0839 (0.0837) loss: 0.8182 (0.8249) time: 0.1931 data: 0.1145 max mem: 9377 +Train: [32] [2700/6250] eta: 0:10:01 lr: 0.000101 grad: 0.0790 (0.0840) loss: 0.8181 (0.8248) time: 0.1412 data: 0.0495 max mem: 9377 +Train: [32] [2800/6250] eta: 0:09:43 lr: 0.000101 grad: 0.0820 (0.0843) loss: 0.8181 (0.8246) time: 0.1626 data: 0.0746 max mem: 9377 +Train: [32] [2900/6250] eta: 0:09:25 lr: 0.000101 grad: 0.0822 (0.0844) loss: 0.8232 (0.8244) time: 0.1592 data: 0.0690 max mem: 9377 +Train: [32] [3000/6250] eta: 0:09:07 lr: 0.000101 grad: 0.0838 (0.0847) loss: 0.8206 (0.8242) time: 0.1454 data: 0.0642 max mem: 9377 +Train: [32] [3100/6250] eta: 0:08:48 lr: 0.000101 grad: 0.0843 (0.0848) loss: 0.8206 (0.8241) time: 0.1340 data: 0.0385 max mem: 9377 +Train: [32] [3200/6250] eta: 0:08:31 lr: 0.000101 grad: 0.0818 (0.0848) loss: 0.8176 (0.8239) time: 0.1401 data: 0.0512 max mem: 9377 +Train: [32] [3300/6250] eta: 0:08:13 lr: 0.000101 grad: 0.0836 (0.0849) loss: 0.8203 (0.8239) time: 0.1453 data: 0.0579 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:55 lr: 0.000101 grad: 0.0810 (0.0850) loss: 0.8241 (0.8238) time: 0.1593 data: 0.0705 max mem: 9377 +Train: [32] [3500/6250] eta: 0:07:37 lr: 0.000101 grad: 0.0850 (0.0852) loss: 0.8216 (0.8237) time: 0.1610 data: 0.0779 max mem: 9377 +Train: [32] [3600/6250] eta: 0:07:20 lr: 0.000101 grad: 0.0897 (0.0853) loss: 0.8216 (0.8236) time: 0.1378 data: 0.0503 max mem: 9377 +Train: [32] [3700/6250] eta: 0:07:03 lr: 0.000101 grad: 0.0903 (0.0855) loss: 0.8124 (0.8235) time: 0.1612 data: 0.0722 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:45 lr: 0.000101 grad: 0.0795 (0.0856) loss: 0.8270 (0.8234) time: 0.1606 data: 0.0709 max mem: 9377 +Train: [32] [3900/6250] eta: 0:06:29 lr: 0.000101 grad: 0.0870 (0.0857) loss: 0.8184 (0.8233) time: 0.1699 data: 0.0854 max mem: 9377 +Train: [32] [4000/6250] eta: 0:06:11 lr: 0.000101 grad: 0.0853 (0.0857) loss: 0.8235 (0.8232) time: 0.1432 data: 0.0558 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:54 lr: 0.000101 grad: 0.0847 (0.0858) loss: 0.8195 (0.8231) time: 0.1402 data: 0.0500 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:37 lr: 0.000101 grad: 0.0795 (0.0858) loss: 0.8257 (0.8231) time: 0.1549 data: 0.0673 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:20 lr: 0.000101 grad: 0.0787 (0.0859) loss: 0.8209 (0.8230) time: 0.1637 data: 0.0750 max mem: 9377 +Train: [32] [4400/6250] eta: 0:05:04 lr: 0.000101 grad: 0.0807 (0.0859) loss: 0.8200 (0.8230) time: 0.2103 data: 0.1268 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:47 lr: 0.000101 grad: 0.0829 (0.0859) loss: 0.8158 (0.8229) time: 0.1801 data: 0.1036 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:30 lr: 0.000101 grad: 0.0758 (0.0859) loss: 0.8268 (0.8229) time: 0.1529 data: 0.0665 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:14 lr: 0.000100 grad: 0.0743 (0.0860) loss: 0.8262 (0.8229) time: 0.2514 data: 0.1656 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:57 lr: 0.000100 grad: 0.0792 (0.0860) loss: 0.8203 (0.8229) time: 0.1645 data: 0.0664 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:41 lr: 0.000100 grad: 0.0801 (0.0859) loss: 0.8201 (0.8228) time: 0.1727 data: 0.0896 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:24 lr: 0.000100 grad: 0.0840 (0.0859) loss: 0.8180 (0.8228) time: 0.1548 data: 0.0572 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:07 lr: 0.000100 grad: 0.0836 (0.0859) loss: 0.8256 (0.8228) time: 0.1438 data: 0.0587 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:51 lr: 0.000100 grad: 0.0794 (0.0859) loss: 0.8264 (0.8227) time: 0.1502 data: 0.0694 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:34 lr: 0.000100 grad: 0.0830 (0.0859) loss: 0.8150 (0.8228) time: 0.1637 data: 0.0876 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:18 lr: 0.000100 grad: 0.0793 (0.0858) loss: 0.8250 (0.8228) time: 0.1466 data: 0.0615 max mem: 9377 +Train: [32] [5500/6250] eta: 0:02:02 lr: 0.000100 grad: 0.0812 (0.0857) loss: 0.8245 (0.8228) time: 0.1985 data: 0.1125 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:45 lr: 0.000100 grad: 0.0820 (0.0857) loss: 0.8255 (0.8229) time: 0.1709 data: 0.0820 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:29 lr: 0.000100 grad: 0.0808 (0.0857) loss: 0.8223 (0.8229) time: 0.1747 data: 0.0861 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:13 lr: 0.000100 grad: 0.0770 (0.0856) loss: 0.8229 (0.8229) time: 0.1863 data: 0.0937 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:56 lr: 0.000100 grad: 0.0836 (0.0856) loss: 0.8239 (0.8230) time: 0.1693 data: 0.0827 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:40 lr: 0.000100 grad: 0.0845 (0.0855) loss: 0.8301 (0.8231) time: 0.1754 data: 0.0784 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:24 lr: 0.000100 grad: 0.0777 (0.0854) loss: 0.8263 (0.8231) time: 0.1761 data: 0.0921 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.0817 (0.0854) loss: 0.8293 (0.8231) time: 0.1662 data: 0.0719 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.0789 (0.0854) loss: 0.8265 (0.8231) time: 0.1692 data: 0.0773 max mem: 9377 +Train: [32] Total time: 0:17:09 (0.1647 s / it) +Averaged stats: lr: 0.000100 grad: 0.0789 (0.0854) loss: 0.8265 (0.8231) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:53 loss: 0.8383 (0.8383) time: 4.7386 data: 4.6745 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8332 (0.8327) time: 0.1424 data: 0.1168 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:16 (0.2582 s / it) +Averaged stats (hcp-train-subset): loss: 0.8332 (0.8327) +Eval (hcp-val): [32] [ 0/62] eta: 0:04:18 loss: 0.8423 (0.8423) time: 4.1651 data: 4.0753 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8396 (0.8416) time: 0.1296 data: 0.1038 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:16 (0.2582 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8416) +Eval (nsd-val): [32] [ 0/62] eta: 0:05:31 loss: 0.8025 (0.8025) time: 5.3426 data: 5.3115 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8133 (0.8151) time: 0.1635 data: 0.1362 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:15 (0.2534 s / it) +Averaged stats (nsd-val): loss: 0.8133 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 12:30:34 lr: 0.000100 grad: 0.3524 (0.3524) loss: 0.7510 (0.7510) time: 7.2055 data: 7.0957 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:23:35 lr: 0.000100 grad: 0.0819 (0.1054) loss: 0.8311 (0.8309) time: 0.1543 data: 0.0409 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:20:46 lr: 0.000100 grad: 0.0811 (0.0948) loss: 0.8391 (0.8296) time: 0.1659 data: 0.0587 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:19:26 lr: 0.000100 grad: 0.0818 (0.0911) loss: 0.8239 (0.8288) time: 0.1642 data: 0.0564 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:18:20 lr: 0.000100 grad: 0.0813 (0.0896) loss: 0.8249 (0.8278) time: 0.1394 data: 0.0364 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:17:38 lr: 0.000100 grad: 0.0876 (0.0891) loss: 0.8206 (0.8274) time: 0.1642 data: 0.0681 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:16:57 lr: 0.000100 grad: 0.0864 (0.0891) loss: 0.8321 (0.8272) time: 0.1569 data: 0.0663 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:16:30 lr: 0.000100 grad: 0.0883 (0.0897) loss: 0.8292 (0.8269) time: 0.1798 data: 0.0956 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:16:10 lr: 0.000100 grad: 0.0873 (0.0896) loss: 0.8229 (0.8264) time: 0.1790 data: 0.0856 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:15:49 lr: 0.000100 grad: 0.0822 (0.0894) loss: 0.8261 (0.8263) time: 0.1961 data: 0.1047 max mem: 9377 +Train: [33] [1000/6250] eta: 0:15:28 lr: 0.000100 grad: 0.0841 (0.0893) loss: 0.8226 (0.8260) time: 0.1872 data: 0.0957 max mem: 9377 +Train: [33] [1100/6250] eta: 0:15:07 lr: 0.000100 grad: 0.0840 (0.0891) loss: 0.8254 (0.8259) time: 0.1686 data: 0.0851 max mem: 9377 +Train: [33] [1200/6250] eta: 0:14:43 lr: 0.000100 grad: 0.0874 (0.0891) loss: 0.8152 (0.8255) time: 0.1586 data: 0.0565 max mem: 9377 +Train: [33] [1300/6250] eta: 0:14:19 lr: 0.000100 grad: 0.0837 (0.0890) loss: 0.8214 (0.8255) time: 0.1505 data: 0.0585 max mem: 9377 +Train: [33] [1400/6250] eta: 0:13:55 lr: 0.000100 grad: 0.0776 (0.0886) loss: 0.8292 (0.8255) time: 0.1568 data: 0.0665 max mem: 9377 +Train: [33] [1500/6250] eta: 0:13:34 lr: 0.000100 grad: 0.0768 (0.0887) loss: 0.8283 (0.8254) time: 0.1730 data: 0.0838 max mem: 9377 +Train: [33] [1600/6250] eta: 0:13:16 lr: 0.000100 grad: 0.0855 (0.0886) loss: 0.8214 (0.8252) time: 0.1574 data: 0.0653 max mem: 9377 +Train: [33] [1700/6250] eta: 0:12:57 lr: 0.000100 grad: 0.0868 (0.0886) loss: 0.8235 (0.8251) time: 0.2002 data: 0.1181 max mem: 9377 +Train: [33] [1800/6250] eta: 0:12:37 lr: 0.000100 grad: 0.0874 (0.0885) loss: 0.8215 (0.8250) time: 0.1585 data: 0.0570 max mem: 9377 +Train: [33] [1900/6250] eta: 0:12:18 lr: 0.000100 grad: 0.0818 (0.0885) loss: 0.8306 (0.8250) time: 0.1848 data: 0.0910 max mem: 9377 +Train: [33] [2000/6250] eta: 0:12:02 lr: 0.000100 grad: 0.0833 (0.0885) loss: 0.8283 (0.8250) time: 0.1904 data: 0.0979 max mem: 9377 +Train: [33] [2100/6250] eta: 0:11:44 lr: 0.000100 grad: 0.0879 (0.0887) loss: 0.8308 (0.8251) time: 0.1474 data: 0.0578 max mem: 9377 +Train: [33] [2200/6250] eta: 0:11:25 lr: 0.000099 grad: 0.0865 (0.0887) loss: 0.8283 (0.8250) time: 0.1733 data: 0.0759 max mem: 9377 +Train: [33] [2300/6250] eta: 0:11:08 lr: 0.000099 grad: 0.0853 (0.0887) loss: 0.8225 (0.8248) time: 0.2347 data: 0.0893 max mem: 9377 +Train: [33] [2400/6250] eta: 0:10:47 lr: 0.000099 grad: 0.0854 (0.0888) loss: 0.8129 (0.8247) time: 0.1547 data: 0.0578 max mem: 9377 +Train: [33] [2500/6250] eta: 0:10:28 lr: 0.000099 grad: 0.0882 (0.0888) loss: 0.8277 (0.8245) time: 0.1487 data: 0.0578 max mem: 9377 +Train: [33] [2600/6250] eta: 0:10:10 lr: 0.000099 grad: 0.0836 (0.0888) loss: 0.8145 (0.8243) time: 0.1729 data: 0.0921 max mem: 9377 +Train: [33] [2700/6250] eta: 0:09:52 lr: 0.000099 grad: 0.0860 (0.0888) loss: 0.8155 (0.8240) time: 0.1576 data: 0.0650 max mem: 9377 +Train: [33] [2800/6250] eta: 0:09:38 lr: 0.000099 grad: 0.0897 (0.0889) loss: 0.8210 (0.8238) time: 0.1929 data: 0.1082 max mem: 9377 +Train: [33] [2900/6250] eta: 0:09:20 lr: 0.000099 grad: 0.0833 (0.0889) loss: 0.8180 (0.8237) time: 0.1752 data: 0.0925 max mem: 9377 +Train: [33] [3000/6250] eta: 0:09:04 lr: 0.000099 grad: 0.0895 (0.0890) loss: 0.8183 (0.8236) time: 0.2032 data: 0.1283 max mem: 9377 +Train: [33] [3100/6250] eta: 0:08:46 lr: 0.000099 grad: 0.0897 (0.0890) loss: 0.8273 (0.8234) time: 0.1640 data: 0.0740 max mem: 9377 +Train: [33] [3200/6250] eta: 0:08:27 lr: 0.000099 grad: 0.0819 (0.0891) loss: 0.8174 (0.8233) time: 0.1628 data: 0.0748 max mem: 9377 +Train: [33] [3300/6250] eta: 0:08:10 lr: 0.000099 grad: 0.0879 (0.0891) loss: 0.8248 (0.8231) time: 0.1768 data: 0.0830 max mem: 9377 +Train: [33] [3400/6250] eta: 0:07:53 lr: 0.000099 grad: 0.0847 (0.0890) loss: 0.8235 (0.8230) time: 0.1688 data: 0.0814 max mem: 9377 +Train: [33] [3500/6250] eta: 0:07:36 lr: 0.000099 grad: 0.0805 (0.0890) loss: 0.8222 (0.8230) time: 0.1881 data: 0.1063 max mem: 9377 +Train: [33] [3600/6250] eta: 0:07:19 lr: 0.000099 grad: 0.0845 (0.0891) loss: 0.8237 (0.8230) time: 0.1236 data: 0.0212 max mem: 9377 +Train: [33] [3700/6250] eta: 0:07:01 lr: 0.000099 grad: 0.0868 (0.0891) loss: 0.8162 (0.8229) time: 0.1393 data: 0.0486 max mem: 9377 +Train: [33] [3800/6250] eta: 0:06:44 lr: 0.000099 grad: 0.0887 (0.0890) loss: 0.8280 (0.8228) time: 0.1464 data: 0.0549 max mem: 9377 +Train: [33] [3900/6250] eta: 0:06:27 lr: 0.000099 grad: 0.0845 (0.0889) loss: 0.8258 (0.8228) time: 0.1583 data: 0.0682 max mem: 9377 +Train: [33] [4000/6250] eta: 0:06:10 lr: 0.000099 grad: 0.0888 (0.0889) loss: 0.8203 (0.8228) time: 0.1503 data: 0.0506 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:53 lr: 0.000099 grad: 0.0854 (0.0888) loss: 0.8204 (0.8229) time: 0.1632 data: 0.0795 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:36 lr: 0.000099 grad: 0.0881 (0.0889) loss: 0.8211 (0.8229) time: 0.1598 data: 0.0735 max mem: 9377 +Train: [33] [4300/6250] eta: 0:05:19 lr: 0.000099 grad: 0.0802 (0.0888) loss: 0.8201 (0.8228) time: 0.1648 data: 0.0750 max mem: 9377 +Train: [33] [4400/6250] eta: 0:05:03 lr: 0.000099 grad: 0.0846 (0.0889) loss: 0.8137 (0.8228) time: 0.1648 data: 0.0794 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:46 lr: 0.000099 grad: 0.0872 (0.0888) loss: 0.8242 (0.8227) time: 0.1581 data: 0.0723 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:29 lr: 0.000099 grad: 0.0838 (0.0888) loss: 0.8158 (0.8226) time: 0.1438 data: 0.0566 max mem: 9377 +Train: [33] [4700/6250] eta: 0:04:12 lr: 0.000099 grad: 0.0878 (0.0888) loss: 0.8214 (0.8226) time: 0.1403 data: 0.0484 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:56 lr: 0.000099 grad: 0.0850 (0.0887) loss: 0.8159 (0.8226) time: 0.1239 data: 0.0343 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:39 lr: 0.000099 grad: 0.0864 (0.0887) loss: 0.8243 (0.8226) time: 0.1391 data: 0.0514 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:23 lr: 0.000099 grad: 0.0870 (0.0887) loss: 0.8257 (0.8225) time: 0.1346 data: 0.0382 max mem: 9377 +Train: [33] [5100/6250] eta: 0:03:06 lr: 0.000099 grad: 0.0871 (0.0887) loss: 0.8205 (0.8225) time: 0.1478 data: 0.0586 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:50 lr: 0.000099 grad: 0.0778 (0.0887) loss: 0.8281 (0.8226) time: 0.1708 data: 0.0877 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:33 lr: 0.000099 grad: 0.0800 (0.0886) loss: 0.8263 (0.8226) time: 0.1590 data: 0.0792 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:17 lr: 0.000099 grad: 0.0854 (0.0886) loss: 0.8170 (0.8226) time: 0.1860 data: 0.0980 max mem: 9377 +Train: [33] [5500/6250] eta: 0:02:01 lr: 0.000099 grad: 0.0811 (0.0886) loss: 0.8242 (0.8226) time: 0.1685 data: 0.0814 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:45 lr: 0.000099 grad: 0.0905 (0.0885) loss: 0.8231 (0.8226) time: 0.1568 data: 0.0707 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:29 lr: 0.000099 grad: 0.0791 (0.0884) loss: 0.8203 (0.8227) time: 0.1841 data: 0.1028 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:13 lr: 0.000099 grad: 0.0767 (0.0884) loss: 0.8270 (0.8227) time: 0.1673 data: 0.0723 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:57 lr: 0.000098 grad: 0.0816 (0.0883) loss: 0.8224 (0.8227) time: 0.1890 data: 0.1101 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:40 lr: 0.000098 grad: 0.0791 (0.0883) loss: 0.8170 (0.8227) time: 0.1814 data: 0.0862 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:24 lr: 0.000098 grad: 0.0811 (0.0883) loss: 0.8312 (0.8227) time: 0.1595 data: 0.0756 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:08 lr: 0.000098 grad: 0.0827 (0.0883) loss: 0.8241 (0.8227) time: 0.1706 data: 0.0834 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.0869 (0.0883) loss: 0.8202 (0.8227) time: 0.1622 data: 0.0753 max mem: 9377 +Train: [33] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000098 grad: 0.0869 (0.0883) loss: 0.8202 (0.8227) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:04:34 loss: 0.8343 (0.8343) time: 4.4325 data: 4.3382 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8315 (0.8323) time: 0.1561 data: 0.1291 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:15 (0.2509 s / it) +Averaged stats (hcp-train-subset): loss: 0.8315 (0.8323) +Eval (hcp-val): [33] [ 0/62] eta: 0:04:08 loss: 0.8452 (0.8452) time: 4.0134 data: 3.9161 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8422 (0.8416) time: 0.1454 data: 0.1193 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:15 (0.2524 s / it) +Averaged stats (hcp-val): loss: 0.8422 (0.8416) +Eval (nsd-val): [33] [ 0/62] eta: 0:06:26 loss: 0.8059 (0.8059) time: 6.2382 data: 6.2065 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8138 (0.8148) time: 0.1346 data: 0.1092 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:15 (0.2485 s / it) +Averaged stats (nsd-val): loss: 0.8138 (0.8148) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 12:07:34 lr: 0.000098 grad: 0.0976 (0.0976) loss: 0.8273 (0.8273) time: 6.9847 data: 6.8596 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:23:27 lr: 0.000098 grad: 0.0815 (0.0964) loss: 0.8398 (0.8421) time: 0.1757 data: 0.0837 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:20:30 lr: 0.000098 grad: 0.0857 (0.0938) loss: 0.8212 (0.8338) time: 0.1586 data: 0.0546 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:19:09 lr: 0.000098 grad: 0.0815 (0.0898) loss: 0.8181 (0.8318) time: 0.1712 data: 0.0706 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:18:00 lr: 0.000098 grad: 0.0879 (0.0891) loss: 0.8224 (0.8301) time: 0.1541 data: 0.0526 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:17:16 lr: 0.000098 grad: 0.0814 (0.0881) loss: 0.8256 (0.8290) time: 0.1396 data: 0.0462 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:16:40 lr: 0.000098 grad: 0.0820 (0.0879) loss: 0.8162 (0.8270) time: 0.1721 data: 0.0785 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:16:08 lr: 0.000098 grad: 0.0791 (0.0876) loss: 0.8213 (0.8260) time: 0.1709 data: 0.0883 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:15:50 lr: 0.000098 grad: 0.0847 (0.0871) loss: 0.8195 (0.8256) time: 0.1594 data: 0.0676 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:15:25 lr: 0.000098 grad: 0.0788 (0.0867) loss: 0.8239 (0.8256) time: 0.1256 data: 0.0310 max mem: 9377 +Train: [34] [1000/6250] eta: 0:14:59 lr: 0.000098 grad: 0.0830 (0.0868) loss: 0.8277 (0.8256) time: 0.1653 data: 0.0828 max mem: 9377 +Train: [34] [1100/6250] eta: 0:14:33 lr: 0.000098 grad: 0.0770 (0.0866) loss: 0.8320 (0.8252) time: 0.1448 data: 0.0510 max mem: 9377 +Train: [34] [1200/6250] eta: 0:14:14 lr: 0.000098 grad: 0.0802 (0.0862) loss: 0.8234 (0.8250) time: 0.1709 data: 0.0761 max mem: 9377 +Train: [34] [1300/6250] eta: 0:13:51 lr: 0.000098 grad: 0.0847 (0.0862) loss: 0.8216 (0.8246) time: 0.1502 data: 0.0653 max mem: 9377 +Train: [34] [1400/6250] eta: 0:13:31 lr: 0.000098 grad: 0.0816 (0.0862) loss: 0.8289 (0.8242) time: 0.1704 data: 0.0818 max mem: 9377 +Train: [34] [1500/6250] eta: 0:13:08 lr: 0.000098 grad: 0.0817 (0.0868) loss: 0.8255 (0.8238) time: 0.1508 data: 0.0602 max mem: 9377 +Train: [34] [1600/6250] eta: 0:12:46 lr: 0.000098 grad: 0.0812 (0.0867) loss: 0.8175 (0.8236) time: 0.1448 data: 0.0528 max mem: 9377 +Train: [34] [1700/6250] eta: 0:12:32 lr: 0.000098 grad: 0.0827 (0.0867) loss: 0.8169 (0.8232) time: 0.1585 data: 0.0622 max mem: 9377 +Train: [34] [1800/6250] eta: 0:12:13 lr: 0.000098 grad: 0.0971 (0.0867) loss: 0.8180 (0.8229) time: 0.1588 data: 0.0687 max mem: 9377 +Train: [34] [1900/6250] eta: 0:11:57 lr: 0.000098 grad: 0.0843 (0.0868) loss: 0.8191 (0.8227) time: 0.1644 data: 0.0820 max mem: 9377 +Train: [34] [2000/6250] eta: 0:11:38 lr: 0.000098 grad: 0.0796 (0.0868) loss: 0.8250 (0.8227) time: 0.1803 data: 0.0815 max mem: 9377 +Train: [34] [2100/6250] eta: 0:11:19 lr: 0.000098 grad: 0.0876 (0.0868) loss: 0.8229 (0.8226) time: 0.1531 data: 0.0629 max mem: 9377 +Train: [34] [2200/6250] eta: 0:11:01 lr: 0.000098 grad: 0.0826 (0.0868) loss: 0.8224 (0.8223) time: 0.1387 data: 0.0399 max mem: 9377 +Train: [34] [2300/6250] eta: 0:10:43 lr: 0.000098 grad: 0.0842 (0.0868) loss: 0.8196 (0.8222) time: 0.1649 data: 0.0813 max mem: 9377 +Train: [34] [2400/6250] eta: 0:10:24 lr: 0.000098 grad: 0.0878 (0.0868) loss: 0.8192 (0.8221) time: 0.1619 data: 0.0707 max mem: 9377 +Train: [34] [2500/6250] eta: 0:10:05 lr: 0.000098 grad: 0.0871 (0.0867) loss: 0.8304 (0.8221) time: 0.1392 data: 0.0507 max mem: 9377 +Train: [34] [2600/6250] eta: 0:09:46 lr: 0.000098 grad: 0.0833 (0.0867) loss: 0.8219 (0.8222) time: 0.1388 data: 0.0478 max mem: 9377 +Train: [34] [2700/6250] eta: 0:09:28 lr: 0.000098 grad: 0.0907 (0.0867) loss: 0.8208 (0.8221) time: 0.1355 data: 0.0465 max mem: 9377 +Train: [34] [2800/6250] eta: 0:09:10 lr: 0.000098 grad: 0.0900 (0.0869) loss: 0.8132 (0.8220) time: 0.1433 data: 0.0423 max mem: 9377 +Train: [34] [2900/6250] eta: 0:08:55 lr: 0.000098 grad: 0.0834 (0.0870) loss: 0.8181 (0.8219) time: 0.1839 data: 0.0962 max mem: 9377 +Train: [34] [3000/6250] eta: 0:08:38 lr: 0.000098 grad: 0.0904 (0.0870) loss: 0.8179 (0.8219) time: 0.1537 data: 0.0597 max mem: 9377 +Train: [34] [3100/6250] eta: 0:08:22 lr: 0.000098 grad: 0.0851 (0.0871) loss: 0.8214 (0.8218) time: 0.1730 data: 0.0898 max mem: 9377 +Train: [34] [3200/6250] eta: 0:08:07 lr: 0.000098 grad: 0.0965 (0.0872) loss: 0.8210 (0.8217) time: 0.2251 data: 0.1369 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:49 lr: 0.000097 grad: 0.0931 (0.0874) loss: 0.8174 (0.8215) time: 0.1467 data: 0.0553 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:33 lr: 0.000097 grad: 0.0993 (0.0876) loss: 0.8165 (0.8213) time: 0.1595 data: 0.0743 max mem: 9377 +Train: [34] [3500/6250] eta: 0:07:16 lr: 0.000097 grad: 0.0907 (0.0877) loss: 0.8272 (0.8211) time: 0.1698 data: 0.0867 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:00 lr: 0.000097 grad: 0.0856 (0.0877) loss: 0.8256 (0.8211) time: 0.1236 data: 0.0322 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:44 lr: 0.000097 grad: 0.0853 (0.0877) loss: 0.8220 (0.8211) time: 0.1602 data: 0.0747 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:28 lr: 0.000097 grad: 0.0824 (0.0877) loss: 0.8237 (0.8211) time: 0.1387 data: 0.0461 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:12 lr: 0.000097 grad: 0.0872 (0.0877) loss: 0.8283 (0.8212) time: 0.1487 data: 0.0595 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:56 lr: 0.000097 grad: 0.0892 (0.0877) loss: 0.8287 (0.8212) time: 0.1681 data: 0.0817 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:40 lr: 0.000097 grad: 0.0827 (0.0878) loss: 0.8268 (0.8212) time: 0.1669 data: 0.0802 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:24 lr: 0.000097 grad: 0.0853 (0.0877) loss: 0.8296 (0.8213) time: 0.1586 data: 0.0634 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:08 lr: 0.000097 grad: 0.0876 (0.0876) loss: 0.8230 (0.8214) time: 0.1228 data: 0.0408 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:52 lr: 0.000097 grad: 0.0834 (0.0876) loss: 0.8188 (0.8215) time: 0.1756 data: 0.0915 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:36 lr: 0.000097 grad: 0.0889 (0.0876) loss: 0.8158 (0.8215) time: 0.1281 data: 0.0374 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:20 lr: 0.000097 grad: 0.0854 (0.0876) loss: 0.8228 (0.8216) time: 0.1504 data: 0.0604 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:04 lr: 0.000097 grad: 0.0829 (0.0875) loss: 0.8238 (0.8216) time: 0.1859 data: 0.0948 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:49 lr: 0.000097 grad: 0.0903 (0.0875) loss: 0.8241 (0.8216) time: 0.1620 data: 0.0773 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:33 lr: 0.000097 grad: 0.0845 (0.0876) loss: 0.8135 (0.8215) time: 0.1989 data: 0.1077 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:18 lr: 0.000097 grad: 0.1003 (0.0877) loss: 0.8175 (0.8214) time: 0.1534 data: 0.0715 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:02 lr: 0.000097 grad: 0.0912 (0.0878) loss: 0.8188 (0.8214) time: 0.1751 data: 0.0896 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:46 lr: 0.000097 grad: 0.0921 (0.0879) loss: 0.8170 (0.8213) time: 0.1742 data: 0.0831 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:30 lr: 0.000097 grad: 0.0881 (0.0879) loss: 0.8214 (0.8213) time: 0.1866 data: 0.0940 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:15 lr: 0.000097 grad: 0.0841 (0.0881) loss: 0.8184 (0.8212) time: 0.1892 data: 0.0997 max mem: 9377 +Train: [34] [5500/6250] eta: 0:01:59 lr: 0.000097 grad: 0.0851 (0.0881) loss: 0.8205 (0.8212) time: 0.1559 data: 0.0666 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:43 lr: 0.000097 grad: 0.0870 (0.0882) loss: 0.8232 (0.8212) time: 0.2166 data: 0.1409 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:27 lr: 0.000097 grad: 0.0873 (0.0882) loss: 0.8220 (0.8212) time: 0.1563 data: 0.0753 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:11 lr: 0.000097 grad: 0.0879 (0.0882) loss: 0.8214 (0.8211) time: 0.1700 data: 0.0904 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:55 lr: 0.000097 grad: 0.0984 (0.0883) loss: 0.8192 (0.8211) time: 0.1666 data: 0.0795 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:39 lr: 0.000097 grad: 0.0803 (0.0883) loss: 0.8289 (0.8211) time: 0.1408 data: 0.0548 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:23 lr: 0.000097 grad: 0.0860 (0.0883) loss: 0.8172 (0.8211) time: 0.1927 data: 0.1087 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:08 lr: 0.000097 grad: 0.0870 (0.0883) loss: 0.8235 (0.8211) time: 0.1738 data: 0.0845 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.0915 (0.0884) loss: 0.8143 (0.8211) time: 0.1799 data: 0.0911 max mem: 9377 +Train: [34] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000097 grad: 0.0915 (0.0884) loss: 0.8143 (0.8211) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:03:50 loss: 0.8341 (0.8341) time: 3.7199 data: 3.6301 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8273 (0.8317) time: 0.1439 data: 0.1185 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:15 (0.2469 s / it) +Averaged stats (hcp-train-subset): loss: 0.8273 (0.8317) +Making plots (hcp-train-subset): example=22 +Eval (hcp-val): [34] [ 0/62] eta: 0:05:22 loss: 0.8418 (0.8418) time: 5.1988 data: 5.1679 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8394 (0.8413) time: 0.1355 data: 0.1102 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:15 (0.2467 s / it) +Averaged stats (hcp-val): loss: 0.8394 (0.8413) +Making plots (hcp-val): example=22 +Eval (nsd-val): [34] [ 0/62] eta: 0:06:41 loss: 0.8101 (0.8101) time: 6.4737 data: 6.4424 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8165 (0.8181) time: 0.1364 data: 0.1109 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:15 (0.2473 s / it) +Averaged stats (nsd-val): loss: 0.8165 (0.8181) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 12:47:27 lr: 0.000097 grad: nan (nan) loss: 0.8253 (0.8253) time: 7.3675 data: 7.2285 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:24:34 lr: 0.000097 grad: 0.0875 (0.1083) loss: 0.8299 (0.8324) time: 0.1739 data: 0.0609 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:21:14 lr: 0.000097 grad: 0.0827 (0.1005) loss: 0.8349 (0.8308) time: 0.1697 data: 0.0712 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:19:31 lr: 0.000097 grad: 0.0850 (0.0967) loss: 0.8243 (0.8293) time: 0.1866 data: 0.0864 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:18:32 lr: 0.000097 grad: 0.0840 (0.0946) loss: 0.8177 (0.8274) time: 0.1609 data: 0.0566 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:17:59 lr: 0.000097 grad: 0.0804 (0.0924) loss: 0.8227 (0.8265) time: 0.1954 data: 0.0811 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:17:12 lr: 0.000097 grad: 0.0821 (0.0914) loss: 0.8271 (0.8258) time: 0.1581 data: 0.0578 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:17:04 lr: 0.000096 grad: 0.0807 (0.0904) loss: 0.8267 (0.8254) time: 0.1654 data: 0.0822 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:16:41 lr: 0.000096 grad: 0.0840 (0.0897) loss: 0.8261 (0.8250) time: 0.1873 data: 0.1000 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:16:15 lr: 0.000096 grad: 0.0793 (0.0889) loss: 0.8206 (0.8250) time: 0.1802 data: 0.0849 max mem: 9377 +Train: [35] [1000/6250] eta: 0:15:41 lr: 0.000096 grad: 0.0851 (0.0887) loss: 0.8216 (0.8247) time: 0.1366 data: 0.0459 max mem: 9377 +Train: [35] [1100/6250] eta: 0:15:13 lr: 0.000096 grad: 0.0818 (0.0883) loss: 0.8160 (0.8242) time: 0.1662 data: 0.0717 max mem: 9377 +Train: [35] [1200/6250] eta: 0:14:46 lr: 0.000096 grad: 0.0893 (0.0885) loss: 0.8262 (0.8237) time: 0.1618 data: 0.0683 max mem: 9377 +Train: [35] [1300/6250] eta: 0:14:20 lr: 0.000096 grad: 0.0837 (0.0885) loss: 0.8205 (0.8233) time: 0.1506 data: 0.0564 max mem: 9377 +Train: [35] [1400/6250] eta: 0:13:53 lr: 0.000096 grad: 0.0832 (0.0885) loss: 0.8225 (0.8229) time: 0.1229 data: 0.0331 max mem: 9377 +Train: [35] [1500/6250] eta: 0:13:27 lr: 0.000096 grad: 0.0884 (0.0888) loss: 0.8102 (0.8225) time: 0.1502 data: 0.0582 max mem: 9377 +Train: [35] [1600/6250] eta: 0:13:04 lr: 0.000096 grad: 0.0895 (0.0890) loss: 0.8149 (0.8220) time: 0.1252 data: 0.0337 max mem: 9377 +Train: [35] [1700/6250] eta: 0:12:44 lr: 0.000096 grad: 0.0826 (0.0891) loss: 0.8213 (0.8219) time: 0.1507 data: 0.0660 max mem: 9377 +Train: [35] [1800/6250] eta: 0:12:25 lr: 0.000096 grad: 0.0814 (0.0889) loss: 0.8190 (0.8217) time: 0.1280 data: 0.0480 max mem: 9377 +Train: [35] [1900/6250] eta: 0:12:06 lr: 0.000096 grad: 0.0869 (0.0890) loss: 0.8166 (0.8213) time: 0.1550 data: 0.0683 max mem: 9377 +Train: [35] [2000/6250] eta: 0:11:50 lr: 0.000096 grad: 0.0860 (0.0891) loss: 0.8102 (0.8210) time: 0.1784 data: 0.1014 max mem: 9377 +Train: [35] [2100/6250] eta: 0:11:36 lr: 0.000096 grad: 0.0877 (0.0890) loss: 0.8184 (0.8208) time: 0.2605 data: 0.1648 max mem: 9377 +Train: [35] [2200/6250] eta: 0:11:18 lr: 0.000096 grad: 0.0909 (0.0890) loss: 0.8110 (0.8206) time: 0.1554 data: 0.0630 max mem: 9377 +Train: [35] [2300/6250] eta: 0:11:01 lr: 0.000096 grad: 0.0963 (0.0892) loss: 0.8095 (0.8204) time: 0.1716 data: 0.0849 max mem: 9377 +Train: [35] [2400/6250] eta: 0:10:42 lr: 0.000096 grad: 0.0874 (0.0894) loss: 0.8116 (0.8201) time: 0.1626 data: 0.0805 max mem: 9377 +Train: [35] [2500/6250] eta: 0:10:24 lr: 0.000096 grad: 0.0868 (0.0895) loss: 0.8134 (0.8200) time: 0.1638 data: 0.0830 max mem: 9377 +Train: [35] [2600/6250] eta: 0:10:08 lr: 0.000096 grad: 0.0883 (0.0896) loss: 0.8137 (0.8198) time: 0.1631 data: 0.0817 max mem: 9377 +Train: [35] [2700/6250] eta: 0:09:52 lr: 0.000096 grad: 0.0826 (0.0897) loss: 0.8168 (0.8197) time: 0.1528 data: 0.0707 max mem: 9377 +Train: [35] [2800/6250] eta: 0:09:36 lr: 0.000096 grad: 0.0850 (0.0898) loss: 0.8188 (0.8196) time: 0.2021 data: 0.1136 max mem: 9377 +Train: [35] [2900/6250] eta: 0:09:19 lr: 0.000096 grad: 0.0914 (0.0899) loss: 0.8156 (0.8196) time: 0.1791 data: 0.1025 max mem: 9377 +Train: [35] [3000/6250] eta: 0:09:03 lr: 0.000096 grad: 0.0888 (0.0901) loss: 0.8102 (0.8194) time: 0.1997 data: 0.1256 max mem: 9377 +Train: [35] [3100/6250] eta: 0:08:46 lr: 0.000096 grad: 0.0915 (0.0901) loss: 0.8121 (0.8194) time: 0.1640 data: 0.0797 max mem: 9377 +Train: [35] [3200/6250] eta: 0:08:29 lr: 0.000096 grad: 0.0863 (0.0901) loss: 0.8153 (0.8193) time: 0.1737 data: 0.0923 max mem: 9377 +Train: [35] [3300/6250] eta: 0:08:12 lr: 0.000096 grad: 0.0865 (0.0901) loss: 0.8190 (0.8192) time: 0.1680 data: 0.0819 max mem: 9377 +Train: [35] [3400/6250] eta: 0:07:56 lr: 0.000096 grad: 0.0856 (0.0902) loss: 0.8149 (0.8191) time: 0.1619 data: 0.0741 max mem: 9377 +Train: [35] [3500/6250] eta: 0:07:39 lr: 0.000096 grad: 0.0859 (0.0902) loss: 0.8189 (0.8190) time: 0.1607 data: 0.0720 max mem: 9377 +Train: [35] [3600/6250] eta: 0:07:21 lr: 0.000096 grad: 0.0829 (0.0902) loss: 0.8182 (0.8190) time: 0.1618 data: 0.0689 max mem: 9377 +Train: [35] [3700/6250] eta: 0:07:04 lr: 0.000096 grad: 0.0830 (0.0902) loss: 0.8235 (0.8189) time: 0.1437 data: 0.0527 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:47 lr: 0.000096 grad: 0.0827 (0.0902) loss: 0.8188 (0.8188) time: 0.1587 data: 0.0651 max mem: 9377 +Train: [35] [3900/6250] eta: 0:06:29 lr: 0.000096 grad: 0.0837 (0.0901) loss: 0.8206 (0.8188) time: 0.1454 data: 0.0591 max mem: 9377 +Train: [35] [4000/6250] eta: 0:06:12 lr: 0.000096 grad: 0.0829 (0.0902) loss: 0.8181 (0.8187) time: 0.1474 data: 0.0583 max mem: 9377 +Train: [35] [4100/6250] eta: 0:05:55 lr: 0.000096 grad: 0.0826 (0.0901) loss: 0.8141 (0.8187) time: 0.1531 data: 0.0646 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:38 lr: 0.000096 grad: 0.0825 (0.0901) loss: 0.8161 (0.8186) time: 0.1864 data: 0.0985 max mem: 9377 +Train: [35] [4300/6250] eta: 0:05:21 lr: 0.000095 grad: 0.0879 (0.0901) loss: 0.8182 (0.8185) time: 0.1303 data: 0.0460 max mem: 9377 +Train: [35] [4400/6250] eta: 0:05:05 lr: 0.000095 grad: 0.0826 (0.0903) loss: 0.8137 (0.8185) time: 0.1666 data: 0.0821 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:48 lr: 0.000095 grad: 0.0862 (0.0902) loss: 0.8133 (0.8184) time: 0.1527 data: 0.0630 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:32 lr: 0.000095 grad: 0.0870 (0.0902) loss: 0.8118 (0.8183) time: 0.1858 data: 0.1102 max mem: 9377 +Train: [35] [4700/6250] eta: 0:04:15 lr: 0.000095 grad: 0.0867 (0.0902) loss: 0.8184 (0.8183) time: 0.1708 data: 0.0814 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:59 lr: 0.000095 grad: 0.0868 (0.0902) loss: 0.8207 (0.8183) time: 0.1602 data: 0.0773 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:42 lr: 0.000095 grad: 0.0912 (0.0901) loss: 0.8185 (0.8183) time: 0.1407 data: 0.0451 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:25 lr: 0.000095 grad: 0.0850 (0.0901) loss: 0.8224 (0.8183) time: 0.1451 data: 0.0478 max mem: 9377 +Train: [35] [5100/6250] eta: 0:03:09 lr: 0.000095 grad: 0.0816 (0.0901) loss: 0.8188 (0.8183) time: 0.1473 data: 0.0568 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:52 lr: 0.000095 grad: 0.0873 (0.0901) loss: 0.8204 (0.8183) time: 0.1498 data: 0.0551 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:36 lr: 0.000095 grad: 0.0965 (0.0901) loss: 0.8097 (0.8182) time: 0.1562 data: 0.0646 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:19 lr: 0.000095 grad: 0.0882 (0.0901) loss: 0.8179 (0.8182) time: 0.1539 data: 0.0689 max mem: 9377 +Train: [35] [5500/6250] eta: 0:02:03 lr: 0.000095 grad: 0.0937 (0.0902) loss: 0.8173 (0.8182) time: 0.1748 data: 0.0874 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:46 lr: 0.000095 grad: 0.0941 (0.0903) loss: 0.8160 (0.8181) time: 0.1633 data: 0.0755 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:30 lr: 0.000095 grad: 0.0928 (0.0903) loss: 0.8098 (0.8180) time: 0.1618 data: 0.0883 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:13 lr: 0.000095 grad: 0.0946 (0.0903) loss: 0.8106 (0.8180) time: 0.1629 data: 0.0808 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:57 lr: 0.000095 grad: 0.0928 (0.0904) loss: 0.8176 (0.8179) time: 0.1820 data: 0.1055 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:41 lr: 0.000095 grad: 0.0911 (0.0905) loss: 0.8112 (0.8178) time: 0.1952 data: 0.0983 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:24 lr: 0.000095 grad: 0.0929 (0.0905) loss: 0.8175 (0.8177) time: 0.1788 data: 0.0916 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:08 lr: 0.000095 grad: 0.0943 (0.0906) loss: 0.8160 (0.8177) time: 0.1914 data: 0.0963 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.0973 (0.0907) loss: 0.8125 (0.8176) time: 0.1699 data: 0.0842 max mem: 9377 +Train: [35] Total time: 0:17:15 (0.1657 s / it) +Averaged stats: lr: 0.000095 grad: 0.0973 (0.0907) loss: 0.8125 (0.8176) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:05:53 loss: 0.8361 (0.8361) time: 5.7033 data: 5.6677 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8335 (0.8315) time: 0.1454 data: 0.1202 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:15 (0.2504 s / it) +Averaged stats (hcp-train-subset): loss: 0.8335 (0.8315) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:44 loss: 0.8385 (0.8385) time: 3.6130 data: 3.5245 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8402 (0.8411) time: 0.1417 data: 0.1164 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-val): loss: 0.8402 (0.8411) +Eval (nsd-val): [35] [ 0/62] eta: 0:05:36 loss: 0.8042 (0.8042) time: 5.4255 data: 5.3950 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8135 (0.8140) time: 0.1589 data: 0.1294 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:14 (0.2417 s / it) +Averaged stats (nsd-val): loss: 0.8135 (0.8140) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [36] [ 0/6250] eta: 12:19:21 lr: 0.000095 grad: 0.0892 (0.0892) loss: 0.8704 (0.8704) time: 7.0979 data: 6.9940 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:23:10 lr: 0.000095 grad: 0.0856 (0.0916) loss: 0.8304 (0.8397) time: 0.1715 data: 0.0726 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:19:48 lr: 0.000095 grad: 0.0732 (0.0865) loss: 0.8386 (0.8371) time: 0.1636 data: 0.0632 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:17:58 lr: 0.000095 grad: 0.0832 (0.0865) loss: 0.8330 (0.8356) time: 0.1475 data: 0.0600 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:17:02 lr: 0.000095 grad: 0.0852 (0.0864) loss: 0.8326 (0.8334) time: 0.1687 data: 0.0757 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:16:22 lr: 0.000095 grad: 0.0866 (0.0872) loss: 0.8307 (0.8320) time: 0.1494 data: 0.0595 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:16:04 lr: 0.000095 grad: 0.0832 (0.0873) loss: 0.8239 (0.8310) time: 0.1642 data: 0.0670 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:15:49 lr: 0.000095 grad: 0.0845 (0.0875) loss: 0.8280 (0.8300) time: 0.1515 data: 0.0592 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:15:39 lr: 0.000095 grad: 0.0828 (0.0874) loss: 0.8303 (0.8295) time: 0.1668 data: 0.0748 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:15:16 lr: 0.000095 grad: 0.0867 (0.0871) loss: 0.8235 (0.8290) time: 0.1759 data: 0.0880 max mem: 9377 +Train: [36] [1000/6250] eta: 0:14:53 lr: 0.000095 grad: 0.0770 (0.0865) loss: 0.8359 (0.8288) time: 0.1914 data: 0.1119 max mem: 9377 +Train: [36] [1100/6250] eta: 0:14:34 lr: 0.000095 grad: 0.0795 (0.0862) loss: 0.8290 (0.8286) time: 0.1669 data: 0.0737 max mem: 9377 +Train: [36] [1200/6250] eta: 0:14:22 lr: 0.000095 grad: 0.0911 (0.0865) loss: 0.8177 (0.8281) time: 0.1831 data: 0.0778 max mem: 9377 +Train: [36] [1300/6250] eta: 0:14:02 lr: 0.000095 grad: 0.0847 (0.0865) loss: 0.8256 (0.8277) time: 0.1633 data: 0.0733 max mem: 9377 +Train: [36] [1400/6250] eta: 0:13:42 lr: 0.000095 grad: 0.0822 (0.0863) loss: 0.8176 (0.8271) time: 0.1627 data: 0.0813 max mem: 9377 +Train: [36] [1500/6250] eta: 0:13:21 lr: 0.000095 grad: 0.0857 (0.0865) loss: 0.8151 (0.8265) time: 0.1613 data: 0.0742 max mem: 9377 +Train: [36] [1600/6250] eta: 0:12:58 lr: 0.000094 grad: 0.0822 (0.0865) loss: 0.8240 (0.8261) time: 0.1208 data: 0.0264 max mem: 9377 +Train: [36] [1700/6250] eta: 0:12:40 lr: 0.000094 grad: 0.0873 (0.0868) loss: 0.8154 (0.8255) time: 0.1730 data: 0.0898 max mem: 9377 +Train: [36] [1800/6250] eta: 0:12:20 lr: 0.000094 grad: 0.0806 (0.0869) loss: 0.8173 (0.8250) time: 0.1415 data: 0.0554 max mem: 9377 +Train: [36] [1900/6250] eta: 0:12:05 lr: 0.000094 grad: 0.0810 (0.0871) loss: 0.8155 (0.8245) time: 0.1661 data: 0.0761 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:48 lr: 0.000094 grad: 0.0858 (0.0872) loss: 0.8191 (0.8241) time: 0.1736 data: 0.0948 max mem: 9377 +Train: [36] [2100/6250] eta: 0:11:28 lr: 0.000094 grad: 0.0807 (0.0873) loss: 0.8206 (0.8237) time: 0.1380 data: 0.0560 max mem: 9377 +Train: [36] [2200/6250] eta: 0:11:09 lr: 0.000094 grad: 0.0882 (0.0875) loss: 0.8180 (0.8233) time: 0.1609 data: 0.0668 max mem: 9377 +Train: [36] [2300/6250] eta: 0:10:53 lr: 0.000094 grad: 0.0919 (0.0876) loss: 0.8148 (0.8229) time: 0.1565 data: 0.0706 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:32 lr: 0.000094 grad: 0.0863 (0.0876) loss: 0.8185 (0.8228) time: 0.1360 data: 0.0450 max mem: 9377 +Train: [36] [2500/6250] eta: 0:10:14 lr: 0.000094 grad: 0.0825 (0.0876) loss: 0.8168 (0.8225) time: 0.1484 data: 0.0589 max mem: 9377 +Train: [36] [2600/6250] eta: 0:09:55 lr: 0.000094 grad: 0.0872 (0.0876) loss: 0.8204 (0.8224) time: 0.1315 data: 0.0422 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:37 lr: 0.000094 grad: 0.0946 (0.0877) loss: 0.8127 (0.8222) time: 0.1528 data: 0.0622 max mem: 9377 +Train: [36] [2800/6250] eta: 0:09:20 lr: 0.000094 grad: 0.0864 (0.0878) loss: 0.8218 (0.8220) time: 0.1633 data: 0.0830 max mem: 9377 +Train: [36] [2900/6250] eta: 0:09:03 lr: 0.000094 grad: 0.0945 (0.0879) loss: 0.8112 (0.8219) time: 0.1731 data: 0.0851 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:46 lr: 0.000094 grad: 0.0832 (0.0880) loss: 0.8194 (0.8217) time: 0.1501 data: 0.0666 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:30 lr: 0.000094 grad: 0.0838 (0.0880) loss: 0.8186 (0.8216) time: 0.1616 data: 0.0732 max mem: 9377 +Train: [36] [3200/6250] eta: 0:08:14 lr: 0.000094 grad: 0.0874 (0.0881) loss: 0.8167 (0.8215) time: 0.1592 data: 0.0831 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:59 lr: 0.000094 grad: 0.0882 (0.0881) loss: 0.8246 (0.8216) time: 0.2158 data: 0.1335 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:41 lr: 0.000094 grad: 0.0851 (0.0881) loss: 0.8252 (0.8215) time: 0.1563 data: 0.0749 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:25 lr: 0.000094 grad: 0.0870 (0.0882) loss: 0.8248 (0.8214) time: 0.1283 data: 0.0454 max mem: 9377 +Train: [36] [3600/6250] eta: 0:07:09 lr: 0.000094 grad: 0.0864 (0.0882) loss: 0.8231 (0.8214) time: 0.1458 data: 0.0567 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:52 lr: 0.000094 grad: 0.0901 (0.0882) loss: 0.8158 (0.8213) time: 0.1155 data: 0.0176 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:36 lr: 0.000094 grad: 0.0848 (0.0883) loss: 0.8179 (0.8213) time: 0.1680 data: 0.0849 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:19 lr: 0.000094 grad: 0.0862 (0.0883) loss: 0.8163 (0.8212) time: 0.1329 data: 0.0427 max mem: 9377 +Train: [36] [4000/6250] eta: 0:06:04 lr: 0.000094 grad: 0.0916 (0.0884) loss: 0.8173 (0.8211) time: 0.1812 data: 0.0901 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:47 lr: 0.000094 grad: 0.0888 (0.0885) loss: 0.8194 (0.8211) time: 0.1645 data: 0.0799 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:31 lr: 0.000094 grad: 0.0910 (0.0885) loss: 0.8147 (0.8211) time: 0.1292 data: 0.0453 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:15 lr: 0.000094 grad: 0.0849 (0.0886) loss: 0.8249 (0.8212) time: 0.1551 data: 0.0653 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:59 lr: 0.000094 grad: 0.0918 (0.0886) loss: 0.8194 (0.8212) time: 0.1675 data: 0.0794 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:42 lr: 0.000094 grad: 0.0881 (0.0886) loss: 0.8260 (0.8212) time: 0.1536 data: 0.0695 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:26 lr: 0.000094 grad: 0.0856 (0.0886) loss: 0.8198 (0.8212) time: 0.1542 data: 0.0675 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:10 lr: 0.000094 grad: 0.0853 (0.0887) loss: 0.8165 (0.8211) time: 0.1705 data: 0.0856 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:53 lr: 0.000094 grad: 0.0861 (0.0887) loss: 0.8183 (0.8211) time: 0.1502 data: 0.0667 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:37 lr: 0.000094 grad: 0.0848 (0.0887) loss: 0.8263 (0.8211) time: 0.1289 data: 0.0380 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:20 lr: 0.000094 grad: 0.0803 (0.0887) loss: 0.8190 (0.8211) time: 0.1538 data: 0.0527 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:04 lr: 0.000093 grad: 0.0874 (0.0887) loss: 0.8205 (0.8211) time: 0.1761 data: 0.0868 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:48 lr: 0.000093 grad: 0.0864 (0.0888) loss: 0.8183 (0.8210) time: 0.1558 data: 0.0710 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:32 lr: 0.000093 grad: 0.0897 (0.0889) loss: 0.8160 (0.8209) time: 0.1347 data: 0.0465 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:16 lr: 0.000093 grad: 0.0868 (0.0890) loss: 0.8053 (0.8208) time: 0.1637 data: 0.0768 max mem: 9377 +Train: [36] [5500/6250] eta: 0:02:00 lr: 0.000093 grad: 0.0929 (0.0890) loss: 0.8223 (0.8207) time: 0.1501 data: 0.0617 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:44 lr: 0.000093 grad: 0.0920 (0.0891) loss: 0.8169 (0.8207) time: 0.1656 data: 0.0809 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:28 lr: 0.000093 grad: 0.0846 (0.0891) loss: 0.8197 (0.8207) time: 0.1414 data: 0.0570 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:12 lr: 0.000093 grad: 0.0883 (0.0892) loss: 0.8175 (0.8207) time: 0.1481 data: 0.0682 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:56 lr: 0.000093 grad: 0.0801 (0.0892) loss: 0.8332 (0.8207) time: 0.1780 data: 0.0933 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:40 lr: 0.000093 grad: 0.0853 (0.0892) loss: 0.8187 (0.8206) time: 0.1230 data: 0.0319 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:24 lr: 0.000093 grad: 0.0815 (0.0892) loss: 0.8277 (0.8207) time: 0.1473 data: 0.0642 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:08 lr: 0.000093 grad: 0.0851 (0.0892) loss: 0.8237 (0.8207) time: 0.1707 data: 0.0880 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.0854 (0.0892) loss: 0.8313 (0.8207) time: 0.1601 data: 0.0724 max mem: 9377 +Train: [36] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000093 grad: 0.0854 (0.0892) loss: 0.8313 (0.8207) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:39 loss: 0.8364 (0.8364) time: 3.5366 data: 3.4506 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8270 (0.8293) time: 0.1408 data: 0.1136 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8270 (0.8293) +Eval (hcp-val): [36] [ 0/62] eta: 0:05:46 loss: 0.8368 (0.8368) time: 5.5929 data: 5.5624 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8395 (0.8415) time: 0.1284 data: 0.1003 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8415) +Eval (nsd-val): [36] [ 0/62] eta: 0:03:30 loss: 0.8037 (0.8037) time: 3.3955 data: 3.3182 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8150 (0.8151) time: 0.1261 data: 0.1008 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (nsd-val): loss: 0.8150 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 11:21:13 lr: 0.000093 grad: 0.1203 (0.1203) loss: 0.8465 (0.8465) time: 6.5397 data: 6.4259 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:21:36 lr: 0.000093 grad: 0.0932 (0.1049) loss: 0.8325 (0.8350) time: 0.1235 data: 0.0021 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:18:48 lr: 0.000093 grad: 0.0956 (0.1020) loss: 0.8136 (0.8276) time: 0.1575 data: 0.0598 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:17:18 lr: 0.000093 grad: 0.0932 (0.1010) loss: 0.8190 (0.8241) time: 0.1540 data: 0.0506 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:16:23 lr: 0.000093 grad: 0.0920 (0.0989) loss: 0.8071 (0.8221) time: 0.1519 data: 0.0592 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:15:49 lr: 0.000093 grad: 0.0827 (0.0963) loss: 0.8311 (0.8223) time: 0.1364 data: 0.0283 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:15:21 lr: 0.000093 grad: 0.0876 (0.0948) loss: 0.8244 (0.8225) time: 0.1847 data: 0.0916 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:15:02 lr: 0.000093 grad: 0.0882 (0.0941) loss: 0.8209 (0.8224) time: 0.1627 data: 0.0721 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:14:48 lr: 0.000093 grad: 0.0831 (0.0934) loss: 0.8211 (0.8221) time: 0.1694 data: 0.0887 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:14:36 lr: 0.000093 grad: 0.0945 (0.0930) loss: 0.8172 (0.8220) time: 0.1871 data: 0.0929 max mem: 9377 +Train: [37] [1000/6250] eta: 0:14:17 lr: 0.000093 grad: 0.0911 (0.0931) loss: 0.8224 (0.8215) time: 0.1479 data: 0.0606 max mem: 9377 +Train: [37] [1100/6250] eta: 0:13:53 lr: 0.000093 grad: 0.0894 (0.0929) loss: 0.8162 (0.8212) time: 0.1518 data: 0.0662 max mem: 9377 +Train: [37] [1200/6250] eta: 0:13:40 lr: 0.000093 grad: 0.0917 (0.0929) loss: 0.8136 (0.8208) time: 0.1613 data: 0.0651 max mem: 9377 +Train: [37] [1300/6250] eta: 0:13:24 lr: 0.000093 grad: 0.0829 (0.0927) loss: 0.8221 (0.8207) time: 0.1758 data: 0.0831 max mem: 9377 +Train: [37] [1400/6250] eta: 0:13:03 lr: 0.000093 grad: 0.0877 (0.0932) loss: 0.8235 (0.8206) time: 0.1500 data: 0.0657 max mem: 9377 +Train: [37] [1500/6250] eta: 0:12:44 lr: 0.000093 grad: 0.0883 (0.0932) loss: 0.8095 (0.8203) time: 0.1576 data: 0.0651 max mem: 9377 +Train: [37] [1600/6250] eta: 0:12:24 lr: 0.000093 grad: 0.0892 (0.0931) loss: 0.8206 (0.8202) time: 0.1410 data: 0.0596 max mem: 9377 +Train: [37] [1700/6250] eta: 0:12:05 lr: 0.000093 grad: 0.0874 (0.0929) loss: 0.8215 (0.8202) time: 0.1411 data: 0.0523 max mem: 9377 +Train: [37] [1800/6250] eta: 0:11:48 lr: 0.000093 grad: 0.0869 (0.0929) loss: 0.8235 (0.8200) time: 0.1613 data: 0.0731 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:34 lr: 0.000093 grad: 0.0947 (0.0931) loss: 0.8129 (0.8198) time: 0.1973 data: 0.1135 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:22 lr: 0.000093 grad: 0.0974 (0.0932) loss: 0.8041 (0.8195) time: 0.1785 data: 0.0914 max mem: 9377 +Train: [37] [2100/6250] eta: 0:11:10 lr: 0.000093 grad: 0.0978 (0.0933) loss: 0.8158 (0.8193) time: 0.1756 data: 0.0779 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:58 lr: 0.000093 grad: 0.0927 (0.0934) loss: 0.8158 (0.8189) time: 0.1844 data: 0.0796 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:43 lr: 0.000092 grad: 0.0848 (0.0934) loss: 0.8173 (0.8187) time: 0.1540 data: 0.0597 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:28 lr: 0.000092 grad: 0.0860 (0.0933) loss: 0.8152 (0.8185) time: 0.1564 data: 0.0617 max mem: 9377 +Train: [37] [2500/6250] eta: 0:10:11 lr: 0.000092 grad: 0.0869 (0.0932) loss: 0.8163 (0.8184) time: 0.1440 data: 0.0553 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:53 lr: 0.000092 grad: 0.0903 (0.0932) loss: 0.8164 (0.8182) time: 0.1381 data: 0.0471 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:36 lr: 0.000092 grad: 0.0886 (0.0933) loss: 0.8103 (0.8180) time: 0.1472 data: 0.0493 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:19 lr: 0.000092 grad: 0.0901 (0.0933) loss: 0.8132 (0.8179) time: 0.1586 data: 0.0766 max mem: 9377 +Train: [37] [2900/6250] eta: 0:09:03 lr: 0.000092 grad: 0.0995 (0.0934) loss: 0.8095 (0.8178) time: 0.1337 data: 0.0452 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:46 lr: 0.000092 grad: 0.0892 (0.0934) loss: 0.8096 (0.8176) time: 0.1576 data: 0.0662 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:30 lr: 0.000092 grad: 0.0918 (0.0935) loss: 0.8173 (0.8175) time: 0.1724 data: 0.0745 max mem: 9377 +Train: [37] [3200/6250] eta: 0:08:14 lr: 0.000092 grad: 0.0910 (0.0935) loss: 0.8151 (0.8175) time: 0.1686 data: 0.0773 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:58 lr: 0.000092 grad: 0.0957 (0.0935) loss: 0.8140 (0.8175) time: 0.1602 data: 0.0765 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:42 lr: 0.000092 grad: 0.0953 (0.0935) loss: 0.8175 (0.8175) time: 0.1930 data: 0.1093 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:26 lr: 0.000092 grad: 0.0933 (0.0935) loss: 0.8176 (0.8175) time: 0.1675 data: 0.0788 max mem: 9377 +Train: [37] [3600/6250] eta: 0:07:10 lr: 0.000092 grad: 0.0873 (0.0935) loss: 0.8207 (0.8175) time: 0.1599 data: 0.0594 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:54 lr: 0.000092 grad: 0.1001 (0.0935) loss: 0.8145 (0.8175) time: 0.1875 data: 0.1031 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:37 lr: 0.000092 grad: 0.0864 (0.0935) loss: 0.8201 (0.8175) time: 0.1458 data: 0.0612 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:20 lr: 0.000092 grad: 0.0862 (0.0935) loss: 0.8204 (0.8176) time: 0.1529 data: 0.0628 max mem: 9377 +Train: [37] [4000/6250] eta: 0:06:04 lr: 0.000092 grad: 0.0884 (0.0935) loss: 0.8267 (0.8176) time: 0.1399 data: 0.0537 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:48 lr: 0.000092 grad: 0.0919 (0.0935) loss: 0.8176 (0.8175) time: 0.1654 data: 0.0741 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:31 lr: 0.000092 grad: 0.0906 (0.0935) loss: 0.8177 (0.8175) time: 0.1561 data: 0.0627 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:15 lr: 0.000092 grad: 0.0877 (0.0935) loss: 0.8237 (0.8175) time: 0.1646 data: 0.0753 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:59 lr: 0.000092 grad: 0.0876 (0.0935) loss: 0.8152 (0.8175) time: 0.1730 data: 0.0874 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:43 lr: 0.000092 grad: 0.0926 (0.0935) loss: 0.8183 (0.8176) time: 0.1401 data: 0.0452 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:26 lr: 0.000092 grad: 0.0872 (0.0934) loss: 0.8232 (0.8176) time: 0.1456 data: 0.0506 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:10 lr: 0.000092 grad: 0.0899 (0.0935) loss: 0.8175 (0.8176) time: 0.1673 data: 0.0882 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:53 lr: 0.000092 grad: 0.0940 (0.0935) loss: 0.8239 (0.8176) time: 0.1533 data: 0.0594 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:37 lr: 0.000092 grad: 0.0873 (0.0934) loss: 0.8175 (0.8176) time: 0.1519 data: 0.0674 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:21 lr: 0.000092 grad: 0.0902 (0.0934) loss: 0.8182 (0.8176) time: 0.1520 data: 0.0684 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:05 lr: 0.000092 grad: 0.0896 (0.0934) loss: 0.8155 (0.8176) time: 0.1691 data: 0.0834 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:48 lr: 0.000092 grad: 0.0901 (0.0934) loss: 0.8168 (0.8176) time: 0.1519 data: 0.0577 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:32 lr: 0.000092 grad: 0.0892 (0.0934) loss: 0.8195 (0.8176) time: 0.1372 data: 0.0532 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:16 lr: 0.000092 grad: 0.0930 (0.0935) loss: 0.8194 (0.8176) time: 0.1348 data: 0.0444 max mem: 9377 +Train: [37] [5500/6250] eta: 0:02:00 lr: 0.000092 grad: 0.0953 (0.0935) loss: 0.8142 (0.8176) time: 0.1423 data: 0.0547 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:44 lr: 0.000092 grad: 0.0902 (0.0935) loss: 0.8138 (0.8176) time: 0.1849 data: 0.0969 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:28 lr: 0.000091 grad: 0.0893 (0.0935) loss: 0.8183 (0.8175) time: 0.1840 data: 0.1032 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:12 lr: 0.000091 grad: 0.1060 (0.0936) loss: 0.8132 (0.8175) time: 0.1545 data: 0.0621 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:56 lr: 0.000091 grad: 0.0930 (0.0937) loss: 0.8229 (0.8175) time: 0.1784 data: 0.0934 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:40 lr: 0.000091 grad: 0.0898 (0.0937) loss: 0.8182 (0.8175) time: 0.1731 data: 0.0815 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:24 lr: 0.000091 grad: 0.0979 (0.0937) loss: 0.8154 (0.8175) time: 0.1752 data: 0.0865 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:08 lr: 0.000091 grad: 0.0904 (0.0937) loss: 0.8160 (0.8175) time: 0.1483 data: 0.0549 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.0921 (0.0937) loss: 0.8161 (0.8175) time: 0.1536 data: 0.0596 max mem: 9377 +Train: [37] Total time: 0:16:51 (0.1619 s / it) +Averaged stats: lr: 0.000091 grad: 0.0921 (0.0937) loss: 0.8161 (0.8175) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:56 loss: 0.8359 (0.8359) time: 5.7541 data: 5.7238 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8295 (0.8309) time: 0.1413 data: 0.1158 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-train-subset): loss: 0.8295 (0.8309) +Eval (hcp-val): [37] [ 0/62] eta: 0:03:45 loss: 0.8387 (0.8387) time: 3.6393 data: 3.5384 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8428 (0.8433) time: 0.1295 data: 0.1023 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (hcp-val): loss: 0.8428 (0.8433) +Eval (nsd-val): [37] [ 0/62] eta: 0:06:12 loss: 0.8092 (0.8092) time: 6.0015 data: 5.9695 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8168 (0.8195) time: 0.1339 data: 0.1062 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:15 (0.2438 s / it) +Averaged stats (nsd-val): loss: 0.8168 (0.8195) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 11:05:42 lr: 0.000091 grad: 0.0597 (0.0597) loss: 0.8596 (0.8596) time: 6.3908 data: 6.2678 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:23:26 lr: 0.000091 grad: 0.0940 (0.1082) loss: 0.8281 (0.8353) time: 0.1890 data: 0.0824 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:20:35 lr: 0.000091 grad: 0.0874 (0.1027) loss: 0.8258 (0.8284) time: 0.2017 data: 0.0931 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:18:12 lr: 0.000091 grad: 0.0834 (0.1033) loss: 0.8206 (0.8237) time: 0.1331 data: 0.0434 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:17:11 lr: 0.000091 grad: 0.0790 (0.1002) loss: 0.8285 (0.8221) time: 0.1281 data: 0.0338 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:16:32 lr: 0.000091 grad: 0.0858 (0.0976) loss: 0.8295 (0.8220) time: 0.1718 data: 0.0788 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:16:02 lr: 0.000091 grad: 0.0891 (0.0966) loss: 0.8201 (0.8215) time: 0.1793 data: 0.0893 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:15:34 lr: 0.000091 grad: 0.0877 (0.0958) loss: 0.8155 (0.8211) time: 0.1590 data: 0.0638 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:15:19 lr: 0.000091 grad: 0.0848 (0.0948) loss: 0.8218 (0.8207) time: 0.1863 data: 0.0861 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:15:05 lr: 0.000091 grad: 0.0805 (0.0941) loss: 0.8287 (0.8206) time: 0.1616 data: 0.0742 max mem: 9377 +Train: [38] [1000/6250] eta: 0:14:43 lr: 0.000091 grad: 0.0796 (0.0936) loss: 0.8248 (0.8206) time: 0.1669 data: 0.0804 max mem: 9377 +Train: [38] [1100/6250] eta: 0:14:28 lr: 0.000091 grad: 0.0886 (0.0932) loss: 0.8255 (0.8205) time: 0.1556 data: 0.0650 max mem: 9377 +Train: [38] [1200/6250] eta: 0:14:20 lr: 0.000091 grad: 0.0897 (0.0928) loss: 0.8223 (0.8205) time: 0.1618 data: 0.0799 max mem: 9377 +Train: [38] [1300/6250] eta: 0:14:04 lr: 0.000091 grad: 0.0883 (0.0926) loss: 0.8159 (0.8203) time: 0.1714 data: 0.0801 max mem: 9377 +Train: [38] [1400/6250] eta: 0:13:43 lr: 0.000091 grad: 0.0886 (0.0926) loss: 0.8234 (0.8201) time: 0.1476 data: 0.0487 max mem: 9377 +Train: [38] [1500/6250] eta: 0:13:23 lr: 0.000091 grad: 0.0881 (0.0924) loss: 0.8120 (0.8200) time: 0.1709 data: 0.0695 max mem: 9377 +Train: [38] [1600/6250] eta: 0:13:01 lr: 0.000091 grad: 0.0906 (0.0923) loss: 0.8157 (0.8198) time: 0.1591 data: 0.0720 max mem: 9377 +Train: [38] [1700/6250] eta: 0:12:41 lr: 0.000091 grad: 0.0902 (0.0923) loss: 0.8213 (0.8196) time: 0.1765 data: 0.0966 max mem: 9377 +Train: [38] [1800/6250] eta: 0:12:22 lr: 0.000091 grad: 0.0981 (0.0926) loss: 0.8157 (0.8192) time: 0.1450 data: 0.0578 max mem: 9377 +Train: [38] [1900/6250] eta: 0:12:07 lr: 0.000091 grad: 0.0905 (0.0926) loss: 0.8101 (0.8188) time: 0.1994 data: 0.1203 max mem: 9377 +Train: [38] [2000/6250] eta: 0:11:51 lr: 0.000091 grad: 0.0945 (0.0926) loss: 0.8127 (0.8185) time: 0.1670 data: 0.0835 max mem: 9377 +Train: [38] [2100/6250] eta: 0:11:34 lr: 0.000091 grad: 0.0917 (0.0927) loss: 0.8061 (0.8181) time: 0.1672 data: 0.0762 max mem: 9377 +Train: [38] [2200/6250] eta: 0:11:16 lr: 0.000091 grad: 0.0925 (0.0928) loss: 0.8196 (0.8177) time: 0.1863 data: 0.0976 max mem: 9377 +Train: [38] [2300/6250] eta: 0:10:58 lr: 0.000091 grad: 0.1033 (0.0930) loss: 0.8114 (0.8174) time: 0.1584 data: 0.0705 max mem: 9377 +Train: [38] [2400/6250] eta: 0:10:39 lr: 0.000091 grad: 0.0951 (0.0930) loss: 0.8096 (0.8172) time: 0.1546 data: 0.0705 max mem: 9377 +Train: [38] [2500/6250] eta: 0:10:21 lr: 0.000091 grad: 0.0849 (0.0930) loss: 0.8108 (0.8170) time: 0.1545 data: 0.0584 max mem: 9377 +Train: [38] [2600/6250] eta: 0:10:02 lr: 0.000091 grad: 0.0988 (0.0930) loss: 0.8130 (0.8168) time: 0.1492 data: 0.0558 max mem: 9377 +Train: [38] [2700/6250] eta: 0:09:44 lr: 0.000091 grad: 0.0915 (0.0932) loss: 0.8176 (0.8166) time: 0.1560 data: 0.0631 max mem: 9377 +Train: [38] [2800/6250] eta: 0:09:27 lr: 0.000091 grad: 0.0905 (0.0932) loss: 0.8202 (0.8165) time: 0.1571 data: 0.0666 max mem: 9377 +Train: [38] [2900/6250] eta: 0:09:10 lr: 0.000090 grad: 0.0915 (0.0932) loss: 0.8208 (0.8164) time: 0.1612 data: 0.0727 max mem: 9377 +Train: [38] [3000/6250] eta: 0:08:52 lr: 0.000090 grad: 0.0908 (0.0933) loss: 0.8122 (0.8163) time: 0.1541 data: 0.0639 max mem: 9377 +Train: [38] [3100/6250] eta: 0:08:35 lr: 0.000090 grad: 0.0913 (0.0933) loss: 0.8178 (0.8163) time: 0.1641 data: 0.0714 max mem: 9377 +Train: [38] [3200/6250] eta: 0:08:18 lr: 0.000090 grad: 0.0919 (0.0935) loss: 0.8122 (0.8162) time: 0.1702 data: 0.0837 max mem: 9377 +Train: [38] [3300/6250] eta: 0:08:02 lr: 0.000090 grad: 0.0889 (0.0936) loss: 0.8217 (0.8162) time: 0.1661 data: 0.0806 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:45 lr: 0.000090 grad: 0.0925 (0.0937) loss: 0.8131 (0.8161) time: 0.1774 data: 0.0864 max mem: 9377 +Train: [38] [3500/6250] eta: 0:07:27 lr: 0.000090 grad: 0.0880 (0.0938) loss: 0.8125 (0.8160) time: 0.1494 data: 0.0683 max mem: 9377 +Train: [38] [3600/6250] eta: 0:07:10 lr: 0.000090 grad: 0.0905 (0.0938) loss: 0.8197 (0.8160) time: 0.1575 data: 0.0755 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:53 lr: 0.000090 grad: 0.0930 (0.0938) loss: 0.8186 (0.8160) time: 0.1631 data: 0.0749 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:37 lr: 0.000090 grad: 0.0945 (0.0939) loss: 0.8158 (0.8160) time: 0.1767 data: 0.0902 max mem: 9377 +Train: [38] [3900/6250] eta: 0:06:20 lr: 0.000090 grad: 0.0914 (0.0939) loss: 0.8125 (0.8160) time: 0.1391 data: 0.0495 max mem: 9377 +Train: [38] [4000/6250] eta: 0:06:03 lr: 0.000090 grad: 0.0950 (0.0940) loss: 0.8219 (0.8160) time: 0.1652 data: 0.0798 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:47 lr: 0.000090 grad: 0.0902 (0.0940) loss: 0.8224 (0.8160) time: 0.1727 data: 0.0773 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:31 lr: 0.000090 grad: 0.0980 (0.0940) loss: 0.8168 (0.8161) time: 0.1519 data: 0.0656 max mem: 9377 +Train: [38] [4300/6250] eta: 0:05:14 lr: 0.000090 grad: 0.0978 (0.0941) loss: 0.8095 (0.8160) time: 0.1765 data: 0.0943 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:58 lr: 0.000090 grad: 0.0939 (0.0942) loss: 0.8161 (0.8160) time: 0.1533 data: 0.0623 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:42 lr: 0.000090 grad: 0.0886 (0.0942) loss: 0.8156 (0.8160) time: 0.1502 data: 0.0642 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:25 lr: 0.000090 grad: 0.0946 (0.0942) loss: 0.8177 (0.8160) time: 0.1507 data: 0.0511 max mem: 9377 +Train: [38] [4700/6250] eta: 0:04:09 lr: 0.000090 grad: 0.0888 (0.0942) loss: 0.8147 (0.8160) time: 0.1480 data: 0.0613 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:53 lr: 0.000090 grad: 0.0958 (0.0943) loss: 0.8175 (0.8160) time: 0.1569 data: 0.0717 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:36 lr: 0.000090 grad: 0.0898 (0.0943) loss: 0.8214 (0.8161) time: 0.1778 data: 0.0839 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:20 lr: 0.000090 grad: 0.0947 (0.0943) loss: 0.8197 (0.8162) time: 0.1798 data: 0.0864 max mem: 9377 +Train: [38] [5100/6250] eta: 0:03:04 lr: 0.000090 grad: 0.0876 (0.0943) loss: 0.8169 (0.8163) time: 0.1574 data: 0.0756 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:48 lr: 0.000090 grad: 0.0900 (0.0943) loss: 0.8162 (0.8163) time: 0.1555 data: 0.0641 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:32 lr: 0.000090 grad: 0.0856 (0.0942) loss: 0.8282 (0.8164) time: 0.1515 data: 0.0601 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:16 lr: 0.000090 grad: 0.0955 (0.0941) loss: 0.8202 (0.8166) time: 0.1386 data: 0.0448 max mem: 9377 +Train: [38] [5500/6250] eta: 0:02:00 lr: 0.000090 grad: 0.0950 (0.0941) loss: 0.8172 (0.8166) time: 0.1666 data: 0.0765 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:44 lr: 0.000090 grad: 0.1007 (0.0941) loss: 0.8132 (0.8167) time: 0.1619 data: 0.0738 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:28 lr: 0.000090 grad: 0.0888 (0.0941) loss: 0.8195 (0.8167) time: 0.1707 data: 0.0844 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:12 lr: 0.000090 grad: 0.0920 (0.0941) loss: 0.8168 (0.8168) time: 0.1750 data: 0.0913 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:56 lr: 0.000090 grad: 0.0954 (0.0941) loss: 0.8139 (0.8167) time: 0.2028 data: 0.1125 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:40 lr: 0.000090 grad: 0.0920 (0.0941) loss: 0.8117 (0.8167) time: 0.1676 data: 0.0618 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:24 lr: 0.000090 grad: 0.0940 (0.0942) loss: 0.8152 (0.8167) time: 0.1478 data: 0.0525 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:08 lr: 0.000089 grad: 0.0938 (0.0942) loss: 0.8169 (0.8167) time: 0.1920 data: 0.1062 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.0962 (0.0942) loss: 0.8138 (0.8167) time: 0.1533 data: 0.0671 max mem: 9377 +Train: [38] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000089 grad: 0.0962 (0.0942) loss: 0.8138 (0.8167) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:05:08 loss: 0.8357 (0.8357) time: 4.9701 data: 4.9395 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8319 (0.8308) time: 0.1356 data: 0.1104 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8319 (0.8308) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:02 loss: 0.8414 (0.8414) time: 4.8765 data: 4.8097 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8408 (0.8418) time: 0.1259 data: 0.0999 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:13 (0.2247 s / it) +Averaged stats (hcp-val): loss: 0.8408 (0.8418) +Eval (nsd-val): [38] [ 0/62] eta: 0:05:16 loss: 0.7975 (0.7975) time: 5.1014 data: 5.0693 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8111 (0.8119) time: 0.1273 data: 0.1019 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (nsd-val): loss: 0.8111 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 8:01:15 lr: 0.000089 grad: 0.0721 (0.0721) loss: 0.8639 (0.8639) time: 4.6201 data: 4.2600 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:23:00 lr: 0.000089 grad: 0.0943 (0.1125) loss: 0.8280 (0.8258) time: 0.1501 data: 0.0379 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:19:49 lr: 0.000089 grad: 0.0935 (0.1050) loss: 0.8333 (0.8254) time: 0.1769 data: 0.0740 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:18:07 lr: 0.000089 grad: 0.0929 (0.1034) loss: 0.8236 (0.8239) time: 0.1535 data: 0.0564 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:17:02 lr: 0.000089 grad: 0.0950 (0.1024) loss: 0.8263 (0.8226) time: 0.1531 data: 0.0449 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:16:26 lr: 0.000089 grad: 0.0866 (0.1000) loss: 0.8191 (0.8218) time: 0.1473 data: 0.0500 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:16:00 lr: 0.000089 grad: 0.0919 (0.0993) loss: 0.8268 (0.8216) time: 0.1650 data: 0.0763 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:15:46 lr: 0.000089 grad: 0.0915 (0.0990) loss: 0.8169 (0.8211) time: 0.1696 data: 0.0768 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:15:23 lr: 0.000089 grad: 0.0842 (0.0977) loss: 0.8269 (0.8211) time: 0.1626 data: 0.0696 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:15:10 lr: 0.000089 grad: 0.0828 (0.0965) loss: 0.8295 (0.8211) time: 0.1487 data: 0.0573 max mem: 9377 +Train: [39] [1000/6250] eta: 0:14:50 lr: 0.000089 grad: 0.0905 (0.0955) loss: 0.8200 (0.8212) time: 0.1582 data: 0.0692 max mem: 9377 +Train: [39] [1100/6250] eta: 0:14:32 lr: 0.000089 grad: 0.0904 (0.0951) loss: 0.8120 (0.8210) time: 0.1592 data: 0.0784 max mem: 9377 +Train: [39] [1200/6250] eta: 0:14:12 lr: 0.000089 grad: 0.0882 (0.0951) loss: 0.8200 (0.8207) time: 0.1612 data: 0.0699 max mem: 9377 +Train: [39] [1300/6250] eta: 0:13:55 lr: 0.000089 grad: 0.0872 (0.0948) loss: 0.8205 (0.8205) time: 0.1622 data: 0.0668 max mem: 9377 +Train: [39] [1400/6250] eta: 0:13:37 lr: 0.000089 grad: 0.0907 (0.0946) loss: 0.8121 (0.8201) time: 0.1603 data: 0.0706 max mem: 9377 +Train: [39] [1500/6250] eta: 0:13:16 lr: 0.000089 grad: 0.0973 (0.0946) loss: 0.8090 (0.8195) time: 0.1244 data: 0.0326 max mem: 9377 +Train: [39] [1600/6250] eta: 0:12:56 lr: 0.000089 grad: 0.0918 (0.0947) loss: 0.8184 (0.8190) time: 0.1806 data: 0.0883 max mem: 9377 +Train: [39] [1700/6250] eta: 0:12:36 lr: 0.000089 grad: 0.0940 (0.0947) loss: 0.8129 (0.8186) time: 0.1561 data: 0.0683 max mem: 9377 +Train: [39] [1800/6250] eta: 0:12:14 lr: 0.000089 grad: 0.0953 (0.0949) loss: 0.8121 (0.8182) time: 0.1521 data: 0.0572 max mem: 9377 +Train: [39] [1900/6250] eta: 0:11:55 lr: 0.000089 grad: 0.0994 (0.0951) loss: 0.8167 (0.8179) time: 0.1575 data: 0.0781 max mem: 9377 +Train: [39] [2000/6250] eta: 0:11:38 lr: 0.000089 grad: 0.0940 (0.0951) loss: 0.8139 (0.8174) time: 0.1643 data: 0.0787 max mem: 9377 +Train: [39] [2100/6250] eta: 0:11:22 lr: 0.000089 grad: 0.0905 (0.0953) loss: 0.8137 (0.8170) time: 0.1622 data: 0.0785 max mem: 9377 +Train: [39] [2200/6250] eta: 0:11:05 lr: 0.000089 grad: 0.0989 (0.0956) loss: 0.8027 (0.8165) time: 0.1513 data: 0.0667 max mem: 9377 +Train: [39] [2300/6250] eta: 0:10:52 lr: 0.000089 grad: 0.0973 (0.0957) loss: 0.8072 (0.8162) time: 0.1837 data: 0.0984 max mem: 9377 +Train: [39] [2400/6250] eta: 0:10:38 lr: 0.000089 grad: 0.0919 (0.0958) loss: 0.8147 (0.8159) time: 0.1909 data: 0.0927 max mem: 9377 +Train: [39] [2500/6250] eta: 0:10:22 lr: 0.000089 grad: 0.0995 (0.0960) loss: 0.8126 (0.8156) time: 0.1698 data: 0.0786 max mem: 9377 +Train: [39] [2600/6250] eta: 0:10:05 lr: 0.000089 grad: 0.0959 (0.0961) loss: 0.8087 (0.8154) time: 0.1732 data: 0.0843 max mem: 9377 +Train: [39] [2700/6250] eta: 0:09:48 lr: 0.000089 grad: 0.0909 (0.0961) loss: 0.8142 (0.8152) time: 0.1691 data: 0.0785 max mem: 9377 +Train: [39] [2800/6250] eta: 0:09:30 lr: 0.000089 grad: 0.0907 (0.0960) loss: 0.8171 (0.8151) time: 0.1742 data: 0.0803 max mem: 9377 +Train: [39] [2900/6250] eta: 0:09:13 lr: 0.000089 grad: 0.0913 (0.0959) loss: 0.8119 (0.8151) time: 0.1655 data: 0.0683 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:55 lr: 0.000089 grad: 0.0993 (0.0959) loss: 0.8137 (0.8151) time: 0.1424 data: 0.0549 max mem: 9377 +Train: [39] [3100/6250] eta: 0:08:38 lr: 0.000089 grad: 0.0923 (0.0958) loss: 0.8193 (0.8151) time: 0.1676 data: 0.0706 max mem: 9377 +Train: [39] [3200/6250] eta: 0:08:21 lr: 0.000089 grad: 0.0908 (0.0958) loss: 0.8143 (0.8151) time: 0.1517 data: 0.0636 max mem: 9377 +Train: [39] [3300/6250] eta: 0:08:04 lr: 0.000088 grad: 0.0963 (0.0959) loss: 0.8145 (0.8150) time: 0.1772 data: 0.0871 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:47 lr: 0.000088 grad: 0.0939 (0.0960) loss: 0.8139 (0.8149) time: 0.1532 data: 0.0592 max mem: 9377 +Train: [39] [3500/6250] eta: 0:07:30 lr: 0.000088 grad: 0.0918 (0.0959) loss: 0.8149 (0.8149) time: 0.1469 data: 0.0609 max mem: 9377 +Train: [39] [3600/6250] eta: 0:07:14 lr: 0.000088 grad: 0.0930 (0.0959) loss: 0.8028 (0.8149) time: 0.1713 data: 0.0784 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:57 lr: 0.000088 grad: 0.0882 (0.0958) loss: 0.8209 (0.8150) time: 0.1518 data: 0.0592 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:41 lr: 0.000088 grad: 0.0890 (0.0957) loss: 0.8200 (0.8151) time: 0.1454 data: 0.0608 max mem: 9377 +Train: [39] [3900/6250] eta: 0:06:24 lr: 0.000088 grad: 0.0915 (0.0956) loss: 0.8152 (0.8152) time: 0.1525 data: 0.0630 max mem: 9377 +Train: [39] [4000/6250] eta: 0:06:07 lr: 0.000088 grad: 0.0905 (0.0956) loss: 0.8195 (0.8153) time: 0.1575 data: 0.0638 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:50 lr: 0.000088 grad: 0.0881 (0.0956) loss: 0.8245 (0.8154) time: 0.1299 data: 0.0402 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:33 lr: 0.000088 grad: 0.0922 (0.0954) loss: 0.8217 (0.8155) time: 0.1627 data: 0.0780 max mem: 9377 +Train: [39] [4300/6250] eta: 0:05:16 lr: 0.000088 grad: 0.0886 (0.0953) loss: 0.8138 (0.8156) time: 0.1502 data: 0.0532 max mem: 9377 +Train: [39] [4400/6250] eta: 0:05:00 lr: 0.000088 grad: 0.0926 (0.0953) loss: 0.8155 (0.8156) time: 0.1412 data: 0.0444 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:43 lr: 0.000088 grad: 0.0865 (0.0953) loss: 0.8233 (0.8156) time: 0.1218 data: 0.0332 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:27 lr: 0.000088 grad: 0.0901 (0.0953) loss: 0.8208 (0.8157) time: 0.1522 data: 0.0631 max mem: 9377 +Train: [39] [4700/6250] eta: 0:04:10 lr: 0.000088 grad: 0.0947 (0.0952) loss: 0.8178 (0.8157) time: 0.1561 data: 0.0689 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:54 lr: 0.000088 grad: 0.0942 (0.0952) loss: 0.8179 (0.8157) time: 0.1632 data: 0.0702 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:38 lr: 0.000088 grad: 0.0896 (0.0952) loss: 0.8191 (0.8157) time: 0.1435 data: 0.0600 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:21 lr: 0.000088 grad: 0.0926 (0.0951) loss: 0.8226 (0.8158) time: 0.1589 data: 0.0720 max mem: 9377 +Train: [39] [5100/6250] eta: 0:03:05 lr: 0.000088 grad: 0.0912 (0.0951) loss: 0.8105 (0.8158) time: 0.1670 data: 0.0807 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:49 lr: 0.000088 grad: 0.0880 (0.0951) loss: 0.8205 (0.8159) time: 0.1356 data: 0.0423 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:33 lr: 0.000088 grad: 0.0869 (0.0950) loss: 0.8176 (0.8159) time: 0.1624 data: 0.0725 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:17 lr: 0.000088 grad: 0.0920 (0.0950) loss: 0.8225 (0.8160) time: 0.1698 data: 0.0769 max mem: 9377 +Train: [39] [5500/6250] eta: 0:02:01 lr: 0.000088 grad: 0.0975 (0.0950) loss: 0.8193 (0.8160) time: 0.1732 data: 0.0965 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:45 lr: 0.000088 grad: 0.0895 (0.0950) loss: 0.8196 (0.8160) time: 0.1716 data: 0.0832 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:29 lr: 0.000088 grad: 0.0930 (0.0950) loss: 0.8173 (0.8161) time: 0.1596 data: 0.0697 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:12 lr: 0.000088 grad: 0.0925 (0.0949) loss: 0.8101 (0.8161) time: 0.1711 data: 0.0839 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:56 lr: 0.000088 grad: 0.0870 (0.0948) loss: 0.8232 (0.8162) time: 0.1546 data: 0.0710 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:40 lr: 0.000088 grad: 0.0902 (0.0948) loss: 0.8225 (0.8163) time: 0.1924 data: 0.1025 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:24 lr: 0.000088 grad: 0.0906 (0.0948) loss: 0.8253 (0.8163) time: 0.1754 data: 0.0825 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:08 lr: 0.000088 grad: 0.0891 (0.0948) loss: 0.8226 (0.8164) time: 0.1489 data: 0.0559 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.0898 (0.0948) loss: 0.8227 (0.8164) time: 0.1581 data: 0.0682 max mem: 9377 +Train: [39] Total time: 0:16:57 (0.1629 s / it) +Averaged stats: lr: 0.000088 grad: 0.0898 (0.0948) loss: 0.8227 (0.8164) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:05:20 loss: 0.8328 (0.8328) time: 5.1678 data: 5.1085 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8249 (0.8274) time: 0.1440 data: 0.1173 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:16 (0.2648 s / it) +Averaged stats (hcp-train-subset): loss: 0.8249 (0.8274) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [39] [ 0/62] eta: 0:06:26 loss: 0.8396 (0.8396) time: 6.2267 data: 6.1914 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8404 (0.8415) time: 0.1471 data: 0.1195 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:15 (0.2548 s / it) +Averaged stats (hcp-val): loss: 0.8404 (0.8415) +Making plots (hcp-val): example=55 +Eval (nsd-val): [39] [ 0/62] eta: 0:05:30 loss: 0.8043 (0.8043) time: 5.3249 data: 5.2794 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8162 (0.8155) time: 0.1411 data: 0.1135 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:16 (0.2633 s / it) +Averaged stats (nsd-val): loss: 0.8162 (0.8155) +Making plots (nsd-val): example=49 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 12:51:59 lr: 0.000088 grad: 0.1262 (0.1262) loss: 0.8469 (0.8469) time: 7.4112 data: 7.3083 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:23:56 lr: 0.000088 grad: 0.1046 (0.1240) loss: 0.8310 (0.8263) time: 0.1530 data: 0.0422 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:20:50 lr: 0.000088 grad: 0.0826 (0.1097) loss: 0.8338 (0.8253) time: 0.1721 data: 0.0689 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:19:45 lr: 0.000088 grad: 0.0735 (0.1039) loss: 0.8327 (0.8239) time: 0.1958 data: 0.0958 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:18:44 lr: 0.000087 grad: 0.0874 (0.1001) loss: 0.8168 (0.8227) time: 0.1671 data: 0.0738 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:17:56 lr: 0.000087 grad: 0.0852 (0.0980) loss: 0.8215 (0.8223) time: 0.1818 data: 0.0917 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:17:12 lr: 0.000087 grad: 0.0856 (0.0962) loss: 0.8195 (0.8218) time: 0.1505 data: 0.0520 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:16:36 lr: 0.000087 grad: 0.0937 (0.0959) loss: 0.8199 (0.8213) time: 0.1733 data: 0.0796 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:16:10 lr: 0.000087 grad: 0.0889 (0.0953) loss: 0.8118 (0.8211) time: 0.1708 data: 0.0769 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:15:49 lr: 0.000087 grad: 0.0907 (0.0945) loss: 0.8172 (0.8207) time: 0.1903 data: 0.1037 max mem: 9377 +Train: [40] [1000/6250] eta: 0:15:24 lr: 0.000087 grad: 0.0884 (0.0940) loss: 0.8138 (0.8203) time: 0.2009 data: 0.1149 max mem: 9377 +Train: [40] [1100/6250] eta: 0:14:56 lr: 0.000087 grad: 0.0955 (0.0938) loss: 0.8205 (0.8200) time: 0.1640 data: 0.0816 max mem: 9377 +Train: [40] [1200/6250] eta: 0:14:32 lr: 0.000087 grad: 0.0900 (0.0938) loss: 0.8118 (0.8192) time: 0.1612 data: 0.0770 max mem: 9377 +Train: [40] [1300/6250] eta: 0:14:12 lr: 0.000087 grad: 0.0877 (0.0938) loss: 0.8174 (0.8188) time: 0.1720 data: 0.0807 max mem: 9377 +Train: [40] [1400/6250] eta: 0:13:47 lr: 0.000087 grad: 0.0918 (0.0937) loss: 0.8195 (0.8183) time: 0.1453 data: 0.0541 max mem: 9377 +Train: [40] [1500/6250] eta: 0:13:24 lr: 0.000087 grad: 0.0885 (0.0938) loss: 0.8217 (0.8178) time: 0.1432 data: 0.0554 max mem: 9377 +Train: [40] [1600/6250] eta: 0:13:05 lr: 0.000087 grad: 0.0894 (0.0937) loss: 0.8106 (0.8176) time: 0.1892 data: 0.1042 max mem: 9377 +Train: [40] [1700/6250] eta: 0:12:42 lr: 0.000087 grad: 0.0854 (0.0938) loss: 0.8140 (0.8173) time: 0.1526 data: 0.0682 max mem: 9377 +Train: [40] [1800/6250] eta: 0:12:25 lr: 0.000087 grad: 0.0926 (0.0939) loss: 0.8067 (0.8170) time: 0.1206 data: 0.0348 max mem: 9377 +Train: [40] [1900/6250] eta: 0:12:10 lr: 0.000087 grad: 0.0816 (0.0938) loss: 0.8212 (0.8168) time: 0.2260 data: 0.1503 max mem: 9377 +Train: [40] [2000/6250] eta: 0:11:52 lr: 0.000087 grad: 0.0869 (0.0938) loss: 0.8208 (0.8167) time: 0.1621 data: 0.0826 max mem: 9377 +Train: [40] [2100/6250] eta: 0:11:34 lr: 0.000087 grad: 0.0848 (0.0937) loss: 0.8222 (0.8167) time: 0.1576 data: 0.0776 max mem: 9377 +Train: [40] [2200/6250] eta: 0:11:16 lr: 0.000087 grad: 0.0884 (0.0936) loss: 0.8149 (0.8165) time: 0.1627 data: 0.0875 max mem: 9377 +Train: [40] [2300/6250] eta: 0:10:56 lr: 0.000087 grad: 0.0846 (0.0936) loss: 0.8201 (0.8164) time: 0.1423 data: 0.0532 max mem: 9377 +Train: [40] [2400/6250] eta: 0:10:39 lr: 0.000087 grad: 0.0878 (0.0935) loss: 0.8166 (0.8165) time: 0.1490 data: 0.0563 max mem: 9377 +Train: [40] [2500/6250] eta: 0:10:22 lr: 0.000087 grad: 0.0894 (0.0935) loss: 0.8159 (0.8165) time: 0.1405 data: 0.0464 max mem: 9377 +Train: [40] [2600/6250] eta: 0:10:07 lr: 0.000087 grad: 0.0900 (0.0934) loss: 0.8121 (0.8166) time: 0.1703 data: 0.0789 max mem: 9377 +Train: [40] [2700/6250] eta: 0:09:50 lr: 0.000087 grad: 0.0914 (0.0935) loss: 0.8103 (0.8166) time: 0.1301 data: 0.0404 max mem: 9377 +Train: [40] [2800/6250] eta: 0:09:32 lr: 0.000087 grad: 0.0941 (0.0936) loss: 0.8211 (0.8166) time: 0.1635 data: 0.0807 max mem: 9377 +Train: [40] [2900/6250] eta: 0:09:13 lr: 0.000087 grad: 0.0918 (0.0936) loss: 0.8154 (0.8166) time: 0.1333 data: 0.0391 max mem: 9377 +Train: [40] [3000/6250] eta: 0:08:55 lr: 0.000087 grad: 0.0913 (0.0936) loss: 0.8200 (0.8165) time: 0.1489 data: 0.0572 max mem: 9377 +Train: [40] [3100/6250] eta: 0:08:39 lr: 0.000087 grad: 0.0967 (0.0937) loss: 0.8138 (0.8165) time: 0.1215 data: 0.0364 max mem: 9377 +Train: [40] [3200/6250] eta: 0:08:22 lr: 0.000087 grad: 0.0954 (0.0937) loss: 0.8121 (0.8165) time: 0.1722 data: 0.0841 max mem: 9377 +Train: [40] [3300/6250] eta: 0:08:05 lr: 0.000087 grad: 0.0938 (0.0939) loss: 0.8166 (0.8163) time: 0.1633 data: 0.0774 max mem: 9377 +Train: [40] [3400/6250] eta: 0:07:49 lr: 0.000087 grad: 0.1016 (0.0941) loss: 0.8168 (0.8162) time: 0.1137 data: 0.0280 max mem: 9377 +Train: [40] [3500/6250] eta: 0:07:33 lr: 0.000087 grad: 0.0988 (0.0942) loss: 0.8147 (0.8162) time: 0.1650 data: 0.0836 max mem: 9377 +Train: [40] [3600/6250] eta: 0:07:17 lr: 0.000087 grad: 0.0912 (0.0943) loss: 0.8174 (0.8161) time: 0.1405 data: 0.0547 max mem: 9377 +Train: [40] [3700/6250] eta: 0:07:00 lr: 0.000086 grad: 0.0953 (0.0944) loss: 0.8164 (0.8161) time: 0.1444 data: 0.0494 max mem: 9377 +Train: [40] [3800/6250] eta: 0:06:43 lr: 0.000086 grad: 0.1010 (0.0945) loss: 0.8152 (0.8160) time: 0.1595 data: 0.0693 max mem: 9377 +Train: [40] [3900/6250] eta: 0:06:26 lr: 0.000086 grad: 0.0996 (0.0946) loss: 0.8084 (0.8159) time: 0.1728 data: 0.0888 max mem: 9377 +Train: [40] [4000/6250] eta: 0:06:08 lr: 0.000086 grad: 0.0962 (0.0947) loss: 0.8157 (0.8159) time: 0.1562 data: 0.0576 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:52 lr: 0.000086 grad: 0.0972 (0.0948) loss: 0.8098 (0.8158) time: 0.1762 data: 0.0943 max mem: 9377 +Train: [40] [4200/6250] eta: 0:05:35 lr: 0.000086 grad: 0.0989 (0.0950) loss: 0.8056 (0.8157) time: 0.1623 data: 0.0796 max mem: 9377 +Train: [40] [4300/6250] eta: 0:05:19 lr: 0.000086 grad: 0.0992 (0.0950) loss: 0.8179 (0.8156) time: 0.1652 data: 0.0808 max mem: 9377 +Train: [40] [4400/6250] eta: 0:05:02 lr: 0.000086 grad: 0.0996 (0.0951) loss: 0.8132 (0.8156) time: 0.1390 data: 0.0569 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:45 lr: 0.000086 grad: 0.0911 (0.0951) loss: 0.8109 (0.8155) time: 0.1533 data: 0.0697 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:29 lr: 0.000086 grad: 0.0971 (0.0953) loss: 0.8118 (0.8154) time: 0.1791 data: 0.0891 max mem: 9377 +Train: [40] [4700/6250] eta: 0:04:12 lr: 0.000086 grad: 0.0983 (0.0954) loss: 0.8160 (0.8153) time: 0.1319 data: 0.0508 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:56 lr: 0.000086 grad: 0.0914 (0.0955) loss: 0.8115 (0.8153) time: 0.1591 data: 0.0716 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:39 lr: 0.000086 grad: 0.0969 (0.0955) loss: 0.8147 (0.8152) time: 0.1660 data: 0.0780 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:23 lr: 0.000086 grad: 0.0997 (0.0956) loss: 0.8149 (0.8151) time: 0.1639 data: 0.0786 max mem: 9377 +Train: [40] [5100/6250] eta: 0:03:07 lr: 0.000086 grad: 0.0930 (0.0955) loss: 0.8125 (0.8151) time: 0.1433 data: 0.0526 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:50 lr: 0.000086 grad: 0.0921 (0.0955) loss: 0.8127 (0.8151) time: 0.1656 data: 0.0735 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:34 lr: 0.000086 grad: 0.0942 (0.0955) loss: 0.8124 (0.8151) time: 0.1994 data: 0.1059 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:18 lr: 0.000086 grad: 0.0920 (0.0955) loss: 0.8166 (0.8151) time: 0.1140 data: 0.0211 max mem: 9377 +Train: [40] [5500/6250] eta: 0:02:01 lr: 0.000086 grad: 0.1031 (0.0955) loss: 0.8138 (0.8151) time: 0.1699 data: 0.0886 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:45 lr: 0.000086 grad: 0.0921 (0.0955) loss: 0.8175 (0.8151) time: 0.1614 data: 0.0724 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:29 lr: 0.000086 grad: 0.0938 (0.0956) loss: 0.8156 (0.8151) time: 0.1893 data: 0.0995 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:13 lr: 0.000086 grad: 0.0943 (0.0955) loss: 0.8177 (0.8151) time: 0.1499 data: 0.0660 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:57 lr: 0.000086 grad: 0.0867 (0.0955) loss: 0.8141 (0.8151) time: 0.1552 data: 0.0674 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:40 lr: 0.000086 grad: 0.0873 (0.0955) loss: 0.8208 (0.8151) time: 0.1608 data: 0.0692 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:24 lr: 0.000086 grad: 0.0896 (0.0954) loss: 0.8231 (0.8152) time: 0.1805 data: 0.0971 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:08 lr: 0.000086 grad: 0.0868 (0.0955) loss: 0.8348 (0.8153) time: 0.1665 data: 0.0803 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.0893 (0.0955) loss: 0.8270 (0.8153) time: 0.1946 data: 0.1040 max mem: 9377 +Train: [40] Total time: 0:17:07 (0.1644 s / it) +Averaged stats: lr: 0.000086 grad: 0.0893 (0.0955) loss: 0.8270 (0.8153) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:05:47 loss: 0.8341 (0.8341) time: 5.5980 data: 5.5645 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8252 (0.8295) time: 0.1598 data: 0.1343 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:15 (0.2511 s / it) +Averaged stats (hcp-train-subset): loss: 0.8252 (0.8295) +Eval (hcp-val): [40] [ 0/62] eta: 0:06:02 loss: 0.8374 (0.8374) time: 5.8507 data: 5.8182 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8389 (0.8414) time: 0.1344 data: 0.1091 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8414) +Eval (nsd-val): [40] [ 0/62] eta: 0:06:30 loss: 0.8120 (0.8120) time: 6.3044 data: 6.2726 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8196 (0.8229) time: 0.1331 data: 0.1058 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (nsd-val): loss: 0.8196 (0.8229) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 10:01:29 lr: 0.000086 grad: 0.2273 (0.2273) loss: 0.7906 (0.7906) time: 5.7744 data: 5.3451 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:23:39 lr: 0.000086 grad: 0.1065 (0.1225) loss: 0.8371 (0.8312) time: 0.1785 data: 0.0708 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:19:56 lr: 0.000086 grad: 0.0922 (0.1126) loss: 0.8167 (0.8255) time: 0.1647 data: 0.0579 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:18:27 lr: 0.000086 grad: 0.0912 (0.1080) loss: 0.8165 (0.8221) time: 0.1996 data: 0.1059 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:17:15 lr: 0.000086 grad: 0.0998 (0.1067) loss: 0.8141 (0.8212) time: 0.1491 data: 0.0510 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:16:45 lr: 0.000086 grad: 0.0893 (0.1034) loss: 0.8235 (0.8217) time: 0.1259 data: 0.0285 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:16:25 lr: 0.000086 grad: 0.0939 (0.1024) loss: 0.8252 (0.8217) time: 0.1496 data: 0.0495 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:16:08 lr: 0.000085 grad: 0.0944 (0.1017) loss: 0.8139 (0.8208) time: 0.1840 data: 0.0861 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:15:40 lr: 0.000085 grad: 0.0932 (0.1006) loss: 0.8140 (0.8204) time: 0.1360 data: 0.0349 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:15:28 lr: 0.000085 grad: 0.0967 (0.1000) loss: 0.8179 (0.8202) time: 0.1784 data: 0.0836 max mem: 9377 +Train: [41] [1000/6250] eta: 0:15:09 lr: 0.000085 grad: 0.0943 (0.0994) loss: 0.8175 (0.8199) time: 0.1730 data: 0.0850 max mem: 9377 +Train: [41] [1100/6250] eta: 0:14:55 lr: 0.000085 grad: 0.0928 (0.0987) loss: 0.8117 (0.8196) time: 0.1852 data: 0.0939 max mem: 9377 +Train: [41] [1200/6250] eta: 0:14:35 lr: 0.000085 grad: 0.0939 (0.0984) loss: 0.8106 (0.8194) time: 0.1718 data: 0.0828 max mem: 9377 +Train: [41] [1300/6250] eta: 0:14:19 lr: 0.000085 grad: 0.0840 (0.0981) loss: 0.8223 (0.8193) time: 0.1787 data: 0.0885 max mem: 9377 +Train: [41] [1400/6250] eta: 0:14:03 lr: 0.000085 grad: 0.0893 (0.0975) loss: 0.8188 (0.8192) time: 0.1725 data: 0.0727 max mem: 9377 +Train: [41] [1500/6250] eta: 0:13:41 lr: 0.000085 grad: 0.0911 (0.0972) loss: 0.8156 (0.8191) time: 0.1582 data: 0.0647 max mem: 9377 +Train: [41] [1600/6250] eta: 0:13:20 lr: 0.000085 grad: 0.0964 (0.0970) loss: 0.8102 (0.8190) time: 0.1703 data: 0.0793 max mem: 9377 +Train: [41] [1700/6250] eta: 0:12:59 lr: 0.000085 grad: 0.0937 (0.0968) loss: 0.8149 (0.8188) time: 0.1505 data: 0.0567 max mem: 9377 +Train: [41] [1800/6250] eta: 0:12:41 lr: 0.000085 grad: 0.0937 (0.0966) loss: 0.8118 (0.8186) time: 0.1661 data: 0.0844 max mem: 9377 +Train: [41] [1900/6250] eta: 0:12:20 lr: 0.000085 grad: 0.0938 (0.0965) loss: 0.8149 (0.8184) time: 0.1608 data: 0.0600 max mem: 9377 +Train: [41] [2000/6250] eta: 0:12:01 lr: 0.000085 grad: 0.0893 (0.0965) loss: 0.8195 (0.8183) time: 0.1921 data: 0.1010 max mem: 9377 +Train: [41] [2100/6250] eta: 0:11:44 lr: 0.000085 grad: 0.0862 (0.0964) loss: 0.8253 (0.8182) time: 0.1706 data: 0.0761 max mem: 9377 +Train: [41] [2200/6250] eta: 0:11:27 lr: 0.000085 grad: 0.0912 (0.0964) loss: 0.8096 (0.8180) time: 0.1983 data: 0.1089 max mem: 9377 +Train: [41] [2300/6250] eta: 0:11:07 lr: 0.000085 grad: 0.0857 (0.0964) loss: 0.8225 (0.8178) time: 0.1474 data: 0.0569 max mem: 9377 +Train: [41] [2400/6250] eta: 0:10:49 lr: 0.000085 grad: 0.0902 (0.0963) loss: 0.8132 (0.8176) time: 0.1827 data: 0.0987 max mem: 9377 +Train: [41] [2500/6250] eta: 0:10:30 lr: 0.000085 grad: 0.0880 (0.0963) loss: 0.8154 (0.8174) time: 0.1521 data: 0.0498 max mem: 9377 +Train: [41] [2600/6250] eta: 0:10:12 lr: 0.000085 grad: 0.0939 (0.0962) loss: 0.8118 (0.8173) time: 0.1634 data: 0.0708 max mem: 9377 +Train: [41] [2700/6250] eta: 0:09:53 lr: 0.000085 grad: 0.0872 (0.0962) loss: 0.8093 (0.8172) time: 0.1525 data: 0.0729 max mem: 9377 +Train: [41] [2800/6250] eta: 0:09:35 lr: 0.000085 grad: 0.0936 (0.0961) loss: 0.8127 (0.8172) time: 0.1440 data: 0.0506 max mem: 9377 +Train: [41] [2900/6250] eta: 0:09:16 lr: 0.000085 grad: 0.0986 (0.0963) loss: 0.8165 (0.8171) time: 0.1359 data: 0.0486 max mem: 9377 +Train: [41] [3000/6250] eta: 0:08:58 lr: 0.000085 grad: 0.0907 (0.0962) loss: 0.8142 (0.8170) time: 0.1706 data: 0.0819 max mem: 9377 +Train: [41] [3100/6250] eta: 0:08:40 lr: 0.000085 grad: 0.0921 (0.0962) loss: 0.8141 (0.8169) time: 0.1459 data: 0.0568 max mem: 9377 +Train: [41] [3200/6250] eta: 0:08:23 lr: 0.000085 grad: 0.0948 (0.0962) loss: 0.8121 (0.8168) time: 0.1863 data: 0.0985 max mem: 9377 +Train: [41] [3300/6250] eta: 0:08:05 lr: 0.000085 grad: 0.0931 (0.0963) loss: 0.8185 (0.8167) time: 0.1584 data: 0.0703 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:48 lr: 0.000085 grad: 0.0912 (0.0963) loss: 0.8207 (0.8166) time: 0.1710 data: 0.0874 max mem: 9377 +Train: [41] [3500/6250] eta: 0:07:31 lr: 0.000085 grad: 0.0993 (0.0965) loss: 0.8118 (0.8165) time: 0.1431 data: 0.0583 max mem: 9377 +Train: [41] [3600/6250] eta: 0:07:14 lr: 0.000085 grad: 0.0967 (0.0966) loss: 0.8152 (0.8164) time: 0.1519 data: 0.0619 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:57 lr: 0.000085 grad: 0.0938 (0.0966) loss: 0.8121 (0.8163) time: 0.1529 data: 0.0566 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:40 lr: 0.000085 grad: 0.0956 (0.0967) loss: 0.8129 (0.8162) time: 0.1601 data: 0.0752 max mem: 9377 +Train: [41] [3900/6250] eta: 0:06:23 lr: 0.000084 grad: 0.0925 (0.0966) loss: 0.8166 (0.8162) time: 0.1469 data: 0.0596 max mem: 9377 +Train: [41] [4000/6250] eta: 0:06:07 lr: 0.000084 grad: 0.1006 (0.0968) loss: 0.8097 (0.8161) time: 0.1743 data: 0.0907 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:51 lr: 0.000084 grad: 0.0948 (0.0967) loss: 0.8105 (0.8160) time: 0.1699 data: 0.0661 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:34 lr: 0.000084 grad: 0.0902 (0.0968) loss: 0.8214 (0.8160) time: 0.1978 data: 0.1080 max mem: 9377 +Train: [41] [4300/6250] eta: 0:05:17 lr: 0.000084 grad: 0.0990 (0.0969) loss: 0.8115 (0.8160) time: 0.1302 data: 0.0435 max mem: 9377 +Train: [41] [4400/6250] eta: 0:05:00 lr: 0.000084 grad: 0.0997 (0.0970) loss: 0.8092 (0.8159) time: 0.1570 data: 0.0702 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:44 lr: 0.000084 grad: 0.0980 (0.0970) loss: 0.8133 (0.8159) time: 0.1385 data: 0.0487 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:27 lr: 0.000084 grad: 0.1026 (0.0971) loss: 0.8087 (0.8159) time: 0.1639 data: 0.0723 max mem: 9377 +Train: [41] [4700/6250] eta: 0:04:11 lr: 0.000084 grad: 0.0962 (0.0972) loss: 0.8137 (0.8159) time: 0.1456 data: 0.0476 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:54 lr: 0.000084 grad: 0.0984 (0.0972) loss: 0.8059 (0.8158) time: 0.1568 data: 0.0721 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:38 lr: 0.000084 grad: 0.0999 (0.0973) loss: 0.8200 (0.8158) time: 0.1713 data: 0.0900 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:22 lr: 0.000084 grad: 0.0992 (0.0973) loss: 0.8112 (0.8157) time: 0.1641 data: 0.0817 max mem: 9377 +Train: [41] [5100/6250] eta: 0:03:06 lr: 0.000084 grad: 0.0977 (0.0974) loss: 0.8105 (0.8156) time: 0.1459 data: 0.0565 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:50 lr: 0.000084 grad: 0.0963 (0.0975) loss: 0.8190 (0.8155) time: 0.1620 data: 0.0780 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:34 lr: 0.000084 grad: 0.1017 (0.0976) loss: 0.8103 (0.8154) time: 0.1573 data: 0.0702 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:17 lr: 0.000084 grad: 0.0948 (0.0975) loss: 0.8129 (0.8154) time: 0.1595 data: 0.0662 max mem: 9377 +Train: [41] [5500/6250] eta: 0:02:01 lr: 0.000084 grad: 0.0975 (0.0976) loss: 0.8114 (0.8154) time: 0.1707 data: 0.0727 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:45 lr: 0.000084 grad: 0.1010 (0.0976) loss: 0.8151 (0.8153) time: 0.1543 data: 0.0591 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:29 lr: 0.000084 grad: 0.0954 (0.0976) loss: 0.8166 (0.8153) time: 0.1516 data: 0.0663 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:13 lr: 0.000084 grad: 0.0977 (0.0976) loss: 0.8159 (0.8153) time: 0.1680 data: 0.0881 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:57 lr: 0.000084 grad: 0.0958 (0.0976) loss: 0.8192 (0.8153) time: 0.1618 data: 0.0665 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:40 lr: 0.000084 grad: 0.0935 (0.0976) loss: 0.8138 (0.8153) time: 0.1712 data: 0.0741 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:24 lr: 0.000084 grad: 0.0930 (0.0976) loss: 0.8140 (0.8153) time: 0.1590 data: 0.0632 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:08 lr: 0.000084 grad: 0.0935 (0.0976) loss: 0.8207 (0.8153) time: 0.1314 data: 0.0312 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.0957 (0.0976) loss: 0.8138 (0.8153) time: 0.1802 data: 0.0815 max mem: 9377 +Train: [41] Total time: 0:17:08 (0.1645 s / it) +Averaged stats: lr: 0.000084 grad: 0.0957 (0.0976) loss: 0.8138 (0.8153) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:05:25 loss: 0.8294 (0.8294) time: 5.2478 data: 5.2180 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8280 (0.8279) time: 0.1416 data: 0.1143 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (hcp-train-subset): loss: 0.8280 (0.8279) +Eval (hcp-val): [41] [ 0/62] eta: 0:04:07 loss: 0.8404 (0.8404) time: 3.9961 data: 3.8904 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8388 (0.8414) time: 0.1278 data: 0.1024 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-val): loss: 0.8388 (0.8414) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:13 loss: 0.8012 (0.8012) time: 4.0853 data: 4.0148 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8134 (0.8147) time: 0.1292 data: 0.1030 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (nsd-val): loss: 0.8134 (0.8147) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 10:56:15 lr: 0.000084 grad: 0.3853 (0.3853) loss: 0.8438 (0.8438) time: 6.3001 data: 6.1444 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:22:33 lr: 0.000084 grad: 0.0920 (0.1245) loss: 0.8276 (0.8357) time: 0.1637 data: 0.0524 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:19:27 lr: 0.000084 grad: 0.1005 (0.1118) loss: 0.8228 (0.8275) time: 0.1672 data: 0.0644 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:17:49 lr: 0.000084 grad: 0.0975 (0.1081) loss: 0.8138 (0.8237) time: 0.1450 data: 0.0402 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:16:48 lr: 0.000084 grad: 0.0902 (0.1063) loss: 0.8225 (0.8222) time: 0.1452 data: 0.0503 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:16:14 lr: 0.000084 grad: 0.0988 (0.1048) loss: 0.8106 (0.8209) time: 0.1505 data: 0.0539 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:15:40 lr: 0.000084 grad: 0.0900 (0.1035) loss: 0.8189 (0.8197) time: 0.1557 data: 0.0647 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:15:13 lr: 0.000084 grad: 0.0932 (0.1021) loss: 0.8172 (0.8190) time: 0.1475 data: 0.0476 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:14:57 lr: 0.000084 grad: 0.0896 (0.1009) loss: 0.8261 (0.8191) time: 0.1665 data: 0.0818 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:14:52 lr: 0.000083 grad: 0.0915 (0.0999) loss: 0.8257 (0.8194) time: 0.1983 data: 0.1048 max mem: 9377 +Train: [42] [1000/6250] eta: 0:14:39 lr: 0.000083 grad: 0.0896 (0.0992) loss: 0.8194 (0.8195) time: 0.1904 data: 0.0968 max mem: 9377 +Train: [42] [1100/6250] eta: 0:14:23 lr: 0.000083 grad: 0.0884 (0.0986) loss: 0.8157 (0.8194) time: 0.1785 data: 0.0893 max mem: 9377 +Train: [42] [1200/6250] eta: 0:14:07 lr: 0.000083 grad: 0.0892 (0.0982) loss: 0.8166 (0.8192) time: 0.1885 data: 0.0950 max mem: 9377 +Train: [42] [1300/6250] eta: 0:13:54 lr: 0.000083 grad: 0.0926 (0.0980) loss: 0.8195 (0.8190) time: 0.1898 data: 0.0912 max mem: 9377 +Train: [42] [1400/6250] eta: 0:13:40 lr: 0.000083 grad: 0.0940 (0.0976) loss: 0.8120 (0.8189) time: 0.2134 data: 0.1299 max mem: 9377 +Train: [42] [1500/6250] eta: 0:13:23 lr: 0.000083 grad: 0.0918 (0.0973) loss: 0.8175 (0.8188) time: 0.1874 data: 0.0970 max mem: 9377 +Train: [42] [1600/6250] eta: 0:13:04 lr: 0.000083 grad: 0.0946 (0.0971) loss: 0.8170 (0.8186) time: 0.1634 data: 0.0689 max mem: 9377 +Train: [42] [1700/6250] eta: 0:12:45 lr: 0.000083 grad: 0.0912 (0.0969) loss: 0.8148 (0.8183) time: 0.1666 data: 0.0693 max mem: 9377 +Train: [42] [1800/6250] eta: 0:12:24 lr: 0.000083 grad: 0.0961 (0.0967) loss: 0.8154 (0.8182) time: 0.1657 data: 0.0758 max mem: 9377 +Train: [42] [1900/6250] eta: 0:12:03 lr: 0.000083 grad: 0.0908 (0.0966) loss: 0.8229 (0.8182) time: 0.1570 data: 0.0726 max mem: 9377 +Train: [42] [2000/6250] eta: 0:11:43 lr: 0.000083 grad: 0.0889 (0.0964) loss: 0.8190 (0.8181) time: 0.1347 data: 0.0520 max mem: 9377 +Train: [42] [2100/6250] eta: 0:11:26 lr: 0.000083 grad: 0.0891 (0.0964) loss: 0.8191 (0.8180) time: 0.1635 data: 0.0718 max mem: 9377 +Train: [42] [2200/6250] eta: 0:11:09 lr: 0.000083 grad: 0.0925 (0.0964) loss: 0.8208 (0.8179) time: 0.1672 data: 0.0827 max mem: 9377 +Train: [42] [2300/6250] eta: 0:10:51 lr: 0.000083 grad: 0.0961 (0.0963) loss: 0.8193 (0.8178) time: 0.1655 data: 0.0860 max mem: 9377 +Train: [42] [2400/6250] eta: 0:10:34 lr: 0.000083 grad: 0.0970 (0.0964) loss: 0.8113 (0.8177) time: 0.1531 data: 0.0715 max mem: 9377 +Train: [42] [2500/6250] eta: 0:10:21 lr: 0.000083 grad: 0.0991 (0.0965) loss: 0.8093 (0.8175) time: 0.1557 data: 0.0535 max mem: 9377 +Train: [42] [2600/6250] eta: 0:10:07 lr: 0.000083 grad: 0.0977 (0.0966) loss: 0.8086 (0.8173) time: 0.1666 data: 0.0699 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:50 lr: 0.000083 grad: 0.0920 (0.0967) loss: 0.8148 (0.8172) time: 0.1690 data: 0.0669 max mem: 9377 +Train: [42] [2800/6250] eta: 0:09:32 lr: 0.000083 grad: 0.0974 (0.0968) loss: 0.8191 (0.8171) time: 0.1495 data: 0.0581 max mem: 9377 +Train: [42] [2900/6250] eta: 0:09:14 lr: 0.000083 grad: 0.0983 (0.0969) loss: 0.8047 (0.8169) time: 0.1524 data: 0.0598 max mem: 9377 +Train: [42] [3000/6250] eta: 0:08:57 lr: 0.000083 grad: 0.0965 (0.0970) loss: 0.8163 (0.8168) time: 0.1689 data: 0.0828 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:41 lr: 0.000083 grad: 0.0956 (0.0970) loss: 0.8141 (0.8168) time: 0.1772 data: 0.0935 max mem: 9377 +Train: [42] [3200/6250] eta: 0:08:25 lr: 0.000083 grad: 0.0876 (0.0970) loss: 0.8192 (0.8167) time: 0.1936 data: 0.1052 max mem: 9377 +Train: [42] [3300/6250] eta: 0:08:09 lr: 0.000083 grad: 0.1004 (0.0971) loss: 0.8147 (0.8166) time: 0.2063 data: 0.1247 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:52 lr: 0.000083 grad: 0.1033 (0.0973) loss: 0.8184 (0.8165) time: 0.1624 data: 0.0731 max mem: 9377 +Train: [42] [3500/6250] eta: 0:07:38 lr: 0.000083 grad: 0.0972 (0.0974) loss: 0.8169 (0.8165) time: 0.0953 data: 0.0063 max mem: 9377 +Train: [42] [3600/6250] eta: 0:07:19 lr: 0.000083 grad: 0.1048 (0.0974) loss: 0.8077 (0.8163) time: 0.1003 data: 0.0003 max mem: 9377 +Train: [42] [3700/6250] eta: 0:07:02 lr: 0.000083 grad: 0.0988 (0.0975) loss: 0.8131 (0.8162) time: 0.1434 data: 0.0551 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:44 lr: 0.000083 grad: 0.0975 (0.0976) loss: 0.8168 (0.8161) time: 0.1199 data: 0.0297 max mem: 9377 +Train: [42] [3900/6250] eta: 0:06:27 lr: 0.000083 grad: 0.0993 (0.0975) loss: 0.8154 (0.8160) time: 0.1586 data: 0.0724 max mem: 9377 +Train: [42] [4000/6250] eta: 0:06:11 lr: 0.000083 grad: 0.0957 (0.0976) loss: 0.8157 (0.8160) time: 0.1670 data: 0.0750 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:54 lr: 0.000082 grad: 0.0990 (0.0976) loss: 0.8120 (0.8160) time: 0.1702 data: 0.0763 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:37 lr: 0.000082 grad: 0.0931 (0.0976) loss: 0.8176 (0.8160) time: 0.1505 data: 0.0543 max mem: 9377 +Train: [42] [4300/6250] eta: 0:05:21 lr: 0.000082 grad: 0.0918 (0.0977) loss: 0.8205 (0.8160) time: 0.1569 data: 0.0674 max mem: 9377 +Train: [42] [4400/6250] eta: 0:05:04 lr: 0.000082 grad: 0.0929 (0.0978) loss: 0.8158 (0.8161) time: 0.1552 data: 0.0593 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:47 lr: 0.000082 grad: 0.0863 (0.0977) loss: 0.8207 (0.8162) time: 0.1613 data: 0.0810 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:31 lr: 0.000082 grad: 0.0853 (0.0976) loss: 0.8237 (0.8162) time: 0.1669 data: 0.0821 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:14 lr: 0.000082 grad: 0.0963 (0.0976) loss: 0.8168 (0.8162) time: 0.1522 data: 0.0645 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:58 lr: 0.000082 grad: 0.0947 (0.0976) loss: 0.8158 (0.8163) time: 0.1706 data: 0.0765 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:41 lr: 0.000082 grad: 0.0924 (0.0975) loss: 0.8144 (0.8163) time: 0.1439 data: 0.0519 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:24 lr: 0.000082 grad: 0.0903 (0.0975) loss: 0.8119 (0.8162) time: 0.1747 data: 0.0923 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:08 lr: 0.000082 grad: 0.0921 (0.0974) loss: 0.8204 (0.8162) time: 0.1313 data: 0.0366 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:52 lr: 0.000082 grad: 0.0930 (0.0974) loss: 0.8159 (0.8162) time: 0.1797 data: 0.0900 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:35 lr: 0.000082 grad: 0.1016 (0.0974) loss: 0.8118 (0.8161) time: 0.1672 data: 0.0831 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:19 lr: 0.000082 grad: 0.0965 (0.0975) loss: 0.8018 (0.8160) time: 0.1450 data: 0.0565 max mem: 9377 +Train: [42] [5500/6250] eta: 0:02:02 lr: 0.000082 grad: 0.0973 (0.0976) loss: 0.8093 (0.8158) time: 0.1734 data: 0.0976 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:46 lr: 0.000082 grad: 0.0945 (0.0977) loss: 0.8105 (0.8158) time: 0.1904 data: 0.1085 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:30 lr: 0.000082 grad: 0.0903 (0.0977) loss: 0.8146 (0.8157) time: 0.2158 data: 0.1329 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:13 lr: 0.000082 grad: 0.0897 (0.0977) loss: 0.8167 (0.8156) time: 0.1793 data: 0.0936 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:57 lr: 0.000082 grad: 0.0985 (0.0978) loss: 0.8176 (0.8155) time: 0.1768 data: 0.0919 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:41 lr: 0.000082 grad: 0.0985 (0.0978) loss: 0.8227 (0.8154) time: 0.1777 data: 0.0899 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:24 lr: 0.000082 grad: 0.0956 (0.0978) loss: 0.8156 (0.8154) time: 0.1448 data: 0.0538 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:08 lr: 0.000082 grad: 0.1011 (0.0978) loss: 0.8133 (0.8153) time: 0.1488 data: 0.0591 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.0985 (0.0979) loss: 0.8174 (0.8152) time: 0.1766 data: 0.0896 max mem: 9377 +Train: [42] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000082 grad: 0.0985 (0.0979) loss: 0.8174 (0.8152) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:05:18 loss: 0.8258 (0.8258) time: 5.1341 data: 5.1033 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8269 (0.8282) time: 0.1341 data: 0.1089 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8269 (0.8282) +Eval (hcp-val): [42] [ 0/62] eta: 0:04:38 loss: 0.8391 (0.8391) time: 4.4990 data: 4.4273 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8382 (0.8409) time: 0.1234 data: 0.0980 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-val): loss: 0.8382 (0.8409) +Eval (nsd-val): [42] [ 0/62] eta: 0:03:52 loss: 0.8040 (0.8040) time: 3.7466 data: 3.6444 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8151 (0.8157) time: 0.1181 data: 0.0924 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (nsd-val): loss: 0.8151 (0.8157) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [43] [ 0/6250] eta: 7:54:32 lr: 0.000082 grad: 0.0590 (0.0590) loss: 0.8630 (0.8630) time: 4.5557 data: 4.3277 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:21:49 lr: 0.000082 grad: 0.1037 (0.1141) loss: 0.8236 (0.8308) time: 0.1688 data: 0.0694 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:19:25 lr: 0.000082 grad: 0.0980 (0.1112) loss: 0.8121 (0.8236) time: 0.2252 data: 0.1279 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:18:24 lr: 0.000082 grad: 0.0980 (0.1077) loss: 0.8197 (0.8214) time: 0.2003 data: 0.0966 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:17:33 lr: 0.000082 grad: 0.1043 (0.1066) loss: 0.8190 (0.8201) time: 0.1769 data: 0.0906 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:16:50 lr: 0.000082 grad: 0.0916 (0.1042) loss: 0.8216 (0.8204) time: 0.1322 data: 0.0475 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:16:18 lr: 0.000082 grad: 0.0897 (0.1028) loss: 0.8210 (0.8205) time: 0.1626 data: 0.0720 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:16:00 lr: 0.000082 grad: 0.0918 (0.1017) loss: 0.8240 (0.8209) time: 0.1134 data: 0.0002 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:15:26 lr: 0.000082 grad: 0.0855 (0.1009) loss: 0.8196 (0.8206) time: 0.1443 data: 0.0582 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:15:17 lr: 0.000082 grad: 0.0894 (0.1002) loss: 0.8236 (0.8206) time: 0.1727 data: 0.0873 max mem: 9377 +Train: [43] [1000/6250] eta: 0:14:54 lr: 0.000081 grad: 0.0907 (0.0999) loss: 0.8183 (0.8202) time: 0.1502 data: 0.0681 max mem: 9377 +Train: [43] [1100/6250] eta: 0:14:32 lr: 0.000081 grad: 0.0873 (0.0992) loss: 0.8195 (0.8200) time: 0.1423 data: 0.0580 max mem: 9377 +Train: [43] [1200/6250] eta: 0:14:13 lr: 0.000081 grad: 0.0932 (0.0991) loss: 0.8158 (0.8197) time: 0.1499 data: 0.0519 max mem: 9377 +Train: [43] [1300/6250] eta: 0:13:55 lr: 0.000081 grad: 0.0891 (0.0986) loss: 0.8166 (0.8196) time: 0.1568 data: 0.0663 max mem: 9377 +Train: [43] [1400/6250] eta: 0:13:33 lr: 0.000081 grad: 0.0963 (0.0984) loss: 0.8197 (0.8193) time: 0.1541 data: 0.0627 max mem: 9377 +Train: [43] [1500/6250] eta: 0:13:13 lr: 0.000081 grad: 0.0898 (0.0984) loss: 0.8122 (0.8189) time: 0.1675 data: 0.0651 max mem: 9377 +Train: [43] [1600/6250] eta: 0:12:52 lr: 0.000081 grad: 0.0913 (0.0984) loss: 0.8142 (0.8185) time: 0.1569 data: 0.0602 max mem: 9377 +Train: [43] [1700/6250] eta: 0:12:32 lr: 0.000081 grad: 0.0938 (0.0982) loss: 0.8145 (0.8183) time: 0.1357 data: 0.0448 max mem: 9377 +Train: [43] [1800/6250] eta: 0:12:14 lr: 0.000081 grad: 0.0962 (0.0981) loss: 0.8144 (0.8180) time: 0.1638 data: 0.0768 max mem: 9377 +Train: [43] [1900/6250] eta: 0:11:55 lr: 0.000081 grad: 0.0954 (0.0981) loss: 0.8174 (0.8178) time: 0.1678 data: 0.0800 max mem: 9377 +Train: [43] [2000/6250] eta: 0:11:37 lr: 0.000081 grad: 0.0936 (0.0980) loss: 0.8171 (0.8176) time: 0.1677 data: 0.0827 max mem: 9377 +Train: [43] [2100/6250] eta: 0:11:22 lr: 0.000081 grad: 0.0935 (0.0979) loss: 0.8088 (0.8173) time: 0.1383 data: 0.0508 max mem: 9377 +Train: [43] [2200/6250] eta: 0:11:05 lr: 0.000081 grad: 0.0893 (0.0981) loss: 0.8160 (0.8173) time: 0.1604 data: 0.0720 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:48 lr: 0.000081 grad: 0.0957 (0.0981) loss: 0.8057 (0.8171) time: 0.1613 data: 0.0684 max mem: 9377 +Train: [43] [2400/6250] eta: 0:10:32 lr: 0.000081 grad: 0.0938 (0.0982) loss: 0.8100 (0.8168) time: 0.1573 data: 0.0696 max mem: 9377 +Train: [43] [2500/6250] eta: 0:10:15 lr: 0.000081 grad: 0.1040 (0.0984) loss: 0.8123 (0.8166) time: 0.1461 data: 0.0557 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:58 lr: 0.000081 grad: 0.1036 (0.0986) loss: 0.8105 (0.8163) time: 0.1645 data: 0.0735 max mem: 9377 +Train: [43] [2700/6250] eta: 0:09:41 lr: 0.000081 grad: 0.1058 (0.0988) loss: 0.8121 (0.8160) time: 0.1648 data: 0.0763 max mem: 9377 +Train: [43] [2800/6250] eta: 0:09:22 lr: 0.000081 grad: 0.0999 (0.0990) loss: 0.8157 (0.8158) time: 0.1347 data: 0.0419 max mem: 9377 +Train: [43] [2900/6250] eta: 0:09:05 lr: 0.000081 grad: 0.0922 (0.0991) loss: 0.8106 (0.8156) time: 0.1531 data: 0.0597 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:46 lr: 0.000081 grad: 0.0943 (0.0991) loss: 0.8221 (0.8155) time: 0.1391 data: 0.0528 max mem: 9377 +Train: [43] [3100/6250] eta: 0:08:28 lr: 0.000081 grad: 0.1019 (0.0993) loss: 0.8046 (0.8153) time: 0.1440 data: 0.0653 max mem: 9377 +Train: [43] [3200/6250] eta: 0:08:11 lr: 0.000081 grad: 0.1075 (0.0994) loss: 0.8047 (0.8152) time: 0.1566 data: 0.0602 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:55 lr: 0.000081 grad: 0.0974 (0.0996) loss: 0.8140 (0.8151) time: 0.1419 data: 0.0504 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:38 lr: 0.000081 grad: 0.1032 (0.0996) loss: 0.8108 (0.8150) time: 0.1463 data: 0.0597 max mem: 9377 +Train: [43] [3500/6250] eta: 0:07:22 lr: 0.000081 grad: 0.1058 (0.0997) loss: 0.8137 (0.8149) time: 0.1178 data: 0.0202 max mem: 9377 +Train: [43] [3600/6250] eta: 0:07:05 lr: 0.000081 grad: 0.0965 (0.0998) loss: 0.8103 (0.8148) time: 0.1294 data: 0.0350 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:49 lr: 0.000081 grad: 0.0938 (0.0998) loss: 0.8180 (0.8147) time: 0.1608 data: 0.0817 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:32 lr: 0.000081 grad: 0.0969 (0.0998) loss: 0.8110 (0.8146) time: 0.1341 data: 0.0407 max mem: 9377 +Train: [43] [3900/6250] eta: 0:06:16 lr: 0.000081 grad: 0.0945 (0.0999) loss: 0.8124 (0.8145) time: 0.1645 data: 0.0696 max mem: 9377 +Train: [43] [4000/6250] eta: 0:06:00 lr: 0.000081 grad: 0.1029 (0.1000) loss: 0.8038 (0.8144) time: 0.1721 data: 0.0804 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:43 lr: 0.000081 grad: 0.0979 (0.1001) loss: 0.8125 (0.8143) time: 0.1286 data: 0.0345 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:27 lr: 0.000080 grad: 0.0980 (0.1001) loss: 0.8110 (0.8142) time: 0.1540 data: 0.0690 max mem: 9377 +Train: [43] [4300/6250] eta: 0:05:11 lr: 0.000080 grad: 0.0955 (0.1001) loss: 0.8062 (0.8142) time: 0.1526 data: 0.0652 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:55 lr: 0.000080 grad: 0.0962 (0.1001) loss: 0.8160 (0.8141) time: 0.1586 data: 0.0716 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:39 lr: 0.000080 grad: 0.1044 (0.1001) loss: 0.8146 (0.8140) time: 0.1400 data: 0.0503 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:23 lr: 0.000080 grad: 0.0917 (0.1001) loss: 0.8204 (0.8140) time: 0.1352 data: 0.0487 max mem: 9377 +Train: [43] [4700/6250] eta: 0:04:06 lr: 0.000080 grad: 0.0994 (0.1001) loss: 0.8093 (0.8139) time: 0.1590 data: 0.0750 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:50 lr: 0.000080 grad: 0.1042 (0.1001) loss: 0.8082 (0.8139) time: 0.1468 data: 0.0557 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:34 lr: 0.000080 grad: 0.1027 (0.1002) loss: 0.8044 (0.8138) time: 0.1692 data: 0.0944 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:19 lr: 0.000080 grad: 0.1031 (0.1002) loss: 0.8121 (0.8137) time: 0.1334 data: 0.0393 max mem: 9377 +Train: [43] [5100/6250] eta: 0:03:02 lr: 0.000080 grad: 0.0921 (0.1002) loss: 0.8164 (0.8136) time: 0.1481 data: 0.0620 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:46 lr: 0.000080 grad: 0.0970 (0.1003) loss: 0.8123 (0.8136) time: 0.1691 data: 0.0868 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:30 lr: 0.000080 grad: 0.0970 (0.1003) loss: 0.8135 (0.8135) time: 0.1584 data: 0.0713 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:15 lr: 0.000080 grad: 0.1037 (0.1003) loss: 0.8077 (0.8134) time: 0.1912 data: 0.1009 max mem: 9377 +Train: [43] [5500/6250] eta: 0:01:59 lr: 0.000080 grad: 0.0940 (0.1003) loss: 0.8131 (0.8134) time: 0.1799 data: 0.0868 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:43 lr: 0.000080 grad: 0.1034 (0.1003) loss: 0.8132 (0.8134) time: 0.1664 data: 0.0799 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:27 lr: 0.000080 grad: 0.1016 (0.1003) loss: 0.8172 (0.8133) time: 0.2632 data: 0.1835 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:11 lr: 0.000080 grad: 0.1043 (0.1004) loss: 0.8140 (0.8132) time: 0.1865 data: 0.0948 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:55 lr: 0.000080 grad: 0.0975 (0.1004) loss: 0.8183 (0.8132) time: 0.1774 data: 0.0935 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:39 lr: 0.000080 grad: 0.0986 (0.1004) loss: 0.8124 (0.8132) time: 0.1576 data: 0.0694 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:23 lr: 0.000080 grad: 0.1050 (0.1004) loss: 0.8117 (0.8131) time: 0.1514 data: 0.0536 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.0919 (0.1004) loss: 0.8149 (0.8131) time: 0.1817 data: 0.0895 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.0934 (0.1004) loss: 0.8194 (0.8131) time: 0.1622 data: 0.0716 max mem: 9377 +Train: [43] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000080 grad: 0.0934 (0.1004) loss: 0.8194 (0.8131) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:06:04 loss: 0.8325 (0.8325) time: 5.8833 data: 5.8502 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8257 (0.8270) time: 0.1467 data: 0.1190 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:15 (0.2566 s / it) +Averaged stats (hcp-train-subset): loss: 0.8257 (0.8270) +Eval (hcp-val): [43] [ 0/62] eta: 0:04:14 loss: 0.8377 (0.8377) time: 4.1126 data: 4.0279 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8382 (0.8409) time: 0.1449 data: 0.1192 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:14 (0.2418 s / it) +Averaged stats (hcp-val): loss: 0.8382 (0.8409) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:16 loss: 0.8100 (0.8100) time: 5.1080 data: 5.0753 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8206 (0.8213) time: 0.1402 data: 0.1129 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8206 (0.8213) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [44] [ 0/6250] eta: 12:20:02 lr: 0.000080 grad: 0.0987 (0.0987) loss: 0.8040 (0.8040) time: 7.1044 data: 7.0028 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:22:54 lr: 0.000080 grad: 0.0824 (0.1169) loss: 0.8341 (0.8299) time: 0.1660 data: 0.0602 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:19:03 lr: 0.000080 grad: 0.0886 (0.1107) loss: 0.8279 (0.8283) time: 0.1601 data: 0.0615 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:17:36 lr: 0.000080 grad: 0.0926 (0.1077) loss: 0.8259 (0.8257) time: 0.1504 data: 0.0569 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:16:49 lr: 0.000080 grad: 0.0967 (0.1043) loss: 0.8173 (0.8249) time: 0.1691 data: 0.0651 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:16:23 lr: 0.000080 grad: 0.0861 (0.1026) loss: 0.8233 (0.8235) time: 0.2078 data: 0.1143 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:15:54 lr: 0.000080 grad: 0.0924 (0.1022) loss: 0.8190 (0.8222) time: 0.1554 data: 0.0631 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:15:41 lr: 0.000080 grad: 0.0936 (0.1011) loss: 0.8152 (0.8217) time: 0.1453 data: 0.0520 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:15:20 lr: 0.000080 grad: 0.1003 (0.1012) loss: 0.8167 (0.8209) time: 0.1436 data: 0.0543 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:15:19 lr: 0.000080 grad: 0.0935 (0.1005) loss: 0.8178 (0.8204) time: 0.1866 data: 0.1006 max mem: 9377 +Train: [44] [1000/6250] eta: 0:15:01 lr: 0.000080 grad: 0.0893 (0.0999) loss: 0.8107 (0.8202) time: 0.1584 data: 0.0672 max mem: 9377 +Train: [44] [1100/6250] eta: 0:14:45 lr: 0.000079 grad: 0.0884 (0.0995) loss: 0.8209 (0.8197) time: 0.1378 data: 0.0539 max mem: 9377 +Train: [44] [1200/6250] eta: 0:14:29 lr: 0.000079 grad: 0.0941 (0.0998) loss: 0.8198 (0.8192) time: 0.1700 data: 0.0769 max mem: 9377 +Train: [44] [1300/6250] eta: 0:14:13 lr: 0.000079 grad: 0.1018 (0.0998) loss: 0.8056 (0.8188) time: 0.1741 data: 0.0835 max mem: 9377 +Train: [44] [1400/6250] eta: 0:13:55 lr: 0.000079 grad: 0.0982 (0.1002) loss: 0.8118 (0.8184) time: 0.1693 data: 0.0831 max mem: 9377 +Train: [44] [1500/6250] eta: 0:13:35 lr: 0.000079 grad: 0.0962 (0.1001) loss: 0.8119 (0.8181) time: 0.1726 data: 0.0889 max mem: 9377 +Train: [44] [1600/6250] eta: 0:13:13 lr: 0.000079 grad: 0.0977 (0.1004) loss: 0.8165 (0.8177) time: 0.1737 data: 0.0868 max mem: 9377 +Train: [44] [1700/6250] eta: 0:12:52 lr: 0.000079 grad: 0.1001 (0.1004) loss: 0.8077 (0.8173) time: 0.1482 data: 0.0548 max mem: 9377 +Train: [44] [1800/6250] eta: 0:12:32 lr: 0.000079 grad: 0.0949 (0.1003) loss: 0.8163 (0.8171) time: 0.1648 data: 0.0758 max mem: 9377 +Train: [44] [1900/6250] eta: 0:12:10 lr: 0.000079 grad: 0.0958 (0.1001) loss: 0.8156 (0.8169) time: 0.1470 data: 0.0530 max mem: 9377 +Train: [44] [2000/6250] eta: 0:11:51 lr: 0.000079 grad: 0.0958 (0.1001) loss: 0.8067 (0.8166) time: 0.1620 data: 0.0721 max mem: 9377 +Train: [44] [2100/6250] eta: 0:11:35 lr: 0.000079 grad: 0.0953 (0.1003) loss: 0.8142 (0.8163) time: 0.2009 data: 0.1134 max mem: 9377 +Train: [44] [2200/6250] eta: 0:11:19 lr: 0.000079 grad: 0.1034 (0.1004) loss: 0.8109 (0.8161) time: 0.1710 data: 0.0787 max mem: 9377 +Train: [44] [2300/6250] eta: 0:11:05 lr: 0.000079 grad: 0.0976 (0.1006) loss: 0.8118 (0.8158) time: 0.1944 data: 0.1139 max mem: 9377 +Train: [44] [2400/6250] eta: 0:10:49 lr: 0.000079 grad: 0.0992 (0.1007) loss: 0.8153 (0.8156) time: 0.1775 data: 0.0826 max mem: 9377 +Train: [44] [2500/6250] eta: 0:10:35 lr: 0.000079 grad: 0.0981 (0.1008) loss: 0.8077 (0.8155) time: 0.1895 data: 0.0838 max mem: 9377 +Train: [44] [2600/6250] eta: 0:10:21 lr: 0.000079 grad: 0.0987 (0.1007) loss: 0.8166 (0.8153) time: 0.1876 data: 0.0956 max mem: 9377 +Train: [44] [2700/6250] eta: 0:10:06 lr: 0.000079 grad: 0.1011 (0.1009) loss: 0.8063 (0.8151) time: 0.1998 data: 0.1030 max mem: 9377 +Train: [44] [2800/6250] eta: 0:09:49 lr: 0.000079 grad: 0.1049 (0.1009) loss: 0.8044 (0.8150) time: 0.1644 data: 0.0686 max mem: 9377 +Train: [44] [2900/6250] eta: 0:09:31 lr: 0.000079 grad: 0.0969 (0.1010) loss: 0.8136 (0.8149) time: 0.1556 data: 0.0622 max mem: 9377 +Train: [44] [3000/6250] eta: 0:09:13 lr: 0.000079 grad: 0.1018 (0.1010) loss: 0.8156 (0.8149) time: 0.1407 data: 0.0376 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:56 lr: 0.000079 grad: 0.0925 (0.1010) loss: 0.8147 (0.8148) time: 0.1990 data: 0.1123 max mem: 9377 +Train: [44] [3200/6250] eta: 0:08:38 lr: 0.000079 grad: 0.1013 (0.1010) loss: 0.8057 (0.8148) time: 0.1412 data: 0.0509 max mem: 9377 +Train: [44] [3300/6250] eta: 0:08:20 lr: 0.000079 grad: 0.0983 (0.1010) loss: 0.8132 (0.8148) time: 0.1681 data: 0.0820 max mem: 9377 +Train: [44] [3400/6250] eta: 0:08:04 lr: 0.000079 grad: 0.0957 (0.1009) loss: 0.8232 (0.8149) time: 0.1492 data: 0.0634 max mem: 9377 +Train: [44] [3500/6250] eta: 0:07:45 lr: 0.000079 grad: 0.0997 (0.1008) loss: 0.8128 (0.8149) time: 0.1449 data: 0.0582 max mem: 9377 +Train: [44] [3600/6250] eta: 0:07:28 lr: 0.000079 grad: 0.0918 (0.1007) loss: 0.8243 (0.8150) time: 0.1587 data: 0.0689 max mem: 9377 +Train: [44] [3700/6250] eta: 0:07:11 lr: 0.000079 grad: 0.0942 (0.1006) loss: 0.8193 (0.8150) time: 0.2068 data: 0.1172 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:53 lr: 0.000079 grad: 0.0963 (0.1007) loss: 0.8159 (0.8150) time: 0.1312 data: 0.0356 max mem: 9377 +Train: [44] [3900/6250] eta: 0:06:36 lr: 0.000079 grad: 0.0937 (0.1008) loss: 0.8158 (0.8149) time: 0.1812 data: 0.1005 max mem: 9377 +Train: [44] [4000/6250] eta: 0:06:18 lr: 0.000079 grad: 0.0966 (0.1009) loss: 0.8137 (0.8148) time: 0.1701 data: 0.0820 max mem: 9377 +Train: [44] [4100/6250] eta: 0:06:01 lr: 0.000079 grad: 0.0977 (0.1009) loss: 0.8185 (0.8147) time: 0.1697 data: 0.0851 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:44 lr: 0.000078 grad: 0.0945 (0.1010) loss: 0.8187 (0.8147) time: 0.1529 data: 0.0631 max mem: 9377 +Train: [44] [4300/6250] eta: 0:05:27 lr: 0.000078 grad: 0.1000 (0.1009) loss: 0.8149 (0.8148) time: 0.1605 data: 0.0675 max mem: 9377 +Train: [44] [4400/6250] eta: 0:05:10 lr: 0.000078 grad: 0.1029 (0.1010) loss: 0.8106 (0.8147) time: 0.1684 data: 0.0828 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:53 lr: 0.000078 grad: 0.1016 (0.1010) loss: 0.8248 (0.8147) time: 0.1744 data: 0.0844 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:36 lr: 0.000078 grad: 0.1018 (0.1010) loss: 0.8123 (0.8147) time: 0.1727 data: 0.0888 max mem: 9377 +Train: [44] [4700/6250] eta: 0:04:19 lr: 0.000078 grad: 0.0987 (0.1010) loss: 0.8143 (0.8146) time: 0.1498 data: 0.0664 max mem: 9377 +Train: [44] [4800/6250] eta: 0:04:02 lr: 0.000078 grad: 0.0953 (0.1010) loss: 0.8171 (0.8146) time: 0.1574 data: 0.0717 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:45 lr: 0.000078 grad: 0.0955 (0.1010) loss: 0.8233 (0.8146) time: 0.1495 data: 0.0587 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:29 lr: 0.000078 grad: 0.0962 (0.1010) loss: 0.8142 (0.8146) time: 0.1598 data: 0.0736 max mem: 9377 +Train: [44] [5100/6250] eta: 0:03:12 lr: 0.000078 grad: 0.0919 (0.1010) loss: 0.8080 (0.8146) time: 0.1692 data: 0.0849 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:55 lr: 0.000078 grad: 0.0917 (0.1009) loss: 0.8089 (0.8145) time: 0.1508 data: 0.0552 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:38 lr: 0.000078 grad: 0.0974 (0.1009) loss: 0.8170 (0.8146) time: 0.1424 data: 0.0563 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:21 lr: 0.000078 grad: 0.0979 (0.1008) loss: 0.8170 (0.8145) time: 0.1913 data: 0.1068 max mem: 9377 +Train: [44] [5500/6250] eta: 0:02:04 lr: 0.000078 grad: 0.0965 (0.1009) loss: 0.8068 (0.8145) time: 0.1533 data: 0.0661 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:48 lr: 0.000078 grad: 0.0958 (0.1008) loss: 0.8159 (0.8145) time: 0.1729 data: 0.0920 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:31 lr: 0.000078 grad: 0.0999 (0.1008) loss: 0.8094 (0.8144) time: 0.1519 data: 0.0658 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:14 lr: 0.000078 grad: 0.1082 (0.1009) loss: 0.8093 (0.8144) time: 0.1367 data: 0.0588 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:58 lr: 0.000078 grad: 0.0987 (0.1009) loss: 0.8164 (0.8143) time: 0.1605 data: 0.0718 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:41 lr: 0.000078 grad: 0.0963 (0.1009) loss: 0.8120 (0.8143) time: 0.1644 data: 0.0760 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:24 lr: 0.000078 grad: 0.1007 (0.1009) loss: 0.8058 (0.8142) time: 0.1435 data: 0.0534 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:08 lr: 0.000078 grad: 0.1040 (0.1010) loss: 0.8124 (0.8142) time: 0.1305 data: 0.0364 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1035 (0.1010) loss: 0.8156 (0.8142) time: 0.1354 data: 0.0315 max mem: 9377 +Train: [44] Total time: 0:17:18 (0.1662 s / it) +Averaged stats: lr: 0.000078 grad: 0.1035 (0.1010) loss: 0.8156 (0.8142) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:04:32 loss: 0.8339 (0.8339) time: 4.3964 data: 4.3066 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8236 (0.8255) time: 0.1540 data: 0.1265 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-train-subset): loss: 0.8236 (0.8255) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [44] [ 0/62] eta: 0:04:40 loss: 0.8412 (0.8412) time: 4.5217 data: 4.4574 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8408 (0.8414) time: 0.1606 data: 0.1348 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:15 (0.2539 s / it) +Averaged stats (hcp-val): loss: 0.8408 (0.8414) +Making plots (hcp-val): example=17 +Eval (nsd-val): [44] [ 0/62] eta: 0:04:00 loss: 0.8109 (0.8109) time: 3.8836 data: 3.8107 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8176 (0.8192) time: 0.1599 data: 0.1323 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:15 (0.2547 s / it) +Averaged stats (nsd-val): loss: 0.8176 (0.8192) +Making plots (nsd-val): example=60 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 13:17:01 lr: 0.000078 grad: 0.3100 (0.3100) loss: 0.7476 (0.7476) time: 7.6515 data: 7.4720 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:24:47 lr: 0.000078 grad: 0.1103 (0.1347) loss: 0.8306 (0.8283) time: 0.1636 data: 0.0542 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:20:54 lr: 0.000078 grad: 0.1061 (0.1206) loss: 0.8182 (0.8260) time: 0.1516 data: 0.0503 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:19:17 lr: 0.000078 grad: 0.0998 (0.1158) loss: 0.8193 (0.8224) time: 0.1711 data: 0.0643 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:18:10 lr: 0.000078 grad: 0.0972 (0.1123) loss: 0.8133 (0.8205) time: 0.1504 data: 0.0567 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:17:23 lr: 0.000078 grad: 0.0887 (0.1095) loss: 0.8103 (0.8188) time: 0.1676 data: 0.0774 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:16:43 lr: 0.000078 grad: 0.0978 (0.1079) loss: 0.8150 (0.8185) time: 0.1325 data: 0.0437 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:16:12 lr: 0.000078 grad: 0.0979 (0.1067) loss: 0.8177 (0.8180) time: 0.1526 data: 0.0690 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:15:57 lr: 0.000078 grad: 0.0939 (0.1054) loss: 0.8205 (0.8182) time: 0.1734 data: 0.0777 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:15:36 lr: 0.000078 grad: 0.0929 (0.1047) loss: 0.8185 (0.8180) time: 0.1967 data: 0.1044 max mem: 9377 +Train: [45] [1000/6250] eta: 0:15:11 lr: 0.000078 grad: 0.0911 (0.1039) loss: 0.8244 (0.8179) time: 0.1726 data: 0.0855 max mem: 9377 +Train: [45] [1100/6250] eta: 0:14:44 lr: 0.000077 grad: 0.0957 (0.1032) loss: 0.8101 (0.8176) time: 0.1587 data: 0.0745 max mem: 9377 +Train: [45] [1200/6250] eta: 0:14:20 lr: 0.000077 grad: 0.0930 (0.1027) loss: 0.8179 (0.8174) time: 0.1779 data: 0.0888 max mem: 9377 +Train: [45] [1300/6250] eta: 0:14:07 lr: 0.000077 grad: 0.0997 (0.1029) loss: 0.8150 (0.8170) time: 0.1698 data: 0.0741 max mem: 9377 +Train: [45] [1400/6250] eta: 0:13:54 lr: 0.000077 grad: 0.0962 (0.1029) loss: 0.8107 (0.8166) time: 0.1936 data: 0.0951 max mem: 9377 +Train: [45] [1500/6250] eta: 0:13:32 lr: 0.000077 grad: 0.0975 (0.1025) loss: 0.8145 (0.8164) time: 0.1546 data: 0.0624 max mem: 9377 +Train: [45] [1600/6250] eta: 0:13:10 lr: 0.000077 grad: 0.0976 (0.1026) loss: 0.8165 (0.8161) time: 0.1492 data: 0.0551 max mem: 9377 +Train: [45] [1700/6250] eta: 0:12:51 lr: 0.000077 grad: 0.1003 (0.1025) loss: 0.8123 (0.8156) time: 0.1774 data: 0.0976 max mem: 9377 +Train: [45] [1800/6250] eta: 0:12:30 lr: 0.000077 grad: 0.1009 (0.1024) loss: 0.8103 (0.8154) time: 0.1431 data: 0.0579 max mem: 9377 +Train: [45] [1900/6250] eta: 0:12:12 lr: 0.000077 grad: 0.1004 (0.1026) loss: 0.8037 (0.8150) time: 0.1279 data: 0.0251 max mem: 9377 +Train: [45] [2000/6250] eta: 0:11:53 lr: 0.000077 grad: 0.1007 (0.1026) loss: 0.8064 (0.8146) time: 0.1250 data: 0.0377 max mem: 9377 +Train: [45] [2100/6250] eta: 0:11:38 lr: 0.000077 grad: 0.0992 (0.1026) loss: 0.8114 (0.8142) time: 0.1605 data: 0.0774 max mem: 9377 +Train: [45] [2200/6250] eta: 0:11:21 lr: 0.000077 grad: 0.1025 (0.1029) loss: 0.8062 (0.8139) time: 0.1648 data: 0.0828 max mem: 9377 +Train: [45] [2300/6250] eta: 0:11:03 lr: 0.000077 grad: 0.1069 (0.1030) loss: 0.8081 (0.8135) time: 0.1792 data: 0.0934 max mem: 9377 +Train: [45] [2400/6250] eta: 0:10:44 lr: 0.000077 grad: 0.0940 (0.1030) loss: 0.8097 (0.8133) time: 0.1625 data: 0.0773 max mem: 9377 +Train: [45] [2500/6250] eta: 0:10:25 lr: 0.000077 grad: 0.0988 (0.1029) loss: 0.8147 (0.8131) time: 0.1639 data: 0.0747 max mem: 9377 +Train: [45] [2600/6250] eta: 0:10:10 lr: 0.000077 grad: 0.0958 (0.1029) loss: 0.8052 (0.8130) time: 0.1914 data: 0.1022 max mem: 9377 +Train: [45] [2700/6250] eta: 0:09:52 lr: 0.000077 grad: 0.0999 (0.1029) loss: 0.8120 (0.8128) time: 0.1625 data: 0.0738 max mem: 9377 +Train: [45] [2800/6250] eta: 0:09:34 lr: 0.000077 grad: 0.1040 (0.1029) loss: 0.8012 (0.8125) time: 0.1667 data: 0.0609 max mem: 9377 +Train: [45] [2900/6250] eta: 0:09:18 lr: 0.000077 grad: 0.1000 (0.1030) loss: 0.8086 (0.8123) time: 0.1999 data: 0.1102 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:59 lr: 0.000077 grad: 0.1066 (0.1029) loss: 0.8073 (0.8121) time: 0.1629 data: 0.0631 max mem: 9377 +Train: [45] [3100/6250] eta: 0:08:42 lr: 0.000077 grad: 0.1096 (0.1030) loss: 0.8004 (0.8118) time: 0.1597 data: 0.0761 max mem: 9377 +Train: [45] [3200/6250] eta: 0:08:25 lr: 0.000077 grad: 0.0973 (0.1029) loss: 0.7966 (0.8117) time: 0.1505 data: 0.0615 max mem: 9377 +Train: [45] [3300/6250] eta: 0:08:08 lr: 0.000077 grad: 0.1021 (0.1029) loss: 0.8058 (0.8115) time: 0.1639 data: 0.0843 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:50 lr: 0.000077 grad: 0.1041 (0.1030) loss: 0.8041 (0.8113) time: 0.1519 data: 0.0635 max mem: 9377 +Train: [45] [3500/6250] eta: 0:07:34 lr: 0.000077 grad: 0.1104 (0.1029) loss: 0.7954 (0.8111) time: 0.1577 data: 0.0720 max mem: 9377 +Train: [45] [3600/6250] eta: 0:07:16 lr: 0.000077 grad: 0.0991 (0.1030) loss: 0.8142 (0.8110) time: 0.1536 data: 0.0647 max mem: 9377 +Train: [45] [3700/6250] eta: 0:07:00 lr: 0.000077 grad: 0.0985 (0.1030) loss: 0.8122 (0.8109) time: 0.1396 data: 0.0592 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:43 lr: 0.000077 grad: 0.1005 (0.1030) loss: 0.8056 (0.8108) time: 0.1635 data: 0.0759 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:25 lr: 0.000077 grad: 0.1047 (0.1030) loss: 0.8059 (0.8108) time: 0.1681 data: 0.0860 max mem: 9377 +Train: [45] [4000/6250] eta: 0:06:09 lr: 0.000077 grad: 0.1042 (0.1031) loss: 0.8011 (0.8107) time: 0.1561 data: 0.0688 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:52 lr: 0.000077 grad: 0.0979 (0.1032) loss: 0.8065 (0.8106) time: 0.1625 data: 0.0754 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:35 lr: 0.000076 grad: 0.1032 (0.1033) loss: 0.8080 (0.8104) time: 0.1557 data: 0.0630 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:18 lr: 0.000076 grad: 0.0993 (0.1035) loss: 0.7992 (0.8103) time: 0.1692 data: 0.0785 max mem: 9377 +Train: [45] [4400/6250] eta: 0:05:02 lr: 0.000076 grad: 0.1082 (0.1036) loss: 0.8092 (0.8101) time: 0.1702 data: 0.0822 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:45 lr: 0.000076 grad: 0.0992 (0.1036) loss: 0.8044 (0.8100) time: 0.1618 data: 0.0628 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:28 lr: 0.000076 grad: 0.1001 (0.1037) loss: 0.8065 (0.8099) time: 0.1391 data: 0.0525 max mem: 9377 +Train: [45] [4700/6250] eta: 0:04:12 lr: 0.000076 grad: 0.0973 (0.1037) loss: 0.8134 (0.8098) time: 0.1518 data: 0.0579 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:55 lr: 0.000076 grad: 0.1028 (0.1038) loss: 0.8079 (0.8097) time: 0.1648 data: 0.0786 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:39 lr: 0.000076 grad: 0.0967 (0.1038) loss: 0.8150 (0.8097) time: 0.1390 data: 0.0498 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:22 lr: 0.000076 grad: 0.1105 (0.1039) loss: 0.8026 (0.8096) time: 0.1342 data: 0.0433 max mem: 9377 +Train: [45] [5100/6250] eta: 0:03:06 lr: 0.000076 grad: 0.0969 (0.1039) loss: 0.8123 (0.8096) time: 0.1583 data: 0.0675 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:50 lr: 0.000076 grad: 0.1034 (0.1040) loss: 0.8094 (0.8096) time: 0.1681 data: 0.0722 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:33 lr: 0.000076 grad: 0.1036 (0.1041) loss: 0.8068 (0.8096) time: 0.1269 data: 0.0301 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:17 lr: 0.000076 grad: 0.1019 (0.1041) loss: 0.8109 (0.8096) time: 0.1566 data: 0.0632 max mem: 9377 +Train: [45] [5500/6250] eta: 0:02:01 lr: 0.000076 grad: 0.1015 (0.1042) loss: 0.8107 (0.8095) time: 0.1734 data: 0.0915 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:45 lr: 0.000076 grad: 0.1068 (0.1042) loss: 0.7981 (0.8094) time: 0.1765 data: 0.0973 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:29 lr: 0.000076 grad: 0.0938 (0.1043) loss: 0.8130 (0.8094) time: 0.1660 data: 0.0829 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:13 lr: 0.000076 grad: 0.1056 (0.1044) loss: 0.8153 (0.8094) time: 0.1684 data: 0.0819 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:57 lr: 0.000076 grad: 0.1097 (0.1045) loss: 0.8087 (0.8094) time: 0.1607 data: 0.0713 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:40 lr: 0.000076 grad: 0.1069 (0.1045) loss: 0.8095 (0.8094) time: 0.1781 data: 0.0844 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:24 lr: 0.000076 grad: 0.1100 (0.1046) loss: 0.8024 (0.8094) time: 0.1491 data: 0.0550 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:08 lr: 0.000076 grad: 0.1044 (0.1047) loss: 0.8077 (0.8093) time: 0.1669 data: 0.0876 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1052 (0.1047) loss: 0.8048 (0.8093) time: 0.1593 data: 0.0692 max mem: 9377 +Train: [45] Total time: 0:17:09 (0.1647 s / it) +Averaged stats: lr: 0.000076 grad: 0.1052 (0.1047) loss: 0.8048 (0.8093) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:03:46 loss: 0.8337 (0.8337) time: 3.6576 data: 3.5553 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8232 (0.8248) time: 0.1427 data: 0.1169 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:15 (0.2420 s / it) +Averaged stats (hcp-train-subset): loss: 0.8232 (0.8248) +Eval (hcp-val): [45] [ 0/62] eta: 0:04:46 loss: 0.8432 (0.8432) time: 4.6271 data: 4.5453 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8411 (0.8417) time: 0.1342 data: 0.1086 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-val): loss: 0.8411 (0.8417) +Eval (nsd-val): [45] [ 0/62] eta: 0:05:58 loss: 0.8080 (0.8080) time: 5.7869 data: 5.7552 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8161 (0.8174) time: 0.1444 data: 0.1133 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (nsd-val): loss: 0.8161 (0.8174) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 12:19:58 lr: 0.000076 grad: 0.1494 (0.1494) loss: 0.8325 (0.8325) time: 7.1038 data: 6.9694 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:22:23 lr: 0.000076 grad: 0.0922 (0.1227) loss: 0.8194 (0.8239) time: 0.1711 data: 0.0526 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:18:51 lr: 0.000076 grad: 0.1113 (0.1187) loss: 0.8099 (0.8204) time: 0.1489 data: 0.0444 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:17:35 lr: 0.000076 grad: 0.1117 (0.1172) loss: 0.8179 (0.8172) time: 0.1652 data: 0.0693 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:16:56 lr: 0.000076 grad: 0.1022 (0.1143) loss: 0.8214 (0.8167) time: 0.1601 data: 0.0658 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:16:30 lr: 0.000076 grad: 0.1013 (0.1122) loss: 0.8148 (0.8160) time: 0.1722 data: 0.0803 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:16:02 lr: 0.000076 grad: 0.1028 (0.1106) loss: 0.8116 (0.8149) time: 0.1370 data: 0.0447 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:15:41 lr: 0.000076 grad: 0.1055 (0.1096) loss: 0.8069 (0.8142) time: 0.1471 data: 0.0459 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:15:24 lr: 0.000076 grad: 0.0987 (0.1085) loss: 0.8149 (0.8140) time: 0.1893 data: 0.0964 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:15:12 lr: 0.000076 grad: 0.0950 (0.1074) loss: 0.8090 (0.8140) time: 0.1969 data: 0.1068 max mem: 9377 +Train: [46] [1000/6250] eta: 0:14:57 lr: 0.000076 grad: 0.0979 (0.1065) loss: 0.8103 (0.8140) time: 0.1656 data: 0.0706 max mem: 9377 +Train: [46] [1100/6250] eta: 0:14:38 lr: 0.000075 grad: 0.0940 (0.1059) loss: 0.8212 (0.8139) time: 0.1746 data: 0.0851 max mem: 9377 +Train: [46] [1200/6250] eta: 0:14:19 lr: 0.000075 grad: 0.0946 (0.1051) loss: 0.8130 (0.8141) time: 0.1690 data: 0.0737 max mem: 9377 +Train: [46] [1300/6250] eta: 0:14:02 lr: 0.000075 grad: 0.0927 (0.1049) loss: 0.8179 (0.8143) time: 0.1745 data: 0.0816 max mem: 9377 +Train: [46] [1400/6250] eta: 0:13:45 lr: 0.000075 grad: 0.1004 (0.1043) loss: 0.8219 (0.8144) time: 0.1714 data: 0.0866 max mem: 9377 +Train: [46] [1500/6250] eta: 0:13:28 lr: 0.000075 grad: 0.0920 (0.1039) loss: 0.8097 (0.8144) time: 0.1778 data: 0.0840 max mem: 9377 +Train: [46] [1600/6250] eta: 0:13:09 lr: 0.000075 grad: 0.0981 (0.1035) loss: 0.8196 (0.8144) time: 0.1453 data: 0.0566 max mem: 9377 +Train: [46] [1700/6250] eta: 0:12:50 lr: 0.000075 grad: 0.0969 (0.1031) loss: 0.8108 (0.8144) time: 0.1675 data: 0.0728 max mem: 9377 +Train: [46] [1800/6250] eta: 0:12:30 lr: 0.000075 grad: 0.0957 (0.1029) loss: 0.8131 (0.8145) time: 0.1660 data: 0.0776 max mem: 9377 +Train: [46] [1900/6250] eta: 0:12:10 lr: 0.000075 grad: 0.0946 (0.1026) loss: 0.8193 (0.8146) time: 0.1444 data: 0.0402 max mem: 9377 +Train: [46] [2000/6250] eta: 0:11:51 lr: 0.000075 grad: 0.1026 (0.1024) loss: 0.8112 (0.8147) time: 0.1559 data: 0.0675 max mem: 9377 +Train: [46] [2100/6250] eta: 0:11:32 lr: 0.000075 grad: 0.0910 (0.1022) loss: 0.8169 (0.8147) time: 0.1656 data: 0.0768 max mem: 9377 +Train: [46] [2200/6250] eta: 0:11:17 lr: 0.000075 grad: 0.0964 (0.1020) loss: 0.8137 (0.8147) time: 0.1974 data: 0.1071 max mem: 9377 +Train: [46] [2300/6250] eta: 0:11:01 lr: 0.000075 grad: 0.0965 (0.1019) loss: 0.8080 (0.8147) time: 0.2025 data: 0.1154 max mem: 9377 +Train: [46] [2400/6250] eta: 0:10:46 lr: 0.000075 grad: 0.0952 (0.1018) loss: 0.8190 (0.8146) time: 0.2563 data: 0.1729 max mem: 9377 +Train: [46] [2500/6250] eta: 0:10:27 lr: 0.000075 grad: 0.0927 (0.1018) loss: 0.8205 (0.8145) time: 0.1694 data: 0.0762 max mem: 9377 +Train: [46] [2600/6250] eta: 0:10:11 lr: 0.000075 grad: 0.0993 (0.1018) loss: 0.8124 (0.8143) time: 0.1662 data: 0.0591 max mem: 9377 +Train: [46] [2700/6250] eta: 0:09:55 lr: 0.000075 grad: 0.1012 (0.1019) loss: 0.8165 (0.8142) time: 0.1602 data: 0.0585 max mem: 9377 +Train: [46] [2800/6250] eta: 0:09:38 lr: 0.000075 grad: 0.1001 (0.1019) loss: 0.8032 (0.8141) time: 0.1551 data: 0.0518 max mem: 9377 +Train: [46] [2900/6250] eta: 0:09:21 lr: 0.000075 grad: 0.0947 (0.1019) loss: 0.8080 (0.8140) time: 0.1693 data: 0.0853 max mem: 9377 +Train: [46] [3000/6250] eta: 0:09:04 lr: 0.000075 grad: 0.1025 (0.1019) loss: 0.8029 (0.8138) time: 0.1649 data: 0.0603 max mem: 9377 +Train: [46] [3100/6250] eta: 0:08:45 lr: 0.000075 grad: 0.1002 (0.1020) loss: 0.8100 (0.8138) time: 0.1397 data: 0.0456 max mem: 9377 +Train: [46] [3200/6250] eta: 0:08:28 lr: 0.000075 grad: 0.1049 (0.1020) loss: 0.8075 (0.8137) time: 0.1546 data: 0.0693 max mem: 9377 +Train: [46] [3300/6250] eta: 0:08:10 lr: 0.000075 grad: 0.0995 (0.1020) loss: 0.8138 (0.8137) time: 0.1428 data: 0.0483 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:52 lr: 0.000075 grad: 0.0993 (0.1021) loss: 0.8145 (0.8136) time: 0.1710 data: 0.0916 max mem: 9377 +Train: [46] [3500/6250] eta: 0:07:35 lr: 0.000075 grad: 0.1076 (0.1023) loss: 0.8166 (0.8135) time: 0.1537 data: 0.0691 max mem: 9377 +Train: [46] [3600/6250] eta: 0:07:18 lr: 0.000075 grad: 0.1062 (0.1025) loss: 0.8080 (0.8134) time: 0.1619 data: 0.0834 max mem: 9377 +Train: [46] [3700/6250] eta: 0:07:01 lr: 0.000075 grad: 0.0980 (0.1026) loss: 0.8143 (0.8133) time: 0.1517 data: 0.0670 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:44 lr: 0.000075 grad: 0.0975 (0.1027) loss: 0.8127 (0.8133) time: 0.1716 data: 0.0862 max mem: 9377 +Train: [46] [3900/6250] eta: 0:06:27 lr: 0.000075 grad: 0.1052 (0.1028) loss: 0.8096 (0.8132) time: 0.1716 data: 0.0854 max mem: 9377 +Train: [46] [4000/6250] eta: 0:06:10 lr: 0.000075 grad: 0.1082 (0.1028) loss: 0.8116 (0.8131) time: 0.1635 data: 0.0804 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:54 lr: 0.000075 grad: 0.1007 (0.1029) loss: 0.8045 (0.8130) time: 0.1789 data: 0.1016 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:38 lr: 0.000074 grad: 0.1037 (0.1029) loss: 0.8040 (0.8128) time: 0.2192 data: 0.1329 max mem: 9377 +Train: [46] [4300/6250] eta: 0:05:20 lr: 0.000074 grad: 0.0981 (0.1030) loss: 0.8111 (0.8127) time: 0.1492 data: 0.0627 max mem: 9377 +Train: [46] [4400/6250] eta: 0:05:04 lr: 0.000074 grad: 0.0967 (0.1029) loss: 0.8150 (0.8127) time: 0.1429 data: 0.0621 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:47 lr: 0.000074 grad: 0.1062 (0.1029) loss: 0.8081 (0.8127) time: 0.1097 data: 0.0213 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:30 lr: 0.000074 grad: 0.0929 (0.1029) loss: 0.8101 (0.8126) time: 0.1670 data: 0.0770 max mem: 9377 +Train: [46] [4700/6250] eta: 0:04:14 lr: 0.000074 grad: 0.1029 (0.1029) loss: 0.8151 (0.8126) time: 0.1882 data: 0.1014 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:57 lr: 0.000074 grad: 0.1063 (0.1029) loss: 0.8075 (0.8125) time: 0.2097 data: 0.1262 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:40 lr: 0.000074 grad: 0.1052 (0.1030) loss: 0.8055 (0.8124) time: 0.1173 data: 0.0306 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:24 lr: 0.000074 grad: 0.1059 (0.1031) loss: 0.8059 (0.8123) time: 0.2006 data: 0.1106 max mem: 9377 +Train: [46] [5100/6250] eta: 0:03:08 lr: 0.000074 grad: 0.1053 (0.1031) loss: 0.8043 (0.8123) time: 0.1550 data: 0.0697 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:51 lr: 0.000074 grad: 0.1087 (0.1032) loss: 0.7981 (0.8122) time: 0.1738 data: 0.0876 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:35 lr: 0.000074 grad: 0.0987 (0.1032) loss: 0.8108 (0.8121) time: 0.1430 data: 0.0513 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:19 lr: 0.000074 grad: 0.1062 (0.1033) loss: 0.8056 (0.8121) time: 0.1979 data: 0.1259 max mem: 9377 +Train: [46] [5500/6250] eta: 0:02:02 lr: 0.000074 grad: 0.0982 (0.1036) loss: 0.8176 (0.8121) time: 0.1410 data: 0.0595 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:46 lr: 0.000074 grad: 0.0988 (0.1036) loss: 0.8096 (0.8121) time: 0.1917 data: 0.1126 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:30 lr: 0.000074 grad: 0.0970 (0.1036) loss: 0.8151 (0.8121) time: 0.1396 data: 0.0622 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:13 lr: 0.000074 grad: 0.0989 (0.1036) loss: 0.8082 (0.8121) time: 0.1577 data: 0.0646 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:57 lr: 0.000074 grad: 0.1022 (0.1037) loss: 0.8103 (0.8121) time: 0.1522 data: 0.0674 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:40 lr: 0.000074 grad: 0.1005 (0.1037) loss: 0.8130 (0.8121) time: 0.1704 data: 0.0816 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.0990 (0.1036) loss: 0.8139 (0.8122) time: 0.1638 data: 0.0857 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:08 lr: 0.000074 grad: 0.0990 (0.1037) loss: 0.8102 (0.8121) time: 0.1474 data: 0.0666 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.0977 (0.1037) loss: 0.8157 (0.8121) time: 0.1558 data: 0.0722 max mem: 9377 +Train: [46] Total time: 0:17:07 (0.1644 s / it) +Averaged stats: lr: 0.000074 grad: 0.0977 (0.1037) loss: 0.8157 (0.8121) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:06:05 loss: 0.8300 (0.8300) time: 5.8989 data: 5.8665 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8224 (0.8226) time: 0.1640 data: 0.1384 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:15 (0.2555 s / it) +Averaged stats (hcp-train-subset): loss: 0.8224 (0.8226) +Eval (hcp-val): [46] [ 0/62] eta: 0:05:09 loss: 0.8395 (0.8395) time: 4.9918 data: 4.9177 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8406 (0.8421) time: 0.1206 data: 0.0926 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:16 (0.2610 s / it) +Averaged stats (hcp-val): loss: 0.8406 (0.8421) +Eval (nsd-val): [46] [ 0/62] eta: 0:04:14 loss: 0.8126 (0.8126) time: 4.0988 data: 4.0320 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8213 (0.8224) time: 0.1656 data: 0.1377 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:16 (0.2632 s / it) +Averaged stats (nsd-val): loss: 0.8213 (0.8224) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 12:27:07 lr: 0.000074 grad: nan (nan) loss: 0.8813 (0.8813) time: 7.1724 data: 7.0711 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:24:42 lr: 0.000074 grad: 0.0792 (0.1094) loss: 0.8457 (0.8357) time: 0.1868 data: 0.0745 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:21:11 lr: 0.000074 grad: 0.1086 (0.1078) loss: 0.8282 (0.8323) time: 0.1749 data: 0.0739 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:19:34 lr: 0.000074 grad: 0.0874 (0.1044) loss: 0.8273 (0.8305) time: 0.1436 data: 0.0273 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:18:46 lr: 0.000074 grad: 0.0896 (0.1021) loss: 0.8205 (0.8289) time: 0.2045 data: 0.1085 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:17:42 lr: 0.000074 grad: 0.0914 (0.1003) loss: 0.8202 (0.8276) time: 0.1603 data: 0.0655 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:17:05 lr: 0.000074 grad: 0.0932 (0.0988) loss: 0.8191 (0.8270) time: 0.1442 data: 0.0545 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:16:29 lr: 0.000074 grad: 0.0861 (0.0977) loss: 0.8315 (0.8269) time: 0.1382 data: 0.0416 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:16:03 lr: 0.000074 grad: 0.0978 (0.0971) loss: 0.8171 (0.8265) time: 0.1204 data: 0.0370 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:15:45 lr: 0.000074 grad: 0.0930 (0.0966) loss: 0.8156 (0.8262) time: 0.1704 data: 0.0862 max mem: 9377 +Train: [47] [1000/6250] eta: 0:15:23 lr: 0.000073 grad: 0.0951 (0.0963) loss: 0.8190 (0.8257) time: 0.1731 data: 0.0851 max mem: 9377 +Train: [47] [1100/6250] eta: 0:14:59 lr: 0.000073 grad: 0.0884 (0.0962) loss: 0.8179 (0.8252) time: 0.1833 data: 0.1037 max mem: 9377 +Train: [47] [1200/6250] eta: 0:14:38 lr: 0.000073 grad: 0.0931 (0.0961) loss: 0.8228 (0.8247) time: 0.1964 data: 0.1061 max mem: 9377 +Train: [47] [1300/6250] eta: 0:14:22 lr: 0.000073 grad: 0.0950 (0.0961) loss: 0.8158 (0.8241) time: 0.1569 data: 0.0719 max mem: 9377 +Train: [47] [1400/6250] eta: 0:14:03 lr: 0.000073 grad: 0.0937 (0.0962) loss: 0.8181 (0.8236) time: 0.1754 data: 0.0855 max mem: 9377 +Train: [47] [1500/6250] eta: 0:13:41 lr: 0.000073 grad: 0.0963 (0.0964) loss: 0.8213 (0.8231) time: 0.1783 data: 0.0948 max mem: 9377 +Train: [47] [1600/6250] eta: 0:13:20 lr: 0.000073 grad: 0.0972 (0.0963) loss: 0.8117 (0.8226) time: 0.1495 data: 0.0694 max mem: 9377 +Train: [47] [1700/6250] eta: 0:12:59 lr: 0.000073 grad: 0.1003 (0.0966) loss: 0.8108 (0.8222) time: 0.1636 data: 0.0712 max mem: 9377 +Train: [47] [1800/6250] eta: 0:12:39 lr: 0.000073 grad: 0.0968 (0.0968) loss: 0.8197 (0.8218) time: 0.1642 data: 0.0683 max mem: 9377 +Train: [47] [1900/6250] eta: 0:12:19 lr: 0.000073 grad: 0.0975 (0.0970) loss: 0.8126 (0.8214) time: 0.1634 data: 0.0854 max mem: 9377 +Train: [47] [2000/6250] eta: 0:12:00 lr: 0.000073 grad: 0.0950 (0.0971) loss: 0.8164 (0.8211) time: 0.1692 data: 0.0848 max mem: 9377 +Train: [47] [2100/6250] eta: 0:11:40 lr: 0.000073 grad: 0.0966 (0.0972) loss: 0.8086 (0.8207) time: 0.1570 data: 0.0709 max mem: 9377 +Train: [47] [2200/6250] eta: 0:11:23 lr: 0.000073 grad: 0.1001 (0.0974) loss: 0.8106 (0.8203) time: 0.2070 data: 0.1289 max mem: 9377 +Train: [47] [2300/6250] eta: 0:11:06 lr: 0.000073 grad: 0.0955 (0.0977) loss: 0.8098 (0.8199) time: 0.1421 data: 0.0654 max mem: 9377 +Train: [47] [2400/6250] eta: 0:10:48 lr: 0.000073 grad: 0.0954 (0.0979) loss: 0.8060 (0.8194) time: 0.1471 data: 0.0665 max mem: 9377 +Train: [47] [2500/6250] eta: 0:10:30 lr: 0.000073 grad: 0.0976 (0.0980) loss: 0.8123 (0.8191) time: 0.1536 data: 0.0672 max mem: 9377 +Train: [47] [2600/6250] eta: 0:10:14 lr: 0.000073 grad: 0.1042 (0.0983) loss: 0.8055 (0.8187) time: 0.1809 data: 0.0952 max mem: 9377 +Train: [47] [2700/6250] eta: 0:09:57 lr: 0.000073 grad: 0.0973 (0.0984) loss: 0.8190 (0.8184) time: 0.1695 data: 0.0790 max mem: 9377 +Train: [47] [2800/6250] eta: 0:09:40 lr: 0.000073 grad: 0.0921 (0.0985) loss: 0.8159 (0.8182) time: 0.1617 data: 0.0747 max mem: 9377 +Train: [47] [2900/6250] eta: 0:09:22 lr: 0.000073 grad: 0.1000 (0.0987) loss: 0.8038 (0.8180) time: 0.1563 data: 0.0676 max mem: 9377 +Train: [47] [3000/6250] eta: 0:09:03 lr: 0.000073 grad: 0.0916 (0.0986) loss: 0.8198 (0.8179) time: 0.1370 data: 0.0353 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:44 lr: 0.000073 grad: 0.0972 (0.0986) loss: 0.8165 (0.8179) time: 0.1509 data: 0.0603 max mem: 9377 +Train: [47] [3200/6250] eta: 0:08:27 lr: 0.000073 grad: 0.1017 (0.0987) loss: 0.8085 (0.8178) time: 0.1528 data: 0.0632 max mem: 9377 +Train: [47] [3300/6250] eta: 0:08:09 lr: 0.000073 grad: 0.1009 (0.0988) loss: 0.8173 (0.8177) time: 0.1285 data: 0.0528 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:52 lr: 0.000073 grad: 0.1056 (0.0990) loss: 0.8112 (0.8175) time: 0.1536 data: 0.0660 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:36 lr: 0.000073 grad: 0.1006 (0.0990) loss: 0.8182 (0.8175) time: 0.2251 data: 0.1517 max mem: 9377 +Train: [47] [3600/6250] eta: 0:07:18 lr: 0.000073 grad: 0.1025 (0.0991) loss: 0.8105 (0.8174) time: 0.1774 data: 0.0936 max mem: 9377 +Train: [47] [3700/6250] eta: 0:07:01 lr: 0.000073 grad: 0.0969 (0.0992) loss: 0.8129 (0.8173) time: 0.1484 data: 0.0570 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:44 lr: 0.000073 grad: 0.1026 (0.0994) loss: 0.8062 (0.8170) time: 0.1479 data: 0.0640 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:28 lr: 0.000073 grad: 0.1058 (0.0996) loss: 0.8094 (0.8169) time: 0.1466 data: 0.0654 max mem: 9377 +Train: [47] [4000/6250] eta: 0:06:10 lr: 0.000073 grad: 0.1027 (0.0997) loss: 0.8051 (0.8166) time: 0.1465 data: 0.0628 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:54 lr: 0.000072 grad: 0.1052 (0.0999) loss: 0.8047 (0.8163) time: 0.1401 data: 0.0447 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:37 lr: 0.000072 grad: 0.1062 (0.1001) loss: 0.7969 (0.8162) time: 0.1453 data: 0.0558 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:21 lr: 0.000072 grad: 0.0993 (0.1001) loss: 0.8088 (0.8160) time: 0.1423 data: 0.0529 max mem: 9377 +Train: [47] [4400/6250] eta: 0:05:04 lr: 0.000072 grad: 0.1036 (0.1002) loss: 0.8093 (0.8159) time: 0.1688 data: 0.0746 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:47 lr: 0.000072 grad: 0.1028 (0.1003) loss: 0.8118 (0.8158) time: 0.1529 data: 0.0624 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:31 lr: 0.000072 grad: 0.1044 (0.1005) loss: 0.8006 (0.8156) time: 0.1573 data: 0.0688 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:14 lr: 0.000072 grad: 0.1031 (0.1007) loss: 0.8075 (0.8154) time: 0.1565 data: 0.0707 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:58 lr: 0.000072 grad: 0.1070 (0.1008) loss: 0.8015 (0.8152) time: 0.1868 data: 0.0998 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:41 lr: 0.000072 grad: 0.1003 (0.1009) loss: 0.8057 (0.8151) time: 0.1567 data: 0.0735 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:25 lr: 0.000072 grad: 0.0998 (0.1010) loss: 0.8059 (0.8149) time: 0.1816 data: 0.0944 max mem: 9377 +Train: [47] [5100/6250] eta: 0:03:08 lr: 0.000072 grad: 0.1093 (0.1011) loss: 0.8075 (0.8148) time: 0.1697 data: 0.0844 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:52 lr: 0.000072 grad: 0.0987 (0.1012) loss: 0.8107 (0.8147) time: 0.1388 data: 0.0594 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:35 lr: 0.000072 grad: 0.0984 (0.1012) loss: 0.8131 (0.8146) time: 0.1190 data: 0.0314 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:19 lr: 0.000072 grad: 0.0983 (0.1011) loss: 0.8129 (0.8146) time: 0.2242 data: 0.1487 max mem: 9377 +Train: [47] [5500/6250] eta: 0:02:03 lr: 0.000072 grad: 0.0998 (0.1011) loss: 0.8149 (0.8146) time: 0.1367 data: 0.0582 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:46 lr: 0.000072 grad: 0.0992 (0.1012) loss: 0.8132 (0.8146) time: 0.1558 data: 0.0674 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:30 lr: 0.000072 grad: 0.0985 (0.1012) loss: 0.8112 (0.8146) time: 0.1426 data: 0.0647 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:13 lr: 0.000072 grad: 0.0974 (0.1013) loss: 0.8150 (0.8146) time: 0.1835 data: 0.0946 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:57 lr: 0.000072 grad: 0.0998 (0.1013) loss: 0.8160 (0.8146) time: 0.1816 data: 0.0908 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:41 lr: 0.000072 grad: 0.0995 (0.1014) loss: 0.8175 (0.8145) time: 0.1724 data: 0.0736 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:24 lr: 0.000072 grad: 0.1011 (0.1014) loss: 0.8175 (0.8145) time: 0.1537 data: 0.0558 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:08 lr: 0.000072 grad: 0.1053 (0.1014) loss: 0.8085 (0.8145) time: 0.1530 data: 0.0608 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1012 (0.1015) loss: 0.8100 (0.8144) time: 0.1633 data: 0.0693 max mem: 9377 +Train: [47] Total time: 0:17:13 (0.1654 s / it) +Averaged stats: lr: 0.000072 grad: 0.1012 (0.1015) loss: 0.8100 (0.8144) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:05:46 loss: 0.8275 (0.8275) time: 5.5881 data: 5.5570 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8213 (0.8224) time: 0.1291 data: 0.1034 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-train-subset): loss: 0.8213 (0.8224) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:52 loss: 0.8405 (0.8405) time: 3.7488 data: 3.6914 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8404 (0.8416) time: 0.1478 data: 0.1224 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-val): loss: 0.8404 (0.8416) +Eval (nsd-val): [47] [ 0/62] eta: 0:06:06 loss: 0.8108 (0.8108) time: 5.9070 data: 5.8761 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8183 (0.8190) time: 0.1528 data: 0.1250 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (nsd-val): loss: 0.8183 (0.8190) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 8:50:14 lr: 0.000072 grad: 0.0809 (0.0809) loss: 0.8539 (0.8539) time: 5.0904 data: 4.7202 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:23:03 lr: 0.000072 grad: 0.1232 (0.1285) loss: 0.8254 (0.8265) time: 0.1646 data: 0.0514 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:20:00 lr: 0.000072 grad: 0.1075 (0.1243) loss: 0.8161 (0.8219) time: 0.1169 data: 0.0218 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:18:29 lr: 0.000072 grad: 0.1045 (0.1214) loss: 0.8120 (0.8186) time: 0.1548 data: 0.0578 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:17:50 lr: 0.000072 grad: 0.0994 (0.1183) loss: 0.8157 (0.8176) time: 0.1598 data: 0.0724 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:17:14 lr: 0.000072 grad: 0.0952 (0.1164) loss: 0.8136 (0.8169) time: 0.1727 data: 0.0772 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:16:40 lr: 0.000072 grad: 0.1066 (0.1150) loss: 0.7980 (0.8152) time: 0.1214 data: 0.0222 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:16:14 lr: 0.000072 grad: 0.1006 (0.1143) loss: 0.8079 (0.8135) time: 0.1624 data: 0.0742 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:15:43 lr: 0.000072 grad: 0.1140 (0.1138) loss: 0.7908 (0.8124) time: 0.1528 data: 0.0618 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:15:23 lr: 0.000071 grad: 0.1164 (0.1133) loss: 0.8007 (0.8113) time: 0.1879 data: 0.1084 max mem: 9377 +Train: [48] [1000/6250] eta: 0:15:03 lr: 0.000071 grad: 0.1049 (0.1129) loss: 0.7958 (0.8104) time: 0.1464 data: 0.0579 max mem: 9377 +Train: [48] [1100/6250] eta: 0:14:43 lr: 0.000071 grad: 0.1032 (0.1125) loss: 0.7978 (0.8097) time: 0.1537 data: 0.0639 max mem: 9377 +Train: [48] [1200/6250] eta: 0:14:20 lr: 0.000071 grad: 0.1106 (0.1125) loss: 0.8010 (0.8088) time: 0.1588 data: 0.0780 max mem: 9377 +Train: [48] [1300/6250] eta: 0:14:04 lr: 0.000071 grad: 0.1087 (0.1124) loss: 0.7997 (0.8081) time: 0.1646 data: 0.0787 max mem: 9377 +Train: [48] [1400/6250] eta: 0:13:45 lr: 0.000071 grad: 0.1081 (0.1124) loss: 0.7975 (0.8073) time: 0.1709 data: 0.0828 max mem: 9377 +Train: [48] [1500/6250] eta: 0:13:29 lr: 0.000071 grad: 0.1112 (0.1124) loss: 0.7899 (0.8068) time: 0.1827 data: 0.0942 max mem: 9377 +Train: [48] [1600/6250] eta: 0:13:06 lr: 0.000071 grad: 0.1050 (0.1122) loss: 0.8068 (0.8065) time: 0.1542 data: 0.0628 max mem: 9377 +Train: [48] [1700/6250] eta: 0:12:47 lr: 0.000071 grad: 0.1090 (0.1119) loss: 0.8035 (0.8062) time: 0.1705 data: 0.0750 max mem: 9377 +Train: [48] [1800/6250] eta: 0:12:28 lr: 0.000071 grad: 0.1102 (0.1117) loss: 0.7991 (0.8060) time: 0.1457 data: 0.0577 max mem: 9377 +Train: [48] [1900/6250] eta: 0:12:07 lr: 0.000071 grad: 0.1111 (0.1115) loss: 0.8025 (0.8060) time: 0.1258 data: 0.0388 max mem: 9377 +Train: [48] [2000/6250] eta: 0:11:49 lr: 0.000071 grad: 0.0983 (0.1112) loss: 0.8153 (0.8060) time: 0.1611 data: 0.0731 max mem: 9377 +Train: [48] [2100/6250] eta: 0:11:30 lr: 0.000071 grad: 0.0991 (0.1110) loss: 0.8097 (0.8061) time: 0.1474 data: 0.0611 max mem: 9377 +Train: [48] [2200/6250] eta: 0:11:11 lr: 0.000071 grad: 0.1125 (0.1108) loss: 0.8077 (0.8062) time: 0.1578 data: 0.0731 max mem: 9377 +Train: [48] [2300/6250] eta: 0:10:56 lr: 0.000071 grad: 0.1078 (0.1107) loss: 0.8103 (0.8062) time: 0.1955 data: 0.0996 max mem: 9377 +Train: [48] [2400/6250] eta: 0:10:41 lr: 0.000071 grad: 0.1112 (0.1106) loss: 0.7948 (0.8061) time: 0.1693 data: 0.0875 max mem: 9377 +Train: [48] [2500/6250] eta: 0:10:24 lr: 0.000071 grad: 0.1029 (0.1104) loss: 0.8103 (0.8063) time: 0.1700 data: 0.0776 max mem: 9377 +Train: [48] [2600/6250] eta: 0:10:05 lr: 0.000071 grad: 0.1080 (0.1102) loss: 0.8061 (0.8063) time: 0.1460 data: 0.0594 max mem: 9377 +Train: [48] [2700/6250] eta: 0:09:48 lr: 0.000071 grad: 0.1115 (0.1103) loss: 0.8096 (0.8064) time: 0.1549 data: 0.0594 max mem: 9377 +Train: [48] [2800/6250] eta: 0:09:32 lr: 0.000071 grad: 0.1015 (0.1101) loss: 0.8109 (0.8066) time: 0.1724 data: 0.0818 max mem: 9377 +Train: [48] [2900/6250] eta: 0:09:14 lr: 0.000071 grad: 0.1092 (0.1101) loss: 0.8095 (0.8067) time: 0.1328 data: 0.0508 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:55 lr: 0.000071 grad: 0.1153 (0.1103) loss: 0.7998 (0.8067) time: 0.1404 data: 0.0456 max mem: 9377 +Train: [48] [3100/6250] eta: 0:08:38 lr: 0.000071 grad: 0.1052 (0.1101) loss: 0.8093 (0.8068) time: 0.1623 data: 0.0733 max mem: 9377 +Train: [48] [3200/6250] eta: 0:08:20 lr: 0.000071 grad: 0.1082 (0.1103) loss: 0.8144 (0.8069) time: 0.1386 data: 0.0468 max mem: 9377 +Train: [48] [3300/6250] eta: 0:08:02 lr: 0.000071 grad: 0.1083 (0.1102) loss: 0.8153 (0.8070) time: 0.1560 data: 0.0576 max mem: 9377 +Train: [48] [3400/6250] eta: 0:07:46 lr: 0.000071 grad: 0.0994 (0.1101) loss: 0.8150 (0.8072) time: 0.1563 data: 0.0651 max mem: 9377 +Train: [48] [3500/6250] eta: 0:07:29 lr: 0.000071 grad: 0.1065 (0.1100) loss: 0.8045 (0.8072) time: 0.1766 data: 0.0953 max mem: 9377 +Train: [48] [3600/6250] eta: 0:07:12 lr: 0.000071 grad: 0.1016 (0.1099) loss: 0.8155 (0.8073) time: 0.1502 data: 0.0634 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:55 lr: 0.000071 grad: 0.1042 (0.1099) loss: 0.8081 (0.8074) time: 0.1512 data: 0.0683 max mem: 9377 +Train: [48] [3800/6250] eta: 0:06:38 lr: 0.000071 grad: 0.1087 (0.1099) loss: 0.8129 (0.8075) time: 0.1458 data: 0.0536 max mem: 9377 +Train: [48] [3900/6250] eta: 0:06:21 lr: 0.000070 grad: 0.1092 (0.1100) loss: 0.8147 (0.8075) time: 0.1650 data: 0.0734 max mem: 9377 +Train: [48] [4000/6250] eta: 0:06:05 lr: 0.000070 grad: 0.1048 (0.1101) loss: 0.8100 (0.8076) time: 0.1525 data: 0.0694 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:49 lr: 0.000070 grad: 0.1059 (0.1101) loss: 0.8095 (0.8076) time: 0.1649 data: 0.0717 max mem: 9377 +Train: [48] [4200/6250] eta: 0:05:32 lr: 0.000070 grad: 0.1058 (0.1102) loss: 0.8145 (0.8077) time: 0.1455 data: 0.0513 max mem: 9377 +Train: [48] [4300/6250] eta: 0:05:15 lr: 0.000070 grad: 0.1039 (0.1100) loss: 0.8105 (0.8078) time: 0.1598 data: 0.0675 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:59 lr: 0.000070 grad: 0.1016 (0.1100) loss: 0.8156 (0.8078) time: 0.1785 data: 0.0983 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:42 lr: 0.000070 grad: 0.1047 (0.1099) loss: 0.8111 (0.8078) time: 0.1440 data: 0.0505 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:26 lr: 0.000070 grad: 0.1015 (0.1099) loss: 0.8116 (0.8078) time: 0.1447 data: 0.0447 max mem: 9377 +Train: [48] [4700/6250] eta: 0:04:10 lr: 0.000070 grad: 0.1030 (0.1099) loss: 0.8026 (0.8078) time: 0.1503 data: 0.0615 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:53 lr: 0.000070 grad: 0.1038 (0.1099) loss: 0.8087 (0.8078) time: 0.1432 data: 0.0529 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:37 lr: 0.000070 grad: 0.1069 (0.1099) loss: 0.8081 (0.8079) time: 0.1572 data: 0.0671 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:21 lr: 0.000070 grad: 0.1045 (0.1099) loss: 0.8053 (0.8079) time: 0.1497 data: 0.0596 max mem: 9377 +Train: [48] [5100/6250] eta: 0:03:05 lr: 0.000070 grad: 0.1033 (0.1098) loss: 0.8081 (0.8080) time: 0.1597 data: 0.0717 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:48 lr: 0.000070 grad: 0.0961 (0.1096) loss: 0.8136 (0.8081) time: 0.1712 data: 0.0902 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:32 lr: 0.000070 grad: 0.1021 (0.1095) loss: 0.8101 (0.8082) time: 0.1628 data: 0.0665 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:17 lr: 0.000070 grad: 0.1058 (0.1094) loss: 0.8111 (0.8083) time: 0.1858 data: 0.1028 max mem: 9377 +Train: [48] [5500/6250] eta: 0:02:01 lr: 0.000070 grad: 0.1035 (0.1093) loss: 0.8188 (0.8084) time: 0.1611 data: 0.0703 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:45 lr: 0.000070 grad: 0.1069 (0.1093) loss: 0.8128 (0.8085) time: 0.1687 data: 0.0858 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:29 lr: 0.000070 grad: 0.1069 (0.1093) loss: 0.8115 (0.8085) time: 0.1484 data: 0.0569 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:12 lr: 0.000070 grad: 0.1054 (0.1092) loss: 0.8078 (0.8086) time: 0.1456 data: 0.0527 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:56 lr: 0.000070 grad: 0.0984 (0.1092) loss: 0.8136 (0.8086) time: 0.1995 data: 0.1042 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:40 lr: 0.000070 grad: 0.1026 (0.1092) loss: 0.8210 (0.8086) time: 0.1658 data: 0.0741 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:24 lr: 0.000070 grad: 0.1042 (0.1091) loss: 0.8124 (0.8087) time: 0.1575 data: 0.0668 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:08 lr: 0.000070 grad: 0.1115 (0.1092) loss: 0.8157 (0.8088) time: 0.1750 data: 0.0861 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1036 (0.1092) loss: 0.8122 (0.8088) time: 0.1597 data: 0.0698 max mem: 9377 +Train: [48] Total time: 0:17:00 (0.1632 s / it) +Averaged stats: lr: 0.000070 grad: 0.1036 (0.1092) loss: 0.8122 (0.8088) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:03:51 loss: 0.8268 (0.8268) time: 3.7336 data: 3.6258 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8217 (0.8230) time: 0.1434 data: 0.1162 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:15 (0.2422 s / it) +Averaged stats (hcp-train-subset): loss: 0.8217 (0.8230) +Eval (hcp-val): [48] [ 0/62] eta: 0:05:57 loss: 0.8376 (0.8376) time: 5.7679 data: 5.7341 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8366 (0.8397) time: 0.1404 data: 0.1151 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-val): loss: 0.8366 (0.8397) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:44 loss: 0.8040 (0.8040) time: 3.6235 data: 3.5269 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8143 (0.8160) time: 0.1426 data: 0.1172 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (nsd-val): loss: 0.8143 (0.8160) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [49] [ 0/6250] eta: 12:14:33 lr: 0.000070 grad: 0.1652 (0.1652) loss: 0.7875 (0.7875) time: 7.0517 data: 6.9195 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:23:57 lr: 0.000070 grad: 0.1368 (0.1454) loss: 0.8102 (0.8176) time: 0.1677 data: 0.0464 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:20:03 lr: 0.000070 grad: 0.1126 (0.1364) loss: 0.8135 (0.8096) time: 0.1552 data: 0.0668 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:18:44 lr: 0.000070 grad: 0.1016 (0.1285) loss: 0.8129 (0.8101) time: 0.1727 data: 0.0785 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:17:37 lr: 0.000070 grad: 0.1120 (0.1237) loss: 0.8119 (0.8093) time: 0.1363 data: 0.0392 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:17:11 lr: 0.000070 grad: 0.1013 (0.1206) loss: 0.8102 (0.8083) time: 0.1893 data: 0.1040 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:16:35 lr: 0.000070 grad: 0.1028 (0.1190) loss: 0.8095 (0.8070) time: 0.1710 data: 0.0814 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:16:03 lr: 0.000069 grad: 0.1001 (0.1178) loss: 0.8053 (0.8063) time: 0.1423 data: 0.0470 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:15:42 lr: 0.000069 grad: 0.0968 (0.1164) loss: 0.7993 (0.8060) time: 0.1642 data: 0.0776 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:15:24 lr: 0.000069 grad: 0.1062 (0.1157) loss: 0.8009 (0.8051) time: 0.1629 data: 0.0740 max mem: 9377 +Train: [49] [1000/6250] eta: 0:15:01 lr: 0.000069 grad: 0.1055 (0.1149) loss: 0.7997 (0.8044) time: 0.1671 data: 0.0832 max mem: 9377 +Train: [49] [1100/6250] eta: 0:14:40 lr: 0.000069 grad: 0.1030 (0.1141) loss: 0.8088 (0.8042) time: 0.1548 data: 0.0675 max mem: 9377 +Train: [49] [1200/6250] eta: 0:14:18 lr: 0.000069 grad: 0.1042 (0.1139) loss: 0.7991 (0.8038) time: 0.1781 data: 0.0875 max mem: 9377 +Train: [49] [1300/6250] eta: 0:14:00 lr: 0.000069 grad: 0.1063 (0.1135) loss: 0.8014 (0.8034) time: 0.1694 data: 0.0732 max mem: 9377 +Train: [49] [1400/6250] eta: 0:13:40 lr: 0.000069 grad: 0.1069 (0.1131) loss: 0.8049 (0.8030) time: 0.1461 data: 0.0406 max mem: 9377 +Train: [49] [1500/6250] eta: 0:13:21 lr: 0.000069 grad: 0.1030 (0.1128) loss: 0.8040 (0.8028) time: 0.1642 data: 0.0747 max mem: 9377 +Train: [49] [1600/6250] eta: 0:12:59 lr: 0.000069 grad: 0.1089 (0.1125) loss: 0.8019 (0.8027) time: 0.1534 data: 0.0620 max mem: 9377 +Train: [49] [1700/6250] eta: 0:12:37 lr: 0.000069 grad: 0.1114 (0.1123) loss: 0.7996 (0.8024) time: 0.1347 data: 0.0394 max mem: 9377 +Train: [49] [1800/6250] eta: 0:12:17 lr: 0.000069 grad: 0.1052 (0.1120) loss: 0.7954 (0.8023) time: 0.1705 data: 0.0795 max mem: 9377 +Train: [49] [1900/6250] eta: 0:11:56 lr: 0.000069 grad: 0.1023 (0.1117) loss: 0.8018 (0.8022) time: 0.1410 data: 0.0458 max mem: 9377 +Train: [49] [2000/6250] eta: 0:11:37 lr: 0.000069 grad: 0.1087 (0.1116) loss: 0.7948 (0.8021) time: 0.1312 data: 0.0428 max mem: 9377 +Train: [49] [2100/6250] eta: 0:11:20 lr: 0.000069 grad: 0.1013 (0.1114) loss: 0.8080 (0.8021) time: 0.1448 data: 0.0572 max mem: 9377 +Train: [49] [2200/6250] eta: 0:11:04 lr: 0.000069 grad: 0.1033 (0.1111) loss: 0.8063 (0.8022) time: 0.1906 data: 0.0972 max mem: 9377 +Train: [49] [2300/6250] eta: 0:10:47 lr: 0.000069 grad: 0.1050 (0.1110) loss: 0.8137 (0.8023) time: 0.1913 data: 0.0991 max mem: 9377 +Train: [49] [2400/6250] eta: 0:10:30 lr: 0.000069 grad: 0.1092 (0.1109) loss: 0.8038 (0.8024) time: 0.1766 data: 0.0941 max mem: 9377 +Train: [49] [2500/6250] eta: 0:10:14 lr: 0.000069 grad: 0.1044 (0.1107) loss: 0.8043 (0.8024) time: 0.1609 data: 0.0758 max mem: 9377 +Train: [49] [2600/6250] eta: 0:09:56 lr: 0.000069 grad: 0.1086 (0.1108) loss: 0.7978 (0.8022) time: 0.1891 data: 0.1053 max mem: 9377 +Train: [49] [2700/6250] eta: 0:09:38 lr: 0.000069 grad: 0.1045 (0.1108) loss: 0.8057 (0.8023) time: 0.1507 data: 0.0593 max mem: 9377 +Train: [49] [2800/6250] eta: 0:09:23 lr: 0.000069 grad: 0.1131 (0.1107) loss: 0.8035 (0.8023) time: 0.1631 data: 0.0598 max mem: 9377 +Train: [49] [2900/6250] eta: 0:09:08 lr: 0.000069 grad: 0.1069 (0.1106) loss: 0.7927 (0.8023) time: 0.1744 data: 0.0793 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:52 lr: 0.000069 grad: 0.1061 (0.1106) loss: 0.8074 (0.8025) time: 0.1538 data: 0.0547 max mem: 9377 +Train: [49] [3100/6250] eta: 0:08:34 lr: 0.000069 grad: 0.1082 (0.1104) loss: 0.7990 (0.8026) time: 0.1328 data: 0.0404 max mem: 9377 +Train: [49] [3200/6250] eta: 0:08:16 lr: 0.000069 grad: 0.1042 (0.1104) loss: 0.8067 (0.8027) time: 0.1537 data: 0.0668 max mem: 9377 +Train: [49] [3300/6250] eta: 0:07:59 lr: 0.000069 grad: 0.1060 (0.1103) loss: 0.8035 (0.8029) time: 0.1661 data: 0.0845 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:43 lr: 0.000069 grad: 0.1091 (0.1102) loss: 0.7980 (0.8031) time: 0.1491 data: 0.0622 max mem: 9377 +Train: [49] [3500/6250] eta: 0:07:27 lr: 0.000069 grad: 0.1160 (0.1103) loss: 0.7965 (0.8032) time: 0.1674 data: 0.0849 max mem: 9377 +Train: [49] [3600/6250] eta: 0:07:11 lr: 0.000069 grad: 0.1029 (0.1103) loss: 0.8136 (0.8033) time: 0.1130 data: 0.0157 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:54 lr: 0.000069 grad: 0.1042 (0.1102) loss: 0.8073 (0.8035) time: 0.1107 data: 0.0148 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:38 lr: 0.000068 grad: 0.1057 (0.1101) loss: 0.8145 (0.8036) time: 0.1245 data: 0.0410 max mem: 9377 +Train: [49] [3900/6250] eta: 0:06:21 lr: 0.000068 grad: 0.1051 (0.1101) loss: 0.8144 (0.8038) time: 0.1354 data: 0.0457 max mem: 9377 +Train: [49] [4000/6250] eta: 0:06:05 lr: 0.000068 grad: 0.1070 (0.1101) loss: 0.8108 (0.8040) time: 0.1681 data: 0.0796 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:49 lr: 0.000068 grad: 0.1014 (0.1100) loss: 0.8051 (0.8042) time: 0.1204 data: 0.0319 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:33 lr: 0.000068 grad: 0.1049 (0.1100) loss: 0.8159 (0.8043) time: 0.1728 data: 0.0844 max mem: 9377 +Train: [49] [4300/6250] eta: 0:05:17 lr: 0.000068 grad: 0.1034 (0.1100) loss: 0.8099 (0.8044) time: 0.1797 data: 0.0969 max mem: 9377 +Train: [49] [4400/6250] eta: 0:05:01 lr: 0.000068 grad: 0.1018 (0.1100) loss: 0.8094 (0.8045) time: 0.1821 data: 0.0993 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:44 lr: 0.000068 grad: 0.1078 (0.1101) loss: 0.8091 (0.8046) time: 0.1588 data: 0.0718 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:28 lr: 0.000068 grad: 0.1033 (0.1101) loss: 0.8101 (0.8047) time: 0.1726 data: 0.0822 max mem: 9377 +Train: [49] [4700/6250] eta: 0:04:11 lr: 0.000068 grad: 0.1047 (0.1101) loss: 0.8085 (0.8048) time: 0.1609 data: 0.0672 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:55 lr: 0.000068 grad: 0.1039 (0.1102) loss: 0.8148 (0.8048) time: 0.2092 data: 0.1083 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:39 lr: 0.000068 grad: 0.1101 (0.1102) loss: 0.7969 (0.8048) time: 0.1824 data: 0.0975 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:22 lr: 0.000068 grad: 0.1058 (0.1102) loss: 0.8122 (0.8048) time: 0.1674 data: 0.0846 max mem: 9377 +Train: [49] [5100/6250] eta: 0:03:06 lr: 0.000068 grad: 0.1160 (0.1102) loss: 0.8050 (0.8049) time: 0.1749 data: 0.0970 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:50 lr: 0.000068 grad: 0.1112 (0.1102) loss: 0.8063 (0.8049) time: 0.1626 data: 0.0740 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:33 lr: 0.000068 grad: 0.1079 (0.1103) loss: 0.8003 (0.8049) time: 0.1767 data: 0.0997 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:17 lr: 0.000068 grad: 0.1028 (0.1103) loss: 0.8126 (0.8048) time: 0.1561 data: 0.0762 max mem: 9377 +Train: [49] [5500/6250] eta: 0:02:01 lr: 0.000068 grad: 0.1101 (0.1103) loss: 0.8096 (0.8048) time: 0.1615 data: 0.0787 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:45 lr: 0.000068 grad: 0.1141 (0.1103) loss: 0.8102 (0.8048) time: 0.1693 data: 0.0862 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:29 lr: 0.000068 grad: 0.1086 (0.1103) loss: 0.8126 (0.8048) time: 0.1828 data: 0.0984 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:13 lr: 0.000068 grad: 0.1087 (0.1103) loss: 0.8064 (0.8049) time: 0.1613 data: 0.0755 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:56 lr: 0.000068 grad: 0.1006 (0.1103) loss: 0.8146 (0.8050) time: 0.1707 data: 0.0793 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:40 lr: 0.000068 grad: 0.1062 (0.1103) loss: 0.8100 (0.8050) time: 0.1748 data: 0.0862 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:24 lr: 0.000068 grad: 0.1036 (0.1102) loss: 0.8136 (0.8051) time: 0.2141 data: 0.0659 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:08 lr: 0.000068 grad: 0.1123 (0.1102) loss: 0.8072 (0.8052) time: 0.1609 data: 0.0719 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1027 (0.1102) loss: 0.8110 (0.8052) time: 0.1542 data: 0.0595 max mem: 9377 +Train: [49] Total time: 0:17:03 (0.1638 s / it) +Averaged stats: lr: 0.000068 grad: 0.1027 (0.1102) loss: 0.8110 (0.8052) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:06:37 loss: 0.8263 (0.8263) time: 6.4176 data: 6.3868 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8223 (0.8225) time: 0.1633 data: 0.1361 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:15 (0.2493 s / it) +Averaged stats (hcp-train-subset): loss: 0.8223 (0.8225) +Making plots (hcp-train-subset): example=50 +Eval (hcp-val): [49] [ 0/62] eta: 0:04:55 loss: 0.8405 (0.8405) time: 4.7638 data: 4.6836 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8398 (0.8412) time: 0.1451 data: 0.1182 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (hcp-val): loss: 0.8398 (0.8412) +Making plots (hcp-val): example=23 +Eval (nsd-val): [49] [ 0/62] eta: 0:06:41 loss: 0.8094 (0.8094) time: 6.4713 data: 6.4395 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8175 (0.8189) time: 0.1342 data: 0.1088 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:15 (0.2438 s / it) +Averaged stats (nsd-val): loss: 0.8175 (0.8189) +Making plots (nsd-val): example=14 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 10:35:54 lr: 0.000068 grad: 0.2021 (0.2021) loss: 0.8629 (0.8629) time: 6.1048 data: 5.8644 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:23:31 lr: 0.000068 grad: 0.1048 (0.1247) loss: 0.8200 (0.8268) time: 0.1459 data: 0.0264 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:20:03 lr: 0.000068 grad: 0.0927 (0.1166) loss: 0.8281 (0.8253) time: 0.1406 data: 0.0313 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:18:37 lr: 0.000068 grad: 0.1038 (0.1132) loss: 0.8186 (0.8239) time: 0.1694 data: 0.0586 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:17:44 lr: 0.000068 grad: 0.1050 (0.1112) loss: 0.8215 (0.8228) time: 0.1383 data: 0.0364 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:17:12 lr: 0.000067 grad: 0.1022 (0.1097) loss: 0.8189 (0.8221) time: 0.1664 data: 0.0678 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:16:31 lr: 0.000067 grad: 0.1114 (0.1097) loss: 0.8034 (0.8202) time: 0.1492 data: 0.0479 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:16:05 lr: 0.000067 grad: 0.1000 (0.1101) loss: 0.8103 (0.8189) time: 0.1831 data: 0.0836 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:15:51 lr: 0.000067 grad: 0.1074 (0.1099) loss: 0.8182 (0.8176) time: 0.1805 data: 0.0832 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:15:26 lr: 0.000067 grad: 0.1040 (0.1099) loss: 0.8095 (0.8167) time: 0.1536 data: 0.0559 max mem: 9377 +Train: [50] [1000/6250] eta: 0:15:02 lr: 0.000067 grad: 0.1068 (0.1096) loss: 0.8081 (0.8156) time: 0.1537 data: 0.0686 max mem: 9377 +Train: [50] [1100/6250] eta: 0:14:38 lr: 0.000067 grad: 0.1030 (0.1095) loss: 0.8155 (0.8147) time: 0.1529 data: 0.0811 max mem: 9377 +Train: [50] [1200/6250] eta: 0:14:15 lr: 0.000067 grad: 0.1031 (0.1094) loss: 0.8095 (0.8138) time: 0.1850 data: 0.1008 max mem: 9377 +Train: [50] [1300/6250] eta: 0:13:55 lr: 0.000067 grad: 0.0995 (0.1091) loss: 0.8143 (0.8131) time: 0.1712 data: 0.0825 max mem: 9377 +Train: [50] [1400/6250] eta: 0:13:37 lr: 0.000067 grad: 0.1080 (0.1089) loss: 0.8135 (0.8127) time: 0.1621 data: 0.0737 max mem: 9377 +Train: [50] [1500/6250] eta: 0:13:15 lr: 0.000067 grad: 0.1106 (0.1088) loss: 0.8018 (0.8122) time: 0.1497 data: 0.0511 max mem: 9377 +Train: [50] [1600/6250] eta: 0:12:54 lr: 0.000067 grad: 0.1092 (0.1090) loss: 0.8053 (0.8116) time: 0.1540 data: 0.0688 max mem: 9377 +Train: [50] [1700/6250] eta: 0:12:32 lr: 0.000067 grad: 0.1123 (0.1090) loss: 0.7996 (0.8112) time: 0.1587 data: 0.0622 max mem: 9377 +Train: [50] [1800/6250] eta: 0:12:12 lr: 0.000067 grad: 0.1061 (0.1089) loss: 0.8104 (0.8110) time: 0.1416 data: 0.0481 max mem: 9377 +Train: [50] [1900/6250] eta: 0:11:52 lr: 0.000067 grad: 0.1045 (0.1089) loss: 0.8125 (0.8110) time: 0.1133 data: 0.0266 max mem: 9377 +Train: [50] [2000/6250] eta: 0:11:32 lr: 0.000067 grad: 0.0988 (0.1088) loss: 0.8160 (0.8110) time: 0.1508 data: 0.0598 max mem: 9377 +Train: [50] [2100/6250] eta: 0:11:13 lr: 0.000067 grad: 0.1048 (0.1087) loss: 0.8076 (0.8110) time: 0.1451 data: 0.0510 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:58 lr: 0.000067 grad: 0.1097 (0.1088) loss: 0.8025 (0.8109) time: 0.1951 data: 0.1186 max mem: 9377 +Train: [50] [2300/6250] eta: 0:10:41 lr: 0.000067 grad: 0.1093 (0.1088) loss: 0.8043 (0.8108) time: 0.1553 data: 0.0788 max mem: 9377 +Train: [50] [2400/6250] eta: 0:10:26 lr: 0.000067 grad: 0.1067 (0.1087) loss: 0.8137 (0.8108) time: 0.1429 data: 0.0546 max mem: 9377 +Train: [50] [2500/6250] eta: 0:10:09 lr: 0.000067 grad: 0.1094 (0.1088) loss: 0.8058 (0.8107) time: 0.1448 data: 0.0617 max mem: 9377 +Train: [50] [2600/6250] eta: 0:09:51 lr: 0.000067 grad: 0.1015 (0.1088) loss: 0.8189 (0.8107) time: 0.1672 data: 0.0798 max mem: 9377 +Train: [50] [2700/6250] eta: 0:09:37 lr: 0.000067 grad: 0.1040 (0.1086) loss: 0.8155 (0.8107) time: 0.1554 data: 0.0735 max mem: 9377 +Train: [50] [2800/6250] eta: 0:09:21 lr: 0.000067 grad: 0.1124 (0.1086) loss: 0.7922 (0.8106) time: 0.1714 data: 0.0833 max mem: 9377 +Train: [50] [2900/6250] eta: 0:09:04 lr: 0.000067 grad: 0.1045 (0.1087) loss: 0.8110 (0.8106) time: 0.1397 data: 0.0496 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:48 lr: 0.000067 grad: 0.1065 (0.1087) loss: 0.8166 (0.8106) time: 0.1671 data: 0.0860 max mem: 9377 +Train: [50] [3100/6250] eta: 0:08:29 lr: 0.000067 grad: 0.1064 (0.1086) loss: 0.8112 (0.8106) time: 0.1572 data: 0.0634 max mem: 9377 +Train: [50] [3200/6250] eta: 0:08:13 lr: 0.000067 grad: 0.1001 (0.1085) loss: 0.8101 (0.8106) time: 0.1547 data: 0.0430 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:56 lr: 0.000067 grad: 0.1073 (0.1085) loss: 0.8120 (0.8107) time: 0.1741 data: 0.0826 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:38 lr: 0.000067 grad: 0.1040 (0.1084) loss: 0.8108 (0.8107) time: 0.1434 data: 0.0588 max mem: 9377 +Train: [50] [3500/6250] eta: 0:07:21 lr: 0.000067 grad: 0.1045 (0.1084) loss: 0.8135 (0.8108) time: 0.1337 data: 0.0473 max mem: 9377 +Train: [50] [3600/6250] eta: 0:07:05 lr: 0.000066 grad: 0.1002 (0.1083) loss: 0.8201 (0.8108) time: 0.1773 data: 0.0824 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:48 lr: 0.000066 grad: 0.1018 (0.1083) loss: 0.8160 (0.8108) time: 0.1480 data: 0.0552 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:32 lr: 0.000066 grad: 0.1099 (0.1085) loss: 0.8115 (0.8108) time: 0.1491 data: 0.0655 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:15 lr: 0.000066 grad: 0.1081 (0.1085) loss: 0.8124 (0.8108) time: 0.1592 data: 0.0721 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:59 lr: 0.000066 grad: 0.1048 (0.1084) loss: 0.8055 (0.8109) time: 0.1177 data: 0.0246 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:42 lr: 0.000066 grad: 0.1081 (0.1084) loss: 0.8075 (0.8109) time: 0.1634 data: 0.0658 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:26 lr: 0.000066 grad: 0.1084 (0.1085) loss: 0.8095 (0.8109) time: 0.1685 data: 0.0885 max mem: 9377 +Train: [50] [4300/6250] eta: 0:05:10 lr: 0.000066 grad: 0.1083 (0.1085) loss: 0.8106 (0.8109) time: 0.1682 data: 0.0773 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:54 lr: 0.000066 grad: 0.1027 (0.1085) loss: 0.8111 (0.8109) time: 0.1427 data: 0.0618 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:37 lr: 0.000066 grad: 0.1066 (0.1084) loss: 0.8085 (0.8110) time: 0.1547 data: 0.0670 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:21 lr: 0.000066 grad: 0.1113 (0.1084) loss: 0.8047 (0.8110) time: 0.1788 data: 0.0850 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:05 lr: 0.000066 grad: 0.1106 (0.1086) loss: 0.8022 (0.8109) time: 0.1451 data: 0.0623 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:49 lr: 0.000066 grad: 0.1051 (0.1088) loss: 0.8153 (0.8108) time: 0.1621 data: 0.0689 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:33 lr: 0.000066 grad: 0.1136 (0.1089) loss: 0.8103 (0.8108) time: 0.1510 data: 0.0532 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:17 lr: 0.000066 grad: 0.1075 (0.1090) loss: 0.8111 (0.8107) time: 0.1693 data: 0.0826 max mem: 9377 +Train: [50] [5100/6250] eta: 0:03:01 lr: 0.000066 grad: 0.1072 (0.1090) loss: 0.8130 (0.8107) time: 0.1615 data: 0.0674 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:45 lr: 0.000066 grad: 0.1052 (0.1090) loss: 0.8123 (0.8107) time: 0.1446 data: 0.0540 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:30 lr: 0.000066 grad: 0.1097 (0.1091) loss: 0.8103 (0.8106) time: 0.1662 data: 0.0701 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:14 lr: 0.000066 grad: 0.1090 (0.1091) loss: 0.8074 (0.8107) time: 0.1573 data: 0.0692 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:58 lr: 0.000066 grad: 0.0981 (0.1091) loss: 0.8135 (0.8107) time: 0.1980 data: 0.1140 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:42 lr: 0.000066 grad: 0.1055 (0.1091) loss: 0.8053 (0.8107) time: 0.1568 data: 0.0704 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:27 lr: 0.000066 grad: 0.1108 (0.1091) loss: 0.8100 (0.8107) time: 0.1682 data: 0.0841 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:11 lr: 0.000066 grad: 0.1104 (0.1091) loss: 0.8079 (0.8107) time: 0.1542 data: 0.0709 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:55 lr: 0.000066 grad: 0.1046 (0.1091) loss: 0.8094 (0.8106) time: 0.1394 data: 0.0443 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:39 lr: 0.000066 grad: 0.1165 (0.1092) loss: 0.8079 (0.8106) time: 0.1608 data: 0.0667 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:23 lr: 0.000066 grad: 0.1084 (0.1092) loss: 0.8089 (0.8106) time: 0.1470 data: 0.0571 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1133 (0.1092) loss: 0.8001 (0.8106) time: 0.1581 data: 0.0635 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1044 (0.1093) loss: 0.8071 (0.8106) time: 0.1177 data: 0.0246 max mem: 9377 +Train: [50] Total time: 0:16:36 (0.1594 s / it) +Averaged stats: lr: 0.000066 grad: 0.1044 (0.1093) loss: 0.8071 (0.8106) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:05:31 loss: 0.8300 (0.8300) time: 5.3453 data: 5.3146 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8186 (0.8221) time: 0.1507 data: 0.1254 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (hcp-train-subset): loss: 0.8186 (0.8221) +Eval (hcp-val): [50] [ 0/62] eta: 0:04:24 loss: 0.8355 (0.8355) time: 4.2582 data: 4.1701 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8381 (0.8412) time: 0.1423 data: 0.1146 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (hcp-val): loss: 0.8381 (0.8412) +Eval (nsd-val): [50] [ 0/62] eta: 0:03:39 loss: 0.8027 (0.8027) time: 3.5344 data: 3.4574 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8135 (0.8150) time: 0.1342 data: 0.1068 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (nsd-val): loss: 0.8135 (0.8150) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 9:32:08 lr: 0.000066 grad: 0.1050 (0.1050) loss: 0.8138 (0.8138) time: 5.4925 data: 5.2238 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:21:57 lr: 0.000066 grad: 0.1253 (0.1392) loss: 0.8107 (0.8171) time: 0.1625 data: 0.0613 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:19:21 lr: 0.000066 grad: 0.1182 (0.1377) loss: 0.7996 (0.8089) time: 0.1485 data: 0.0450 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:17:50 lr: 0.000065 grad: 0.1147 (0.1338) loss: 0.8076 (0.8076) time: 0.1418 data: 0.0433 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:16:52 lr: 0.000065 grad: 0.1137 (0.1306) loss: 0.8071 (0.8068) time: 0.1645 data: 0.0798 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:16:17 lr: 0.000065 grad: 0.1196 (0.1278) loss: 0.8177 (0.8071) time: 0.1530 data: 0.0719 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:15:55 lr: 0.000065 grad: 0.1094 (0.1255) loss: 0.8042 (0.8070) time: 0.1718 data: 0.0886 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:15:35 lr: 0.000065 grad: 0.1115 (0.1237) loss: 0.8097 (0.8069) time: 0.1672 data: 0.0844 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:15:19 lr: 0.000065 grad: 0.1089 (0.1220) loss: 0.8067 (0.8072) time: 0.1764 data: 0.0837 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:15:04 lr: 0.000065 grad: 0.1100 (0.1204) loss: 0.8119 (0.8073) time: 0.1910 data: 0.0957 max mem: 9377 +Train: [51] [1000/6250] eta: 0:14:39 lr: 0.000065 grad: 0.1067 (0.1191) loss: 0.8106 (0.8076) time: 0.1572 data: 0.0636 max mem: 9377 +Train: [51] [1100/6250] eta: 0:14:16 lr: 0.000065 grad: 0.1014 (0.1179) loss: 0.8172 (0.8079) time: 0.1259 data: 0.0339 max mem: 9377 +Train: [51] [1200/6250] eta: 0:14:00 lr: 0.000065 grad: 0.1054 (0.1172) loss: 0.8090 (0.8078) time: 0.1959 data: 0.0988 max mem: 9377 +Train: [51] [1300/6250] eta: 0:13:46 lr: 0.000065 grad: 0.1129 (0.1166) loss: 0.8146 (0.8078) time: 0.1700 data: 0.0796 max mem: 9377 +Train: [51] [1400/6250] eta: 0:13:30 lr: 0.000065 grad: 0.1071 (0.1163) loss: 0.8045 (0.8079) time: 0.1729 data: 0.0823 max mem: 9377 +Train: [51] [1500/6250] eta: 0:13:09 lr: 0.000065 grad: 0.1089 (0.1157) loss: 0.8071 (0.8079) time: 0.1553 data: 0.0645 max mem: 9377 +Train: [51] [1600/6250] eta: 0:12:48 lr: 0.000065 grad: 0.1099 (0.1153) loss: 0.8135 (0.8078) time: 0.1506 data: 0.0684 max mem: 9377 +Train: [51] [1700/6250] eta: 0:12:27 lr: 0.000065 grad: 0.0980 (0.1146) loss: 0.8180 (0.8080) time: 0.1420 data: 0.0565 max mem: 9377 +Train: [51] [1800/6250] eta: 0:12:09 lr: 0.000065 grad: 0.1137 (0.1141) loss: 0.8073 (0.8080) time: 0.1762 data: 0.0925 max mem: 9377 +Train: [51] [1900/6250] eta: 0:11:53 lr: 0.000065 grad: 0.1063 (0.1137) loss: 0.8109 (0.8082) time: 0.2120 data: 0.1264 max mem: 9377 +Train: [51] [2000/6250] eta: 0:11:35 lr: 0.000065 grad: 0.1035 (0.1134) loss: 0.8035 (0.8081) time: 0.1489 data: 0.0615 max mem: 9377 +Train: [51] [2100/6250] eta: 0:11:18 lr: 0.000065 grad: 0.1067 (0.1130) loss: 0.8147 (0.8083) time: 0.1462 data: 0.0545 max mem: 9377 +Train: [51] [2200/6250] eta: 0:11:02 lr: 0.000065 grad: 0.1031 (0.1128) loss: 0.8101 (0.8083) time: 0.1740 data: 0.0862 max mem: 9377 +Train: [51] [2300/6250] eta: 0:10:48 lr: 0.000065 grad: 0.0960 (0.1126) loss: 0.8172 (0.8084) time: 0.2305 data: 0.1481 max mem: 9377 +Train: [51] [2400/6250] eta: 0:10:34 lr: 0.000065 grad: 0.0996 (0.1123) loss: 0.8065 (0.8085) time: 0.1890 data: 0.1004 max mem: 9377 +Train: [51] [2500/6250] eta: 0:10:20 lr: 0.000065 grad: 0.0987 (0.1122) loss: 0.8169 (0.8087) time: 0.1804 data: 0.0878 max mem: 9377 +Train: [51] [2600/6250] eta: 0:10:06 lr: 0.000065 grad: 0.1080 (0.1120) loss: 0.7947 (0.8086) time: 0.1654 data: 0.0770 max mem: 9377 +Train: [51] [2700/6250] eta: 0:09:52 lr: 0.000065 grad: 0.0997 (0.1119) loss: 0.8158 (0.8086) time: 0.1830 data: 0.0864 max mem: 9377 +Train: [51] [2800/6250] eta: 0:09:38 lr: 0.000065 grad: 0.1076 (0.1119) loss: 0.8121 (0.8087) time: 0.1870 data: 0.0979 max mem: 9377 +Train: [51] [2900/6250] eta: 0:09:22 lr: 0.000065 grad: 0.1076 (0.1119) loss: 0.8078 (0.8086) time: 0.1875 data: 0.0868 max mem: 9377 +Train: [51] [3000/6250] eta: 0:09:06 lr: 0.000065 grad: 0.1110 (0.1119) loss: 0.8007 (0.8086) time: 0.1834 data: 0.0789 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:48 lr: 0.000065 grad: 0.1033 (0.1118) loss: 0.8078 (0.8086) time: 0.1318 data: 0.0464 max mem: 9377 +Train: [51] [3200/6250] eta: 0:08:31 lr: 0.000065 grad: 0.1011 (0.1117) loss: 0.8159 (0.8087) time: 0.1638 data: 0.0715 max mem: 9377 +Train: [51] [3300/6250] eta: 0:08:14 lr: 0.000065 grad: 0.1049 (0.1116) loss: 0.8119 (0.8088) time: 0.1084 data: 0.0111 max mem: 9377 +Train: [51] [3400/6250] eta: 0:07:57 lr: 0.000064 grad: 0.1142 (0.1117) loss: 0.8086 (0.8088) time: 0.1420 data: 0.0491 max mem: 9377 +Train: [51] [3500/6250] eta: 0:07:40 lr: 0.000064 grad: 0.1052 (0.1116) loss: 0.8153 (0.8089) time: 0.1741 data: 0.0818 max mem: 9377 +Train: [51] [3600/6250] eta: 0:07:23 lr: 0.000064 grad: 0.1046 (0.1116) loss: 0.8081 (0.8088) time: 0.1815 data: 0.0919 max mem: 9377 +Train: [51] [3700/6250] eta: 0:07:06 lr: 0.000064 grad: 0.1086 (0.1116) loss: 0.8098 (0.8087) time: 0.1592 data: 0.0662 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:49 lr: 0.000064 grad: 0.1169 (0.1117) loss: 0.8004 (0.8086) time: 0.1698 data: 0.0768 max mem: 9377 +Train: [51] [3900/6250] eta: 0:06:32 lr: 0.000064 grad: 0.1046 (0.1117) loss: 0.8047 (0.8085) time: 0.1554 data: 0.0584 max mem: 9377 +Train: [51] [4000/6250] eta: 0:06:14 lr: 0.000064 grad: 0.1129 (0.1118) loss: 0.8007 (0.8083) time: 0.1545 data: 0.0697 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:57 lr: 0.000064 grad: 0.1082 (0.1118) loss: 0.8059 (0.8083) time: 0.1532 data: 0.0696 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:40 lr: 0.000064 grad: 0.1137 (0.1118) loss: 0.7963 (0.8082) time: 0.1530 data: 0.0558 max mem: 9377 +Train: [51] [4300/6250] eta: 0:05:23 lr: 0.000064 grad: 0.1064 (0.1118) loss: 0.8098 (0.8082) time: 0.1604 data: 0.0700 max mem: 9377 +Train: [51] [4400/6250] eta: 0:05:06 lr: 0.000064 grad: 0.1076 (0.1118) loss: 0.8086 (0.8082) time: 0.1521 data: 0.0717 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:49 lr: 0.000064 grad: 0.1192 (0.1119) loss: 0.7987 (0.8081) time: 0.1421 data: 0.0564 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:32 lr: 0.000064 grad: 0.1126 (0.1120) loss: 0.7931 (0.8079) time: 0.1390 data: 0.0415 max mem: 9377 +Train: [51] [4700/6250] eta: 0:04:15 lr: 0.000064 grad: 0.1109 (0.1121) loss: 0.8043 (0.8077) time: 0.1411 data: 0.0599 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:59 lr: 0.000064 grad: 0.1180 (0.1122) loss: 0.8058 (0.8076) time: 0.1400 data: 0.0619 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:42 lr: 0.000064 grad: 0.1076 (0.1122) loss: 0.8061 (0.8075) time: 0.1408 data: 0.0518 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:26 lr: 0.000064 grad: 0.1152 (0.1123) loss: 0.8025 (0.8074) time: 0.1544 data: 0.0716 max mem: 9377 +Train: [51] [5100/6250] eta: 0:03:09 lr: 0.000064 grad: 0.1142 (0.1125) loss: 0.8060 (0.8073) time: 0.1469 data: 0.0499 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:52 lr: 0.000064 grad: 0.1132 (0.1126) loss: 0.8024 (0.8071) time: 0.1704 data: 0.0875 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:36 lr: 0.000064 grad: 0.1165 (0.1127) loss: 0.8078 (0.8071) time: 0.1716 data: 0.0830 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:20 lr: 0.000064 grad: 0.1186 (0.1128) loss: 0.8038 (0.8070) time: 0.1919 data: 0.1102 max mem: 9377 +Train: [51] [5500/6250] eta: 0:02:03 lr: 0.000064 grad: 0.1096 (0.1128) loss: 0.8104 (0.8069) time: 0.1699 data: 0.0814 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:47 lr: 0.000064 grad: 0.1145 (0.1128) loss: 0.8109 (0.8069) time: 0.1793 data: 0.0821 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:30 lr: 0.000064 grad: 0.1100 (0.1129) loss: 0.8033 (0.8068) time: 0.1569 data: 0.0756 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:14 lr: 0.000064 grad: 0.1120 (0.1129) loss: 0.8036 (0.8067) time: 0.1546 data: 0.0734 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:57 lr: 0.000064 grad: 0.1158 (0.1129) loss: 0.8071 (0.8067) time: 0.1587 data: 0.0752 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:41 lr: 0.000064 grad: 0.1067 (0.1129) loss: 0.8071 (0.8067) time: 0.1588 data: 0.0690 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:24 lr: 0.000064 grad: 0.1050 (0.1129) loss: 0.8146 (0.8067) time: 0.1322 data: 0.0443 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:08 lr: 0.000064 grad: 0.1127 (0.1129) loss: 0.8031 (0.8066) time: 0.1443 data: 0.0449 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1052 (0.1129) loss: 0.8100 (0.8067) time: 0.1483 data: 0.0560 max mem: 9377 +Train: [51] Total time: 0:17:13 (0.1653 s / it) +Averaged stats: lr: 0.000064 grad: 0.1052 (0.1129) loss: 0.8100 (0.8067) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:04:02 loss: 0.8296 (0.8296) time: 3.9190 data: 3.8429 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8169 (0.8204) time: 0.1339 data: 0.1085 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (hcp-train-subset): loss: 0.8169 (0.8204) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:48 loss: 0.8358 (0.8358) time: 3.6784 data: 3.6219 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8364 (0.8400) time: 0.1170 data: 0.0903 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:14 (0.2330 s / it) +Averaged stats (hcp-val): loss: 0.8364 (0.8400) +Eval (nsd-val): [51] [ 0/62] eta: 0:04:15 loss: 0.8050 (0.8050) time: 4.1221 data: 4.0312 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8177 (0.8171) time: 0.1406 data: 0.1130 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8171) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 11:48:51 lr: 0.000064 grad: 0.1113 (0.1113) loss: 0.8582 (0.8582) time: 6.8050 data: 6.7028 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:22:10 lr: 0.000063 grad: 0.1299 (0.1636) loss: 0.8208 (0.8139) time: 0.1638 data: 0.0653 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:18:52 lr: 0.000063 grad: 0.1019 (0.1435) loss: 0.8209 (0.8143) time: 0.1373 data: 0.0325 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:17:45 lr: 0.000063 grad: 0.0972 (0.1329) loss: 0.8170 (0.8140) time: 0.1713 data: 0.0765 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:16:48 lr: 0.000063 grad: 0.0978 (0.1257) loss: 0.8196 (0.8143) time: 0.1720 data: 0.0759 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:16:02 lr: 0.000063 grad: 0.1000 (0.1221) loss: 0.8157 (0.8150) time: 0.1636 data: 0.0664 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:15:28 lr: 0.000063 grad: 0.1074 (0.1208) loss: 0.8021 (0.8146) time: 0.1402 data: 0.0276 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:15:00 lr: 0.000063 grad: 0.1063 (0.1189) loss: 0.8079 (0.8148) time: 0.1655 data: 0.0732 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:14:39 lr: 0.000063 grad: 0.1063 (0.1171) loss: 0.8223 (0.8151) time: 0.1611 data: 0.0772 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:14:25 lr: 0.000063 grad: 0.1077 (0.1163) loss: 0.8072 (0.8148) time: 0.1764 data: 0.0822 max mem: 9377 +Train: [52] [1000/6250] eta: 0:14:15 lr: 0.000063 grad: 0.1075 (0.1157) loss: 0.8121 (0.8143) time: 0.1759 data: 0.0871 max mem: 9377 +Train: [52] [1100/6250] eta: 0:13:55 lr: 0.000063 grad: 0.1050 (0.1151) loss: 0.8029 (0.8134) time: 0.1629 data: 0.0749 max mem: 9377 +Train: [52] [1200/6250] eta: 0:13:36 lr: 0.000063 grad: 0.1086 (0.1147) loss: 0.8035 (0.8127) time: 0.1526 data: 0.0705 max mem: 9377 +Train: [52] [1300/6250] eta: 0:13:19 lr: 0.000063 grad: 0.1098 (0.1146) loss: 0.8041 (0.8119) time: 0.1754 data: 0.0876 max mem: 9377 +Train: [52] [1400/6250] eta: 0:13:05 lr: 0.000063 grad: 0.1144 (0.1144) loss: 0.8047 (0.8115) time: 0.1644 data: 0.0852 max mem: 9377 +Train: [52] [1500/6250] eta: 0:12:53 lr: 0.000063 grad: 0.1104 (0.1142) loss: 0.8015 (0.8110) time: 0.1677 data: 0.0761 max mem: 9377 +Train: [52] [1600/6250] eta: 0:12:38 lr: 0.000063 grad: 0.1131 (0.1141) loss: 0.8068 (0.8106) time: 0.1701 data: 0.0776 max mem: 9377 +Train: [52] [1700/6250] eta: 0:12:18 lr: 0.000063 grad: 0.1063 (0.1140) loss: 0.8009 (0.8103) time: 0.1513 data: 0.0622 max mem: 9377 +Train: [52] [1800/6250] eta: 0:12:02 lr: 0.000063 grad: 0.1102 (0.1138) loss: 0.7987 (0.8100) time: 0.1490 data: 0.0618 max mem: 9377 +Train: [52] [1900/6250] eta: 0:11:46 lr: 0.000063 grad: 0.1156 (0.1136) loss: 0.8055 (0.8098) time: 0.1714 data: 0.0871 max mem: 9377 +Train: [52] [2000/6250] eta: 0:11:27 lr: 0.000063 grad: 0.1051 (0.1134) loss: 0.8077 (0.8096) time: 0.1237 data: 0.0310 max mem: 9377 +Train: [52] [2100/6250] eta: 0:11:10 lr: 0.000063 grad: 0.1119 (0.1133) loss: 0.8125 (0.8096) time: 0.1656 data: 0.0768 max mem: 9377 +Train: [52] [2200/6250] eta: 0:10:51 lr: 0.000063 grad: 0.1096 (0.1132) loss: 0.8029 (0.8095) time: 0.1504 data: 0.0628 max mem: 9377 +Train: [52] [2300/6250] eta: 0:10:35 lr: 0.000063 grad: 0.1125 (0.1131) loss: 0.8132 (0.8094) time: 0.1506 data: 0.0588 max mem: 9377 +Train: [52] [2400/6250] eta: 0:10:22 lr: 0.000063 grad: 0.1020 (0.1129) loss: 0.8120 (0.8095) time: 0.1583 data: 0.0669 max mem: 9377 +Train: [52] [2500/6250] eta: 0:10:06 lr: 0.000063 grad: 0.1065 (0.1128) loss: 0.8098 (0.8096) time: 0.1824 data: 0.0945 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:50 lr: 0.000063 grad: 0.1057 (0.1126) loss: 0.8108 (0.8096) time: 0.1699 data: 0.0878 max mem: 9377 +Train: [52] [2700/6250] eta: 0:09:34 lr: 0.000063 grad: 0.0984 (0.1125) loss: 0.8072 (0.8096) time: 0.1460 data: 0.0587 max mem: 9377 +Train: [52] [2800/6250] eta: 0:09:19 lr: 0.000063 grad: 0.0954 (0.1124) loss: 0.8169 (0.8096) time: 0.1727 data: 0.0871 max mem: 9377 +Train: [52] [2900/6250] eta: 0:09:04 lr: 0.000063 grad: 0.1061 (0.1123) loss: 0.8134 (0.8096) time: 0.1805 data: 0.0882 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:47 lr: 0.000063 grad: 0.1115 (0.1123) loss: 0.8079 (0.8095) time: 0.1424 data: 0.0537 max mem: 9377 +Train: [52] [3100/6250] eta: 0:08:31 lr: 0.000063 grad: 0.1139 (0.1124) loss: 0.8070 (0.8095) time: 0.1709 data: 0.0835 max mem: 9377 +Train: [52] [3200/6250] eta: 0:08:14 lr: 0.000062 grad: 0.1120 (0.1125) loss: 0.8094 (0.8093) time: 0.1599 data: 0.0707 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:57 lr: 0.000062 grad: 0.1126 (0.1124) loss: 0.7978 (0.8093) time: 0.1602 data: 0.0683 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:41 lr: 0.000062 grad: 0.1122 (0.1125) loss: 0.8060 (0.8091) time: 0.1675 data: 0.0755 max mem: 9377 +Train: [52] [3500/6250] eta: 0:07:23 lr: 0.000062 grad: 0.1086 (0.1125) loss: 0.8124 (0.8090) time: 0.1345 data: 0.0393 max mem: 9377 +Train: [52] [3600/6250] eta: 0:07:07 lr: 0.000062 grad: 0.1127 (0.1125) loss: 0.8104 (0.8089) time: 0.1491 data: 0.0548 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:50 lr: 0.000062 grad: 0.1108 (0.1125) loss: 0.8015 (0.8088) time: 0.1579 data: 0.0686 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:34 lr: 0.000062 grad: 0.1153 (0.1125) loss: 0.8043 (0.8088) time: 0.1538 data: 0.0689 max mem: 9377 +Train: [52] [3900/6250] eta: 0:06:17 lr: 0.000062 grad: 0.1136 (0.1126) loss: 0.7981 (0.8086) time: 0.1494 data: 0.0626 max mem: 9377 +Train: [52] [4000/6250] eta: 0:06:01 lr: 0.000062 grad: 0.1202 (0.1128) loss: 0.8055 (0.8085) time: 0.1410 data: 0.0551 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:44 lr: 0.000062 grad: 0.1121 (0.1129) loss: 0.8004 (0.8084) time: 0.1440 data: 0.0493 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:28 lr: 0.000062 grad: 0.1119 (0.1130) loss: 0.8110 (0.8082) time: 0.1285 data: 0.0349 max mem: 9377 +Train: [52] [4300/6250] eta: 0:05:11 lr: 0.000062 grad: 0.1140 (0.1132) loss: 0.7966 (0.8081) time: 0.1423 data: 0.0579 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:55 lr: 0.000062 grad: 0.1133 (0.1133) loss: 0.8001 (0.8079) time: 0.1509 data: 0.0629 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:39 lr: 0.000062 grad: 0.1107 (0.1134) loss: 0.7983 (0.8078) time: 0.1376 data: 0.0525 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:23 lr: 0.000062 grad: 0.1127 (0.1134) loss: 0.8062 (0.8077) time: 0.1823 data: 0.0913 max mem: 9377 +Train: [52] [4700/6250] eta: 0:04:07 lr: 0.000062 grad: 0.1125 (0.1134) loss: 0.7993 (0.8076) time: 0.1372 data: 0.0374 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:51 lr: 0.000062 grad: 0.1061 (0.1134) loss: 0.8084 (0.8075) time: 0.1580 data: 0.0760 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:35 lr: 0.000062 grad: 0.1095 (0.1134) loss: 0.8051 (0.8074) time: 0.1420 data: 0.0552 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:19 lr: 0.000062 grad: 0.1096 (0.1135) loss: 0.8133 (0.8074) time: 0.1441 data: 0.0504 max mem: 9377 +Train: [52] [5100/6250] eta: 0:03:03 lr: 0.000062 grad: 0.1168 (0.1135) loss: 0.7914 (0.8072) time: 0.1096 data: 0.0056 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:47 lr: 0.000062 grad: 0.1196 (0.1136) loss: 0.8048 (0.8071) time: 0.1794 data: 0.0944 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:31 lr: 0.000062 grad: 0.1046 (0.1136) loss: 0.8031 (0.8070) time: 0.1617 data: 0.0762 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:15 lr: 0.000062 grad: 0.1017 (0.1136) loss: 0.8057 (0.8070) time: 0.1622 data: 0.0736 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:59 lr: 0.000062 grad: 0.1142 (0.1137) loss: 0.8099 (0.8070) time: 0.1689 data: 0.0914 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:43 lr: 0.000062 grad: 0.1082 (0.1138) loss: 0.7981 (0.8069) time: 0.1780 data: 0.0815 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:27 lr: 0.000062 grad: 0.1173 (0.1139) loss: 0.7955 (0.8067) time: 0.2093 data: 0.1216 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:11 lr: 0.000062 grad: 0.1184 (0.1140) loss: 0.7913 (0.8066) time: 0.1624 data: 0.0765 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:55 lr: 0.000062 grad: 0.1197 (0.1141) loss: 0.8036 (0.8065) time: 0.1744 data: 0.0794 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:39 lr: 0.000062 grad: 0.1118 (0.1141) loss: 0.7938 (0.8064) time: 0.1345 data: 0.0473 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:23 lr: 0.000062 grad: 0.1108 (0.1142) loss: 0.8080 (0.8064) time: 0.1444 data: 0.0503 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1113 (0.1142) loss: 0.8040 (0.8064) time: 0.1429 data: 0.0542 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1140 (0.1142) loss: 0.8037 (0.8063) time: 0.1551 data: 0.0655 max mem: 9377 +Train: [52] Total time: 0:16:41 (0.1602 s / it) +Averaged stats: lr: 0.000061 grad: 0.1140 (0.1142) loss: 0.8037 (0.8063) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:05:05 loss: 0.8207 (0.8207) time: 4.9230 data: 4.8906 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8156 (0.8186) time: 0.1071 data: 0.0818 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-train-subset): loss: 0.8156 (0.8186) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:43 loss: 0.8378 (0.8378) time: 3.6021 data: 3.5394 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8378 (0.8399) time: 0.1372 data: 0.1105 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-val): loss: 0.8378 (0.8399) +Eval (nsd-val): [52] [ 0/62] eta: 0:04:50 loss: 0.8093 (0.8093) time: 4.6855 data: 4.6353 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8157 (0.8185) time: 0.1116 data: 0.0865 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (nsd-val): loss: 0.8157 (0.8185) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 8:13:13 lr: 0.000061 grad: 0.1740 (0.1740) loss: 0.8432 (0.8432) time: 4.7350 data: 4.4090 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:21:39 lr: 0.000061 grad: 0.0990 (0.1239) loss: 0.8254 (0.8307) time: 0.1443 data: 0.0362 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:18:57 lr: 0.000061 grad: 0.0995 (0.1174) loss: 0.8230 (0.8247) time: 0.1610 data: 0.0636 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:17:36 lr: 0.000061 grad: 0.1094 (0.1151) loss: 0.8143 (0.8215) time: 0.1560 data: 0.0694 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:16:35 lr: 0.000061 grad: 0.1053 (0.1151) loss: 0.8129 (0.8186) time: 0.1365 data: 0.0307 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:15:56 lr: 0.000061 grad: 0.0989 (0.1136) loss: 0.8177 (0.8177) time: 0.1346 data: 0.0298 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:15:27 lr: 0.000061 grad: 0.1050 (0.1127) loss: 0.8116 (0.8171) time: 0.1701 data: 0.0721 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:15:02 lr: 0.000061 grad: 0.0938 (0.1114) loss: 0.8186 (0.8169) time: 0.1440 data: 0.0461 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:14:49 lr: 0.000061 grad: 0.0984 (0.1108) loss: 0.8178 (0.8166) time: 0.1781 data: 0.0895 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:14:33 lr: 0.000061 grad: 0.1058 (0.1104) loss: 0.8089 (0.8161) time: 0.1390 data: 0.0416 max mem: 9377 +Train: [53] [1000/6250] eta: 0:14:27 lr: 0.000061 grad: 0.1031 (0.1100) loss: 0.8045 (0.8155) time: 0.2040 data: 0.1113 max mem: 9377 +Train: [53] [1100/6250] eta: 0:14:16 lr: 0.000061 grad: 0.1046 (0.1098) loss: 0.8154 (0.8147) time: 0.1829 data: 0.0974 max mem: 9377 +Train: [53] [1200/6250] eta: 0:14:04 lr: 0.000061 grad: 0.1109 (0.1097) loss: 0.8134 (0.8143) time: 0.1797 data: 0.0918 max mem: 9377 +Train: [53] [1300/6250] eta: 0:13:45 lr: 0.000061 grad: 0.1056 (0.1096) loss: 0.8149 (0.8140) time: 0.2028 data: 0.1121 max mem: 9377 +Train: [53] [1400/6250] eta: 0:13:32 lr: 0.000061 grad: 0.1079 (0.1094) loss: 0.8005 (0.8135) time: 0.1832 data: 0.0916 max mem: 9377 +Train: [53] [1500/6250] eta: 0:13:17 lr: 0.000061 grad: 0.1096 (0.1093) loss: 0.7997 (0.8129) time: 0.1573 data: 0.0683 max mem: 9377 +Train: [53] [1600/6250] eta: 0:13:03 lr: 0.000061 grad: 0.1118 (0.1093) loss: 0.8026 (0.8125) time: 0.1779 data: 0.0860 max mem: 9377 +Train: [53] [1700/6250] eta: 0:12:45 lr: 0.000061 grad: 0.1053 (0.1095) loss: 0.8060 (0.8120) time: 0.1671 data: 0.0749 max mem: 9377 +Train: [53] [1800/6250] eta: 0:12:28 lr: 0.000061 grad: 0.1140 (0.1098) loss: 0.7932 (0.8115) time: 0.1685 data: 0.0736 max mem: 9377 +Train: [53] [1900/6250] eta: 0:12:09 lr: 0.000061 grad: 0.1100 (0.1099) loss: 0.8026 (0.8111) time: 0.1548 data: 0.0646 max mem: 9377 +Train: [53] [2000/6250] eta: 0:11:51 lr: 0.000061 grad: 0.1089 (0.1100) loss: 0.8070 (0.8110) time: 0.1734 data: 0.0874 max mem: 9377 +Train: [53] [2100/6250] eta: 0:11:30 lr: 0.000061 grad: 0.1115 (0.1101) loss: 0.8036 (0.8106) time: 0.1598 data: 0.0703 max mem: 9377 +Train: [53] [2200/6250] eta: 0:11:13 lr: 0.000061 grad: 0.1172 (0.1105) loss: 0.8026 (0.8102) time: 0.1804 data: 0.0915 max mem: 9377 +Train: [53] [2300/6250] eta: 0:10:54 lr: 0.000061 grad: 0.1128 (0.1107) loss: 0.8011 (0.8099) time: 0.1700 data: 0.0820 max mem: 9377 +Train: [53] [2400/6250] eta: 0:10:37 lr: 0.000061 grad: 0.1138 (0.1111) loss: 0.8027 (0.8095) time: 0.1654 data: 0.0878 max mem: 9377 +Train: [53] [2500/6250] eta: 0:10:20 lr: 0.000061 grad: 0.1224 (0.1115) loss: 0.7987 (0.8090) time: 0.1659 data: 0.0851 max mem: 9377 +Train: [53] [2600/6250] eta: 0:10:04 lr: 0.000061 grad: 0.1198 (0.1119) loss: 0.7874 (0.8084) time: 0.1797 data: 0.0935 max mem: 9377 +Train: [53] [2700/6250] eta: 0:09:47 lr: 0.000061 grad: 0.1191 (0.1121) loss: 0.7883 (0.8080) time: 0.1600 data: 0.0776 max mem: 9377 +Train: [53] [2800/6250] eta: 0:09:30 lr: 0.000061 grad: 0.1138 (0.1123) loss: 0.8077 (0.8076) time: 0.1609 data: 0.0790 max mem: 9377 +Train: [53] [2900/6250] eta: 0:09:15 lr: 0.000061 grad: 0.1093 (0.1124) loss: 0.8083 (0.8074) time: 0.1775 data: 0.0912 max mem: 9377 +Train: [53] [3000/6250] eta: 0:08:58 lr: 0.000060 grad: 0.1093 (0.1124) loss: 0.8027 (0.8072) time: 0.1592 data: 0.0581 max mem: 9377 +Train: [53] [3100/6250] eta: 0:08:42 lr: 0.000060 grad: 0.1106 (0.1125) loss: 0.7854 (0.8069) time: 0.1763 data: 0.0860 max mem: 9377 +Train: [53] [3200/6250] eta: 0:08:25 lr: 0.000060 grad: 0.1115 (0.1126) loss: 0.8025 (0.8067) time: 0.1296 data: 0.0371 max mem: 9377 +Train: [53] [3300/6250] eta: 0:08:08 lr: 0.000060 grad: 0.1186 (0.1129) loss: 0.7958 (0.8064) time: 0.1662 data: 0.0771 max mem: 9377 +Train: [53] [3400/6250] eta: 0:07:51 lr: 0.000060 grad: 0.1176 (0.1131) loss: 0.7947 (0.8061) time: 0.1409 data: 0.0564 max mem: 9377 +Train: [53] [3500/6250] eta: 0:07:34 lr: 0.000060 grad: 0.1214 (0.1133) loss: 0.7930 (0.8059) time: 0.1352 data: 0.0401 max mem: 9377 +Train: [53] [3600/6250] eta: 0:07:17 lr: 0.000060 grad: 0.1176 (0.1134) loss: 0.7895 (0.8057) time: 0.1259 data: 0.0261 max mem: 9377 +Train: [53] [3700/6250] eta: 0:07:00 lr: 0.000060 grad: 0.1200 (0.1135) loss: 0.8063 (0.8055) time: 0.1623 data: 0.0774 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:42 lr: 0.000060 grad: 0.1228 (0.1136) loss: 0.7966 (0.8054) time: 0.1507 data: 0.0516 max mem: 9377 +Train: [53] [3900/6250] eta: 0:06:25 lr: 0.000060 grad: 0.1155 (0.1138) loss: 0.7958 (0.8051) time: 0.1585 data: 0.0721 max mem: 9377 +Train: [53] [4000/6250] eta: 0:06:08 lr: 0.000060 grad: 0.1176 (0.1139) loss: 0.7941 (0.8050) time: 0.1622 data: 0.0751 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:52 lr: 0.000060 grad: 0.1214 (0.1140) loss: 0.8102 (0.8049) time: 0.1643 data: 0.0757 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:35 lr: 0.000060 grad: 0.1172 (0.1142) loss: 0.7999 (0.8048) time: 0.1657 data: 0.0758 max mem: 9377 +Train: [53] [4300/6250] eta: 0:05:18 lr: 0.000060 grad: 0.1058 (0.1142) loss: 0.8169 (0.8048) time: 0.1517 data: 0.0663 max mem: 9377 +Train: [53] [4400/6250] eta: 0:05:02 lr: 0.000060 grad: 0.1251 (0.1143) loss: 0.8067 (0.8047) time: 0.1655 data: 0.0808 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:45 lr: 0.000060 grad: 0.1173 (0.1144) loss: 0.7991 (0.8046) time: 0.1705 data: 0.0881 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:28 lr: 0.000060 grad: 0.1104 (0.1145) loss: 0.8090 (0.8046) time: 0.1309 data: 0.0429 max mem: 9377 +Train: [53] [4700/6250] eta: 0:04:12 lr: 0.000060 grad: 0.1174 (0.1146) loss: 0.8003 (0.8046) time: 0.1489 data: 0.0573 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:56 lr: 0.000060 grad: 0.1184 (0.1147) loss: 0.8061 (0.8045) time: 0.1508 data: 0.0591 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:39 lr: 0.000060 grad: 0.1161 (0.1149) loss: 0.7990 (0.8044) time: 0.1556 data: 0.0725 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:23 lr: 0.000060 grad: 0.1130 (0.1149) loss: 0.8021 (0.8044) time: 0.1682 data: 0.0779 max mem: 9377 +Train: [53] [5100/6250] eta: 0:03:06 lr: 0.000060 grad: 0.1079 (0.1148) loss: 0.8047 (0.8044) time: 0.1611 data: 0.0696 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:50 lr: 0.000060 grad: 0.1181 (0.1148) loss: 0.8012 (0.8044) time: 0.1596 data: 0.0787 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:33 lr: 0.000060 grad: 0.1177 (0.1149) loss: 0.7975 (0.8044) time: 0.1433 data: 0.0652 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:17 lr: 0.000060 grad: 0.1110 (0.1149) loss: 0.8045 (0.8044) time: 0.1759 data: 0.0853 max mem: 9377 +Train: [53] [5500/6250] eta: 0:02:01 lr: 0.000060 grad: 0.1153 (0.1149) loss: 0.8047 (0.8044) time: 0.1701 data: 0.0923 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:45 lr: 0.000060 grad: 0.1094 (0.1150) loss: 0.8106 (0.8044) time: 0.1542 data: 0.0703 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:29 lr: 0.000060 grad: 0.1173 (0.1150) loss: 0.8002 (0.8044) time: 0.1622 data: 0.0739 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:13 lr: 0.000060 grad: 0.1129 (0.1150) loss: 0.8105 (0.8044) time: 0.1757 data: 0.0856 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:56 lr: 0.000060 grad: 0.1180 (0.1151) loss: 0.8066 (0.8044) time: 0.1432 data: 0.0529 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:40 lr: 0.000059 grad: 0.1159 (0.1151) loss: 0.7959 (0.8044) time: 0.1521 data: 0.0618 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:24 lr: 0.000059 grad: 0.1102 (0.1153) loss: 0.8033 (0.8043) time: 0.1449 data: 0.0487 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:08 lr: 0.000059 grad: 0.1089 (0.1153) loss: 0.8097 (0.8043) time: 0.1397 data: 0.0475 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1151 (0.1153) loss: 0.8086 (0.8043) time: 0.1487 data: 0.0646 max mem: 9377 +Train: [53] Total time: 0:16:55 (0.1625 s / it) +Averaged stats: lr: 0.000059 grad: 0.1151 (0.1153) loss: 0.8086 (0.8043) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:04:19 loss: 0.8253 (0.8253) time: 4.1883 data: 4.0765 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8174 (0.8177) time: 0.1307 data: 0.1052 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (hcp-train-subset): loss: 0.8174 (0.8177) +Eval (hcp-val): [53] [ 0/62] eta: 0:05:35 loss: 0.8395 (0.8395) time: 5.4173 data: 5.3855 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8405 (0.8404) time: 0.1016 data: 0.0747 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (hcp-val): loss: 0.8405 (0.8404) +Eval (nsd-val): [53] [ 0/62] eta: 0:03:57 loss: 0.8055 (0.8055) time: 3.8302 data: 3.7520 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8144 (0.8152) time: 0.1182 data: 0.0931 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (nsd-val): loss: 0.8144 (0.8152) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 11:07:05 lr: 0.000059 grad: nan (nan) loss: 0.6873 (0.6873) time: 6.4041 data: 6.2833 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:23:14 lr: 0.000059 grad: 0.1409 (0.1608) loss: 0.8155 (0.8193) time: 0.1904 data: 0.0789 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:20:15 lr: 0.000059 grad: 0.1158 (0.1455) loss: 0.8109 (0.8167) time: 0.1537 data: 0.0555 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:18:45 lr: 0.000059 grad: 0.1214 (0.1401) loss: 0.8003 (0.8134) time: 0.1477 data: 0.0490 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:17:48 lr: 0.000059 grad: 0.1138 (0.1356) loss: 0.8048 (0.8115) time: 0.1670 data: 0.0767 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:17:09 lr: 0.000059 grad: 0.1193 (0.1323) loss: 0.7986 (0.8101) time: 0.1678 data: 0.0735 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:16:37 lr: 0.000059 grad: 0.1041 (0.1286) loss: 0.8077 (0.8096) time: 0.1735 data: 0.0847 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:16:05 lr: 0.000059 grad: 0.1090 (0.1260) loss: 0.8205 (0.8098) time: 0.1610 data: 0.0662 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:15:38 lr: 0.000059 grad: 0.1083 (0.1242) loss: 0.8060 (0.8097) time: 0.1622 data: 0.0733 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:15:20 lr: 0.000059 grad: 0.1087 (0.1230) loss: 0.8057 (0.8094) time: 0.1733 data: 0.0813 max mem: 9377 +Train: [54] [1000/6250] eta: 0:15:00 lr: 0.000059 grad: 0.1140 (0.1224) loss: 0.8065 (0.8093) time: 0.1555 data: 0.0685 max mem: 9377 +Train: [54] [1100/6250] eta: 0:14:40 lr: 0.000059 grad: 0.1123 (0.1214) loss: 0.8015 (0.8091) time: 0.1852 data: 0.0974 max mem: 9377 +Train: [54] [1200/6250] eta: 0:14:19 lr: 0.000059 grad: 0.1104 (0.1209) loss: 0.8059 (0.8089) time: 0.1708 data: 0.0892 max mem: 9377 +Train: [54] [1300/6250] eta: 0:14:00 lr: 0.000059 grad: 0.1134 (0.1204) loss: 0.8027 (0.8088) time: 0.1821 data: 0.0895 max mem: 9377 +Train: [54] [1400/6250] eta: 0:13:41 lr: 0.000059 grad: 0.1094 (0.1200) loss: 0.8076 (0.8085) time: 0.1496 data: 0.0421 max mem: 9377 +Train: [54] [1500/6250] eta: 0:13:24 lr: 0.000059 grad: 0.1060 (0.1194) loss: 0.8066 (0.8083) time: 0.1660 data: 0.0729 max mem: 9377 +Train: [54] [1600/6250] eta: 0:13:06 lr: 0.000059 grad: 0.1237 (0.1195) loss: 0.7965 (0.8078) time: 0.1592 data: 0.0702 max mem: 9377 +Train: [54] [1700/6250] eta: 0:12:45 lr: 0.000059 grad: 0.1105 (0.1195) loss: 0.8050 (0.8074) time: 0.1426 data: 0.0500 max mem: 9377 +Train: [54] [1800/6250] eta: 0:12:25 lr: 0.000059 grad: 0.1119 (0.1194) loss: 0.8011 (0.8070) time: 0.1359 data: 0.0380 max mem: 9377 +Train: [54] [1900/6250] eta: 0:12:07 lr: 0.000059 grad: 0.1195 (0.1193) loss: 0.7947 (0.8065) time: 0.1643 data: 0.0789 max mem: 9377 +Train: [54] [2000/6250] eta: 0:11:47 lr: 0.000059 grad: 0.1134 (0.1193) loss: 0.8013 (0.8063) time: 0.1466 data: 0.0507 max mem: 9377 +Train: [54] [2100/6250] eta: 0:11:29 lr: 0.000059 grad: 0.1214 (0.1193) loss: 0.7967 (0.8060) time: 0.1798 data: 0.0925 max mem: 9377 +Train: [54] [2200/6250] eta: 0:11:09 lr: 0.000059 grad: 0.1129 (0.1193) loss: 0.8065 (0.8058) time: 0.1470 data: 0.0517 max mem: 9377 +Train: [54] [2300/6250] eta: 0:10:51 lr: 0.000059 grad: 0.1175 (0.1193) loss: 0.7903 (0.8055) time: 0.1753 data: 0.0782 max mem: 9377 +Train: [54] [2400/6250] eta: 0:10:35 lr: 0.000059 grad: 0.1232 (0.1194) loss: 0.7950 (0.8052) time: 0.1556 data: 0.0807 max mem: 9377 +Train: [54] [2500/6250] eta: 0:10:18 lr: 0.000059 grad: 0.1138 (0.1193) loss: 0.7976 (0.8049) time: 0.1772 data: 0.0975 max mem: 9377 +Train: [54] [2600/6250] eta: 0:10:02 lr: 0.000059 grad: 0.1083 (0.1193) loss: 0.8093 (0.8048) time: 0.1497 data: 0.0626 max mem: 9377 +Train: [54] [2700/6250] eta: 0:09:45 lr: 0.000059 grad: 0.1146 (0.1191) loss: 0.8057 (0.8047) time: 0.1061 data: 0.0172 max mem: 9377 +Train: [54] [2800/6250] eta: 0:09:31 lr: 0.000058 grad: 0.1067 (0.1189) loss: 0.8052 (0.8047) time: 0.1685 data: 0.0772 max mem: 9377 +Train: [54] [2900/6250] eta: 0:09:14 lr: 0.000058 grad: 0.1045 (0.1188) loss: 0.8145 (0.8048) time: 0.1507 data: 0.0690 max mem: 9377 +Train: [54] [3000/6250] eta: 0:08:58 lr: 0.000058 grad: 0.1160 (0.1187) loss: 0.8047 (0.8049) time: 0.1986 data: 0.1062 max mem: 9377 +Train: [54] [3100/6250] eta: 0:08:41 lr: 0.000058 grad: 0.1142 (0.1186) loss: 0.8018 (0.8048) time: 0.1524 data: 0.0610 max mem: 9377 +Train: [54] [3200/6250] eta: 0:08:25 lr: 0.000058 grad: 0.1191 (0.1186) loss: 0.8017 (0.8048) time: 0.1506 data: 0.0680 max mem: 9377 +Train: [54] [3300/6250] eta: 0:08:07 lr: 0.000058 grad: 0.1089 (0.1185) loss: 0.8093 (0.8049) time: 0.1561 data: 0.0689 max mem: 9377 +Train: [54] [3400/6250] eta: 0:07:49 lr: 0.000058 grad: 0.1129 (0.1185) loss: 0.8060 (0.8049) time: 0.1400 data: 0.0492 max mem: 9377 +Train: [54] [3500/6250] eta: 0:07:32 lr: 0.000058 grad: 0.1086 (0.1185) loss: 0.8118 (0.8049) time: 0.1656 data: 0.0756 max mem: 9377 +Train: [54] [3600/6250] eta: 0:07:15 lr: 0.000058 grad: 0.1103 (0.1184) loss: 0.8022 (0.8048) time: 0.1455 data: 0.0621 max mem: 9377 +Train: [54] [3700/6250] eta: 0:07:00 lr: 0.000058 grad: 0.1151 (0.1184) loss: 0.8056 (0.8048) time: 0.1793 data: 0.0972 max mem: 9377 +Train: [54] [3800/6250] eta: 0:06:43 lr: 0.000058 grad: 0.1082 (0.1183) loss: 0.8112 (0.8049) time: 0.1513 data: 0.0693 max mem: 9377 +Train: [54] [3900/6250] eta: 0:06:27 lr: 0.000058 grad: 0.1145 (0.1183) loss: 0.7995 (0.8049) time: 0.1325 data: 0.0435 max mem: 9377 +Train: [54] [4000/6250] eta: 0:06:10 lr: 0.000058 grad: 0.1162 (0.1182) loss: 0.8035 (0.8050) time: 0.1601 data: 0.0693 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:53 lr: 0.000058 grad: 0.1181 (0.1182) loss: 0.8055 (0.8049) time: 0.1692 data: 0.0816 max mem: 9377 +Train: [54] [4200/6250] eta: 0:05:36 lr: 0.000058 grad: 0.1099 (0.1181) loss: 0.8059 (0.8049) time: 0.1473 data: 0.0554 max mem: 9377 +Train: [54] [4300/6250] eta: 0:05:19 lr: 0.000058 grad: 0.1189 (0.1181) loss: 0.8096 (0.8049) time: 0.1682 data: 0.0779 max mem: 9377 +Train: [54] [4400/6250] eta: 0:05:02 lr: 0.000058 grad: 0.1240 (0.1181) loss: 0.8027 (0.8049) time: 0.1702 data: 0.0736 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:46 lr: 0.000058 grad: 0.1105 (0.1180) loss: 0.8084 (0.8050) time: 0.1650 data: 0.0765 max mem: 9377 +Train: [54] [4600/6250] eta: 0:04:29 lr: 0.000058 grad: 0.1218 (0.1180) loss: 0.7927 (0.8049) time: 0.1629 data: 0.0745 max mem: 9377 +Train: [54] [4700/6250] eta: 0:04:13 lr: 0.000058 grad: 0.1209 (0.1180) loss: 0.7901 (0.8048) time: 0.1538 data: 0.0610 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:56 lr: 0.000058 grad: 0.1138 (0.1180) loss: 0.8041 (0.8047) time: 0.1579 data: 0.0701 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:40 lr: 0.000058 grad: 0.1103 (0.1179) loss: 0.8064 (0.8047) time: 0.1436 data: 0.0525 max mem: 9377 +Train: [54] [5000/6250] eta: 0:03:23 lr: 0.000058 grad: 0.1137 (0.1179) loss: 0.7999 (0.8046) time: 0.1351 data: 0.0486 max mem: 9377 +Train: [54] [5100/6250] eta: 0:03:07 lr: 0.000058 grad: 0.1135 (0.1179) loss: 0.8044 (0.8046) time: 0.1798 data: 0.0950 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:50 lr: 0.000058 grad: 0.1120 (0.1179) loss: 0.8035 (0.8046) time: 0.1583 data: 0.0714 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:34 lr: 0.000058 grad: 0.1206 (0.1180) loss: 0.7939 (0.8045) time: 0.2563 data: 0.1806 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:18 lr: 0.000058 grad: 0.1123 (0.1180) loss: 0.8006 (0.8045) time: 0.1396 data: 0.0533 max mem: 9377 +Train: [54] [5500/6250] eta: 0:02:02 lr: 0.000058 grad: 0.1195 (0.1180) loss: 0.7900 (0.8044) time: 0.1620 data: 0.0802 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:45 lr: 0.000058 grad: 0.1132 (0.1180) loss: 0.8033 (0.8043) time: 0.1474 data: 0.0659 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:29 lr: 0.000058 grad: 0.1142 (0.1181) loss: 0.7967 (0.8042) time: 0.1519 data: 0.0645 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:13 lr: 0.000057 grad: 0.1201 (0.1181) loss: 0.8033 (0.8042) time: 0.1754 data: 0.0830 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:56 lr: 0.000057 grad: 0.1207 (0.1182) loss: 0.8004 (0.8042) time: 0.1503 data: 0.0552 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:40 lr: 0.000057 grad: 0.1219 (0.1182) loss: 0.8016 (0.8042) time: 0.1565 data: 0.0549 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:24 lr: 0.000057 grad: 0.1210 (0.1183) loss: 0.8006 (0.8041) time: 0.1711 data: 0.0653 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:08 lr: 0.000057 grad: 0.1207 (0.1183) loss: 0.8025 (0.8040) time: 0.1849 data: 0.0926 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1205 (0.1183) loss: 0.7976 (0.8040) time: 0.1902 data: 0.0972 max mem: 9377 +Train: [54] Total time: 0:17:07 (0.1645 s / it) +Averaged stats: lr: 0.000057 grad: 0.1205 (0.1183) loss: 0.7976 (0.8040) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:03:59 loss: 0.8271 (0.8271) time: 3.8563 data: 3.8047 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8169 (0.8180) time: 0.1253 data: 0.0998 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:16 (0.2593 s / it) +Averaged stats (hcp-train-subset): loss: 0.8169 (0.8180) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [54] [ 0/62] eta: 0:05:25 loss: 0.8398 (0.8398) time: 5.2552 data: 5.1656 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8367 (0.8393) time: 0.1221 data: 0.0952 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:15 (0.2486 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8393) +Making plots (hcp-val): example=49 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:20 loss: 0.8078 (0.8078) time: 5.1773 data: 5.1446 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8177 (0.8187) time: 0.1462 data: 0.1189 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:15 (0.2566 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8187) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 9:04:06 lr: 0.000057 grad: 0.1385 (0.1385) loss: 0.8230 (0.8230) time: 5.2234 data: 4.9891 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:24:36 lr: 0.000057 grad: 0.1398 (0.1567) loss: 0.8032 (0.8252) time: 0.1921 data: 0.0757 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:21:10 lr: 0.000057 grad: 0.1192 (0.1477) loss: 0.8079 (0.8161) time: 0.1815 data: 0.0814 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:19:44 lr: 0.000057 grad: 0.1133 (0.1438) loss: 0.8040 (0.8117) time: 0.1860 data: 0.0951 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:18:26 lr: 0.000057 grad: 0.1219 (0.1396) loss: 0.8049 (0.8109) time: 0.1549 data: 0.0591 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:17:39 lr: 0.000057 grad: 0.1300 (0.1369) loss: 0.8026 (0.8097) time: 0.1515 data: 0.0520 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:16:55 lr: 0.000057 grad: 0.1174 (0.1342) loss: 0.8119 (0.8094) time: 0.1685 data: 0.0743 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:16:26 lr: 0.000057 grad: 0.1136 (0.1317) loss: 0.8083 (0.8090) time: 0.1705 data: 0.0806 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:15:58 lr: 0.000057 grad: 0.1095 (0.1297) loss: 0.7991 (0.8086) time: 0.1358 data: 0.0395 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:15:47 lr: 0.000057 grad: 0.1102 (0.1283) loss: 0.8037 (0.8081) time: 0.1945 data: 0.1065 max mem: 9377 +Train: [55] [1000/6250] eta: 0:15:19 lr: 0.000057 grad: 0.1133 (0.1273) loss: 0.8101 (0.8079) time: 0.1696 data: 0.0803 max mem: 9377 +Train: [55] [1100/6250] eta: 0:14:57 lr: 0.000057 grad: 0.1213 (0.1262) loss: 0.8010 (0.8078) time: 0.1725 data: 0.0776 max mem: 9377 +Train: [55] [1200/6250] eta: 0:14:35 lr: 0.000057 grad: 0.1178 (0.1264) loss: 0.7992 (0.8073) time: 0.1638 data: 0.0843 max mem: 9377 +Train: [55] [1300/6250] eta: 0:14:11 lr: 0.000057 grad: 0.1149 (0.1258) loss: 0.8030 (0.8072) time: 0.1692 data: 0.0854 max mem: 9377 +Train: [55] [1400/6250] eta: 0:13:56 lr: 0.000057 grad: 0.1203 (0.1252) loss: 0.7986 (0.8069) time: 0.1786 data: 0.0905 max mem: 9377 +Train: [55] [1500/6250] eta: 0:13:35 lr: 0.000057 grad: 0.1173 (0.1249) loss: 0.8077 (0.8067) time: 0.1743 data: 0.0876 max mem: 9377 +Train: [55] [1600/6250] eta: 0:13:19 lr: 0.000057 grad: 0.1204 (0.1244) loss: 0.8061 (0.8066) time: 0.1715 data: 0.0853 max mem: 9377 +Train: [55] [1700/6250] eta: 0:12:59 lr: 0.000057 grad: 0.1137 (0.1240) loss: 0.8014 (0.8065) time: 0.1484 data: 0.0505 max mem: 9377 +Train: [55] [1800/6250] eta: 0:12:38 lr: 0.000057 grad: 0.1133 (0.1235) loss: 0.8050 (0.8063) time: 0.1608 data: 0.0641 max mem: 9377 +Train: [55] [1900/6250] eta: 0:12:16 lr: 0.000057 grad: 0.1104 (0.1230) loss: 0.8108 (0.8064) time: 0.1521 data: 0.0595 max mem: 9377 +Train: [55] [2000/6250] eta: 0:11:57 lr: 0.000057 grad: 0.1168 (0.1228) loss: 0.7986 (0.8063) time: 0.1662 data: 0.0754 max mem: 9377 +Train: [55] [2100/6250] eta: 0:11:37 lr: 0.000057 grad: 0.1221 (0.1226) loss: 0.8042 (0.8062) time: 0.1666 data: 0.0785 max mem: 9377 +Train: [55] [2200/6250] eta: 0:11:17 lr: 0.000057 grad: 0.1186 (0.1225) loss: 0.8008 (0.8060) time: 0.1409 data: 0.0548 max mem: 9377 +Train: [55] [2300/6250] eta: 0:11:00 lr: 0.000057 grad: 0.1126 (0.1224) loss: 0.8008 (0.8059) time: 0.1576 data: 0.0646 max mem: 9377 +Train: [55] [2400/6250] eta: 0:10:44 lr: 0.000057 grad: 0.1176 (0.1224) loss: 0.7931 (0.8056) time: 0.1661 data: 0.0753 max mem: 9377 +Train: [55] [2500/6250] eta: 0:10:27 lr: 0.000057 grad: 0.1186 (0.1224) loss: 0.7980 (0.8054) time: 0.1743 data: 0.0922 max mem: 9377 +Train: [55] [2600/6250] eta: 0:10:09 lr: 0.000056 grad: 0.1265 (0.1225) loss: 0.7982 (0.8051) time: 0.1778 data: 0.0967 max mem: 9377 +Train: [55] [2700/6250] eta: 0:09:50 lr: 0.000056 grad: 0.1199 (0.1225) loss: 0.8047 (0.8049) time: 0.1250 data: 0.0473 max mem: 9377 +Train: [55] [2800/6250] eta: 0:09:32 lr: 0.000056 grad: 0.1213 (0.1225) loss: 0.8000 (0.8047) time: 0.1633 data: 0.0800 max mem: 9377 +Train: [55] [2900/6250] eta: 0:09:15 lr: 0.000056 grad: 0.1232 (0.1226) loss: 0.8020 (0.8044) time: 0.1665 data: 0.0771 max mem: 9377 +Train: [55] [3000/6250] eta: 0:08:58 lr: 0.000056 grad: 0.1263 (0.1226) loss: 0.8004 (0.8043) time: 0.1502 data: 0.0561 max mem: 9377 +Train: [55] [3100/6250] eta: 0:08:41 lr: 0.000056 grad: 0.1142 (0.1225) loss: 0.8049 (0.8042) time: 0.1586 data: 0.0753 max mem: 9377 +Train: [55] [3200/6250] eta: 0:08:23 lr: 0.000056 grad: 0.1280 (0.1225) loss: 0.7986 (0.8040) time: 0.1558 data: 0.0618 max mem: 9377 +Train: [55] [3300/6250] eta: 0:08:05 lr: 0.000056 grad: 0.1201 (0.1225) loss: 0.7981 (0.8039) time: 0.1577 data: 0.0667 max mem: 9377 +Train: [55] [3400/6250] eta: 0:07:47 lr: 0.000056 grad: 0.1188 (0.1226) loss: 0.7983 (0.8038) time: 0.1572 data: 0.0569 max mem: 9377 +Train: [55] [3500/6250] eta: 0:07:31 lr: 0.000056 grad: 0.1235 (0.1226) loss: 0.7944 (0.8037) time: 0.1772 data: 0.0859 max mem: 9377 +Train: [55] [3600/6250] eta: 0:07:14 lr: 0.000056 grad: 0.1235 (0.1228) loss: 0.8013 (0.8035) time: 0.1562 data: 0.0598 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:56 lr: 0.000056 grad: 0.1148 (0.1228) loss: 0.8025 (0.8034) time: 0.1366 data: 0.0433 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:39 lr: 0.000056 grad: 0.1180 (0.1228) loss: 0.8004 (0.8032) time: 0.1552 data: 0.0708 max mem: 9377 +Train: [55] [3900/6250] eta: 0:06:22 lr: 0.000056 grad: 0.1252 (0.1228) loss: 0.7931 (0.8030) time: 0.1617 data: 0.0688 max mem: 9377 +Train: [55] [4000/6250] eta: 0:06:05 lr: 0.000056 grad: 0.1287 (0.1228) loss: 0.7972 (0.8030) time: 0.1450 data: 0.0603 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:48 lr: 0.000056 grad: 0.1311 (0.1229) loss: 0.7960 (0.8029) time: 0.1487 data: 0.0640 max mem: 9377 +Train: [55] [4200/6250] eta: 0:05:32 lr: 0.000056 grad: 0.1210 (0.1230) loss: 0.7999 (0.8028) time: 0.1497 data: 0.0548 max mem: 9377 +Train: [55] [4300/6250] eta: 0:05:15 lr: 0.000056 grad: 0.1224 (0.1232) loss: 0.8002 (0.8026) time: 0.1583 data: 0.0683 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:59 lr: 0.000056 grad: 0.1208 (0.1232) loss: 0.7943 (0.8025) time: 0.1551 data: 0.0635 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:42 lr: 0.000056 grad: 0.1185 (0.1233) loss: 0.7948 (0.8025) time: 0.1264 data: 0.0307 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:26 lr: 0.000056 grad: 0.1277 (0.1234) loss: 0.7911 (0.8024) time: 0.1511 data: 0.0604 max mem: 9377 +Train: [55] [4700/6250] eta: 0:04:09 lr: 0.000056 grad: 0.1216 (0.1235) loss: 0.7964 (0.8023) time: 0.2046 data: 0.1196 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:53 lr: 0.000056 grad: 0.1264 (0.1235) loss: 0.7928 (0.8022) time: 0.1544 data: 0.0609 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:37 lr: 0.000056 grad: 0.1257 (0.1235) loss: 0.7962 (0.8021) time: 0.1773 data: 0.0846 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:21 lr: 0.000056 grad: 0.1292 (0.1236) loss: 0.7975 (0.8020) time: 0.1627 data: 0.0716 max mem: 9377 +Train: [55] [5100/6250] eta: 0:03:05 lr: 0.000056 grad: 0.1220 (0.1236) loss: 0.7921 (0.8018) time: 0.1623 data: 0.0625 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:49 lr: 0.000056 grad: 0.1252 (0.1237) loss: 0.7929 (0.8017) time: 0.2016 data: 0.1116 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:34 lr: 0.000056 grad: 0.1277 (0.1237) loss: 0.7848 (0.8015) time: 0.2679 data: 0.1929 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:18 lr: 0.000056 grad: 0.1204 (0.1237) loss: 0.8055 (0.8014) time: 0.1815 data: 0.0874 max mem: 9377 +Train: [55] [5500/6250] eta: 0:02:02 lr: 0.000056 grad: 0.1150 (0.1237) loss: 0.8045 (0.8013) time: 0.2036 data: 0.1154 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:46 lr: 0.000055 grad: 0.1293 (0.1238) loss: 0.7879 (0.8012) time: 0.2092 data: 0.1276 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:30 lr: 0.000055 grad: 0.1272 (0.1240) loss: 0.8045 (0.8011) time: 0.1763 data: 0.0847 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:13 lr: 0.000055 grad: 0.1294 (0.1240) loss: 0.7915 (0.8011) time: 0.1807 data: 0.0974 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:57 lr: 0.000055 grad: 0.1318 (0.1240) loss: 0.7988 (0.8011) time: 0.1727 data: 0.0743 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:41 lr: 0.000055 grad: 0.1208 (0.1240) loss: 0.8000 (0.8010) time: 0.1934 data: 0.1051 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:24 lr: 0.000055 grad: 0.1240 (0.1241) loss: 0.8015 (0.8009) time: 0.1659 data: 0.0780 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:08 lr: 0.000055 grad: 0.1192 (0.1241) loss: 0.8036 (0.8009) time: 0.1662 data: 0.0748 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1281 (0.1241) loss: 0.8086 (0.8009) time: 0.1379 data: 0.0434 max mem: 9377 +Train: [55] Total time: 0:17:14 (0.1656 s / it) +Averaged stats: lr: 0.000055 grad: 0.1281 (0.1241) loss: 0.8086 (0.8009) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:04:58 loss: 0.8261 (0.8261) time: 4.8114 data: 4.7148 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8167 (0.8171) time: 0.1580 data: 0.1325 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:15 (0.2542 s / it) +Averaged stats (hcp-train-subset): loss: 0.8167 (0.8171) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:51 loss: 0.8361 (0.8361) time: 5.6643 data: 5.6332 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8377 (0.8388) time: 0.1316 data: 0.1063 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:15 (0.2547 s / it) +Averaged stats (hcp-val): loss: 0.8377 (0.8388) +Eval (nsd-val): [55] [ 0/62] eta: 0:06:26 loss: 0.8122 (0.8122) time: 6.2366 data: 6.2014 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8199 (0.8192) time: 0.1508 data: 0.1253 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:15 (0.2522 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8192) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [56] [ 0/6250] eta: 12:26:44 lr: 0.000055 grad: 0.1891 (0.1891) loss: 0.8467 (0.8467) time: 7.1687 data: 7.0650 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:24:16 lr: 0.000055 grad: 0.1164 (0.1452) loss: 0.8311 (0.8325) time: 0.1880 data: 0.0795 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:21:09 lr: 0.000055 grad: 0.1124 (0.1339) loss: 0.8304 (0.8277) time: 0.1862 data: 0.0843 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:19:35 lr: 0.000055 grad: 0.1025 (0.1282) loss: 0.8247 (0.8245) time: 0.1569 data: 0.0496 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:18:42 lr: 0.000055 grad: 0.1128 (0.1258) loss: 0.8133 (0.8226) time: 0.1697 data: 0.0753 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:17:46 lr: 0.000055 grad: 0.1075 (0.1246) loss: 0.8186 (0.8200) time: 0.1411 data: 0.0422 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:17:06 lr: 0.000055 grad: 0.1203 (0.1250) loss: 0.7912 (0.8175) time: 0.1538 data: 0.0589 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:16:37 lr: 0.000055 grad: 0.1165 (0.1244) loss: 0.8018 (0.8153) time: 0.1621 data: 0.0560 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:16:08 lr: 0.000055 grad: 0.1160 (0.1239) loss: 0.8071 (0.8139) time: 0.1753 data: 0.0840 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:15:47 lr: 0.000055 grad: 0.1203 (0.1236) loss: 0.8062 (0.8126) time: 0.2239 data: 0.1278 max mem: 9377 +Train: [56] [1000/6250] eta: 0:15:25 lr: 0.000055 grad: 0.1241 (0.1241) loss: 0.7908 (0.8110) time: 0.1412 data: 0.0617 max mem: 9377 +Train: [56] [1100/6250] eta: 0:14:59 lr: 0.000055 grad: 0.1195 (0.1242) loss: 0.7991 (0.8100) time: 0.1573 data: 0.0742 max mem: 9377 +Train: [56] [1200/6250] eta: 0:14:36 lr: 0.000055 grad: 0.1217 (0.1243) loss: 0.7957 (0.8086) time: 0.1387 data: 0.0534 max mem: 9377 +Train: [56] [1300/6250] eta: 0:14:18 lr: 0.000055 grad: 0.1146 (0.1242) loss: 0.8030 (0.8076) time: 0.1324 data: 0.0432 max mem: 9377 +Train: [56] [1400/6250] eta: 0:13:59 lr: 0.000055 grad: 0.1255 (0.1243) loss: 0.7804 (0.8065) time: 0.1776 data: 0.0870 max mem: 9377 +Train: [56] [1500/6250] eta: 0:13:39 lr: 0.000055 grad: 0.1247 (0.1244) loss: 0.7918 (0.8056) time: 0.1718 data: 0.0876 max mem: 9377 +Train: [56] [1600/6250] eta: 0:13:18 lr: 0.000055 grad: 0.1119 (0.1244) loss: 0.7957 (0.8050) time: 0.1415 data: 0.0545 max mem: 9377 +Train: [56] [1700/6250] eta: 0:12:57 lr: 0.000055 grad: 0.1159 (0.1244) loss: 0.8044 (0.8044) time: 0.1489 data: 0.0672 max mem: 9377 +Train: [56] [1800/6250] eta: 0:12:37 lr: 0.000055 grad: 0.1220 (0.1246) loss: 0.7831 (0.8039) time: 0.1695 data: 0.0818 max mem: 9377 +Train: [56] [1900/6250] eta: 0:12:15 lr: 0.000055 grad: 0.1107 (0.1245) loss: 0.8029 (0.8035) time: 0.1425 data: 0.0560 max mem: 9377 +Train: [56] [2000/6250] eta: 0:11:54 lr: 0.000055 grad: 0.1215 (0.1243) loss: 0.8036 (0.8034) time: 0.1591 data: 0.0708 max mem: 9377 +Train: [56] [2100/6250] eta: 0:11:33 lr: 0.000055 grad: 0.1123 (0.1244) loss: 0.8043 (0.8031) time: 0.1528 data: 0.0580 max mem: 9377 +Train: [56] [2200/6250] eta: 0:11:13 lr: 0.000055 grad: 0.1218 (0.1242) loss: 0.8051 (0.8028) time: 0.1392 data: 0.0466 max mem: 9377 +Train: [56] [2300/6250] eta: 0:10:53 lr: 0.000055 grad: 0.1188 (0.1241) loss: 0.8013 (0.8026) time: 0.1490 data: 0.0632 max mem: 9377 +Train: [56] [2400/6250] eta: 0:10:35 lr: 0.000054 grad: 0.1246 (0.1240) loss: 0.7974 (0.8025) time: 0.1264 data: 0.0399 max mem: 9377 +Train: [56] [2500/6250] eta: 0:10:19 lr: 0.000054 grad: 0.1244 (0.1240) loss: 0.8010 (0.8022) time: 0.1727 data: 0.0947 max mem: 9377 +Train: [56] [2600/6250] eta: 0:10:02 lr: 0.000054 grad: 0.1213 (0.1239) loss: 0.7987 (0.8021) time: 0.1381 data: 0.0525 max mem: 9377 +Train: [56] [2700/6250] eta: 0:09:47 lr: 0.000054 grad: 0.1180 (0.1238) loss: 0.8020 (0.8020) time: 0.1576 data: 0.0770 max mem: 9377 +Train: [56] [2800/6250] eta: 0:09:31 lr: 0.000054 grad: 0.1165 (0.1237) loss: 0.8020 (0.8019) time: 0.1852 data: 0.0920 max mem: 9377 +Train: [56] [2900/6250] eta: 0:09:17 lr: 0.000054 grad: 0.1145 (0.1237) loss: 0.7985 (0.8018) time: 0.1739 data: 0.0798 max mem: 9377 +Train: [56] [3000/6250] eta: 0:09:01 lr: 0.000054 grad: 0.1175 (0.1236) loss: 0.8001 (0.8018) time: 0.2008 data: 0.1099 max mem: 9377 +Train: [56] [3100/6250] eta: 0:08:44 lr: 0.000054 grad: 0.1167 (0.1235) loss: 0.8047 (0.8018) time: 0.1482 data: 0.0627 max mem: 9377 +Train: [56] [3200/6250] eta: 0:08:27 lr: 0.000054 grad: 0.1201 (0.1234) loss: 0.7944 (0.8018) time: 0.1551 data: 0.0687 max mem: 9377 +Train: [56] [3300/6250] eta: 0:08:10 lr: 0.000054 grad: 0.1153 (0.1234) loss: 0.8022 (0.8019) time: 0.1498 data: 0.0654 max mem: 9377 +Train: [56] [3400/6250] eta: 0:07:54 lr: 0.000054 grad: 0.1137 (0.1233) loss: 0.8111 (0.8019) time: 0.1083 data: 0.0268 max mem: 9377 +Train: [56] [3500/6250] eta: 0:07:38 lr: 0.000054 grad: 0.1180 (0.1232) loss: 0.8068 (0.8020) time: 0.0965 data: 0.0002 max mem: 9377 +Train: [56] [3600/6250] eta: 0:07:22 lr: 0.000054 grad: 0.1218 (0.1232) loss: 0.8086 (0.8020) time: 0.1466 data: 0.0656 max mem: 9377 +Train: [56] [3700/6250] eta: 0:07:06 lr: 0.000054 grad: 0.1252 (0.1232) loss: 0.8053 (0.8020) time: 0.2342 data: 0.1573 max mem: 9377 +Train: [56] [3800/6250] eta: 0:06:49 lr: 0.000054 grad: 0.1146 (0.1231) loss: 0.8048 (0.8020) time: 0.1773 data: 0.0952 max mem: 9377 +Train: [56] [3900/6250] eta: 0:06:32 lr: 0.000054 grad: 0.1120 (0.1230) loss: 0.8081 (0.8021) time: 0.1295 data: 0.0472 max mem: 9377 +Train: [56] [4000/6250] eta: 0:06:16 lr: 0.000054 grad: 0.1193 (0.1230) loss: 0.7974 (0.8021) time: 0.1746 data: 0.0846 max mem: 9377 +Train: [56] [4100/6250] eta: 0:05:59 lr: 0.000054 grad: 0.1222 (0.1229) loss: 0.8083 (0.8022) time: 0.1670 data: 0.0854 max mem: 9377 +Train: [56] [4200/6250] eta: 0:05:42 lr: 0.000054 grad: 0.1119 (0.1228) loss: 0.8053 (0.8023) time: 0.1553 data: 0.0700 max mem: 9377 +Train: [56] [4300/6250] eta: 0:05:26 lr: 0.000054 grad: 0.1237 (0.1228) loss: 0.7999 (0.8024) time: 0.1684 data: 0.0798 max mem: 9377 +Train: [56] [4400/6250] eta: 0:05:08 lr: 0.000054 grad: 0.1148 (0.1228) loss: 0.8059 (0.8025) time: 0.1887 data: 0.1051 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:51 lr: 0.000054 grad: 0.1127 (0.1227) loss: 0.8103 (0.8026) time: 0.1643 data: 0.0713 max mem: 9377 +Train: [56] [4600/6250] eta: 0:04:35 lr: 0.000054 grad: 0.1166 (0.1226) loss: 0.8080 (0.8027) time: 0.1471 data: 0.0480 max mem: 9377 +Train: [56] [4700/6250] eta: 0:04:18 lr: 0.000054 grad: 0.1178 (0.1227) loss: 0.8066 (0.8028) time: 0.1561 data: 0.0625 max mem: 9377 +Train: [56] [4800/6250] eta: 0:04:01 lr: 0.000054 grad: 0.1200 (0.1226) loss: 0.8120 (0.8028) time: 0.1528 data: 0.0644 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:44 lr: 0.000054 grad: 0.1115 (0.1226) loss: 0.8159 (0.8029) time: 0.1607 data: 0.0786 max mem: 9377 +Train: [56] [5000/6250] eta: 0:03:27 lr: 0.000054 grad: 0.1194 (0.1226) loss: 0.8062 (0.8029) time: 0.1621 data: 0.0754 max mem: 9377 +Train: [56] [5100/6250] eta: 0:03:11 lr: 0.000054 grad: 0.1228 (0.1226) loss: 0.8028 (0.8029) time: 0.1855 data: 0.1016 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:54 lr: 0.000054 grad: 0.1382 (0.1228) loss: 0.8061 (0.8030) time: 0.1823 data: 0.0825 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:38 lr: 0.000054 grad: 0.1235 (0.1228) loss: 0.8025 (0.8030) time: 0.1850 data: 0.0988 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:21 lr: 0.000054 grad: 0.1268 (0.1228) loss: 0.8052 (0.8030) time: 0.1757 data: 0.0856 max mem: 9377 +Train: [56] [5500/6250] eta: 0:02:04 lr: 0.000053 grad: 0.1155 (0.1229) loss: 0.8078 (0.8030) time: 0.1746 data: 0.0939 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:48 lr: 0.000053 grad: 0.1243 (0.1229) loss: 0.7948 (0.8030) time: 0.1704 data: 0.0901 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:31 lr: 0.000053 grad: 0.1214 (0.1229) loss: 0.8009 (0.8029) time: 0.1496 data: 0.0602 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:14 lr: 0.000053 grad: 0.1200 (0.1229) loss: 0.8019 (0.8029) time: 0.1658 data: 0.0788 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:58 lr: 0.000053 grad: 0.1156 (0.1229) loss: 0.8069 (0.8029) time: 0.1608 data: 0.0755 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:41 lr: 0.000053 grad: 0.1202 (0.1229) loss: 0.8029 (0.8029) time: 0.1600 data: 0.0758 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:24 lr: 0.000053 grad: 0.1129 (0.1229) loss: 0.8178 (0.8030) time: 0.1611 data: 0.0699 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:08 lr: 0.000053 grad: 0.1120 (0.1228) loss: 0.8094 (0.8031) time: 0.1627 data: 0.0701 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1159 (0.1228) loss: 0.8067 (0.8031) time: 0.1531 data: 0.0594 max mem: 9377 +Train: [56] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000053 grad: 0.1159 (0.1228) loss: 0.8067 (0.8031) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:14 loss: 0.8278 (0.8278) time: 5.0726 data: 5.0395 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8133 (0.8161) time: 0.1407 data: 0.1136 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:14 (0.2360 s / it) +Averaged stats (hcp-train-subset): loss: 0.8133 (0.8161) +Eval (hcp-val): [56] [ 0/62] eta: 0:05:37 loss: 0.8363 (0.8363) time: 5.4402 data: 5.4085 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8371 (0.8386) time: 0.1306 data: 0.1051 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-val): loss: 0.8371 (0.8386) +Eval (nsd-val): [56] [ 0/62] eta: 0:04:36 loss: 0.8076 (0.8076) time: 4.4640 data: 4.3613 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8176 (0.8182) time: 0.1426 data: 0.1173 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (nsd-val): loss: 0.8176 (0.8182) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [57] [ 0/6250] eta: 9:37:32 lr: 0.000053 grad: 0.1804 (0.1804) loss: 0.8218 (0.8218) time: 5.5445 data: 5.3382 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:23:50 lr: 0.000053 grad: 0.1297 (0.1544) loss: 0.8131 (0.8202) time: 0.1878 data: 0.0943 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:19:47 lr: 0.000053 grad: 0.1156 (0.1432) loss: 0.8072 (0.8165) time: 0.1655 data: 0.0740 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:18:16 lr: 0.000053 grad: 0.1104 (0.1353) loss: 0.8104 (0.8150) time: 0.1477 data: 0.0410 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:17:23 lr: 0.000053 grad: 0.1137 (0.1325) loss: 0.8089 (0.8133) time: 0.1547 data: 0.0506 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:16:41 lr: 0.000053 grad: 0.1100 (0.1289) loss: 0.8172 (0.8131) time: 0.1497 data: 0.0531 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:16:09 lr: 0.000053 grad: 0.1038 (0.1262) loss: 0.8186 (0.8129) time: 0.1754 data: 0.0823 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:54 lr: 0.000053 grad: 0.1170 (0.1245) loss: 0.8118 (0.8132) time: 0.1831 data: 0.0905 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:15:29 lr: 0.000053 grad: 0.1149 (0.1236) loss: 0.8010 (0.8129) time: 0.1568 data: 0.0565 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:15:23 lr: 0.000053 grad: 0.1162 (0.1230) loss: 0.8118 (0.8129) time: 0.2422 data: 0.1513 max mem: 9377 +Train: [57] [1000/6250] eta: 0:14:58 lr: 0.000053 grad: 0.1134 (0.1232) loss: 0.8158 (0.8129) time: 0.1529 data: 0.0661 max mem: 9377 +Train: [57] [1100/6250] eta: 0:14:35 lr: 0.000053 grad: 0.1157 (0.1239) loss: 0.8028 (0.8124) time: 0.1436 data: 0.0474 max mem: 9377 +Train: [57] [1200/6250] eta: 0:14:13 lr: 0.000053 grad: 0.1074 (0.1235) loss: 0.8095 (0.8121) time: 0.1412 data: 0.0565 max mem: 9377 +Train: [57] [1300/6250] eta: 0:13:54 lr: 0.000053 grad: 0.1145 (0.1230) loss: 0.8090 (0.8119) time: 0.1679 data: 0.0770 max mem: 9377 +Train: [57] [1400/6250] eta: 0:13:37 lr: 0.000053 grad: 0.1172 (0.1228) loss: 0.8079 (0.8116) time: 0.1689 data: 0.0767 max mem: 9377 +Train: [57] [1500/6250] eta: 0:13:19 lr: 0.000053 grad: 0.1180 (0.1228) loss: 0.8090 (0.8111) time: 0.1680 data: 0.0682 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:55 lr: 0.000053 grad: 0.1200 (0.1226) loss: 0.8037 (0.8106) time: 0.1416 data: 0.0458 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:35 lr: 0.000053 grad: 0.1128 (0.1225) loss: 0.8100 (0.8103) time: 0.1334 data: 0.0302 max mem: 9377 +Train: [57] [1800/6250] eta: 0:12:15 lr: 0.000053 grad: 0.1194 (0.1223) loss: 0.8047 (0.8100) time: 0.1604 data: 0.0762 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:55 lr: 0.000053 grad: 0.1225 (0.1223) loss: 0.7988 (0.8098) time: 0.1666 data: 0.0709 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:35 lr: 0.000053 grad: 0.1103 (0.1222) loss: 0.8149 (0.8097) time: 0.1374 data: 0.0470 max mem: 9377 +Train: [57] [2100/6250] eta: 0:11:17 lr: 0.000053 grad: 0.1191 (0.1220) loss: 0.8098 (0.8096) time: 0.1494 data: 0.0482 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:58 lr: 0.000053 grad: 0.1195 (0.1220) loss: 0.7982 (0.8094) time: 0.1461 data: 0.0519 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:39 lr: 0.000052 grad: 0.1202 (0.1222) loss: 0.8048 (0.8091) time: 0.1634 data: 0.0801 max mem: 9377 +Train: [57] [2400/6250] eta: 0:10:21 lr: 0.000052 grad: 0.1234 (0.1222) loss: 0.7975 (0.8089) time: 0.1375 data: 0.0518 max mem: 9377 +Train: [57] [2500/6250] eta: 0:10:06 lr: 0.000052 grad: 0.1185 (0.1222) loss: 0.8052 (0.8087) time: 0.1913 data: 0.1087 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:50 lr: 0.000052 grad: 0.1258 (0.1224) loss: 0.8083 (0.8085) time: 0.1535 data: 0.0541 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:34 lr: 0.000052 grad: 0.1248 (0.1225) loss: 0.7993 (0.8082) time: 0.1564 data: 0.0642 max mem: 9377 +Train: [57] [2800/6250] eta: 0:09:16 lr: 0.000052 grad: 0.1273 (0.1226) loss: 0.7880 (0.8079) time: 0.1601 data: 0.0719 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:59 lr: 0.000052 grad: 0.1257 (0.1227) loss: 0.8030 (0.8076) time: 0.1600 data: 0.0689 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:43 lr: 0.000052 grad: 0.1168 (0.1228) loss: 0.8080 (0.8074) time: 0.1484 data: 0.0564 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:27 lr: 0.000052 grad: 0.1216 (0.1229) loss: 0.8034 (0.8072) time: 0.1497 data: 0.0615 max mem: 9377 +Train: [57] [3200/6250] eta: 0:08:11 lr: 0.000052 grad: 0.1203 (0.1230) loss: 0.7967 (0.8069) time: 0.1565 data: 0.0674 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:54 lr: 0.000052 grad: 0.1149 (0.1231) loss: 0.8015 (0.8066) time: 0.1452 data: 0.0491 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:37 lr: 0.000052 grad: 0.1215 (0.1231) loss: 0.7997 (0.8064) time: 0.1506 data: 0.0652 max mem: 9377 +Train: [57] [3500/6250] eta: 0:07:20 lr: 0.000052 grad: 0.1220 (0.1232) loss: 0.7962 (0.8061) time: 0.1295 data: 0.0350 max mem: 9377 +Train: [57] [3600/6250] eta: 0:07:03 lr: 0.000052 grad: 0.1267 (0.1232) loss: 0.8004 (0.8059) time: 0.1543 data: 0.0699 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:47 lr: 0.000052 grad: 0.1249 (0.1233) loss: 0.7914 (0.8056) time: 0.1643 data: 0.0815 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:31 lr: 0.000052 grad: 0.1232 (0.1233) loss: 0.8029 (0.8054) time: 0.1554 data: 0.0662 max mem: 9377 +Train: [57] [3900/6250] eta: 0:06:15 lr: 0.000052 grad: 0.1261 (0.1234) loss: 0.7991 (0.8053) time: 0.1616 data: 0.0726 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:58 lr: 0.000052 grad: 0.1207 (0.1235) loss: 0.8020 (0.8051) time: 0.1501 data: 0.0617 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:42 lr: 0.000052 grad: 0.1211 (0.1236) loss: 0.7991 (0.8049) time: 0.1658 data: 0.0808 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:27 lr: 0.000052 grad: 0.1152 (0.1235) loss: 0.8124 (0.8049) time: 0.1608 data: 0.0640 max mem: 9377 +Train: [57] [4300/6250] eta: 0:05:11 lr: 0.000052 grad: 0.1273 (0.1234) loss: 0.7978 (0.8049) time: 0.1754 data: 0.0863 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:54 lr: 0.000052 grad: 0.1209 (0.1234) loss: 0.7981 (0.8048) time: 0.1391 data: 0.0529 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:38 lr: 0.000052 grad: 0.1092 (0.1234) loss: 0.8084 (0.8047) time: 0.1481 data: 0.0532 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:22 lr: 0.000052 grad: 0.1194 (0.1233) loss: 0.8008 (0.8047) time: 0.1480 data: 0.0614 max mem: 9377 +Train: [57] [4700/6250] eta: 0:04:06 lr: 0.000052 grad: 0.1222 (0.1233) loss: 0.8044 (0.8046) time: 0.1563 data: 0.0710 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:50 lr: 0.000052 grad: 0.1125 (0.1233) loss: 0.8049 (0.8046) time: 0.1481 data: 0.0579 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:34 lr: 0.000052 grad: 0.1217 (0.1233) loss: 0.8031 (0.8046) time: 0.1556 data: 0.0697 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:18 lr: 0.000052 grad: 0.1213 (0.1232) loss: 0.7997 (0.8046) time: 0.1637 data: 0.0745 max mem: 9377 +Train: [57] [5100/6250] eta: 0:03:02 lr: 0.000052 grad: 0.1147 (0.1232) loss: 0.8059 (0.8046) time: 0.1549 data: 0.0586 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:47 lr: 0.000052 grad: 0.1285 (0.1232) loss: 0.7975 (0.8045) time: 0.1681 data: 0.0813 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:31 lr: 0.000052 grad: 0.1154 (0.1232) loss: 0.8064 (0.8046) time: 0.1487 data: 0.0639 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:15 lr: 0.000051 grad: 0.1229 (0.1231) loss: 0.8019 (0.8046) time: 0.1653 data: 0.0700 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:59 lr: 0.000051 grad: 0.1137 (0.1231) loss: 0.8020 (0.8045) time: 0.1752 data: 0.0799 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:43 lr: 0.000051 grad: 0.1250 (0.1232) loss: 0.7887 (0.8044) time: 0.1781 data: 0.0854 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:27 lr: 0.000051 grad: 0.1289 (0.1233) loss: 0.7999 (0.8043) time: 0.1874 data: 0.1046 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:11 lr: 0.000051 grad: 0.1244 (0.1233) loss: 0.7974 (0.8042) time: 0.1670 data: 0.0702 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:55 lr: 0.000051 grad: 0.1252 (0.1234) loss: 0.7844 (0.8041) time: 0.1470 data: 0.0473 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:39 lr: 0.000051 grad: 0.1198 (0.1234) loss: 0.8001 (0.8040) time: 0.1565 data: 0.0633 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:23 lr: 0.000051 grad: 0.1265 (0.1234) loss: 0.7996 (0.8041) time: 0.1453 data: 0.0434 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1219 (0.1234) loss: 0.8040 (0.8040) time: 0.1584 data: 0.0677 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1208 (0.1234) loss: 0.8012 (0.8040) time: 0.1385 data: 0.0502 max mem: 9377 +Train: [57] Total time: 0:16:41 (0.1602 s / it) +Averaged stats: lr: 0.000051 grad: 0.1208 (0.1234) loss: 0.8012 (0.8040) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:05:52 loss: 0.8245 (0.8245) time: 5.6786 data: 5.6472 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8102 (0.8142) time: 0.1312 data: 0.1062 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:14 (0.2406 s / it) +Averaged stats (hcp-train-subset): loss: 0.8102 (0.8142) +Eval (hcp-val): [57] [ 0/62] eta: 0:05:38 loss: 0.8355 (0.8355) time: 5.4542 data: 5.4238 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8365 (0.8388) time: 0.1297 data: 0.1030 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8365 (0.8388) +Eval (nsd-val): [57] [ 0/62] eta: 0:03:51 loss: 0.8063 (0.8063) time: 3.7372 data: 3.6652 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8168 (0.8181) time: 0.1249 data: 0.0993 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (nsd-val): loss: 0.8168 (0.8181) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 11:44:10 lr: 0.000051 grad: 0.0835 (0.0835) loss: 0.8797 (0.8797) time: 6.7600 data: 6.6584 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:22:48 lr: 0.000051 grad: 0.1544 (0.1552) loss: 0.8162 (0.8192) time: 0.1731 data: 0.0657 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:19:40 lr: 0.000051 grad: 0.1382 (0.1509) loss: 0.8115 (0.8139) time: 0.1562 data: 0.0537 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:17:58 lr: 0.000051 grad: 0.1289 (0.1447) loss: 0.8119 (0.8130) time: 0.1615 data: 0.0629 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:17:02 lr: 0.000051 grad: 0.1171 (0.1410) loss: 0.8124 (0.8121) time: 0.1631 data: 0.0705 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:16:23 lr: 0.000051 grad: 0.1216 (0.1386) loss: 0.8019 (0.8105) time: 0.1466 data: 0.0458 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:15:57 lr: 0.000051 grad: 0.1270 (0.1369) loss: 0.8051 (0.8090) time: 0.1670 data: 0.0667 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:15:28 lr: 0.000051 grad: 0.1301 (0.1354) loss: 0.8094 (0.8083) time: 0.1338 data: 0.0386 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:15:11 lr: 0.000051 grad: 0.1280 (0.1346) loss: 0.8005 (0.8076) time: 0.1775 data: 0.0758 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:14:56 lr: 0.000051 grad: 0.1179 (0.1344) loss: 0.7984 (0.8068) time: 0.1589 data: 0.0558 max mem: 9377 +Train: [58] [1000/6250] eta: 0:14:41 lr: 0.000051 grad: 0.1191 (0.1334) loss: 0.8034 (0.8062) time: 0.1673 data: 0.0745 max mem: 9377 +Train: [58] [1100/6250] eta: 0:14:26 lr: 0.000051 grad: 0.1147 (0.1323) loss: 0.8014 (0.8058) time: 0.1506 data: 0.0660 max mem: 9377 +Train: [58] [1200/6250] eta: 0:14:10 lr: 0.000051 grad: 0.1219 (0.1317) loss: 0.8055 (0.8054) time: 0.1547 data: 0.0726 max mem: 9377 +Train: [58] [1300/6250] eta: 0:13:56 lr: 0.000051 grad: 0.1236 (0.1315) loss: 0.7973 (0.8047) time: 0.1799 data: 0.0835 max mem: 9377 +Train: [58] [1400/6250] eta: 0:13:43 lr: 0.000051 grad: 0.1197 (0.1310) loss: 0.8014 (0.8043) time: 0.1877 data: 0.0903 max mem: 9377 +Train: [58] [1500/6250] eta: 0:13:29 lr: 0.000051 grad: 0.1155 (0.1308) loss: 0.8030 (0.8040) time: 0.1930 data: 0.0936 max mem: 9377 +Train: [58] [1600/6250] eta: 0:13:11 lr: 0.000051 grad: 0.1307 (0.1306) loss: 0.7914 (0.8037) time: 0.1615 data: 0.0680 max mem: 9377 +Train: [58] [1700/6250] eta: 0:12:54 lr: 0.000051 grad: 0.1262 (0.1303) loss: 0.7998 (0.8035) time: 0.1462 data: 0.0474 max mem: 9377 +Train: [58] [1800/6250] eta: 0:12:35 lr: 0.000051 grad: 0.1273 (0.1301) loss: 0.7940 (0.8031) time: 0.1799 data: 0.0918 max mem: 9377 +Train: [58] [1900/6250] eta: 0:12:15 lr: 0.000051 grad: 0.1239 (0.1301) loss: 0.7982 (0.8028) time: 0.1599 data: 0.0758 max mem: 9377 +Train: [58] [2000/6250] eta: 0:11:54 lr: 0.000051 grad: 0.1279 (0.1301) loss: 0.7864 (0.8025) time: 0.1456 data: 0.0640 max mem: 9377 +Train: [58] [2100/6250] eta: 0:11:36 lr: 0.000051 grad: 0.1324 (0.1301) loss: 0.7908 (0.8021) time: 0.1542 data: 0.0693 max mem: 9377 +Train: [58] [2200/6250] eta: 0:11:22 lr: 0.000050 grad: 0.1298 (0.1299) loss: 0.7931 (0.8019) time: 0.1952 data: 0.1151 max mem: 9377 +Train: [58] [2300/6250] eta: 0:11:03 lr: 0.000050 grad: 0.1179 (0.1299) loss: 0.7959 (0.8016) time: 0.1691 data: 0.0804 max mem: 9377 +Train: [58] [2400/6250] eta: 0:10:46 lr: 0.000050 grad: 0.1323 (0.1299) loss: 0.7832 (0.8013) time: 0.1643 data: 0.0810 max mem: 9377 +Train: [58] [2500/6250] eta: 0:10:29 lr: 0.000050 grad: 0.1327 (0.1299) loss: 0.7898 (0.8009) time: 0.1821 data: 0.0968 max mem: 9377 +Train: [58] [2600/6250] eta: 0:10:13 lr: 0.000050 grad: 0.1264 (0.1299) loss: 0.7942 (0.8007) time: 0.1438 data: 0.0503 max mem: 9377 +Train: [58] [2700/6250] eta: 0:09:55 lr: 0.000050 grad: 0.1264 (0.1300) loss: 0.7955 (0.8004) time: 0.1377 data: 0.0520 max mem: 9377 +Train: [58] [2800/6250] eta: 0:09:39 lr: 0.000050 grad: 0.1227 (0.1300) loss: 0.7960 (0.8002) time: 0.1729 data: 0.0920 max mem: 9377 +Train: [58] [2900/6250] eta: 0:09:21 lr: 0.000050 grad: 0.1273 (0.1301) loss: 0.8020 (0.8000) time: 0.1603 data: 0.0705 max mem: 9377 +Train: [58] [3000/6250] eta: 0:09:04 lr: 0.000050 grad: 0.1268 (0.1300) loss: 0.7988 (0.7999) time: 0.1707 data: 0.0816 max mem: 9377 +Train: [58] [3100/6250] eta: 0:08:48 lr: 0.000050 grad: 0.1229 (0.1298) loss: 0.7975 (0.7998) time: 0.1932 data: 0.0866 max mem: 9377 +Train: [58] [3200/6250] eta: 0:08:30 lr: 0.000050 grad: 0.1200 (0.1297) loss: 0.7931 (0.7996) time: 0.1510 data: 0.0563 max mem: 9377 +Train: [58] [3300/6250] eta: 0:08:12 lr: 0.000050 grad: 0.1281 (0.1297) loss: 0.7918 (0.7995) time: 0.1604 data: 0.0708 max mem: 9377 +Train: [58] [3400/6250] eta: 0:07:55 lr: 0.000050 grad: 0.1175 (0.1295) loss: 0.7963 (0.7994) time: 0.1661 data: 0.0690 max mem: 9377 +Train: [58] [3500/6250] eta: 0:07:36 lr: 0.000050 grad: 0.1231 (0.1295) loss: 0.7952 (0.7992) time: 0.1439 data: 0.0510 max mem: 9377 +Train: [58] [3600/6250] eta: 0:07:19 lr: 0.000050 grad: 0.1263 (0.1294) loss: 0.7947 (0.7991) time: 0.1560 data: 0.0664 max mem: 9377 +Train: [58] [3700/6250] eta: 0:07:01 lr: 0.000050 grad: 0.1210 (0.1294) loss: 0.8067 (0.7991) time: 0.1378 data: 0.0428 max mem: 9377 +Train: [58] [3800/6250] eta: 0:06:45 lr: 0.000050 grad: 0.1224 (0.1293) loss: 0.8004 (0.7991) time: 0.1588 data: 0.0803 max mem: 9377 +Train: [58] [3900/6250] eta: 0:06:28 lr: 0.000050 grad: 0.1251 (0.1292) loss: 0.7820 (0.7990) time: 0.1579 data: 0.0691 max mem: 9377 +Train: [58] [4000/6250] eta: 0:06:11 lr: 0.000050 grad: 0.1200 (0.1291) loss: 0.8025 (0.7991) time: 0.1333 data: 0.0494 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:54 lr: 0.000050 grad: 0.1165 (0.1292) loss: 0.7991 (0.7991) time: 0.1460 data: 0.0555 max mem: 9377 +Train: [58] [4200/6250] eta: 0:05:37 lr: 0.000050 grad: 0.1267 (0.1290) loss: 0.8042 (0.7992) time: 0.1572 data: 0.0710 max mem: 9377 +Train: [58] [4300/6250] eta: 0:05:20 lr: 0.000050 grad: 0.1134 (0.1290) loss: 0.7973 (0.7992) time: 0.1484 data: 0.0568 max mem: 9377 +Train: [58] [4400/6250] eta: 0:05:03 lr: 0.000050 grad: 0.1226 (0.1289) loss: 0.8055 (0.7992) time: 0.1639 data: 0.0703 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:46 lr: 0.000050 grad: 0.1193 (0.1288) loss: 0.7918 (0.7992) time: 0.1651 data: 0.0757 max mem: 9377 +Train: [58] [4600/6250] eta: 0:04:30 lr: 0.000050 grad: 0.1253 (0.1287) loss: 0.8060 (0.7993) time: 0.1313 data: 0.0441 max mem: 9377 +Train: [58] [4700/6250] eta: 0:04:13 lr: 0.000050 grad: 0.1264 (0.1286) loss: 0.8017 (0.7993) time: 0.1664 data: 0.0759 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:56 lr: 0.000050 grad: 0.1237 (0.1286) loss: 0.7993 (0.7994) time: 0.1619 data: 0.0592 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:39 lr: 0.000050 grad: 0.1204 (0.1285) loss: 0.8077 (0.7995) time: 0.1602 data: 0.0719 max mem: 9377 +Train: [58] [5000/6250] eta: 0:03:23 lr: 0.000050 grad: 0.1248 (0.1285) loss: 0.7939 (0.7995) time: 0.1433 data: 0.0556 max mem: 9377 +Train: [58] [5100/6250] eta: 0:03:06 lr: 0.000050 grad: 0.1250 (0.1284) loss: 0.7985 (0.7995) time: 0.1377 data: 0.0522 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:50 lr: 0.000050 grad: 0.1244 (0.1284) loss: 0.8017 (0.7996) time: 0.1711 data: 0.0959 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:34 lr: 0.000049 grad: 0.1296 (0.1284) loss: 0.8019 (0.7996) time: 0.1590 data: 0.0758 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:18 lr: 0.000049 grad: 0.1263 (0.1283) loss: 0.7958 (0.7995) time: 0.1542 data: 0.0590 max mem: 9377 +Train: [58] [5500/6250] eta: 0:02:01 lr: 0.000049 grad: 0.1235 (0.1283) loss: 0.7997 (0.7995) time: 0.1424 data: 0.0592 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:45 lr: 0.000049 grad: 0.1297 (0.1283) loss: 0.7920 (0.7995) time: 0.1657 data: 0.0774 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:29 lr: 0.000049 grad: 0.1183 (0.1282) loss: 0.8061 (0.7995) time: 0.1582 data: 0.0569 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:13 lr: 0.000049 grad: 0.1262 (0.1281) loss: 0.8039 (0.7995) time: 0.1763 data: 0.0840 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:57 lr: 0.000049 grad: 0.1230 (0.1281) loss: 0.7999 (0.7996) time: 0.1720 data: 0.0729 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:40 lr: 0.000049 grad: 0.1316 (0.1281) loss: 0.7974 (0.7995) time: 0.1657 data: 0.0747 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:24 lr: 0.000049 grad: 0.1319 (0.1280) loss: 0.7955 (0.7995) time: 0.1433 data: 0.0525 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:08 lr: 0.000049 grad: 0.1232 (0.1281) loss: 0.7982 (0.7995) time: 0.1403 data: 0.0526 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1172 (0.1281) loss: 0.8041 (0.7995) time: 0.1533 data: 0.0620 max mem: 9377 +Train: [58] Total time: 0:17:01 (0.1635 s / it) +Averaged stats: lr: 0.000049 grad: 0.1172 (0.1281) loss: 0.8041 (0.7995) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:05:45 loss: 0.8213 (0.8213) time: 5.5682 data: 5.5378 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8134 (0.8148) time: 0.1253 data: 0.0993 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8134 (0.8148) +Eval (hcp-val): [58] [ 0/62] eta: 0:06:11 loss: 0.8344 (0.8344) time: 5.9966 data: 5.9671 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8383 (0.8396) time: 0.1337 data: 0.1084 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-val): loss: 0.8383 (0.8396) +Eval (nsd-val): [58] [ 0/62] eta: 0:03:47 loss: 0.8075 (0.8075) time: 3.6727 data: 3.5800 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8188 (0.8211) time: 0.1398 data: 0.1138 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:13 (0.2256 s / it) +Averaged stats (nsd-val): loss: 0.8188 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 8:37:29 lr: 0.000049 grad: 0.6787 (0.6787) loss: 0.7401 (0.7401) time: 4.9679 data: 4.7727 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:22:16 lr: 0.000049 grad: 0.1325 (0.1728) loss: 0.8121 (0.8174) time: 0.1594 data: 0.0520 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:19:14 lr: 0.000049 grad: 0.1158 (0.1556) loss: 0.8102 (0.8146) time: 0.1589 data: 0.0541 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:17:57 lr: 0.000049 grad: 0.1254 (0.1483) loss: 0.8118 (0.8120) time: 0.1630 data: 0.0676 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:17:05 lr: 0.000049 grad: 0.1267 (0.1443) loss: 0.8026 (0.8112) time: 0.1825 data: 0.0837 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:16:42 lr: 0.000049 grad: 0.1274 (0.1416) loss: 0.8088 (0.8098) time: 0.1646 data: 0.0626 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:16:17 lr: 0.000049 grad: 0.1174 (0.1397) loss: 0.8076 (0.8092) time: 0.1767 data: 0.0827 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:15:50 lr: 0.000049 grad: 0.1144 (0.1373) loss: 0.8155 (0.8094) time: 0.1610 data: 0.0655 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:15:31 lr: 0.000049 grad: 0.1207 (0.1357) loss: 0.8097 (0.8093) time: 0.1816 data: 0.0928 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:15:09 lr: 0.000049 grad: 0.1193 (0.1340) loss: 0.8074 (0.8092) time: 0.1549 data: 0.0622 max mem: 9377 +Train: [59] [1000/6250] eta: 0:14:51 lr: 0.000049 grad: 0.1192 (0.1332) loss: 0.8080 (0.8088) time: 0.1689 data: 0.0940 max mem: 9377 +Train: [59] [1100/6250] eta: 0:14:30 lr: 0.000049 grad: 0.1247 (0.1327) loss: 0.8067 (0.8085) time: 0.1703 data: 0.0889 max mem: 9377 +Train: [59] [1200/6250] eta: 0:14:13 lr: 0.000049 grad: 0.1287 (0.1322) loss: 0.8118 (0.8082) time: 0.1666 data: 0.0712 max mem: 9377 +Train: [59] [1300/6250] eta: 0:13:52 lr: 0.000049 grad: 0.1137 (0.1313) loss: 0.8069 (0.8081) time: 0.1645 data: 0.0734 max mem: 9377 +Train: [59] [1400/6250] eta: 0:13:33 lr: 0.000049 grad: 0.1166 (0.1309) loss: 0.7988 (0.8080) time: 0.1631 data: 0.0793 max mem: 9377 +Train: [59] [1500/6250] eta: 0:13:14 lr: 0.000049 grad: 0.1188 (0.1305) loss: 0.8082 (0.8077) time: 0.1552 data: 0.0715 max mem: 9377 +Train: [59] [1600/6250] eta: 0:12:55 lr: 0.000049 grad: 0.1242 (0.1301) loss: 0.8072 (0.8073) time: 0.1742 data: 0.0900 max mem: 9377 +Train: [59] [1700/6250] eta: 0:12:33 lr: 0.000049 grad: 0.1357 (0.1300) loss: 0.8012 (0.8072) time: 0.1437 data: 0.0489 max mem: 9377 +Train: [59] [1800/6250] eta: 0:12:14 lr: 0.000049 grad: 0.1262 (0.1298) loss: 0.8039 (0.8070) time: 0.1495 data: 0.0606 max mem: 9377 +Train: [59] [1900/6250] eta: 0:11:53 lr: 0.000049 grad: 0.1232 (0.1296) loss: 0.8008 (0.8067) time: 0.1690 data: 0.0702 max mem: 9377 +Train: [59] [2000/6250] eta: 0:11:33 lr: 0.000049 grad: 0.1222 (0.1294) loss: 0.8077 (0.8064) time: 0.1509 data: 0.0530 max mem: 9377 +Train: [59] [2100/6250] eta: 0:11:14 lr: 0.000048 grad: 0.1165 (0.1292) loss: 0.8052 (0.8061) time: 0.1509 data: 0.0665 max mem: 9377 +Train: [59] [2200/6250] eta: 0:10:55 lr: 0.000048 grad: 0.1224 (0.1291) loss: 0.8041 (0.8059) time: 0.1484 data: 0.0617 max mem: 9377 +Train: [59] [2300/6250] eta: 0:10:37 lr: 0.000048 grad: 0.1234 (0.1289) loss: 0.7979 (0.8056) time: 0.1451 data: 0.0607 max mem: 9377 +Train: [59] [2400/6250] eta: 0:10:20 lr: 0.000048 grad: 0.1255 (0.1288) loss: 0.7961 (0.8055) time: 0.1590 data: 0.0662 max mem: 9377 +Train: [59] [2500/6250] eta: 0:10:02 lr: 0.000048 grad: 0.1196 (0.1289) loss: 0.8085 (0.8054) time: 0.1639 data: 0.0749 max mem: 9377 +Train: [59] [2600/6250] eta: 0:09:44 lr: 0.000048 grad: 0.1240 (0.1289) loss: 0.8006 (0.8053) time: 0.1392 data: 0.0558 max mem: 9377 +Train: [59] [2700/6250] eta: 0:09:34 lr: 0.000048 grad: 0.1335 (0.1289) loss: 0.8011 (0.8051) time: 0.1842 data: 0.1021 max mem: 9377 +Train: [59] [2800/6250] eta: 0:09:19 lr: 0.000048 grad: 0.1284 (0.1288) loss: 0.7994 (0.8049) time: 0.1664 data: 0.0838 max mem: 9377 +Train: [59] [2900/6250] eta: 0:09:05 lr: 0.000048 grad: 0.1303 (0.1289) loss: 0.7976 (0.8047) time: 0.1687 data: 0.0872 max mem: 9377 +Train: [59] [3000/6250] eta: 0:08:49 lr: 0.000048 grad: 0.1304 (0.1290) loss: 0.7963 (0.8045) time: 0.1792 data: 0.0887 max mem: 9377 +Train: [59] [3100/6250] eta: 0:08:35 lr: 0.000048 grad: 0.1308 (0.1290) loss: 0.7898 (0.8043) time: 0.1592 data: 0.0628 max mem: 9377 +Train: [59] [3200/6250] eta: 0:08:21 lr: 0.000048 grad: 0.1289 (0.1290) loss: 0.7967 (0.8041) time: 0.1409 data: 0.0534 max mem: 9377 +Train: [59] [3300/6250] eta: 0:08:05 lr: 0.000048 grad: 0.1282 (0.1291) loss: 0.7903 (0.8038) time: 0.2095 data: 0.1135 max mem: 9377 +Train: [59] [3400/6250] eta: 0:07:49 lr: 0.000048 grad: 0.1279 (0.1290) loss: 0.7979 (0.8036) time: 0.1747 data: 0.0916 max mem: 9377 +Train: [59] [3500/6250] eta: 0:07:32 lr: 0.000048 grad: 0.1353 (0.1290) loss: 0.8028 (0.8035) time: 0.1617 data: 0.0646 max mem: 9377 +Train: [59] [3600/6250] eta: 0:07:15 lr: 0.000048 grad: 0.1270 (0.1290) loss: 0.7963 (0.8033) time: 0.1447 data: 0.0480 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:58 lr: 0.000048 grad: 0.1230 (0.1291) loss: 0.7963 (0.8031) time: 0.1474 data: 0.0587 max mem: 9377 +Train: [59] [3800/6250] eta: 0:06:41 lr: 0.000048 grad: 0.1181 (0.1291) loss: 0.8014 (0.8030) time: 0.1629 data: 0.0773 max mem: 9377 +Train: [59] [3900/6250] eta: 0:06:24 lr: 0.000048 grad: 0.1209 (0.1291) loss: 0.7970 (0.8028) time: 0.1699 data: 0.0776 max mem: 9377 +Train: [59] [4000/6250] eta: 0:06:07 lr: 0.000048 grad: 0.1236 (0.1290) loss: 0.7942 (0.8026) time: 0.1559 data: 0.0656 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:51 lr: 0.000048 grad: 0.1280 (0.1289) loss: 0.8019 (0.8026) time: 0.1687 data: 0.0819 max mem: 9377 +Train: [59] [4200/6250] eta: 0:05:34 lr: 0.000048 grad: 0.1247 (0.1288) loss: 0.8056 (0.8026) time: 0.1652 data: 0.0673 max mem: 9377 +Train: [59] [4300/6250] eta: 0:05:18 lr: 0.000048 grad: 0.1172 (0.1286) loss: 0.8054 (0.8028) time: 0.1368 data: 0.0378 max mem: 9377 +Train: [59] [4400/6250] eta: 0:05:01 lr: 0.000048 grad: 0.1204 (0.1285) loss: 0.8058 (0.8028) time: 0.1727 data: 0.0852 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:45 lr: 0.000048 grad: 0.1173 (0.1284) loss: 0.8066 (0.8029) time: 0.2005 data: 0.1108 max mem: 9377 +Train: [59] [4600/6250] eta: 0:04:28 lr: 0.000048 grad: 0.1225 (0.1283) loss: 0.8016 (0.8029) time: 0.1544 data: 0.0637 max mem: 9377 +Train: [59] [4700/6250] eta: 0:04:12 lr: 0.000048 grad: 0.1187 (0.1282) loss: 0.8066 (0.8031) time: 0.1495 data: 0.0573 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:55 lr: 0.000048 grad: 0.1222 (0.1281) loss: 0.8100 (0.8032) time: 0.1713 data: 0.0840 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:39 lr: 0.000048 grad: 0.1315 (0.1280) loss: 0.8038 (0.8032) time: 0.1608 data: 0.0574 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:22 lr: 0.000048 grad: 0.1254 (0.1280) loss: 0.8085 (0.8032) time: 0.1434 data: 0.0560 max mem: 9377 +Train: [59] [5100/6250] eta: 0:03:07 lr: 0.000048 grad: 0.1245 (0.1280) loss: 0.7963 (0.8031) time: 0.2519 data: 0.1631 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:50 lr: 0.000047 grad: 0.1286 (0.1280) loss: 0.7967 (0.8030) time: 0.2009 data: 0.1276 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:34 lr: 0.000047 grad: 0.1321 (0.1281) loss: 0.7991 (0.8029) time: 0.1590 data: 0.0717 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:18 lr: 0.000047 grad: 0.1280 (0.1283) loss: 0.7943 (0.8028) time: 0.1583 data: 0.0735 max mem: 9377 +Train: [59] [5500/6250] eta: 0:02:01 lr: 0.000047 grad: 0.1341 (0.1283) loss: 0.7902 (0.8027) time: 0.1609 data: 0.0677 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:45 lr: 0.000047 grad: 0.1234 (0.1283) loss: 0.7948 (0.8026) time: 0.1427 data: 0.0641 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:29 lr: 0.000047 grad: 0.1309 (0.1284) loss: 0.7913 (0.8025) time: 0.1637 data: 0.0787 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:12 lr: 0.000047 grad: 0.1302 (0.1284) loss: 0.7996 (0.8024) time: 0.1373 data: 0.0493 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:56 lr: 0.000047 grad: 0.1267 (0.1284) loss: 0.7985 (0.8023) time: 0.1536 data: 0.0661 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:40 lr: 0.000047 grad: 0.1310 (0.1284) loss: 0.7987 (0.8023) time: 0.1543 data: 0.0619 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:24 lr: 0.000047 grad: 0.1289 (0.1284) loss: 0.7932 (0.8022) time: 0.1348 data: 0.0468 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:08 lr: 0.000047 grad: 0.1234 (0.1284) loss: 0.7951 (0.8021) time: 0.1486 data: 0.0479 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1238 (0.1284) loss: 0.7955 (0.8020) time: 0.1435 data: 0.0468 max mem: 9377 +Train: [59] Total time: 0:16:56 (0.1626 s / it) +Averaged stats: lr: 0.000047 grad: 0.1238 (0.1284) loss: 0.7955 (0.8020) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:05:09 loss: 0.8253 (0.8253) time: 4.9957 data: 4.9427 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8114 (0.8150) time: 0.1505 data: 0.1247 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:15 (0.2559 s / it) +Averaged stats (hcp-train-subset): loss: 0.8114 (0.8150) +Making plots (hcp-train-subset): example=10 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:37 loss: 0.8373 (0.8373) time: 5.4404 data: 5.4028 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8385 (0.8397) time: 0.1494 data: 0.1238 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:16 (0.2595 s / it) +Averaged stats (hcp-val): loss: 0.8385 (0.8397) +Making plots (hcp-val): example=9 +Eval (nsd-val): [59] [ 0/62] eta: 0:05:14 loss: 0.8040 (0.8040) time: 5.0737 data: 4.9749 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8199 (0.8189) time: 0.1343 data: 0.1081 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:16 (0.2587 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8189) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 13:26:34 lr: 0.000047 grad: 0.1383 (0.1383) loss: 0.8120 (0.8120) time: 7.7431 data: 7.6426 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:24:33 lr: 0.000047 grad: 0.1832 (0.1869) loss: 0.7882 (0.8130) time: 0.2000 data: 0.0885 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:21:03 lr: 0.000047 grad: 0.1429 (0.1771) loss: 0.8087 (0.8034) time: 0.1465 data: 0.0475 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:19:10 lr: 0.000047 grad: 0.1326 (0.1667) loss: 0.7941 (0.8013) time: 0.1553 data: 0.0454 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:18:12 lr: 0.000047 grad: 0.1309 (0.1586) loss: 0.7968 (0.8005) time: 0.1549 data: 0.0558 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:17:24 lr: 0.000047 grad: 0.1308 (0.1551) loss: 0.7929 (0.7995) time: 0.1641 data: 0.0711 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:16:55 lr: 0.000047 grad: 0.1224 (0.1508) loss: 0.8012 (0.7996) time: 0.1998 data: 0.1184 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:16:30 lr: 0.000047 grad: 0.1282 (0.1469) loss: 0.8009 (0.8002) time: 0.1662 data: 0.0686 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:16:04 lr: 0.000047 grad: 0.1127 (0.1443) loss: 0.8043 (0.8005) time: 0.1422 data: 0.0484 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:15:43 lr: 0.000047 grad: 0.1194 (0.1425) loss: 0.8078 (0.8005) time: 0.1568 data: 0.0592 max mem: 9377 +Train: [60] [1000/6250] eta: 0:15:20 lr: 0.000047 grad: 0.1254 (0.1409) loss: 0.7940 (0.8005) time: 0.1480 data: 0.0634 max mem: 9377 +Train: [60] [1100/6250] eta: 0:14:55 lr: 0.000047 grad: 0.1287 (0.1395) loss: 0.7960 (0.8002) time: 0.1411 data: 0.0515 max mem: 9377 +Train: [60] [1200/6250] eta: 0:14:31 lr: 0.000047 grad: 0.1248 (0.1384) loss: 0.8089 (0.8004) time: 0.1631 data: 0.0864 max mem: 9377 +Train: [60] [1300/6250] eta: 0:14:07 lr: 0.000047 grad: 0.1290 (0.1376) loss: 0.7971 (0.8003) time: 0.1355 data: 0.0540 max mem: 9377 +Train: [60] [1400/6250] eta: 0:13:44 lr: 0.000047 grad: 0.1336 (0.1371) loss: 0.7948 (0.8001) time: 0.1498 data: 0.0617 max mem: 9377 +Train: [60] [1500/6250] eta: 0:13:26 lr: 0.000047 grad: 0.1334 (0.1364) loss: 0.7975 (0.8001) time: 0.1816 data: 0.1003 max mem: 9377 +Train: [60] [1600/6250] eta: 0:13:05 lr: 0.000047 grad: 0.1262 (0.1360) loss: 0.7990 (0.8001) time: 0.1451 data: 0.0619 max mem: 9377 +Train: [60] [1700/6250] eta: 0:12:45 lr: 0.000047 grad: 0.1333 (0.1356) loss: 0.8012 (0.7999) time: 0.1657 data: 0.0759 max mem: 9377 +Train: [60] [1800/6250] eta: 0:12:24 lr: 0.000047 grad: 0.1222 (0.1356) loss: 0.7955 (0.7997) time: 0.1502 data: 0.0548 max mem: 9377 +Train: [60] [1900/6250] eta: 0:12:05 lr: 0.000047 grad: 0.1283 (0.1354) loss: 0.7989 (0.7997) time: 0.1423 data: 0.0599 max mem: 9377 +Train: [60] [2000/6250] eta: 0:11:47 lr: 0.000047 grad: 0.1251 (0.1350) loss: 0.7947 (0.7996) time: 0.1716 data: 0.0805 max mem: 9377 +Train: [60] [2100/6250] eta: 0:11:29 lr: 0.000046 grad: 0.1234 (0.1346) loss: 0.7998 (0.7996) time: 0.1457 data: 0.0556 max mem: 9377 +Train: [60] [2200/6250] eta: 0:11:12 lr: 0.000046 grad: 0.1230 (0.1343) loss: 0.8073 (0.7998) time: 0.1626 data: 0.0724 max mem: 9377 +Train: [60] [2300/6250] eta: 0:10:57 lr: 0.000046 grad: 0.1272 (0.1340) loss: 0.7996 (0.7998) time: 0.1671 data: 0.0775 max mem: 9377 +Train: [60] [2400/6250] eta: 0:10:39 lr: 0.000046 grad: 0.1276 (0.1337) loss: 0.8044 (0.7999) time: 0.1646 data: 0.0794 max mem: 9377 +Train: [60] [2500/6250] eta: 0:10:21 lr: 0.000046 grad: 0.1382 (0.1335) loss: 0.7915 (0.7998) time: 0.1295 data: 0.0403 max mem: 9377 +Train: [60] [2600/6250] eta: 0:10:07 lr: 0.000046 grad: 0.1218 (0.1333) loss: 0.8047 (0.7998) time: 0.1775 data: 0.0804 max mem: 9377 +Train: [60] [2700/6250] eta: 0:09:51 lr: 0.000046 grad: 0.1272 (0.1333) loss: 0.7972 (0.7997) time: 0.2301 data: 0.1413 max mem: 9377 +Train: [60] [2800/6250] eta: 0:09:34 lr: 0.000046 grad: 0.1273 (0.1331) loss: 0.8037 (0.7997) time: 0.1792 data: 0.0821 max mem: 9377 +Train: [60] [2900/6250] eta: 0:09:16 lr: 0.000046 grad: 0.1243 (0.1329) loss: 0.8051 (0.7996) time: 0.1437 data: 0.0627 max mem: 9377 +Train: [60] [3000/6250] eta: 0:08:59 lr: 0.000046 grad: 0.1161 (0.1326) loss: 0.7952 (0.7996) time: 0.1497 data: 0.0658 max mem: 9377 +Train: [60] [3100/6250] eta: 0:08:43 lr: 0.000046 grad: 0.1232 (0.1325) loss: 0.7931 (0.7995) time: 0.1730 data: 0.0796 max mem: 9377 +Train: [60] [3200/6250] eta: 0:08:27 lr: 0.000046 grad: 0.1177 (0.1322) loss: 0.7969 (0.7995) time: 0.1794 data: 0.0899 max mem: 9377 +Train: [60] [3300/6250] eta: 0:08:10 lr: 0.000046 grad: 0.1284 (0.1322) loss: 0.7878 (0.7994) time: 0.1571 data: 0.0584 max mem: 9377 +Train: [60] [3400/6250] eta: 0:07:53 lr: 0.000046 grad: 0.1266 (0.1321) loss: 0.7984 (0.7994) time: 0.1721 data: 0.0791 max mem: 9377 +Train: [60] [3500/6250] eta: 0:07:36 lr: 0.000046 grad: 0.1126 (0.1319) loss: 0.8038 (0.7994) time: 0.1524 data: 0.0573 max mem: 9377 +Train: [60] [3600/6250] eta: 0:07:19 lr: 0.000046 grad: 0.1216 (0.1319) loss: 0.8032 (0.7994) time: 0.1619 data: 0.0673 max mem: 9377 +Train: [60] [3700/6250] eta: 0:07:02 lr: 0.000046 grad: 0.1243 (0.1318) loss: 0.8048 (0.7994) time: 0.1978 data: 0.1048 max mem: 9377 +Train: [60] [3800/6250] eta: 0:06:44 lr: 0.000046 grad: 0.1311 (0.1317) loss: 0.7961 (0.7994) time: 0.1337 data: 0.0457 max mem: 9377 +Train: [60] [3900/6250] eta: 0:06:27 lr: 0.000046 grad: 0.1292 (0.1316) loss: 0.7931 (0.7994) time: 0.1493 data: 0.0507 max mem: 9377 +Train: [60] [4000/6250] eta: 0:06:10 lr: 0.000046 grad: 0.1237 (0.1315) loss: 0.8085 (0.7995) time: 0.1559 data: 0.0711 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:53 lr: 0.000046 grad: 0.1267 (0.1314) loss: 0.8068 (0.7995) time: 0.1459 data: 0.0495 max mem: 9377 +Train: [60] [4200/6250] eta: 0:05:36 lr: 0.000046 grad: 0.1230 (0.1313) loss: 0.8041 (0.7995) time: 0.1160 data: 0.0260 max mem: 9377 +Train: [60] [4300/6250] eta: 0:05:19 lr: 0.000046 grad: 0.1315 (0.1314) loss: 0.7935 (0.7994) time: 0.1558 data: 0.0731 max mem: 9377 +Train: [60] [4400/6250] eta: 0:05:03 lr: 0.000046 grad: 0.1297 (0.1314) loss: 0.7947 (0.7993) time: 0.1453 data: 0.0491 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:46 lr: 0.000046 grad: 0.1275 (0.1314) loss: 0.7979 (0.7992) time: 0.1739 data: 0.0890 max mem: 9377 +Train: [60] [4600/6250] eta: 0:04:30 lr: 0.000046 grad: 0.1275 (0.1314) loss: 0.7931 (0.7991) time: 0.1796 data: 0.0920 max mem: 9377 +Train: [60] [4700/6250] eta: 0:04:13 lr: 0.000046 grad: 0.1281 (0.1315) loss: 0.7947 (0.7990) time: 0.1654 data: 0.0801 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:56 lr: 0.000046 grad: 0.1307 (0.1314) loss: 0.7987 (0.7989) time: 0.1592 data: 0.0683 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:40 lr: 0.000046 grad: 0.1273 (0.1314) loss: 0.7960 (0.7988) time: 0.1199 data: 0.0299 max mem: 9377 +Train: [60] [5000/6250] eta: 0:03:23 lr: 0.000046 grad: 0.1279 (0.1314) loss: 0.7891 (0.7988) time: 0.1470 data: 0.0609 max mem: 9377 +Train: [60] [5100/6250] eta: 0:03:07 lr: 0.000046 grad: 0.1216 (0.1314) loss: 0.8066 (0.7988) time: 0.2111 data: 0.0609 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:51 lr: 0.000045 grad: 0.1343 (0.1314) loss: 0.7994 (0.7988) time: 0.2075 data: 0.1291 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:35 lr: 0.000045 grad: 0.1213 (0.1313) loss: 0.8002 (0.7988) time: 0.1765 data: 0.0933 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:18 lr: 0.000045 grad: 0.1187 (0.1313) loss: 0.8014 (0.7988) time: 0.1569 data: 0.0678 max mem: 9377 +Train: [60] [5500/6250] eta: 0:02:02 lr: 0.000045 grad: 0.1197 (0.1312) loss: 0.8089 (0.7988) time: 0.1607 data: 0.0739 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:46 lr: 0.000045 grad: 0.1239 (0.1311) loss: 0.8038 (0.7989) time: 0.1862 data: 0.1038 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:30 lr: 0.000045 grad: 0.1248 (0.1310) loss: 0.8051 (0.7990) time: 0.1801 data: 0.0824 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:13 lr: 0.000045 grad: 0.1241 (0.1309) loss: 0.8050 (0.7991) time: 0.1812 data: 0.0927 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:57 lr: 0.000045 grad: 0.1221 (0.1309) loss: 0.8100 (0.7992) time: 0.1936 data: 0.0923 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:41 lr: 0.000045 grad: 0.1187 (0.1308) loss: 0.8048 (0.7993) time: 0.1597 data: 0.0601 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:24 lr: 0.000045 grad: 0.1121 (0.1306) loss: 0.8121 (0.7994) time: 0.1583 data: 0.0727 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:08 lr: 0.000045 grad: 0.1190 (0.1305) loss: 0.8137 (0.7996) time: 0.1334 data: 0.0406 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1173 (0.1305) loss: 0.8017 (0.7996) time: 0.1280 data: 0.0388 max mem: 9377 +Train: [60] Total time: 0:17:10 (0.1648 s / it) +Averaged stats: lr: 0.000045 grad: 0.1173 (0.1305) loss: 0.8017 (0.7996) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:05:22 loss: 0.8241 (0.8241) time: 5.2080 data: 5.1542 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8066 (0.8125) time: 0.1408 data: 0.1155 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (hcp-train-subset): loss: 0.8066 (0.8125) +Eval (hcp-val): [60] [ 0/62] eta: 0:05:49 loss: 0.8331 (0.8331) time: 5.6428 data: 5.6050 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8370 (0.8386) time: 0.1201 data: 0.0948 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:15 (0.2423 s / it) +Averaged stats (hcp-val): loss: 0.8370 (0.8386) +Eval (nsd-val): [60] [ 0/62] eta: 0:06:33 loss: 0.8138 (0.8138) time: 6.3462 data: 6.3156 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8220 (0.8230) time: 0.1220 data: 0.0953 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:14 (0.2360 s / it) +Averaged stats (nsd-val): loss: 0.8220 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 11:43:37 lr: 0.000045 grad: 0.2203 (0.2203) loss: 0.8245 (0.8245) time: 6.7548 data: 6.5911 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:22:59 lr: 0.000045 grad: 0.1471 (0.1952) loss: 0.8039 (0.8098) time: 0.1864 data: 0.0743 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:19:12 lr: 0.000045 grad: 0.1357 (0.1653) loss: 0.8192 (0.8127) time: 0.1542 data: 0.0477 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:17:51 lr: 0.000045 grad: 0.1219 (0.1540) loss: 0.8073 (0.8126) time: 0.1524 data: 0.0484 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:17:00 lr: 0.000045 grad: 0.1151 (0.1471) loss: 0.8006 (0.8113) time: 0.1643 data: 0.0617 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:16:30 lr: 0.000045 grad: 0.1165 (0.1436) loss: 0.8057 (0.8099) time: 0.1528 data: 0.0526 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:16:08 lr: 0.000045 grad: 0.1269 (0.1421) loss: 0.8002 (0.8089) time: 0.1825 data: 0.0920 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:15:35 lr: 0.000045 grad: 0.1195 (0.1399) loss: 0.8112 (0.8079) time: 0.1525 data: 0.0576 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:15:12 lr: 0.000045 grad: 0.1222 (0.1379) loss: 0.8122 (0.8078) time: 0.1551 data: 0.0666 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:14:53 lr: 0.000045 grad: 0.1176 (0.1358) loss: 0.8101 (0.8081) time: 0.1661 data: 0.0765 max mem: 9377 +Train: [61] [1000/6250] eta: 0:14:38 lr: 0.000045 grad: 0.1226 (0.1342) loss: 0.8108 (0.8082) time: 0.1783 data: 0.0935 max mem: 9377 +Train: [61] [1100/6250] eta: 0:14:18 lr: 0.000045 grad: 0.1179 (0.1329) loss: 0.8121 (0.8084) time: 0.1439 data: 0.0527 max mem: 9377 +Train: [61] [1200/6250] eta: 0:13:58 lr: 0.000045 grad: 0.1297 (0.1325) loss: 0.7977 (0.8081) time: 0.1536 data: 0.0657 max mem: 9377 +Train: [61] [1300/6250] eta: 0:13:37 lr: 0.000045 grad: 0.1244 (0.1318) loss: 0.7960 (0.8078) time: 0.1640 data: 0.0795 max mem: 9377 +Train: [61] [1400/6250] eta: 0:13:19 lr: 0.000045 grad: 0.1209 (0.1314) loss: 0.8022 (0.8074) time: 0.1472 data: 0.0561 max mem: 9377 +Train: [61] [1500/6250] eta: 0:13:03 lr: 0.000045 grad: 0.1175 (0.1311) loss: 0.8021 (0.8068) time: 0.1539 data: 0.0653 max mem: 9377 +Train: [61] [1600/6250] eta: 0:12:47 lr: 0.000045 grad: 0.1267 (0.1308) loss: 0.7922 (0.8064) time: 0.1740 data: 0.0803 max mem: 9377 +Train: [61] [1700/6250] eta: 0:12:30 lr: 0.000045 grad: 0.1241 (0.1307) loss: 0.8122 (0.8060) time: 0.1515 data: 0.0641 max mem: 9377 +Train: [61] [1800/6250] eta: 0:12:13 lr: 0.000045 grad: 0.1219 (0.1306) loss: 0.7960 (0.8056) time: 0.1459 data: 0.0591 max mem: 9377 +Train: [61] [1900/6250] eta: 0:11:56 lr: 0.000045 grad: 0.1172 (0.1304) loss: 0.8026 (0.8054) time: 0.1635 data: 0.0814 max mem: 9377 +Train: [61] [2000/6250] eta: 0:11:37 lr: 0.000045 grad: 0.1232 (0.1305) loss: 0.7993 (0.8051) time: 0.1551 data: 0.0541 max mem: 9377 +Train: [61] [2100/6250] eta: 0:11:19 lr: 0.000044 grad: 0.1217 (0.1304) loss: 0.8030 (0.8048) time: 0.1701 data: 0.0907 max mem: 9377 +Train: [61] [2200/6250] eta: 0:11:02 lr: 0.000044 grad: 0.1240 (0.1303) loss: 0.7961 (0.8045) time: 0.1618 data: 0.0669 max mem: 9377 +Train: [61] [2300/6250] eta: 0:10:43 lr: 0.000044 grad: 0.1232 (0.1303) loss: 0.7965 (0.8043) time: 0.1342 data: 0.0384 max mem: 9377 +Train: [61] [2400/6250] eta: 0:10:25 lr: 0.000044 grad: 0.1296 (0.1303) loss: 0.7922 (0.8040) time: 0.1700 data: 0.0814 max mem: 9377 +Train: [61] [2500/6250] eta: 0:10:07 lr: 0.000044 grad: 0.1290 (0.1303) loss: 0.7929 (0.8037) time: 0.1623 data: 0.0771 max mem: 9377 +Train: [61] [2600/6250] eta: 0:09:50 lr: 0.000044 grad: 0.1276 (0.1302) loss: 0.7965 (0.8035) time: 0.1336 data: 0.0376 max mem: 9377 +Train: [61] [2700/6250] eta: 0:09:37 lr: 0.000044 grad: 0.1309 (0.1303) loss: 0.8092 (0.8034) time: 0.1197 data: 0.0290 max mem: 9377 +Train: [61] [2800/6250] eta: 0:09:20 lr: 0.000044 grad: 0.1285 (0.1301) loss: 0.8004 (0.8034) time: 0.1766 data: 0.0930 max mem: 9377 +Train: [61] [2900/6250] eta: 0:09:03 lr: 0.000044 grad: 0.1278 (0.1300) loss: 0.7967 (0.8034) time: 0.1759 data: 0.0898 max mem: 9377 +Train: [61] [3000/6250] eta: 0:08:47 lr: 0.000044 grad: 0.1276 (0.1298) loss: 0.8134 (0.8035) time: 0.1779 data: 0.0925 max mem: 9377 +Train: [61] [3100/6250] eta: 0:08:32 lr: 0.000044 grad: 0.1324 (0.1298) loss: 0.8068 (0.8036) time: 0.1779 data: 0.0869 max mem: 9377 +Train: [61] [3200/6250] eta: 0:08:16 lr: 0.000044 grad: 0.1248 (0.1298) loss: 0.7939 (0.8035) time: 0.1799 data: 0.0818 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:59 lr: 0.000044 grad: 0.1234 (0.1299) loss: 0.8017 (0.8034) time: 0.1440 data: 0.0473 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:42 lr: 0.000044 grad: 0.1254 (0.1298) loss: 0.8041 (0.8033) time: 0.1603 data: 0.0627 max mem: 9377 +Train: [61] [3500/6250] eta: 0:07:26 lr: 0.000044 grad: 0.1297 (0.1299) loss: 0.7911 (0.8031) time: 0.1361 data: 0.0457 max mem: 9377 +Train: [61] [3600/6250] eta: 0:07:09 lr: 0.000044 grad: 0.1349 (0.1299) loss: 0.7939 (0.8029) time: 0.1494 data: 0.0639 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:52 lr: 0.000044 grad: 0.1312 (0.1299) loss: 0.7945 (0.8028) time: 0.1297 data: 0.0400 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:35 lr: 0.000044 grad: 0.1331 (0.1299) loss: 0.7900 (0.8026) time: 0.1634 data: 0.0603 max mem: 9377 +Train: [61] [3900/6250] eta: 0:06:19 lr: 0.000044 grad: 0.1233 (0.1299) loss: 0.7963 (0.8025) time: 0.1269 data: 0.0460 max mem: 9377 +Train: [61] [4000/6250] eta: 0:06:03 lr: 0.000044 grad: 0.1256 (0.1300) loss: 0.8058 (0.8024) time: 0.1599 data: 0.0738 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:47 lr: 0.000044 grad: 0.1323 (0.1300) loss: 0.8053 (0.8024) time: 0.2129 data: 0.1254 max mem: 9377 +Train: [61] [4200/6250] eta: 0:05:31 lr: 0.000044 grad: 0.1376 (0.1301) loss: 0.7899 (0.8023) time: 0.1719 data: 0.0846 max mem: 9377 +Train: [61] [4300/6250] eta: 0:05:14 lr: 0.000044 grad: 0.1307 (0.1301) loss: 0.8080 (0.8022) time: 0.1278 data: 0.0353 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:57 lr: 0.000044 grad: 0.1285 (0.1302) loss: 0.8020 (0.8021) time: 0.1469 data: 0.0557 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:41 lr: 0.000044 grad: 0.1395 (0.1304) loss: 0.7812 (0.8020) time: 0.1639 data: 0.0757 max mem: 9377 +Train: [61] [4600/6250] eta: 0:04:25 lr: 0.000044 grad: 0.1361 (0.1305) loss: 0.7883 (0.8018) time: 0.1345 data: 0.0359 max mem: 9377 +Train: [61] [4700/6250] eta: 0:04:09 lr: 0.000044 grad: 0.1212 (0.1306) loss: 0.7954 (0.8017) time: 0.1669 data: 0.0744 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:53 lr: 0.000044 grad: 0.1358 (0.1306) loss: 0.7951 (0.8015) time: 0.1593 data: 0.0709 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:37 lr: 0.000044 grad: 0.1261 (0.1307) loss: 0.7919 (0.8014) time: 0.1696 data: 0.0874 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:21 lr: 0.000044 grad: 0.1297 (0.1308) loss: 0.7971 (0.8012) time: 0.1874 data: 0.1008 max mem: 9377 +Train: [61] [5100/6250] eta: 0:03:05 lr: 0.000044 grad: 0.1362 (0.1309) loss: 0.7895 (0.8010) time: 0.1533 data: 0.0582 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:49 lr: 0.000044 grad: 0.1262 (0.1309) loss: 0.7944 (0.8009) time: 0.1507 data: 0.0649 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:33 lr: 0.000043 grad: 0.1273 (0.1309) loss: 0.7919 (0.8008) time: 0.1763 data: 0.0834 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:17 lr: 0.000043 grad: 0.1332 (0.1310) loss: 0.7865 (0.8006) time: 0.1479 data: 0.0620 max mem: 9377 +Train: [61] [5500/6250] eta: 0:02:01 lr: 0.000043 grad: 0.1334 (0.1311) loss: 0.7788 (0.8004) time: 0.1675 data: 0.0803 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:45 lr: 0.000043 grad: 0.1272 (0.1312) loss: 0.8015 (0.8002) time: 0.1803 data: 0.0963 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:29 lr: 0.000043 grad: 0.1291 (0.1312) loss: 0.7950 (0.8001) time: 0.1474 data: 0.0616 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:12 lr: 0.000043 grad: 0.1374 (0.1313) loss: 0.7985 (0.7999) time: 0.1460 data: 0.0509 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:56 lr: 0.000043 grad: 0.1373 (0.1313) loss: 0.7872 (0.7998) time: 0.1374 data: 0.0424 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:40 lr: 0.000043 grad: 0.1314 (0.1314) loss: 0.7835 (0.7997) time: 0.1147 data: 0.0094 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:24 lr: 0.000043 grad: 0.1315 (0.1315) loss: 0.7942 (0.7995) time: 0.1397 data: 0.0512 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:08 lr: 0.000043 grad: 0.1347 (0.1316) loss: 0.7962 (0.7993) time: 0.1403 data: 0.0425 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1303 (0.1316) loss: 0.7942 (0.7993) time: 0.1549 data: 0.0619 max mem: 9377 +Train: [61] Total time: 0:16:52 (0.1621 s / it) +Averaged stats: lr: 0.000043 grad: 0.1303 (0.1316) loss: 0.7942 (0.7993) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:05:07 loss: 0.8242 (0.8242) time: 4.9540 data: 4.8949 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8093 (0.8123) time: 0.1269 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:15 (0.2534 s / it) +Averaged stats (hcp-train-subset): loss: 0.8093 (0.8123) +Eval (hcp-val): [61] [ 0/62] eta: 0:06:19 loss: 0.8367 (0.8367) time: 6.1267 data: 6.0801 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8369 (0.8386) time: 0.1551 data: 0.1289 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:16 (0.2617 s / it) +Averaged stats (hcp-val): loss: 0.8369 (0.8386) +Eval (nsd-val): [61] [ 0/62] eta: 0:05:49 loss: 0.8145 (0.8145) time: 5.6395 data: 5.5890 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8274 (0.8261) time: 0.1427 data: 0.1162 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:15 (0.2566 s / it) +Averaged stats (nsd-val): loss: 0.8274 (0.8261) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 11:28:27 lr: 0.000043 grad: 0.2889 (0.2889) loss: 0.7987 (0.7987) time: 6.6092 data: 6.4663 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:24:28 lr: 0.000043 grad: 0.1530 (0.1657) loss: 0.8079 (0.8269) time: 0.1605 data: 0.0374 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:20:21 lr: 0.000043 grad: 0.1173 (0.1532) loss: 0.8112 (0.8186) time: 0.1473 data: 0.0403 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:18:47 lr: 0.000043 grad: 0.1224 (0.1482) loss: 0.8030 (0.8149) time: 0.1768 data: 0.0767 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:17:40 lr: 0.000043 grad: 0.1312 (0.1448) loss: 0.8021 (0.8119) time: 0.1703 data: 0.0743 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:17:01 lr: 0.000043 grad: 0.1243 (0.1425) loss: 0.8140 (0.8105) time: 0.1422 data: 0.0449 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:16:40 lr: 0.000043 grad: 0.1299 (0.1405) loss: 0.8034 (0.8098) time: 0.1854 data: 0.0937 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:16:10 lr: 0.000043 grad: 0.1291 (0.1393) loss: 0.7945 (0.8083) time: 0.1642 data: 0.0751 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:15:45 lr: 0.000043 grad: 0.1203 (0.1384) loss: 0.7989 (0.8069) time: 0.1628 data: 0.0700 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:15:23 lr: 0.000043 grad: 0.1270 (0.1375) loss: 0.7957 (0.8060) time: 0.1745 data: 0.0810 max mem: 9377 +Train: [62] [1000/6250] eta: 0:15:10 lr: 0.000043 grad: 0.1281 (0.1370) loss: 0.8039 (0.8052) time: 0.1381 data: 0.0561 max mem: 9377 +Train: [62] [1100/6250] eta: 0:14:57 lr: 0.000043 grad: 0.1363 (0.1368) loss: 0.7996 (0.8043) time: 0.1769 data: 0.0952 max mem: 9377 +Train: [62] [1200/6250] eta: 0:14:46 lr: 0.000043 grad: 0.1268 (0.1368) loss: 0.7932 (0.8033) time: 0.2227 data: 0.1342 max mem: 9377 +Train: [62] [1300/6250] eta: 0:14:27 lr: 0.000043 grad: 0.1291 (0.1365) loss: 0.7959 (0.8025) time: 0.1741 data: 0.0907 max mem: 9377 +Train: [62] [1400/6250] eta: 0:14:11 lr: 0.000043 grad: 0.1283 (0.1364) loss: 0.7932 (0.8018) time: 0.1595 data: 0.0625 max mem: 9377 +Train: [62] [1500/6250] eta: 0:13:55 lr: 0.000043 grad: 0.1252 (0.1364) loss: 0.7903 (0.8010) time: 0.1690 data: 0.0749 max mem: 9377 +Train: [62] [1600/6250] eta: 0:13:36 lr: 0.000043 grad: 0.1273 (0.1361) loss: 0.7953 (0.8005) time: 0.1553 data: 0.0602 max mem: 9377 +Train: [62] [1700/6250] eta: 0:13:16 lr: 0.000043 grad: 0.1328 (0.1363) loss: 0.7875 (0.7998) time: 0.1937 data: 0.1045 max mem: 9377 +Train: [62] [1800/6250] eta: 0:12:56 lr: 0.000043 grad: 0.1313 (0.1363) loss: 0.7912 (0.7993) time: 0.1428 data: 0.0477 max mem: 9377 +Train: [62] [1900/6250] eta: 0:12:38 lr: 0.000043 grad: 0.1292 (0.1360) loss: 0.7950 (0.7989) time: 0.1814 data: 0.0885 max mem: 9377 +Train: [62] [2000/6250] eta: 0:12:16 lr: 0.000043 grad: 0.1301 (0.1361) loss: 0.7834 (0.7985) time: 0.1522 data: 0.0575 max mem: 9377 +Train: [62] [2100/6250] eta: 0:11:58 lr: 0.000043 grad: 0.1324 (0.1358) loss: 0.7987 (0.7983) time: 0.1465 data: 0.0543 max mem: 9377 +Train: [62] [2200/6250] eta: 0:11:39 lr: 0.000042 grad: 0.1420 (0.1357) loss: 0.7830 (0.7980) time: 0.1483 data: 0.0547 max mem: 9377 +Train: [62] [2300/6250] eta: 0:11:22 lr: 0.000042 grad: 0.1424 (0.1358) loss: 0.7919 (0.7977) time: 0.1796 data: 0.0924 max mem: 9377 +Train: [62] [2400/6250] eta: 0:11:01 lr: 0.000042 grad: 0.1268 (0.1356) loss: 0.7963 (0.7977) time: 0.1427 data: 0.0469 max mem: 9377 +Train: [62] [2500/6250] eta: 0:10:42 lr: 0.000042 grad: 0.1343 (0.1357) loss: 0.7956 (0.7976) time: 0.1493 data: 0.0587 max mem: 9377 +Train: [62] [2600/6250] eta: 0:10:24 lr: 0.000042 grad: 0.1379 (0.1358) loss: 0.7970 (0.7975) time: 0.1789 data: 0.0886 max mem: 9377 +Train: [62] [2700/6250] eta: 0:10:08 lr: 0.000042 grad: 0.1312 (0.1357) loss: 0.8029 (0.7976) time: 0.1637 data: 0.0667 max mem: 9377 +Train: [62] [2800/6250] eta: 0:09:49 lr: 0.000042 grad: 0.1377 (0.1356) loss: 0.7997 (0.7975) time: 0.1459 data: 0.0616 max mem: 9377 +Train: [62] [2900/6250] eta: 0:09:30 lr: 0.000042 grad: 0.1327 (0.1356) loss: 0.7961 (0.7975) time: 0.1591 data: 0.0717 max mem: 9377 +Train: [62] [3000/6250] eta: 0:09:11 lr: 0.000042 grad: 0.1320 (0.1357) loss: 0.7967 (0.7975) time: 0.1568 data: 0.0737 max mem: 9377 +Train: [62] [3100/6250] eta: 0:08:56 lr: 0.000042 grad: 0.1219 (0.1355) loss: 0.7983 (0.7975) time: 0.2345 data: 0.1488 max mem: 9377 +Train: [62] [3200/6250] eta: 0:08:36 lr: 0.000042 grad: 0.1355 (0.1353) loss: 0.7956 (0.7976) time: 0.1522 data: 0.0616 max mem: 9377 +Train: [62] [3300/6250] eta: 0:08:18 lr: 0.000042 grad: 0.1283 (0.1352) loss: 0.7989 (0.7977) time: 0.1548 data: 0.0619 max mem: 9377 +Train: [62] [3400/6250] eta: 0:08:00 lr: 0.000042 grad: 0.1252 (0.1350) loss: 0.8032 (0.7978) time: 0.1574 data: 0.0651 max mem: 9377 +Train: [62] [3500/6250] eta: 0:07:42 lr: 0.000042 grad: 0.1224 (0.1347) loss: 0.8057 (0.7980) time: 0.1536 data: 0.0613 max mem: 9377 +Train: [62] [3600/6250] eta: 0:07:25 lr: 0.000042 grad: 0.1288 (0.1345) loss: 0.8060 (0.7981) time: 0.1742 data: 0.0889 max mem: 9377 +Train: [62] [3700/6250] eta: 0:07:06 lr: 0.000042 grad: 0.1231 (0.1343) loss: 0.8019 (0.7983) time: 0.1462 data: 0.0597 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:49 lr: 0.000042 grad: 0.1352 (0.1342) loss: 0.7964 (0.7984) time: 0.1775 data: 0.0866 max mem: 9377 +Train: [62] [3900/6250] eta: 0:06:32 lr: 0.000042 grad: 0.1266 (0.1341) loss: 0.7944 (0.7985) time: 0.1723 data: 0.0790 max mem: 9377 +Train: [62] [4000/6250] eta: 0:06:14 lr: 0.000042 grad: 0.1216 (0.1339) loss: 0.7997 (0.7985) time: 0.1596 data: 0.0649 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:57 lr: 0.000042 grad: 0.1344 (0.1338) loss: 0.7907 (0.7985) time: 0.1439 data: 0.0580 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:40 lr: 0.000042 grad: 0.1334 (0.1337) loss: 0.7946 (0.7984) time: 0.1555 data: 0.0685 max mem: 9377 +Train: [62] [4300/6250] eta: 0:05:23 lr: 0.000042 grad: 0.1242 (0.1336) loss: 0.8028 (0.7985) time: 0.1564 data: 0.0674 max mem: 9377 +Train: [62] [4400/6250] eta: 0:05:06 lr: 0.000042 grad: 0.1205 (0.1337) loss: 0.8023 (0.7985) time: 0.1333 data: 0.0452 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:49 lr: 0.000042 grad: 0.1272 (0.1337) loss: 0.7971 (0.7985) time: 0.1381 data: 0.0522 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:32 lr: 0.000042 grad: 0.1444 (0.1337) loss: 0.7848 (0.7985) time: 0.1595 data: 0.0668 max mem: 9377 +Train: [62] [4700/6250] eta: 0:04:15 lr: 0.000042 grad: 0.1222 (0.1337) loss: 0.7978 (0.7984) time: 0.1554 data: 0.0735 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:58 lr: 0.000042 grad: 0.1370 (0.1337) loss: 0.7985 (0.7984) time: 0.1553 data: 0.0613 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:42 lr: 0.000042 grad: 0.1318 (0.1338) loss: 0.7990 (0.7983) time: 0.1129 data: 0.0235 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:25 lr: 0.000042 grad: 0.1314 (0.1338) loss: 0.8014 (0.7983) time: 0.1867 data: 0.1005 max mem: 9377 +Train: [62] [5100/6250] eta: 0:03:08 lr: 0.000042 grad: 0.1282 (0.1338) loss: 0.8080 (0.7984) time: 0.1553 data: 0.0754 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:52 lr: 0.000042 grad: 0.1306 (0.1338) loss: 0.7933 (0.7984) time: 0.1685 data: 0.0862 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:36 lr: 0.000042 grad: 0.1246 (0.1338) loss: 0.7995 (0.7984) time: 0.1616 data: 0.0828 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:19 lr: 0.000041 grad: 0.1309 (0.1337) loss: 0.8021 (0.7984) time: 0.1807 data: 0.0968 max mem: 9377 +Train: [62] [5500/6250] eta: 0:02:03 lr: 0.000041 grad: 0.1312 (0.1337) loss: 0.8009 (0.7984) time: 0.1604 data: 0.0711 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:46 lr: 0.000041 grad: 0.1304 (0.1336) loss: 0.8005 (0.7984) time: 0.1653 data: 0.0799 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:30 lr: 0.000041 grad: 0.1413 (0.1337) loss: 0.7973 (0.7983) time: 0.1639 data: 0.0733 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:14 lr: 0.000041 grad: 0.1353 (0.1337) loss: 0.7966 (0.7983) time: 0.1913 data: 0.0925 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:57 lr: 0.000041 grad: 0.1343 (0.1338) loss: 0.7969 (0.7983) time: 0.1902 data: 0.1023 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:41 lr: 0.000041 grad: 0.1333 (0.1338) loss: 0.7911 (0.7982) time: 0.1494 data: 0.0578 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:24 lr: 0.000041 grad: 0.1310 (0.1338) loss: 0.7997 (0.7982) time: 0.1593 data: 0.0737 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:08 lr: 0.000041 grad: 0.1350 (0.1338) loss: 0.8010 (0.7981) time: 0.1322 data: 0.0439 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1309 (0.1338) loss: 0.7976 (0.7981) time: 0.1545 data: 0.0651 max mem: 9377 +Train: [62] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000041 grad: 0.1309 (0.1338) loss: 0.7976 (0.7981) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:05:30 loss: 0.8224 (0.8224) time: 5.3372 data: 5.3060 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8081 (0.8113) time: 0.1160 data: 0.0906 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-train-subset): loss: 0.8081 (0.8113) +Eval (hcp-val): [62] [ 0/62] eta: 0:06:10 loss: 0.8384 (0.8384) time: 5.9795 data: 5.9471 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8389 (0.8399) time: 0.1237 data: 0.0965 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:14 (0.2397 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8399) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:04 loss: 0.8063 (0.8063) time: 3.9424 data: 3.8441 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8165 (0.8188) time: 0.1268 data: 0.0999 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (nsd-val): loss: 0.8165 (0.8188) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 11:52:37 lr: 0.000041 grad: 0.2045 (0.2045) loss: 0.8149 (0.8149) time: 6.8411 data: 6.7358 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:22:26 lr: 0.000041 grad: 0.1636 (0.1834) loss: 0.8047 (0.8168) time: 0.1737 data: 0.0816 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:19:20 lr: 0.000041 grad: 0.1110 (0.1656) loss: 0.8132 (0.8147) time: 0.1858 data: 0.0905 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:18:01 lr: 0.000041 grad: 0.1314 (0.1537) loss: 0.8072 (0.8130) time: 0.1838 data: 0.0813 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:16:53 lr: 0.000041 grad: 0.1287 (0.1497) loss: 0.8125 (0.8103) time: 0.1550 data: 0.0599 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:16:18 lr: 0.000041 grad: 0.1268 (0.1464) loss: 0.8015 (0.8090) time: 0.1467 data: 0.0400 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:15:55 lr: 0.000041 grad: 0.1232 (0.1430) loss: 0.8086 (0.8089) time: 0.1611 data: 0.0614 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:15:34 lr: 0.000041 grad: 0.1149 (0.1405) loss: 0.8144 (0.8088) time: 0.1627 data: 0.0742 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:15:10 lr: 0.000041 grad: 0.1145 (0.1381) loss: 0.8107 (0.8094) time: 0.1213 data: 0.0237 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:14:57 lr: 0.000041 grad: 0.1193 (0.1362) loss: 0.8129 (0.8097) time: 0.1944 data: 0.1025 max mem: 9377 +Train: [63] [1000/6250] eta: 0:14:41 lr: 0.000041 grad: 0.1170 (0.1353) loss: 0.8031 (0.8095) time: 0.1876 data: 0.1041 max mem: 9377 +Train: [63] [1100/6250] eta: 0:14:24 lr: 0.000041 grad: 0.1173 (0.1345) loss: 0.8094 (0.8092) time: 0.1381 data: 0.0570 max mem: 9377 +Train: [63] [1200/6250] eta: 0:14:05 lr: 0.000041 grad: 0.1209 (0.1341) loss: 0.7983 (0.8085) time: 0.1498 data: 0.0697 max mem: 9377 +Train: [63] [1300/6250] eta: 0:13:45 lr: 0.000041 grad: 0.1284 (0.1338) loss: 0.7974 (0.8076) time: 0.1613 data: 0.0703 max mem: 9377 +Train: [63] [1400/6250] eta: 0:13:26 lr: 0.000041 grad: 0.1430 (0.1336) loss: 0.7878 (0.8070) time: 0.1567 data: 0.0631 max mem: 9377 +Train: [63] [1500/6250] eta: 0:13:09 lr: 0.000041 grad: 0.1237 (0.1333) loss: 0.8057 (0.8067) time: 0.1635 data: 0.0737 max mem: 9377 +Train: [63] [1600/6250] eta: 0:12:56 lr: 0.000041 grad: 0.1338 (0.1333) loss: 0.8085 (0.8061) time: 0.1622 data: 0.0682 max mem: 9377 +Train: [63] [1700/6250] eta: 0:12:39 lr: 0.000041 grad: 0.1354 (0.1334) loss: 0.7914 (0.8055) time: 0.1839 data: 0.0901 max mem: 9377 +Train: [63] [1800/6250] eta: 0:12:20 lr: 0.000041 grad: 0.1308 (0.1337) loss: 0.7931 (0.8049) time: 0.1661 data: 0.0723 max mem: 9377 +Train: [63] [1900/6250] eta: 0:12:01 lr: 0.000041 grad: 0.1206 (0.1336) loss: 0.8050 (0.8045) time: 0.1585 data: 0.0565 max mem: 9377 +Train: [63] [2000/6250] eta: 0:11:41 lr: 0.000041 grad: 0.1282 (0.1334) loss: 0.7949 (0.8042) time: 0.1631 data: 0.0682 max mem: 9377 +Train: [63] [2100/6250] eta: 0:11:21 lr: 0.000041 grad: 0.1314 (0.1334) loss: 0.7912 (0.8038) time: 0.1510 data: 0.0601 max mem: 9377 +Train: [63] [2200/6250] eta: 0:11:03 lr: 0.000041 grad: 0.1243 (0.1333) loss: 0.8069 (0.8036) time: 0.1169 data: 0.0282 max mem: 9377 +Train: [63] [2300/6250] eta: 0:10:48 lr: 0.000041 grad: 0.1211 (0.1332) loss: 0.8013 (0.8034) time: 0.1927 data: 0.1134 max mem: 9377 +Train: [63] [2400/6250] eta: 0:10:30 lr: 0.000040 grad: 0.1244 (0.1331) loss: 0.8041 (0.8033) time: 0.1492 data: 0.0663 max mem: 9377 +Train: [63] [2500/6250] eta: 0:10:13 lr: 0.000040 grad: 0.1286 (0.1330) loss: 0.8064 (0.8032) time: 0.1626 data: 0.0758 max mem: 9377 +Train: [63] [2600/6250] eta: 0:09:55 lr: 0.000040 grad: 0.1256 (0.1329) loss: 0.8043 (0.8032) time: 0.1481 data: 0.0612 max mem: 9377 +Train: [63] [2700/6250] eta: 0:09:38 lr: 0.000040 grad: 0.1245 (0.1328) loss: 0.7984 (0.8030) time: 0.1575 data: 0.0745 max mem: 9377 +Train: [63] [2800/6250] eta: 0:09:23 lr: 0.000040 grad: 0.1299 (0.1327) loss: 0.8006 (0.8030) time: 0.1709 data: 0.0883 max mem: 9377 +Train: [63] [2900/6250] eta: 0:09:07 lr: 0.000040 grad: 0.1282 (0.1327) loss: 0.8026 (0.8028) time: 0.1692 data: 0.0830 max mem: 9377 +Train: [63] [3000/6250] eta: 0:08:52 lr: 0.000040 grad: 0.1314 (0.1327) loss: 0.8040 (0.8027) time: 0.1805 data: 0.0988 max mem: 9377 +Train: [63] [3100/6250] eta: 0:08:38 lr: 0.000040 grad: 0.1229 (0.1327) loss: 0.8048 (0.8026) time: 0.1926 data: 0.0860 max mem: 9377 +Train: [63] [3200/6250] eta: 0:08:23 lr: 0.000040 grad: 0.1279 (0.1327) loss: 0.8036 (0.8025) time: 0.1553 data: 0.0593 max mem: 9377 +Train: [63] [3300/6250] eta: 0:08:08 lr: 0.000040 grad: 0.1256 (0.1327) loss: 0.8008 (0.8024) time: 0.1677 data: 0.0688 max mem: 9377 +Train: [63] [3400/6250] eta: 0:07:52 lr: 0.000040 grad: 0.1339 (0.1327) loss: 0.7996 (0.8023) time: 0.1621 data: 0.0817 max mem: 9377 +Train: [63] [3500/6250] eta: 0:07:35 lr: 0.000040 grad: 0.1243 (0.1327) loss: 0.8030 (0.8021) time: 0.1653 data: 0.0596 max mem: 9377 +Train: [63] [3600/6250] eta: 0:07:18 lr: 0.000040 grad: 0.1330 (0.1328) loss: 0.7913 (0.8019) time: 0.1617 data: 0.0734 max mem: 9377 +Train: [63] [3700/6250] eta: 0:07:01 lr: 0.000040 grad: 0.1217 (0.1328) loss: 0.8031 (0.8019) time: 0.1544 data: 0.0597 max mem: 9377 +Train: [63] [3800/6250] eta: 0:06:44 lr: 0.000040 grad: 0.1188 (0.1327) loss: 0.8042 (0.8018) time: 0.1506 data: 0.0551 max mem: 9377 +Train: [63] [3900/6250] eta: 0:06:28 lr: 0.000040 grad: 0.1325 (0.1326) loss: 0.7998 (0.8018) time: 0.1638 data: 0.0672 max mem: 9377 +Train: [63] [4000/6250] eta: 0:06:12 lr: 0.000040 grad: 0.1284 (0.1326) loss: 0.7989 (0.8017) time: 0.1220 data: 0.0358 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:56 lr: 0.000040 grad: 0.1264 (0.1326) loss: 0.8015 (0.8017) time: 0.2342 data: 0.1337 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:38 lr: 0.000040 grad: 0.1240 (0.1326) loss: 0.8043 (0.8017) time: 0.1370 data: 0.0394 max mem: 9377 +Train: [63] [4300/6250] eta: 0:05:21 lr: 0.000040 grad: 0.1328 (0.1326) loss: 0.8033 (0.8016) time: 0.1675 data: 0.0799 max mem: 9377 +Train: [63] [4400/6250] eta: 0:05:04 lr: 0.000040 grad: 0.1343 (0.1325) loss: 0.7999 (0.8016) time: 0.1642 data: 0.0760 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:48 lr: 0.000040 grad: 0.1298 (0.1325) loss: 0.7948 (0.8015) time: 0.1746 data: 0.0878 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:31 lr: 0.000040 grad: 0.1267 (0.1325) loss: 0.7986 (0.8015) time: 0.1556 data: 0.0593 max mem: 9377 +Train: [63] [4700/6250] eta: 0:04:14 lr: 0.000040 grad: 0.1397 (0.1325) loss: 0.7994 (0.8014) time: 0.1640 data: 0.0680 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:58 lr: 0.000040 grad: 0.1298 (0.1324) loss: 0.7974 (0.8014) time: 0.1567 data: 0.0635 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:41 lr: 0.000040 grad: 0.1311 (0.1324) loss: 0.8019 (0.8014) time: 0.1639 data: 0.0740 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:25 lr: 0.000040 grad: 0.1305 (0.1324) loss: 0.8002 (0.8013) time: 0.1498 data: 0.0515 max mem: 9377 +Train: [63] [5100/6250] eta: 0:03:08 lr: 0.000040 grad: 0.1352 (0.1325) loss: 0.7985 (0.8012) time: 0.1792 data: 0.0946 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:52 lr: 0.000040 grad: 0.1281 (0.1325) loss: 0.8011 (0.8011) time: 0.1733 data: 0.0829 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:36 lr: 0.000040 grad: 0.1267 (0.1324) loss: 0.7982 (0.8011) time: 0.1844 data: 0.0989 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:20 lr: 0.000040 grad: 0.1308 (0.1324) loss: 0.7971 (0.8010) time: 0.1567 data: 0.0654 max mem: 9377 +Train: [63] [5500/6250] eta: 0:02:03 lr: 0.000040 grad: 0.1371 (0.1326) loss: 0.7954 (0.8008) time: 0.1948 data: 0.1019 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:47 lr: 0.000039 grad: 0.1255 (0.1326) loss: 0.8027 (0.8007) time: 0.1853 data: 0.0866 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:30 lr: 0.000039 grad: 0.1284 (0.1328) loss: 0.8033 (0.8006) time: 0.1574 data: 0.0588 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:14 lr: 0.000039 grad: 0.1388 (0.1329) loss: 0.7880 (0.8005) time: 0.1493 data: 0.0611 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:57 lr: 0.000039 grad: 0.1261 (0.1329) loss: 0.7925 (0.8004) time: 0.1488 data: 0.0598 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:41 lr: 0.000039 grad: 0.1324 (0.1330) loss: 0.7913 (0.8002) time: 0.1420 data: 0.0499 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:24 lr: 0.000039 grad: 0.1401 (0.1331) loss: 0.7849 (0.8001) time: 0.1387 data: 0.0511 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:08 lr: 0.000039 grad: 0.1324 (0.1332) loss: 0.8014 (0.8000) time: 0.1386 data: 0.0401 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1346 (0.1333) loss: 0.7949 (0.7999) time: 0.1658 data: 0.0722 max mem: 9377 +Train: [63] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000039 grad: 0.1346 (0.1333) loss: 0.7949 (0.7999) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:04:23 loss: 0.8192 (0.8192) time: 4.2533 data: 4.1734 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8046 (0.8093) time: 0.1263 data: 0.0994 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-train-subset): loss: 0.8046 (0.8093) +Eval (hcp-val): [63] [ 0/62] eta: 0:05:55 loss: 0.8325 (0.8325) time: 5.7390 data: 5.7072 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8373 (0.8399) time: 0.1387 data: 0.1135 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (hcp-val): loss: 0.8373 (0.8399) +Eval (nsd-val): [63] [ 0/62] eta: 0:05:23 loss: 0.8071 (0.8071) time: 5.2122 data: 5.1787 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8168 (0.8186) time: 0.1311 data: 0.1030 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:14 (0.2281 s / it) +Averaged stats (nsd-val): loss: 0.8168 (0.8186) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 9:38:44 lr: 0.000039 grad: 0.0866 (0.0866) loss: 0.8183 (0.8183) time: 5.5559 data: 5.2927 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:22:09 lr: 0.000039 grad: 0.1385 (0.1710) loss: 0.8092 (0.8199) time: 0.1642 data: 0.0542 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:20:01 lr: 0.000039 grad: 0.1413 (0.1687) loss: 0.8062 (0.8100) time: 0.2072 data: 0.1166 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:18:41 lr: 0.000039 grad: 0.1451 (0.1652) loss: 0.8077 (0.8075) time: 0.1724 data: 0.0808 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:17:53 lr: 0.000039 grad: 0.1309 (0.1597) loss: 0.8148 (0.8079) time: 0.1759 data: 0.0831 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:17:04 lr: 0.000039 grad: 0.1391 (0.1558) loss: 0.8040 (0.8071) time: 0.1529 data: 0.0588 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:16:32 lr: 0.000039 grad: 0.1403 (0.1525) loss: 0.7989 (0.8069) time: 0.1753 data: 0.0770 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:16:03 lr: 0.000039 grad: 0.1322 (0.1504) loss: 0.8033 (0.8064) time: 0.1689 data: 0.0741 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:15:39 lr: 0.000039 grad: 0.1290 (0.1490) loss: 0.8099 (0.8062) time: 0.1487 data: 0.0541 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:15:19 lr: 0.000039 grad: 0.1385 (0.1480) loss: 0.8003 (0.8059) time: 0.1713 data: 0.0840 max mem: 9377 +Train: [64] [1000/6250] eta: 0:14:56 lr: 0.000039 grad: 0.1365 (0.1471) loss: 0.8021 (0.8058) time: 0.1768 data: 0.0924 max mem: 9377 +Train: [64] [1100/6250] eta: 0:14:40 lr: 0.000039 grad: 0.1374 (0.1461) loss: 0.7953 (0.8054) time: 0.1497 data: 0.0506 max mem: 9377 +Train: [64] [1200/6250] eta: 0:14:22 lr: 0.000039 grad: 0.1343 (0.1452) loss: 0.8057 (0.8052) time: 0.1560 data: 0.0709 max mem: 9377 +Train: [64] [1300/6250] eta: 0:14:03 lr: 0.000039 grad: 0.1317 (0.1446) loss: 0.8061 (0.8046) time: 0.1523 data: 0.0650 max mem: 9377 +Train: [64] [1400/6250] eta: 0:13:42 lr: 0.000039 grad: 0.1283 (0.1437) loss: 0.7954 (0.8041) time: 0.1949 data: 0.1076 max mem: 9377 +Train: [64] [1500/6250] eta: 0:13:23 lr: 0.000039 grad: 0.1364 (0.1432) loss: 0.7952 (0.8039) time: 0.1560 data: 0.0631 max mem: 9377 +Train: [64] [1600/6250] eta: 0:13:04 lr: 0.000039 grad: 0.1352 (0.1429) loss: 0.7973 (0.8036) time: 0.1563 data: 0.0672 max mem: 9377 +Train: [64] [1700/6250] eta: 0:12:45 lr: 0.000039 grad: 0.1393 (0.1425) loss: 0.7988 (0.8032) time: 0.1559 data: 0.0594 max mem: 9377 +Train: [64] [1800/6250] eta: 0:12:24 lr: 0.000039 grad: 0.1381 (0.1420) loss: 0.7944 (0.8029) time: 0.1572 data: 0.0675 max mem: 9377 +Train: [64] [1900/6250] eta: 0:12:04 lr: 0.000039 grad: 0.1348 (0.1418) loss: 0.8005 (0.8027) time: 0.1249 data: 0.0376 max mem: 9377 +Train: [64] [2000/6250] eta: 0:11:43 lr: 0.000039 grad: 0.1310 (0.1414) loss: 0.7982 (0.8024) time: 0.1197 data: 0.0284 max mem: 9377 +Train: [64] [2100/6250] eta: 0:11:23 lr: 0.000039 grad: 0.1270 (0.1410) loss: 0.8036 (0.8023) time: 0.1255 data: 0.0276 max mem: 9377 +Train: [64] [2200/6250] eta: 0:11:02 lr: 0.000039 grad: 0.1378 (0.1408) loss: 0.7980 (0.8021) time: 0.1309 data: 0.0343 max mem: 9377 +Train: [64] [2300/6250] eta: 0:10:44 lr: 0.000039 grad: 0.1243 (0.1405) loss: 0.7939 (0.8019) time: 0.1587 data: 0.0648 max mem: 9377 +Train: [64] [2400/6250] eta: 0:10:27 lr: 0.000039 grad: 0.1228 (0.1402) loss: 0.8051 (0.8018) time: 0.1584 data: 0.0766 max mem: 9377 +Train: [64] [2500/6250] eta: 0:10:09 lr: 0.000039 grad: 0.1360 (0.1401) loss: 0.8048 (0.8017) time: 0.1519 data: 0.0671 max mem: 9377 +Train: [64] [2600/6250] eta: 0:09:53 lr: 0.000039 grad: 0.1324 (0.1399) loss: 0.7925 (0.8017) time: 0.1651 data: 0.0734 max mem: 9377 +Train: [64] [2700/6250] eta: 0:09:36 lr: 0.000038 grad: 0.1290 (0.1397) loss: 0.8047 (0.8016) time: 0.1749 data: 0.0803 max mem: 9377 +Train: [64] [2800/6250] eta: 0:09:21 lr: 0.000038 grad: 0.1376 (0.1395) loss: 0.7916 (0.8015) time: 0.1925 data: 0.0947 max mem: 9377 +Train: [64] [2900/6250] eta: 0:09:05 lr: 0.000038 grad: 0.1402 (0.1395) loss: 0.7918 (0.8013) time: 0.1680 data: 0.0847 max mem: 9377 +Train: [64] [3000/6250] eta: 0:08:49 lr: 0.000038 grad: 0.1412 (0.1395) loss: 0.7979 (0.8012) time: 0.1650 data: 0.0764 max mem: 9377 +Train: [64] [3100/6250] eta: 0:08:32 lr: 0.000038 grad: 0.1348 (0.1393) loss: 0.7990 (0.8012) time: 0.1771 data: 0.0883 max mem: 9377 +Train: [64] [3200/6250] eta: 0:08:16 lr: 0.000038 grad: 0.1274 (0.1392) loss: 0.7929 (0.8011) time: 0.1809 data: 0.0805 max mem: 9377 +Train: [64] [3300/6250] eta: 0:08:00 lr: 0.000038 grad: 0.1377 (0.1391) loss: 0.7982 (0.8011) time: 0.1687 data: 0.0829 max mem: 9377 +Train: [64] [3400/6250] eta: 0:07:43 lr: 0.000038 grad: 0.1302 (0.1390) loss: 0.7999 (0.8009) time: 0.1380 data: 0.0477 max mem: 9377 +Train: [64] [3500/6250] eta: 0:07:26 lr: 0.000038 grad: 0.1318 (0.1389) loss: 0.7990 (0.8008) time: 0.1536 data: 0.0653 max mem: 9377 +Train: [64] [3600/6250] eta: 0:07:09 lr: 0.000038 grad: 0.1382 (0.1389) loss: 0.7983 (0.8007) time: 0.1614 data: 0.0760 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:52 lr: 0.000038 grad: 0.1308 (0.1388) loss: 0.7971 (0.8005) time: 0.1339 data: 0.0339 max mem: 9377 +Train: [64] [3800/6250] eta: 0:06:35 lr: 0.000038 grad: 0.1372 (0.1389) loss: 0.8022 (0.8003) time: 0.1486 data: 0.0590 max mem: 9377 +Train: [64] [3900/6250] eta: 0:06:17 lr: 0.000038 grad: 0.1361 (0.1389) loss: 0.7957 (0.8002) time: 0.1461 data: 0.0495 max mem: 9377 +Train: [64] [4000/6250] eta: 0:06:01 lr: 0.000038 grad: 0.1408 (0.1389) loss: 0.7953 (0.8000) time: 0.1387 data: 0.0513 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:45 lr: 0.000038 grad: 0.1321 (0.1389) loss: 0.7987 (0.7999) time: 0.1639 data: 0.0726 max mem: 9377 +Train: [64] [4200/6250] eta: 0:05:29 lr: 0.000038 grad: 0.1326 (0.1388) loss: 0.7944 (0.7998) time: 0.1722 data: 0.0828 max mem: 9377 +Train: [64] [4300/6250] eta: 0:05:13 lr: 0.000038 grad: 0.1284 (0.1387) loss: 0.8032 (0.7999) time: 0.1657 data: 0.0793 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:57 lr: 0.000038 grad: 0.1387 (0.1387) loss: 0.8033 (0.7998) time: 0.1702 data: 0.0762 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:41 lr: 0.000038 grad: 0.1362 (0.1386) loss: 0.8018 (0.7999) time: 0.1585 data: 0.0655 max mem: 9377 +Train: [64] [4600/6250] eta: 0:04:25 lr: 0.000038 grad: 0.1283 (0.1386) loss: 0.8061 (0.7999) time: 0.1718 data: 0.0813 max mem: 9377 +Train: [64] [4700/6250] eta: 0:04:09 lr: 0.000038 grad: 0.1351 (0.1387) loss: 0.7995 (0.7999) time: 0.1462 data: 0.0566 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:53 lr: 0.000038 grad: 0.1360 (0.1386) loss: 0.7941 (0.7999) time: 0.1742 data: 0.0853 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:37 lr: 0.000038 grad: 0.1342 (0.1386) loss: 0.7988 (0.7999) time: 0.1557 data: 0.0656 max mem: 9377 +Train: [64] [5000/6250] eta: 0:03:20 lr: 0.000038 grad: 0.1369 (0.1386) loss: 0.7911 (0.7999) time: 0.1699 data: 0.0833 max mem: 9377 +Train: [64] [5100/6250] eta: 0:03:04 lr: 0.000038 grad: 0.1380 (0.1387) loss: 0.8016 (0.7999) time: 0.1433 data: 0.0542 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:48 lr: 0.000038 grad: 0.1402 (0.1387) loss: 0.7939 (0.7998) time: 0.1820 data: 0.1002 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:32 lr: 0.000038 grad: 0.1378 (0.1387) loss: 0.7966 (0.7998) time: 0.1565 data: 0.0698 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:16 lr: 0.000038 grad: 0.1307 (0.1386) loss: 0.8042 (0.7998) time: 0.1292 data: 0.0429 max mem: 9377 +Train: [64] [5500/6250] eta: 0:02:00 lr: 0.000038 grad: 0.1344 (0.1386) loss: 0.7923 (0.7997) time: 0.1561 data: 0.0666 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:44 lr: 0.000038 grad: 0.1385 (0.1385) loss: 0.7949 (0.7997) time: 0.1575 data: 0.0615 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:28 lr: 0.000038 grad: 0.1345 (0.1385) loss: 0.8004 (0.7996) time: 0.1502 data: 0.0568 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:12 lr: 0.000038 grad: 0.1293 (0.1386) loss: 0.7963 (0.7995) time: 0.1882 data: 0.1026 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:56 lr: 0.000037 grad: 0.1347 (0.1386) loss: 0.7953 (0.7994) time: 0.1558 data: 0.0694 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:40 lr: 0.000037 grad: 0.1407 (0.1387) loss: 0.7796 (0.7993) time: 0.1457 data: 0.0507 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:24 lr: 0.000037 grad: 0.1393 (0.1386) loss: 0.7911 (0.7992) time: 0.1506 data: 0.0556 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:08 lr: 0.000037 grad: 0.1397 (0.1387) loss: 0.8021 (0.7991) time: 0.1496 data: 0.0479 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1372 (0.1387) loss: 0.7999 (0.7991) time: 0.1877 data: 0.0872 max mem: 9377 +Train: [64] Total time: 0:16:50 (0.1616 s / it) +Averaged stats: lr: 0.000037 grad: 0.1372 (0.1387) loss: 0.7999 (0.7991) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:06:03 loss: 0.8161 (0.8161) time: 5.8579 data: 5.8261 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8087 (0.8093) time: 0.1520 data: 0.1262 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:15 (0.2564 s / it) +Averaged stats (hcp-train-subset): loss: 0.8087 (0.8093) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [64] [ 0/62] eta: 0:04:58 loss: 0.8348 (0.8348) time: 4.8147 data: 4.7445 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8376 (0.8391) time: 0.1430 data: 0.1175 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:16 (0.2590 s / it) +Averaged stats (hcp-val): loss: 0.8376 (0.8391) +Making plots (hcp-val): example=42 +Eval (nsd-val): [64] [ 0/62] eta: 0:07:26 loss: 0.8107 (0.8107) time: 7.2081 data: 7.1750 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8164 (0.8189) time: 0.1406 data: 0.1135 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:16 (0.2628 s / it) +Averaged stats (nsd-val): loss: 0.8164 (0.8189) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 10:55:48 lr: 0.000037 grad: 0.1061 (0.1061) loss: 0.8603 (0.8603) time: 6.2958 data: 5.9733 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:25:28 lr: 0.000037 grad: 0.1364 (0.1678) loss: 0.8033 (0.8213) time: 0.2034 data: 0.0891 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:21:29 lr: 0.000037 grad: 0.1428 (0.1645) loss: 0.8078 (0.8125) time: 0.1835 data: 0.0700 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:19:56 lr: 0.000037 grad: 0.1448 (0.1593) loss: 0.7978 (0.8082) time: 0.1652 data: 0.0641 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:18:54 lr: 0.000037 grad: 0.1248 (0.1571) loss: 0.7997 (0.8058) time: 0.1576 data: 0.0508 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:17:57 lr: 0.000037 grad: 0.1258 (0.1533) loss: 0.8019 (0.8053) time: 0.1706 data: 0.0707 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:17:06 lr: 0.000037 grad: 0.1273 (0.1511) loss: 0.8041 (0.8044) time: 0.1472 data: 0.0584 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:16:40 lr: 0.000037 grad: 0.1344 (0.1489) loss: 0.7991 (0.8038) time: 0.1540 data: 0.0616 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:16:13 lr: 0.000037 grad: 0.1368 (0.1474) loss: 0.8065 (0.8032) time: 0.1448 data: 0.0457 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:15:51 lr: 0.000037 grad: 0.1350 (0.1461) loss: 0.7902 (0.8027) time: 0.1155 data: 0.0002 max mem: 9377 +Train: [65] [1000/6250] eta: 0:15:26 lr: 0.000037 grad: 0.1257 (0.1448) loss: 0.8007 (0.8023) time: 0.1549 data: 0.0610 max mem: 9377 +Train: [65] [1100/6250] eta: 0:15:02 lr: 0.000037 grad: 0.1354 (0.1439) loss: 0.7943 (0.8019) time: 0.1837 data: 0.1004 max mem: 9377 +Train: [65] [1200/6250] eta: 0:14:39 lr: 0.000037 grad: 0.1314 (0.1433) loss: 0.7905 (0.8015) time: 0.1738 data: 0.0879 max mem: 9377 +Train: [65] [1300/6250] eta: 0:14:17 lr: 0.000037 grad: 0.1332 (0.1429) loss: 0.7928 (0.8009) time: 0.1539 data: 0.0688 max mem: 9377 +Train: [65] [1400/6250] eta: 0:14:02 lr: 0.000037 grad: 0.1294 (0.1421) loss: 0.7914 (0.8007) time: 0.1774 data: 0.0792 max mem: 9377 +Train: [65] [1500/6250] eta: 0:13:50 lr: 0.000037 grad: 0.1347 (0.1419) loss: 0.7976 (0.8003) time: 0.2037 data: 0.1048 max mem: 9377 +Train: [65] [1600/6250] eta: 0:13:32 lr: 0.000037 grad: 0.1339 (0.1416) loss: 0.7966 (0.7999) time: 0.1840 data: 0.0986 max mem: 9377 +Train: [65] [1700/6250] eta: 0:13:11 lr: 0.000037 grad: 0.1337 (0.1420) loss: 0.7956 (0.7997) time: 0.1521 data: 0.0515 max mem: 9377 +Train: [65] [1800/6250] eta: 0:12:51 lr: 0.000037 grad: 0.1310 (0.1416) loss: 0.7980 (0.7996) time: 0.1586 data: 0.0612 max mem: 9377 +Train: [65] [1900/6250] eta: 0:12:32 lr: 0.000037 grad: 0.1279 (0.1415) loss: 0.7952 (0.7995) time: 0.1673 data: 0.0787 max mem: 9377 +Train: [65] [2000/6250] eta: 0:12:13 lr: 0.000037 grad: 0.1340 (0.1414) loss: 0.7910 (0.7993) time: 0.1593 data: 0.0657 max mem: 9377 +Train: [65] [2100/6250] eta: 0:11:52 lr: 0.000037 grad: 0.1264 (0.1409) loss: 0.7946 (0.7993) time: 0.1593 data: 0.0696 max mem: 9377 +Train: [65] [2200/6250] eta: 0:11:32 lr: 0.000037 grad: 0.1304 (0.1406) loss: 0.7980 (0.7993) time: 0.1353 data: 0.0478 max mem: 9377 +Train: [65] [2300/6250] eta: 0:11:15 lr: 0.000037 grad: 0.1336 (0.1403) loss: 0.7907 (0.7994) time: 0.1955 data: 0.1144 max mem: 9377 +Train: [65] [2400/6250] eta: 0:10:56 lr: 0.000037 grad: 0.1265 (0.1402) loss: 0.8027 (0.7994) time: 0.1360 data: 0.0493 max mem: 9377 +Train: [65] [2500/6250] eta: 0:10:38 lr: 0.000037 grad: 0.1392 (0.1401) loss: 0.7955 (0.7992) time: 0.1716 data: 0.0733 max mem: 9377 +Train: [65] [2600/6250] eta: 0:10:19 lr: 0.000037 grad: 0.1334 (0.1399) loss: 0.7904 (0.7991) time: 0.1596 data: 0.0708 max mem: 9377 +Train: [65] [2700/6250] eta: 0:10:02 lr: 0.000037 grad: 0.1316 (0.1396) loss: 0.7939 (0.7991) time: 0.1773 data: 0.1000 max mem: 9377 +Train: [65] [2800/6250] eta: 0:09:44 lr: 0.000037 grad: 0.1363 (0.1394) loss: 0.7982 (0.7991) time: 0.1703 data: 0.0909 max mem: 9377 +Train: [65] [2900/6250] eta: 0:09:26 lr: 0.000037 grad: 0.1256 (0.1393) loss: 0.8068 (0.7991) time: 0.1482 data: 0.0680 max mem: 9377 +Train: [65] [3000/6250] eta: 0:09:08 lr: 0.000036 grad: 0.1398 (0.1392) loss: 0.7856 (0.7990) time: 0.1473 data: 0.0553 max mem: 9377 +Train: [65] [3100/6250] eta: 0:08:50 lr: 0.000036 grad: 0.1353 (0.1392) loss: 0.7814 (0.7989) time: 0.1687 data: 0.0825 max mem: 9377 +Train: [65] [3200/6250] eta: 0:08:33 lr: 0.000036 grad: 0.1378 (0.1392) loss: 0.7911 (0.7987) time: 0.1605 data: 0.0723 max mem: 9377 +Train: [65] [3300/6250] eta: 0:08:16 lr: 0.000036 grad: 0.1372 (0.1391) loss: 0.7923 (0.7986) time: 0.1805 data: 0.0924 max mem: 9377 +Train: [65] [3400/6250] eta: 0:08:00 lr: 0.000036 grad: 0.1486 (0.1391) loss: 0.7916 (0.7985) time: 0.1823 data: 0.0906 max mem: 9377 +Train: [65] [3500/6250] eta: 0:07:43 lr: 0.000036 grad: 0.1305 (0.1391) loss: 0.7980 (0.7985) time: 0.1697 data: 0.0874 max mem: 9377 +Train: [65] [3600/6250] eta: 0:07:26 lr: 0.000036 grad: 0.1390 (0.1390) loss: 0.7961 (0.7984) time: 0.1634 data: 0.0709 max mem: 9377 +Train: [65] [3700/6250] eta: 0:07:08 lr: 0.000036 grad: 0.1352 (0.1389) loss: 0.8073 (0.7985) time: 0.1480 data: 0.0578 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:50 lr: 0.000036 grad: 0.1309 (0.1388) loss: 0.8048 (0.7985) time: 0.1520 data: 0.0570 max mem: 9377 +Train: [65] [3900/6250] eta: 0:06:33 lr: 0.000036 grad: 0.1386 (0.1388) loss: 0.8002 (0.7985) time: 0.1676 data: 0.0595 max mem: 9377 +Train: [65] [4000/6250] eta: 0:06:16 lr: 0.000036 grad: 0.1352 (0.1388) loss: 0.7857 (0.7985) time: 0.1232 data: 0.0371 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:59 lr: 0.000036 grad: 0.1367 (0.1388) loss: 0.8000 (0.7985) time: 0.1204 data: 0.0302 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:42 lr: 0.000036 grad: 0.1451 (0.1388) loss: 0.7996 (0.7985) time: 0.1765 data: 0.0857 max mem: 9377 +Train: [65] [4300/6250] eta: 0:05:25 lr: 0.000036 grad: 0.1356 (0.1388) loss: 0.7984 (0.7984) time: 0.1551 data: 0.0599 max mem: 9377 +Train: [65] [4400/6250] eta: 0:05:08 lr: 0.000036 grad: 0.1368 (0.1389) loss: 0.7878 (0.7983) time: 0.1433 data: 0.0675 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:50 lr: 0.000036 grad: 0.1358 (0.1389) loss: 0.7977 (0.7983) time: 0.1585 data: 0.0644 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:33 lr: 0.000036 grad: 0.1363 (0.1389) loss: 0.8004 (0.7982) time: 0.1777 data: 0.0978 max mem: 9377 +Train: [65] [4700/6250] eta: 0:04:17 lr: 0.000036 grad: 0.1383 (0.1390) loss: 0.7910 (0.7981) time: 0.1218 data: 0.0200 max mem: 9377 +Train: [65] [4800/6250] eta: 0:04:00 lr: 0.000036 grad: 0.1521 (0.1391) loss: 0.7883 (0.7979) time: 0.1555 data: 0.0587 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:43 lr: 0.000036 grad: 0.1358 (0.1392) loss: 0.7999 (0.7977) time: 0.1862 data: 0.0971 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:26 lr: 0.000036 grad: 0.1430 (0.1393) loss: 0.7863 (0.7975) time: 0.1627 data: 0.0773 max mem: 9377 +Train: [65] [5100/6250] eta: 0:03:09 lr: 0.000036 grad: 0.1417 (0.1394) loss: 0.7781 (0.7973) time: 0.1770 data: 0.0928 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:53 lr: 0.000036 grad: 0.1452 (0.1396) loss: 0.7801 (0.7971) time: 0.1847 data: 0.1004 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:37 lr: 0.000036 grad: 0.1531 (0.1397) loss: 0.7746 (0.7968) time: 0.1739 data: 0.0869 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:20 lr: 0.000036 grad: 0.1419 (0.1397) loss: 0.7835 (0.7967) time: 0.1287 data: 0.0362 max mem: 9377 +Train: [65] [5500/6250] eta: 0:02:03 lr: 0.000036 grad: 0.1456 (0.1398) loss: 0.7775 (0.7965) time: 0.1484 data: 0.0646 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:47 lr: 0.000036 grad: 0.1387 (0.1400) loss: 0.7900 (0.7963) time: 0.1564 data: 0.0650 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:30 lr: 0.000036 grad: 0.1458 (0.1401) loss: 0.7837 (0.7962) time: 0.1896 data: 0.1032 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:14 lr: 0.000036 grad: 0.1345 (0.1402) loss: 0.7899 (0.7960) time: 0.1788 data: 0.0860 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:57 lr: 0.000036 grad: 0.1445 (0.1403) loss: 0.7920 (0.7958) time: 0.1630 data: 0.0723 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:41 lr: 0.000036 grad: 0.1405 (0.1404) loss: 0.7850 (0.7957) time: 0.1663 data: 0.0721 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:24 lr: 0.000036 grad: 0.1392 (0.1405) loss: 0.7973 (0.7955) time: 0.1316 data: 0.0397 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:08 lr: 0.000036 grad: 0.1436 (0.1405) loss: 0.7891 (0.7955) time: 0.1644 data: 0.0626 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1405 (0.1405) loss: 0.7956 (0.7954) time: 0.1607 data: 0.0738 max mem: 9377 +Train: [65] Total time: 0:17:16 (0.1659 s / it) +Averaged stats: lr: 0.000036 grad: 0.1405 (0.1405) loss: 0.7956 (0.7954) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:06:11 loss: 0.8194 (0.8194) time: 5.9872 data: 5.9551 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8065 (0.8086) time: 0.1377 data: 0.1115 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:15 (0.2434 s / it) +Averaged stats (hcp-train-subset): loss: 0.8065 (0.8086) +Eval (hcp-val): [65] [ 0/62] eta: 0:04:08 loss: 0.8397 (0.8397) time: 4.0140 data: 3.9499 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8383 (0.8399) time: 0.1289 data: 0.1034 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:14 (0.2370 s / it) +Averaged stats (hcp-val): loss: 0.8383 (0.8399) +Eval (nsd-val): [65] [ 0/62] eta: 0:05:18 loss: 0.8089 (0.8089) time: 5.1380 data: 5.0856 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8177 (0.8200) time: 0.1398 data: 0.1138 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8200) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 9:13:27 lr: 0.000036 grad: 0.0974 (0.0974) loss: 0.8476 (0.8476) time: 5.3132 data: 5.0752 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:22:52 lr: 0.000035 grad: 0.1537 (0.1651) loss: 0.8176 (0.8217) time: 0.1776 data: 0.0792 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:19:43 lr: 0.000035 grad: 0.1345 (0.1557) loss: 0.8119 (0.8142) time: 0.1788 data: 0.0806 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:18:18 lr: 0.000035 grad: 0.1462 (0.1565) loss: 0.7961 (0.8097) time: 0.1590 data: 0.0572 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:17:19 lr: 0.000035 grad: 0.1372 (0.1522) loss: 0.8074 (0.8093) time: 0.1499 data: 0.0543 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:16:34 lr: 0.000035 grad: 0.1375 (0.1488) loss: 0.8026 (0.8085) time: 0.1566 data: 0.0730 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:16:02 lr: 0.000035 grad: 0.1283 (0.1465) loss: 0.8061 (0.8081) time: 0.1607 data: 0.0613 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:15:40 lr: 0.000035 grad: 0.1436 (0.1456) loss: 0.8043 (0.8076) time: 0.1573 data: 0.0594 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:15:25 lr: 0.000035 grad: 0.1352 (0.1445) loss: 0.8001 (0.8072) time: 0.1991 data: 0.1059 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:14:58 lr: 0.000035 grad: 0.1340 (0.1438) loss: 0.8090 (0.8071) time: 0.1727 data: 0.0795 max mem: 9377 +Train: [66] [1000/6250] eta: 0:14:38 lr: 0.000035 grad: 0.1293 (0.1431) loss: 0.8037 (0.8068) time: 0.1618 data: 0.0560 max mem: 9377 +Train: [66] [1100/6250] eta: 0:14:27 lr: 0.000035 grad: 0.1396 (0.1427) loss: 0.8002 (0.8063) time: 0.1421 data: 0.0577 max mem: 9377 +Train: [66] [1200/6250] eta: 0:14:11 lr: 0.000035 grad: 0.1345 (0.1422) loss: 0.8054 (0.8059) time: 0.1697 data: 0.0910 max mem: 9377 +Train: [66] [1300/6250] eta: 0:13:52 lr: 0.000035 grad: 0.1418 (0.1420) loss: 0.8058 (0.8054) time: 0.1384 data: 0.0495 max mem: 9377 +Train: [66] [1400/6250] eta: 0:13:31 lr: 0.000035 grad: 0.1257 (0.1416) loss: 0.8001 (0.8049) time: 0.1782 data: 0.0968 max mem: 9377 +Train: [66] [1500/6250] eta: 0:13:13 lr: 0.000035 grad: 0.1359 (0.1415) loss: 0.7991 (0.8045) time: 0.1776 data: 0.0893 max mem: 9377 +Train: [66] [1600/6250] eta: 0:12:54 lr: 0.000035 grad: 0.1393 (0.1415) loss: 0.8039 (0.8041) time: 0.1483 data: 0.0595 max mem: 9377 +Train: [66] [1700/6250] eta: 0:12:34 lr: 0.000035 grad: 0.1347 (0.1414) loss: 0.7957 (0.8037) time: 0.1698 data: 0.0758 max mem: 9377 +Train: [66] [1800/6250] eta: 0:12:14 lr: 0.000035 grad: 0.1360 (0.1412) loss: 0.8013 (0.8034) time: 0.1516 data: 0.0605 max mem: 9377 +Train: [66] [1900/6250] eta: 0:11:53 lr: 0.000035 grad: 0.1451 (0.1411) loss: 0.7884 (0.8030) time: 0.1646 data: 0.0580 max mem: 9377 +Train: [66] [2000/6250] eta: 0:11:33 lr: 0.000035 grad: 0.1288 (0.1410) loss: 0.8043 (0.8027) time: 0.1439 data: 0.0489 max mem: 9377 +Train: [66] [2100/6250] eta: 0:11:13 lr: 0.000035 grad: 0.1339 (0.1410) loss: 0.8030 (0.8024) time: 0.1536 data: 0.0620 max mem: 9377 +Train: [66] [2200/6250] eta: 0:10:53 lr: 0.000035 grad: 0.1354 (0.1409) loss: 0.7973 (0.8021) time: 0.1373 data: 0.0482 max mem: 9377 +Train: [66] [2300/6250] eta: 0:10:35 lr: 0.000035 grad: 0.1348 (0.1409) loss: 0.8005 (0.8018) time: 0.1464 data: 0.0471 max mem: 9377 +Train: [66] [2400/6250] eta: 0:10:18 lr: 0.000035 grad: 0.1385 (0.1410) loss: 0.7926 (0.8016) time: 0.1329 data: 0.0423 max mem: 9377 +Train: [66] [2500/6250] eta: 0:10:01 lr: 0.000035 grad: 0.1377 (0.1409) loss: 0.7988 (0.8015) time: 0.1591 data: 0.0601 max mem: 9377 +Train: [66] [2600/6250] eta: 0:09:44 lr: 0.000035 grad: 0.1360 (0.1408) loss: 0.7979 (0.8013) time: 0.1504 data: 0.0576 max mem: 9377 +Train: [66] [2700/6250] eta: 0:09:28 lr: 0.000035 grad: 0.1329 (0.1406) loss: 0.7950 (0.8012) time: 0.1467 data: 0.0576 max mem: 9377 +Train: [66] [2800/6250] eta: 0:09:11 lr: 0.000035 grad: 0.1402 (0.1406) loss: 0.7863 (0.8010) time: 0.1679 data: 0.0738 max mem: 9377 +Train: [66] [2900/6250] eta: 0:08:55 lr: 0.000035 grad: 0.1360 (0.1405) loss: 0.7969 (0.8008) time: 0.1584 data: 0.0675 max mem: 9377 +Train: [66] [3000/6250] eta: 0:08:40 lr: 0.000035 grad: 0.1370 (0.1405) loss: 0.7917 (0.8007) time: 0.1712 data: 0.0841 max mem: 9377 +Train: [66] [3100/6250] eta: 0:08:24 lr: 0.000035 grad: 0.1365 (0.1405) loss: 0.7978 (0.8005) time: 0.1679 data: 0.0756 max mem: 9377 +Train: [66] [3200/6250] eta: 0:08:07 lr: 0.000035 grad: 0.1353 (0.1406) loss: 0.7938 (0.8004) time: 0.1583 data: 0.0754 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:52 lr: 0.000035 grad: 0.1253 (0.1405) loss: 0.7979 (0.8002) time: 0.1582 data: 0.0636 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:36 lr: 0.000035 grad: 0.1333 (0.1405) loss: 0.7981 (0.8001) time: 0.1633 data: 0.0717 max mem: 9377 +Train: [66] [3500/6250] eta: 0:07:21 lr: 0.000034 grad: 0.1372 (0.1405) loss: 0.7948 (0.8000) time: 0.1764 data: 0.0908 max mem: 9377 +Train: [66] [3600/6250] eta: 0:07:04 lr: 0.000034 grad: 0.1410 (0.1404) loss: 0.7947 (0.8000) time: 0.1824 data: 0.0955 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:48 lr: 0.000034 grad: 0.1349 (0.1405) loss: 0.8033 (0.7998) time: 0.1551 data: 0.0666 max mem: 9377 +Train: [66] [3800/6250] eta: 0:06:32 lr: 0.000034 grad: 0.1371 (0.1404) loss: 0.7973 (0.7997) time: 0.1771 data: 0.0896 max mem: 9377 +Train: [66] [3900/6250] eta: 0:06:15 lr: 0.000034 grad: 0.1391 (0.1405) loss: 0.7970 (0.7996) time: 0.1388 data: 0.0462 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:59 lr: 0.000034 grad: 0.1483 (0.1406) loss: 0.7857 (0.7994) time: 0.1545 data: 0.0701 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:43 lr: 0.000034 grad: 0.1420 (0.1407) loss: 0.7981 (0.7993) time: 0.1845 data: 0.0991 max mem: 9377 +Train: [66] [4200/6250] eta: 0:05:27 lr: 0.000034 grad: 0.1408 (0.1408) loss: 0.7916 (0.7992) time: 0.1544 data: 0.0724 max mem: 9377 +Train: [66] [4300/6250] eta: 0:05:12 lr: 0.000034 grad: 0.1388 (0.1407) loss: 0.8001 (0.7991) time: 0.2444 data: 0.1577 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:56 lr: 0.000034 grad: 0.1362 (0.1408) loss: 0.8030 (0.7991) time: 0.1612 data: 0.0722 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:40 lr: 0.000034 grad: 0.1409 (0.1409) loss: 0.7936 (0.7991) time: 0.1381 data: 0.0574 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:24 lr: 0.000034 grad: 0.1491 (0.1410) loss: 0.7878 (0.7989) time: 0.1676 data: 0.0789 max mem: 9377 +Train: [66] [4700/6250] eta: 0:04:07 lr: 0.000034 grad: 0.1338 (0.1410) loss: 0.7972 (0.7988) time: 0.1672 data: 0.0735 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:51 lr: 0.000034 grad: 0.1455 (0.1411) loss: 0.7916 (0.7987) time: 0.1537 data: 0.0710 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:35 lr: 0.000034 grad: 0.1388 (0.1412) loss: 0.7956 (0.7986) time: 0.1534 data: 0.0708 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:19 lr: 0.000034 grad: 0.1356 (0.1414) loss: 0.8067 (0.7986) time: 0.1665 data: 0.0801 max mem: 9377 +Train: [66] [5100/6250] eta: 0:03:03 lr: 0.000034 grad: 0.1424 (0.1414) loss: 0.7960 (0.7985) time: 0.1881 data: 0.0997 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:48 lr: 0.000034 grad: 0.1368 (0.1415) loss: 0.7983 (0.7985) time: 0.1494 data: 0.0755 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:32 lr: 0.000034 grad: 0.1450 (0.1415) loss: 0.7965 (0.7985) time: 0.1883 data: 0.1056 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:16 lr: 0.000034 grad: 0.1409 (0.1415) loss: 0.7987 (0.7986) time: 0.1829 data: 0.0953 max mem: 9377 +Train: [66] [5500/6250] eta: 0:02:00 lr: 0.000034 grad: 0.1472 (0.1416) loss: 0.8001 (0.7985) time: 0.1318 data: 0.0492 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:44 lr: 0.000034 grad: 0.1370 (0.1416) loss: 0.8022 (0.7986) time: 0.1587 data: 0.0659 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:28 lr: 0.000034 grad: 0.1437 (0.1415) loss: 0.8012 (0.7986) time: 0.1590 data: 0.0698 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:12 lr: 0.000034 grad: 0.1378 (0.1415) loss: 0.8038 (0.7987) time: 0.1813 data: 0.0880 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:56 lr: 0.000034 grad: 0.1420 (0.1415) loss: 0.8034 (0.7988) time: 0.1665 data: 0.0800 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:40 lr: 0.000034 grad: 0.1419 (0.1415) loss: 0.7968 (0.7988) time: 0.1457 data: 0.0462 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:24 lr: 0.000034 grad: 0.1350 (0.1415) loss: 0.8016 (0.7988) time: 0.1428 data: 0.0467 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:08 lr: 0.000034 grad: 0.1347 (0.1414) loss: 0.8000 (0.7989) time: 0.1448 data: 0.0524 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1472 (0.1414) loss: 0.7936 (0.7989) time: 0.1551 data: 0.0680 max mem: 9377 +Train: [66] Total time: 0:16:48 (0.1613 s / it) +Averaged stats: lr: 0.000034 grad: 0.1472 (0.1414) loss: 0.7936 (0.7989) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:06:39 loss: 0.8222 (0.8222) time: 6.4478 data: 6.4151 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8061 (0.8066) time: 0.1415 data: 0.1144 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-train-subset): loss: 0.8061 (0.8066) +Eval (hcp-val): [66] [ 0/62] eta: 0:04:45 loss: 0.8368 (0.8368) time: 4.6024 data: 4.5368 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8388 (0.8401) time: 0.1314 data: 0.1044 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:14 (0.2354 s / it) +Averaged stats (hcp-val): loss: 0.8388 (0.8401) +Eval (nsd-val): [66] [ 0/62] eta: 0:04:55 loss: 0.8084 (0.8084) time: 4.7694 data: 4.6940 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8153 (0.8162) time: 0.1104 data: 0.0836 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8153 (0.8162) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 9:53:20 lr: 0.000034 grad: 0.2219 (0.2219) loss: 0.8693 (0.8693) time: 5.6961 data: 5.4887 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:23:53 lr: 0.000034 grad: 0.1372 (0.2119) loss: 0.8202 (0.8097) time: 0.1865 data: 0.0669 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:20:50 lr: 0.000034 grad: 0.1368 (0.1851) loss: 0.8192 (0.8097) time: 0.1839 data: 0.0727 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:19:22 lr: 0.000034 grad: 0.1260 (0.1710) loss: 0.8138 (0.8094) time: 0.1604 data: 0.0516 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:18:05 lr: 0.000034 grad: 0.1372 (0.1656) loss: 0.7995 (0.8082) time: 0.1311 data: 0.0366 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:17:22 lr: 0.000034 grad: 0.1425 (0.1614) loss: 0.7991 (0.8070) time: 0.1403 data: 0.0380 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:16:45 lr: 0.000033 grad: 0.1454 (0.1596) loss: 0.7916 (0.8051) time: 0.1502 data: 0.0511 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:16:29 lr: 0.000033 grad: 0.1423 (0.1571) loss: 0.8049 (0.8044) time: 0.2251 data: 0.1372 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:15:58 lr: 0.000033 grad: 0.1343 (0.1548) loss: 0.8028 (0.8041) time: 0.1162 data: 0.0082 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:15:43 lr: 0.000033 grad: 0.1376 (0.1533) loss: 0.7983 (0.8036) time: 0.2135 data: 0.1286 max mem: 9377 +Train: [67] [1000/6250] eta: 0:15:11 lr: 0.000033 grad: 0.1424 (0.1523) loss: 0.7910 (0.8029) time: 0.1528 data: 0.0615 max mem: 9377 +Train: [67] [1100/6250] eta: 0:14:55 lr: 0.000033 grad: 0.1313 (0.1516) loss: 0.8016 (0.8024) time: 0.1582 data: 0.0733 max mem: 9377 +Train: [67] [1200/6250] eta: 0:14:36 lr: 0.000033 grad: 0.1314 (0.1505) loss: 0.7982 (0.8018) time: 0.1784 data: 0.0825 max mem: 9377 +Train: [67] [1300/6250] eta: 0:14:15 lr: 0.000033 grad: 0.1425 (0.1499) loss: 0.7878 (0.8012) time: 0.1815 data: 0.0990 max mem: 9377 +Train: [67] [1400/6250] eta: 0:13:53 lr: 0.000033 grad: 0.1300 (0.1490) loss: 0.8006 (0.8008) time: 0.1687 data: 0.0841 max mem: 9377 +Train: [67] [1500/6250] eta: 0:13:31 lr: 0.000033 grad: 0.1258 (0.1482) loss: 0.8001 (0.8005) time: 0.1623 data: 0.0726 max mem: 9377 +Train: [67] [1600/6250] eta: 0:13:13 lr: 0.000033 grad: 0.1406 (0.1476) loss: 0.7898 (0.8002) time: 0.1615 data: 0.0650 max mem: 9377 +Train: [67] [1700/6250] eta: 0:12:54 lr: 0.000033 grad: 0.1358 (0.1469) loss: 0.7969 (0.8000) time: 0.1645 data: 0.0808 max mem: 9377 +Train: [67] [1800/6250] eta: 0:12:36 lr: 0.000033 grad: 0.1335 (0.1466) loss: 0.7953 (0.7996) time: 0.1762 data: 0.0900 max mem: 9377 +Train: [67] [1900/6250] eta: 0:12:16 lr: 0.000033 grad: 0.1373 (0.1461) loss: 0.8003 (0.7994) time: 0.1550 data: 0.0640 max mem: 9377 +Train: [67] [2000/6250] eta: 0:11:55 lr: 0.000033 grad: 0.1337 (0.1457) loss: 0.7977 (0.7992) time: 0.1410 data: 0.0488 max mem: 9377 +Train: [67] [2100/6250] eta: 0:11:35 lr: 0.000033 grad: 0.1352 (0.1454) loss: 0.7973 (0.7991) time: 0.1623 data: 0.0724 max mem: 9377 +Train: [67] [2200/6250] eta: 0:11:16 lr: 0.000033 grad: 0.1435 (0.1453) loss: 0.7942 (0.7988) time: 0.1533 data: 0.0633 max mem: 9377 +Train: [67] [2300/6250] eta: 0:10:58 lr: 0.000033 grad: 0.1430 (0.1453) loss: 0.7947 (0.7986) time: 0.1614 data: 0.0720 max mem: 9377 +Train: [67] [2400/6250] eta: 0:10:41 lr: 0.000033 grad: 0.1388 (0.1452) loss: 0.7913 (0.7983) time: 0.1700 data: 0.0725 max mem: 9377 +Train: [67] [2500/6250] eta: 0:10:24 lr: 0.000033 grad: 0.1376 (0.1450) loss: 0.7966 (0.7982) time: 0.1645 data: 0.0737 max mem: 9377 +Train: [67] [2600/6250] eta: 0:10:06 lr: 0.000033 grad: 0.1364 (0.1450) loss: 0.8028 (0.7981) time: 0.1669 data: 0.0784 max mem: 9377 +Train: [67] [2700/6250] eta: 0:09:47 lr: 0.000033 grad: 0.1396 (0.1449) loss: 0.7937 (0.7980) time: 0.1382 data: 0.0578 max mem: 9377 +Train: [67] [2800/6250] eta: 0:09:29 lr: 0.000033 grad: 0.1392 (0.1447) loss: 0.7957 (0.7980) time: 0.1428 data: 0.0536 max mem: 9377 +Train: [67] [2900/6250] eta: 0:09:14 lr: 0.000033 grad: 0.1501 (0.1448) loss: 0.7938 (0.7978) time: 0.1958 data: 0.1052 max mem: 9377 +Train: [67] [3000/6250] eta: 0:08:57 lr: 0.000033 grad: 0.1356 (0.1449) loss: 0.8009 (0.7976) time: 0.1718 data: 0.0944 max mem: 9377 +Train: [67] [3100/6250] eta: 0:08:40 lr: 0.000033 grad: 0.1413 (0.1448) loss: 0.8008 (0.7976) time: 0.1664 data: 0.0781 max mem: 9377 +Train: [67] [3200/6250] eta: 0:08:22 lr: 0.000033 grad: 0.1365 (0.1449) loss: 0.8009 (0.7974) time: 0.1533 data: 0.0724 max mem: 9377 +Train: [67] [3300/6250] eta: 0:08:06 lr: 0.000033 grad: 0.1374 (0.1449) loss: 0.7921 (0.7973) time: 0.1630 data: 0.0643 max mem: 9377 +Train: [67] [3400/6250] eta: 0:07:50 lr: 0.000033 grad: 0.1421 (0.1448) loss: 0.7942 (0.7972) time: 0.1504 data: 0.0663 max mem: 9377 +Train: [67] [3500/6250] eta: 0:07:33 lr: 0.000033 grad: 0.1373 (0.1447) loss: 0.7945 (0.7972) time: 0.1505 data: 0.0647 max mem: 9377 +Train: [67] [3600/6250] eta: 0:07:16 lr: 0.000033 grad: 0.1357 (0.1445) loss: 0.7959 (0.7972) time: 0.1548 data: 0.0657 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:59 lr: 0.000033 grad: 0.1348 (0.1443) loss: 0.8034 (0.7973) time: 0.1643 data: 0.0767 max mem: 9377 +Train: [67] [3800/6250] eta: 0:06:41 lr: 0.000033 grad: 0.1331 (0.1442) loss: 0.8000 (0.7973) time: 0.1537 data: 0.0658 max mem: 9377 +Train: [67] [3900/6250] eta: 0:06:24 lr: 0.000033 grad: 0.1362 (0.1441) loss: 0.7968 (0.7973) time: 0.1285 data: 0.0316 max mem: 9377 +Train: [67] [4000/6250] eta: 0:06:07 lr: 0.000032 grad: 0.1393 (0.1441) loss: 0.7983 (0.7973) time: 0.1609 data: 0.0751 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:50 lr: 0.000032 grad: 0.1455 (0.1442) loss: 0.7924 (0.7972) time: 0.1164 data: 0.0231 max mem: 9377 +Train: [67] [4200/6250] eta: 0:05:33 lr: 0.000032 grad: 0.1426 (0.1442) loss: 0.7880 (0.7971) time: 0.1474 data: 0.0609 max mem: 9377 +Train: [67] [4300/6250] eta: 0:05:17 lr: 0.000032 grad: 0.1463 (0.1442) loss: 0.7892 (0.7970) time: 0.1570 data: 0.0789 max mem: 9377 +Train: [67] [4400/6250] eta: 0:05:00 lr: 0.000032 grad: 0.1464 (0.1442) loss: 0.7968 (0.7969) time: 0.1670 data: 0.0774 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:44 lr: 0.000032 grad: 0.1435 (0.1442) loss: 0.7883 (0.7968) time: 0.1559 data: 0.0635 max mem: 9377 +Train: [67] [4600/6250] eta: 0:04:27 lr: 0.000032 grad: 0.1350 (0.1441) loss: 0.7901 (0.7968) time: 0.1515 data: 0.0640 max mem: 9377 +Train: [67] [4700/6250] eta: 0:04:11 lr: 0.000032 grad: 0.1473 (0.1441) loss: 0.7908 (0.7967) time: 0.1707 data: 0.0807 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:54 lr: 0.000032 grad: 0.1433 (0.1441) loss: 0.7867 (0.7966) time: 0.1425 data: 0.0515 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:38 lr: 0.000032 grad: 0.1476 (0.1442) loss: 0.7806 (0.7966) time: 0.1684 data: 0.0827 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:22 lr: 0.000032 grad: 0.1397 (0.1442) loss: 0.8012 (0.7966) time: 0.1802 data: 0.0907 max mem: 9377 +Train: [67] [5100/6250] eta: 0:03:05 lr: 0.000032 grad: 0.1448 (0.1442) loss: 0.7993 (0.7965) time: 0.1499 data: 0.0641 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:49 lr: 0.000032 grad: 0.1338 (0.1442) loss: 0.7990 (0.7965) time: 0.1840 data: 0.0932 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:33 lr: 0.000032 grad: 0.1356 (0.1442) loss: 0.7990 (0.7965) time: 0.1963 data: 0.1050 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:17 lr: 0.000032 grad: 0.1428 (0.1442) loss: 0.7944 (0.7965) time: 0.1585 data: 0.0646 max mem: 9377 +Train: [67] [5500/6250] eta: 0:02:01 lr: 0.000032 grad: 0.1353 (0.1442) loss: 0.7991 (0.7965) time: 0.1840 data: 0.0885 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:45 lr: 0.000032 grad: 0.1344 (0.1442) loss: 0.7942 (0.7965) time: 0.1576 data: 0.0716 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:28 lr: 0.000032 grad: 0.1445 (0.1442) loss: 0.7977 (0.7965) time: 0.1489 data: 0.0557 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:12 lr: 0.000032 grad: 0.1403 (0.1442) loss: 0.7931 (0.7964) time: 0.1732 data: 0.0777 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:56 lr: 0.000032 grad: 0.1363 (0.1442) loss: 0.7945 (0.7964) time: 0.1728 data: 0.0801 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:40 lr: 0.000032 grad: 0.1457 (0.1443) loss: 0.7897 (0.7965) time: 0.1677 data: 0.0854 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:24 lr: 0.000032 grad: 0.1464 (0.1444) loss: 0.7972 (0.7965) time: 0.1531 data: 0.0590 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:08 lr: 0.000032 grad: 0.1329 (0.1444) loss: 0.7985 (0.7964) time: 0.1621 data: 0.0791 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1452 (0.1444) loss: 0.7896 (0.7964) time: 0.1811 data: 0.0918 max mem: 9377 +Train: [67] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000032 grad: 0.1452 (0.1444) loss: 0.7896 (0.7964) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:05:29 loss: 0.8159 (0.8159) time: 5.3200 data: 5.2881 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8017 (0.8042) time: 0.1248 data: 0.0998 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-train-subset): loss: 0.8017 (0.8042) +Eval (hcp-val): [67] [ 0/62] eta: 0:03:42 loss: 0.8385 (0.8385) time: 3.5946 data: 3.4787 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8382 (0.8393) time: 0.1370 data: 0.1118 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2406 s / it) +Averaged stats (hcp-val): loss: 0.8382 (0.8393) +Eval (nsd-val): [67] [ 0/62] eta: 0:06:12 loss: 0.8071 (0.8071) time: 6.0087 data: 5.9777 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8196 (0.8203) time: 0.1347 data: 0.1075 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:14 (0.2375 s / it) +Averaged stats (nsd-val): loss: 0.8196 (0.8203) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 8:42:01 lr: 0.000032 grad: 0.1886 (0.1886) loss: 0.8345 (0.8345) time: 5.0114 data: 4.7465 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:23:24 lr: 0.000032 grad: 0.1581 (0.1858) loss: 0.8128 (0.8162) time: 0.1981 data: 0.0782 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:19:47 lr: 0.000032 grad: 0.1672 (0.1797) loss: 0.7982 (0.8086) time: 0.1480 data: 0.0492 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:18:18 lr: 0.000032 grad: 0.1615 (0.1744) loss: 0.7809 (0.8043) time: 0.1816 data: 0.0754 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:17:10 lr: 0.000032 grad: 0.1622 (0.1699) loss: 0.7868 (0.8027) time: 0.1597 data: 0.0617 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:16:29 lr: 0.000032 grad: 0.1452 (0.1659) loss: 0.8039 (0.8020) time: 0.1672 data: 0.0681 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:15:52 lr: 0.000032 grad: 0.1396 (0.1625) loss: 0.8039 (0.8019) time: 0.1462 data: 0.0432 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:15:28 lr: 0.000032 grad: 0.1364 (0.1594) loss: 0.7976 (0.8017) time: 0.1557 data: 0.0630 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:15:12 lr: 0.000032 grad: 0.1528 (0.1584) loss: 0.7871 (0.8010) time: 0.1527 data: 0.0613 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:14:57 lr: 0.000032 grad: 0.1395 (0.1574) loss: 0.7987 (0.8003) time: 0.1541 data: 0.0621 max mem: 9377 +Train: [68] [1000/6250] eta: 0:14:39 lr: 0.000032 grad: 0.1427 (0.1567) loss: 0.7927 (0.7995) time: 0.1822 data: 0.0958 max mem: 9377 +Train: [68] [1100/6250] eta: 0:14:37 lr: 0.000032 grad: 0.1442 (0.1557) loss: 0.7914 (0.7992) time: 0.1885 data: 0.1075 max mem: 9377 +Train: [68] [1200/6250] eta: 0:14:23 lr: 0.000032 grad: 0.1393 (0.1547) loss: 0.7978 (0.7990) time: 0.1906 data: 0.1030 max mem: 9377 +Train: [68] [1300/6250] eta: 0:14:07 lr: 0.000031 grad: 0.1393 (0.1536) loss: 0.7940 (0.7989) time: 0.1635 data: 0.0783 max mem: 9377 +Train: [68] [1400/6250] eta: 0:13:51 lr: 0.000031 grad: 0.1351 (0.1529) loss: 0.8013 (0.7987) time: 0.1728 data: 0.0799 max mem: 9377 +Train: [68] [1500/6250] eta: 0:13:35 lr: 0.000031 grad: 0.1396 (0.1522) loss: 0.8065 (0.7986) time: 0.1773 data: 0.0822 max mem: 9377 +Train: [68] [1600/6250] eta: 0:13:22 lr: 0.000031 grad: 0.1354 (0.1515) loss: 0.7979 (0.7986) time: 0.1951 data: 0.1088 max mem: 9377 +Train: [68] [1700/6250] eta: 0:13:03 lr: 0.000031 grad: 0.1414 (0.1511) loss: 0.7963 (0.7985) time: 0.1781 data: 0.0731 max mem: 9377 +Train: [68] [1800/6250] eta: 0:12:45 lr: 0.000031 grad: 0.1345 (0.1506) loss: 0.8021 (0.7984) time: 0.1526 data: 0.0697 max mem: 9377 +Train: [68] [1900/6250] eta: 0:12:30 lr: 0.000031 grad: 0.1287 (0.1501) loss: 0.8069 (0.7985) time: 0.2085 data: 0.1213 max mem: 9377 +Train: [68] [2000/6250] eta: 0:12:08 lr: 0.000031 grad: 0.1387 (0.1496) loss: 0.7978 (0.7985) time: 0.1485 data: 0.0585 max mem: 9377 +Train: [68] [2100/6250] eta: 0:11:47 lr: 0.000031 grad: 0.1410 (0.1494) loss: 0.7977 (0.7983) time: 0.1520 data: 0.0529 max mem: 9377 +Train: [68] [2200/6250] eta: 0:11:29 lr: 0.000031 grad: 0.1431 (0.1490) loss: 0.7971 (0.7983) time: 0.1649 data: 0.0704 max mem: 9377 +Train: [68] [2300/6250] eta: 0:11:10 lr: 0.000031 grad: 0.1396 (0.1488) loss: 0.8005 (0.7983) time: 0.1613 data: 0.0699 max mem: 9377 +Train: [68] [2400/6250] eta: 0:10:50 lr: 0.000031 grad: 0.1347 (0.1485) loss: 0.7975 (0.7982) time: 0.1318 data: 0.0448 max mem: 9377 +Train: [68] [2500/6250] eta: 0:10:31 lr: 0.000031 grad: 0.1404 (0.1483) loss: 0.7926 (0.7982) time: 0.1595 data: 0.0724 max mem: 9377 +Train: [68] [2600/6250] eta: 0:10:12 lr: 0.000031 grad: 0.1414 (0.1482) loss: 0.7982 (0.7982) time: 0.1348 data: 0.0323 max mem: 9377 +Train: [68] [2700/6250] eta: 0:09:54 lr: 0.000031 grad: 0.1429 (0.1481) loss: 0.7901 (0.7981) time: 0.1659 data: 0.0717 max mem: 9377 +Train: [68] [2800/6250] eta: 0:09:35 lr: 0.000031 grad: 0.1350 (0.1479) loss: 0.8031 (0.7981) time: 0.1666 data: 0.0737 max mem: 9377 +Train: [68] [2900/6250] eta: 0:09:21 lr: 0.000031 grad: 0.1398 (0.1478) loss: 0.7959 (0.7982) time: 0.1726 data: 0.0898 max mem: 9377 +Train: [68] [3000/6250] eta: 0:09:04 lr: 0.000031 grad: 0.1421 (0.1478) loss: 0.7979 (0.7981) time: 0.1596 data: 0.0723 max mem: 9377 +Train: [68] [3100/6250] eta: 0:08:47 lr: 0.000031 grad: 0.1435 (0.1477) loss: 0.7999 (0.7981) time: 0.1506 data: 0.0571 max mem: 9377 +Train: [68] [3200/6250] eta: 0:08:30 lr: 0.000031 grad: 0.1412 (0.1476) loss: 0.7906 (0.7979) time: 0.1708 data: 0.0782 max mem: 9377 +Train: [68] [3300/6250] eta: 0:08:13 lr: 0.000031 grad: 0.1528 (0.1476) loss: 0.7911 (0.7978) time: 0.2087 data: 0.1152 max mem: 9377 +Train: [68] [3400/6250] eta: 0:07:55 lr: 0.000031 grad: 0.1414 (0.1477) loss: 0.7904 (0.7976) time: 0.1693 data: 0.0820 max mem: 9377 +Train: [68] [3500/6250] eta: 0:07:38 lr: 0.000031 grad: 0.1400 (0.1476) loss: 0.8019 (0.7975) time: 0.1627 data: 0.0721 max mem: 9377 +Train: [68] [3600/6250] eta: 0:07:21 lr: 0.000031 grad: 0.1408 (0.1474) loss: 0.8003 (0.7975) time: 0.1466 data: 0.0626 max mem: 9377 +Train: [68] [3700/6250] eta: 0:07:03 lr: 0.000031 grad: 0.1446 (0.1473) loss: 0.7919 (0.7974) time: 0.1528 data: 0.0584 max mem: 9377 +Train: [68] [3800/6250] eta: 0:06:46 lr: 0.000031 grad: 0.1359 (0.1472) loss: 0.8059 (0.7974) time: 0.1688 data: 0.0849 max mem: 9377 +Train: [68] [3900/6250] eta: 0:06:28 lr: 0.000031 grad: 0.1301 (0.1471) loss: 0.7988 (0.7973) time: 0.1446 data: 0.0513 max mem: 9377 +Train: [68] [4000/6250] eta: 0:06:11 lr: 0.000031 grad: 0.1379 (0.1470) loss: 0.7986 (0.7972) time: 0.1629 data: 0.0748 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:54 lr: 0.000031 grad: 0.1402 (0.1470) loss: 0.7969 (0.7971) time: 0.1598 data: 0.0742 max mem: 9377 +Train: [68] [4200/6250] eta: 0:05:37 lr: 0.000031 grad: 0.1380 (0.1468) loss: 0.7942 (0.7971) time: 0.1358 data: 0.0467 max mem: 9377 +Train: [68] [4300/6250] eta: 0:05:20 lr: 0.000031 grad: 0.1300 (0.1467) loss: 0.8096 (0.7971) time: 0.1495 data: 0.0561 max mem: 9377 +Train: [68] [4400/6250] eta: 0:05:03 lr: 0.000031 grad: 0.1413 (0.1466) loss: 0.7981 (0.7971) time: 0.1637 data: 0.0829 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:47 lr: 0.000031 grad: 0.1443 (0.1464) loss: 0.7831 (0.7971) time: 0.1705 data: 0.0839 max mem: 9377 +Train: [68] [4600/6250] eta: 0:04:30 lr: 0.000031 grad: 0.1393 (0.1463) loss: 0.7972 (0.7972) time: 0.1713 data: 0.0874 max mem: 9377 +Train: [68] [4700/6250] eta: 0:04:13 lr: 0.000031 grad: 0.1376 (0.1463) loss: 0.8071 (0.7972) time: 0.1313 data: 0.0352 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:57 lr: 0.000030 grad: 0.1410 (0.1463) loss: 0.7964 (0.7972) time: 0.1543 data: 0.0656 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:40 lr: 0.000030 grad: 0.1403 (0.1462) loss: 0.7914 (0.7971) time: 0.1571 data: 0.0580 max mem: 9377 +Train: [68] [5000/6250] eta: 0:03:24 lr: 0.000030 grad: 0.1431 (0.1462) loss: 0.7910 (0.7971) time: 0.1645 data: 0.0782 max mem: 9377 +Train: [68] [5100/6250] eta: 0:03:07 lr: 0.000030 grad: 0.1385 (0.1461) loss: 0.8021 (0.7971) time: 0.1933 data: 0.1093 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:51 lr: 0.000030 grad: 0.1346 (0.1460) loss: 0.8043 (0.7972) time: 0.1786 data: 0.0915 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:35 lr: 0.000030 grad: 0.1413 (0.1459) loss: 0.7968 (0.7971) time: 0.1547 data: 0.0777 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:19 lr: 0.000030 grad: 0.1412 (0.1459) loss: 0.7964 (0.7971) time: 0.1712 data: 0.0869 max mem: 9377 +Train: [68] [5500/6250] eta: 0:02:03 lr: 0.000030 grad: 0.1409 (0.1459) loss: 0.7966 (0.7971) time: 0.1852 data: 0.0930 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:47 lr: 0.000030 grad: 0.1384 (0.1457) loss: 0.7998 (0.7971) time: 0.1730 data: 0.0820 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:30 lr: 0.000030 grad: 0.1295 (0.1456) loss: 0.8051 (0.7971) time: 0.1807 data: 0.0771 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:14 lr: 0.000030 grad: 0.1419 (0.1455) loss: 0.7972 (0.7971) time: 0.1604 data: 0.0620 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:57 lr: 0.000030 grad: 0.1358 (0.1454) loss: 0.7987 (0.7971) time: 0.1729 data: 0.0889 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:41 lr: 0.000030 grad: 0.1376 (0.1453) loss: 0.7979 (0.7971) time: 0.1631 data: 0.0709 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:24 lr: 0.000030 grad: 0.1466 (0.1452) loss: 0.7972 (0.7971) time: 0.1481 data: 0.0573 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:08 lr: 0.000030 grad: 0.1353 (0.1452) loss: 0.7956 (0.7971) time: 0.1431 data: 0.0516 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1454 (0.1452) loss: 0.7918 (0.7971) time: 0.1614 data: 0.0771 max mem: 9377 +Train: [68] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000030 grad: 0.1454 (0.1452) loss: 0.7918 (0.7971) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:05:12 loss: 0.8190 (0.8190) time: 5.0377 data: 4.9619 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8081 (0.8048) time: 0.1062 data: 0.0790 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (hcp-train-subset): loss: 0.8081 (0.8048) +Eval (hcp-val): [68] [ 0/62] eta: 0:05:39 loss: 0.8371 (0.8371) time: 5.4804 data: 5.4495 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8377 (0.8393) time: 0.1304 data: 0.1046 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:14 (0.2360 s / it) +Averaged stats (hcp-val): loss: 0.8377 (0.8393) +Eval (nsd-val): [68] [ 0/62] eta: 0:06:17 loss: 0.8143 (0.8143) time: 6.0835 data: 6.0524 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8218 (0.8237) time: 0.1097 data: 0.0843 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (nsd-val): loss: 0.8218 (0.8237) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 10:05:27 lr: 0.000030 grad: 0.3250 (0.3250) loss: 0.7468 (0.7468) time: 5.8123 data: 5.5365 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:23:16 lr: 0.000030 grad: 0.1709 (0.2216) loss: 0.8052 (0.8031) time: 0.1762 data: 0.0555 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:20:30 lr: 0.000030 grad: 0.1485 (0.1963) loss: 0.8058 (0.8028) time: 0.1746 data: 0.0683 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:19:24 lr: 0.000030 grad: 0.1422 (0.1818) loss: 0.8146 (0.8040) time: 0.1819 data: 0.0592 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:18:24 lr: 0.000030 grad: 0.1329 (0.1721) loss: 0.8063 (0.8045) time: 0.1720 data: 0.0713 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:17:38 lr: 0.000030 grad: 0.1308 (0.1654) loss: 0.8119 (0.8053) time: 0.1731 data: 0.0753 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:16:58 lr: 0.000030 grad: 0.1291 (0.1602) loss: 0.8086 (0.8060) time: 0.1757 data: 0.0697 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:16:21 lr: 0.000030 grad: 0.1364 (0.1570) loss: 0.8031 (0.8061) time: 0.1550 data: 0.0556 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:15:58 lr: 0.000030 grad: 0.1348 (0.1547) loss: 0.8085 (0.8061) time: 0.1673 data: 0.0733 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:15:33 lr: 0.000030 grad: 0.1356 (0.1530) loss: 0.7961 (0.8057) time: 0.1566 data: 0.0592 max mem: 9377 +Train: [69] [1000/6250] eta: 0:15:08 lr: 0.000030 grad: 0.1496 (0.1523) loss: 0.7887 (0.8048) time: 0.1670 data: 0.0776 max mem: 9377 +Train: [69] [1100/6250] eta: 0:14:55 lr: 0.000030 grad: 0.1512 (0.1518) loss: 0.7949 (0.8038) time: 0.1749 data: 0.0907 max mem: 9377 +Train: [69] [1200/6250] eta: 0:14:32 lr: 0.000030 grad: 0.1356 (0.1513) loss: 0.7914 (0.8031) time: 0.1496 data: 0.0666 max mem: 9377 +Train: [69] [1300/6250] eta: 0:14:11 lr: 0.000030 grad: 0.1438 (0.1506) loss: 0.7961 (0.8024) time: 0.1686 data: 0.0935 max mem: 9377 +Train: [69] [1400/6250] eta: 0:13:50 lr: 0.000030 grad: 0.1315 (0.1498) loss: 0.7942 (0.8021) time: 0.1900 data: 0.0980 max mem: 9377 +Train: [69] [1500/6250] eta: 0:13:34 lr: 0.000030 grad: 0.1343 (0.1492) loss: 0.8017 (0.8019) time: 0.2085 data: 0.1211 max mem: 9377 +Train: [69] [1600/6250] eta: 0:13:12 lr: 0.000030 grad: 0.1350 (0.1488) loss: 0.7962 (0.8016) time: 0.1630 data: 0.0613 max mem: 9377 +Train: [69] [1700/6250] eta: 0:12:53 lr: 0.000030 grad: 0.1384 (0.1487) loss: 0.7962 (0.8012) time: 0.1670 data: 0.0756 max mem: 9377 +Train: [69] [1800/6250] eta: 0:12:36 lr: 0.000030 grad: 0.1384 (0.1484) loss: 0.7961 (0.8009) time: 0.1772 data: 0.0806 max mem: 9377 +Train: [69] [1900/6250] eta: 0:12:16 lr: 0.000030 grad: 0.1384 (0.1479) loss: 0.8004 (0.8007) time: 0.1606 data: 0.0653 max mem: 9377 +Train: [69] [2000/6250] eta: 0:11:58 lr: 0.000030 grad: 0.1496 (0.1477) loss: 0.7944 (0.8005) time: 0.1708 data: 0.0862 max mem: 9377 +Train: [69] [2100/6250] eta: 0:11:38 lr: 0.000029 grad: 0.1417 (0.1475) loss: 0.7883 (0.8003) time: 0.1617 data: 0.0657 max mem: 9377 +Train: [69] [2200/6250] eta: 0:11:18 lr: 0.000029 grad: 0.1361 (0.1473) loss: 0.7977 (0.8000) time: 0.1650 data: 0.0794 max mem: 9377 +Train: [69] [2300/6250] eta: 0:11:00 lr: 0.000029 grad: 0.1381 (0.1472) loss: 0.8028 (0.7998) time: 0.1544 data: 0.0607 max mem: 9377 +Train: [69] [2400/6250] eta: 0:10:42 lr: 0.000029 grad: 0.1449 (0.1470) loss: 0.8026 (0.7997) time: 0.1501 data: 0.0632 max mem: 9377 +Train: [69] [2500/6250] eta: 0:10:25 lr: 0.000029 grad: 0.1494 (0.1467) loss: 0.7873 (0.7996) time: 0.1730 data: 0.0859 max mem: 9377 +Train: [69] [2600/6250] eta: 0:10:07 lr: 0.000029 grad: 0.1473 (0.1466) loss: 0.7926 (0.7995) time: 0.1525 data: 0.0634 max mem: 9377 +Train: [69] [2700/6250] eta: 0:09:50 lr: 0.000029 grad: 0.1434 (0.1464) loss: 0.7977 (0.7992) time: 0.1293 data: 0.0348 max mem: 9377 +Train: [69] [2800/6250] eta: 0:09:32 lr: 0.000029 grad: 0.1355 (0.1463) loss: 0.7963 (0.7991) time: 0.1527 data: 0.0666 max mem: 9377 +Train: [69] [2900/6250] eta: 0:09:17 lr: 0.000029 grad: 0.1402 (0.1463) loss: 0.7973 (0.7990) time: 0.1887 data: 0.1014 max mem: 9377 +Train: [69] [3000/6250] eta: 0:08:59 lr: 0.000029 grad: 0.1393 (0.1460) loss: 0.7975 (0.7990) time: 0.1589 data: 0.0693 max mem: 9377 +Train: [69] [3100/6250] eta: 0:08:42 lr: 0.000029 grad: 0.1381 (0.1458) loss: 0.8000 (0.7990) time: 0.1546 data: 0.0630 max mem: 9377 +Train: [69] [3200/6250] eta: 0:08:23 lr: 0.000029 grad: 0.1338 (0.1458) loss: 0.8038 (0.7989) time: 0.1422 data: 0.0577 max mem: 9377 +Train: [69] [3300/6250] eta: 0:08:06 lr: 0.000029 grad: 0.1368 (0.1457) loss: 0.8020 (0.7990) time: 0.1601 data: 0.0694 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:49 lr: 0.000029 grad: 0.1441 (0.1457) loss: 0.7924 (0.7989) time: 0.1731 data: 0.0783 max mem: 9377 +Train: [69] [3500/6250] eta: 0:07:32 lr: 0.000029 grad: 0.1379 (0.1456) loss: 0.7967 (0.7989) time: 0.1793 data: 0.0893 max mem: 9377 +Train: [69] [3600/6250] eta: 0:07:15 lr: 0.000029 grad: 0.1318 (0.1455) loss: 0.8011 (0.7989) time: 0.1639 data: 0.0736 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:58 lr: 0.000029 grad: 0.1386 (0.1453) loss: 0.7946 (0.7990) time: 0.1594 data: 0.0651 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:40 lr: 0.000029 grad: 0.1388 (0.1451) loss: 0.8037 (0.7991) time: 0.1446 data: 0.0501 max mem: 9377 +Train: [69] [3900/6250] eta: 0:06:23 lr: 0.000029 grad: 0.1377 (0.1450) loss: 0.8022 (0.7992) time: 0.1637 data: 0.0739 max mem: 9377 +Train: [69] [4000/6250] eta: 0:06:06 lr: 0.000029 grad: 0.1342 (0.1448) loss: 0.8067 (0.7993) time: 0.1428 data: 0.0505 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:50 lr: 0.000029 grad: 0.1379 (0.1447) loss: 0.8018 (0.7993) time: 0.1426 data: 0.0491 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:33 lr: 0.000029 grad: 0.1460 (0.1447) loss: 0.8007 (0.7993) time: 0.1557 data: 0.0699 max mem: 9377 +Train: [69] [4300/6250] eta: 0:05:16 lr: 0.000029 grad: 0.1370 (0.1447) loss: 0.8005 (0.7993) time: 0.1671 data: 0.0863 max mem: 9377 +Train: [69] [4400/6250] eta: 0:05:00 lr: 0.000029 grad: 0.1431 (0.1448) loss: 0.7918 (0.7992) time: 0.1672 data: 0.0761 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:44 lr: 0.000029 grad: 0.1475 (0.1449) loss: 0.7944 (0.7992) time: 0.1604 data: 0.0664 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:27 lr: 0.000029 grad: 0.1424 (0.1449) loss: 0.7918 (0.7991) time: 0.1573 data: 0.0615 max mem: 9377 +Train: [69] [4700/6250] eta: 0:04:11 lr: 0.000029 grad: 0.1392 (0.1450) loss: 0.7965 (0.7990) time: 0.1540 data: 0.0610 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:55 lr: 0.000029 grad: 0.1422 (0.1450) loss: 0.8007 (0.7989) time: 0.1565 data: 0.0615 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:38 lr: 0.000029 grad: 0.1430 (0.1451) loss: 0.7931 (0.7987) time: 0.1764 data: 0.0898 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:22 lr: 0.000029 grad: 0.1529 (0.1452) loss: 0.7948 (0.7986) time: 0.1604 data: 0.0736 max mem: 9377 +Train: [69] [5100/6250] eta: 0:03:06 lr: 0.000029 grad: 0.1445 (0.1453) loss: 0.7993 (0.7984) time: 0.1661 data: 0.0824 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:50 lr: 0.000029 grad: 0.1389 (0.1455) loss: 0.7958 (0.7983) time: 0.1782 data: 0.0907 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:34 lr: 0.000029 grad: 0.1457 (0.1456) loss: 0.7994 (0.7981) time: 0.1517 data: 0.0620 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:18 lr: 0.000029 grad: 0.1547 (0.1456) loss: 0.7932 (0.7980) time: 0.1551 data: 0.0709 max mem: 9377 +Train: [69] [5500/6250] eta: 0:02:01 lr: 0.000029 grad: 0.1527 (0.1458) loss: 0.7862 (0.7978) time: 0.1488 data: 0.0677 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:45 lr: 0.000028 grad: 0.1437 (0.1458) loss: 0.7917 (0.7977) time: 0.1710 data: 0.0826 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:29 lr: 0.000028 grad: 0.1487 (0.1460) loss: 0.7904 (0.7975) time: 0.1335 data: 0.0415 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:13 lr: 0.000028 grad: 0.1434 (0.1461) loss: 0.7851 (0.7974) time: 0.1553 data: 0.0607 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:56 lr: 0.000028 grad: 0.1398 (0.1461) loss: 0.7958 (0.7972) time: 0.1647 data: 0.0762 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:40 lr: 0.000028 grad: 0.1452 (0.1461) loss: 0.7881 (0.7971) time: 0.1679 data: 0.0676 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:24 lr: 0.000028 grad: 0.1449 (0.1462) loss: 0.7984 (0.7970) time: 0.1617 data: 0.0607 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:08 lr: 0.000028 grad: 0.1459 (0.1462) loss: 0.7878 (0.7969) time: 0.1218 data: 0.0202 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1458 (0.1462) loss: 0.7864 (0.7969) time: 0.1626 data: 0.0738 max mem: 9377 +Train: [69] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000028 grad: 0.1458 (0.1462) loss: 0.7864 (0.7969) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:05:31 loss: 0.8183 (0.8183) time: 5.3522 data: 5.2998 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8023 (0.8028) time: 0.1384 data: 0.1113 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:15 (0.2446 s / it) +Averaged stats (hcp-train-subset): loss: 0.8023 (0.8028) +Making plots (hcp-train-subset): example=48 +Eval (hcp-val): [69] [ 0/62] eta: 0:06:07 loss: 0.8328 (0.8328) time: 5.9311 data: 5.8940 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8382 (0.8386) time: 0.1428 data: 0.1158 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (hcp-val): loss: 0.8382 (0.8386) +Making plots (hcp-val): example=51 +Eval (nsd-val): [69] [ 0/62] eta: 0:04:47 loss: 0.8093 (0.8093) time: 4.6315 data: 4.5275 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8167 (0.8183) time: 0.1431 data: 0.1176 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:15 (0.2460 s / it) +Averaged stats (nsd-val): loss: 0.8167 (0.8183) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 13:03:27 lr: 0.000028 grad: 0.5765 (0.5765) loss: 0.8294 (0.8294) time: 7.5213 data: 7.4167 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:23:44 lr: 0.000028 grad: 0.1652 (0.2085) loss: 0.8062 (0.8074) time: 0.1586 data: 0.0533 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:20:05 lr: 0.000028 grad: 0.1480 (0.1810) loss: 0.8035 (0.8071) time: 0.1477 data: 0.0452 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:18:53 lr: 0.000028 grad: 0.1633 (0.1739) loss: 0.8024 (0.8050) time: 0.1620 data: 0.0582 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:18:01 lr: 0.000028 grad: 0.1522 (0.1705) loss: 0.7916 (0.8022) time: 0.1522 data: 0.0457 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:17:14 lr: 0.000028 grad: 0.1506 (0.1670) loss: 0.7809 (0.7997) time: 0.1738 data: 0.0795 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:16:39 lr: 0.000028 grad: 0.1535 (0.1640) loss: 0.7996 (0.7998) time: 0.1554 data: 0.0665 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:16:11 lr: 0.000028 grad: 0.1407 (0.1611) loss: 0.8052 (0.7998) time: 0.1773 data: 0.0865 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:15:42 lr: 0.000028 grad: 0.1355 (0.1595) loss: 0.7917 (0.7998) time: 0.1510 data: 0.0531 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:15:17 lr: 0.000028 grad: 0.1408 (0.1580) loss: 0.7936 (0.7996) time: 0.1628 data: 0.0707 max mem: 9377 +Train: [70] [1000/6250] eta: 0:14:54 lr: 0.000028 grad: 0.1392 (0.1571) loss: 0.7975 (0.7995) time: 0.1761 data: 0.0807 max mem: 9377 +Train: [70] [1100/6250] eta: 0:14:38 lr: 0.000028 grad: 0.1498 (0.1562) loss: 0.8004 (0.7993) time: 0.1546 data: 0.0751 max mem: 9377 +Train: [70] [1200/6250] eta: 0:14:20 lr: 0.000028 grad: 0.1376 (0.1554) loss: 0.7963 (0.7990) time: 0.1605 data: 0.0785 max mem: 9377 +Train: [70] [1300/6250] eta: 0:14:00 lr: 0.000028 grad: 0.1471 (0.1549) loss: 0.7946 (0.7985) time: 0.1625 data: 0.0735 max mem: 9377 +Train: [70] [1400/6250] eta: 0:13:39 lr: 0.000028 grad: 0.1548 (0.1546) loss: 0.7731 (0.7978) time: 0.1352 data: 0.0505 max mem: 9377 +Train: [70] [1500/6250] eta: 0:13:23 lr: 0.000028 grad: 0.1482 (0.1542) loss: 0.7888 (0.7973) time: 0.1730 data: 0.0884 max mem: 9377 +Train: [70] [1600/6250] eta: 0:13:03 lr: 0.000028 grad: 0.1509 (0.1539) loss: 0.7870 (0.7968) time: 0.1563 data: 0.0694 max mem: 9377 +Train: [70] [1700/6250] eta: 0:12:43 lr: 0.000028 grad: 0.1482 (0.1536) loss: 0.7920 (0.7965) time: 0.1256 data: 0.0317 max mem: 9377 +Train: [70] [1800/6250] eta: 0:12:22 lr: 0.000028 grad: 0.1400 (0.1533) loss: 0.7980 (0.7964) time: 0.1524 data: 0.0568 max mem: 9377 +Train: [70] [1900/6250] eta: 0:12:00 lr: 0.000028 grad: 0.1414 (0.1529) loss: 0.7972 (0.7963) time: 0.1436 data: 0.0438 max mem: 9377 +Train: [70] [2000/6250] eta: 0:11:42 lr: 0.000028 grad: 0.1456 (0.1526) loss: 0.7909 (0.7962) time: 0.1629 data: 0.0717 max mem: 9377 +Train: [70] [2100/6250] eta: 0:11:21 lr: 0.000028 grad: 0.1446 (0.1523) loss: 0.7946 (0.7961) time: 0.1388 data: 0.0488 max mem: 9377 +Train: [70] [2200/6250] eta: 0:11:03 lr: 0.000028 grad: 0.1425 (0.1522) loss: 0.7962 (0.7960) time: 0.1638 data: 0.0794 max mem: 9377 +Train: [70] [2300/6250] eta: 0:10:44 lr: 0.000028 grad: 0.1534 (0.1519) loss: 0.7826 (0.7959) time: 0.1707 data: 0.0791 max mem: 9377 +Train: [70] [2400/6250] eta: 0:10:28 lr: 0.000028 grad: 0.1436 (0.1516) loss: 0.7973 (0.7959) time: 0.1161 data: 0.0259 max mem: 9377 +Train: [70] [2500/6250] eta: 0:10:12 lr: 0.000028 grad: 0.1513 (0.1513) loss: 0.7906 (0.7958) time: 0.1344 data: 0.0519 max mem: 9377 +Train: [70] [2600/6250] eta: 0:09:56 lr: 0.000028 grad: 0.1403 (0.1511) loss: 0.8000 (0.7957) time: 0.1760 data: 0.0984 max mem: 9377 +Train: [70] [2700/6250] eta: 0:09:40 lr: 0.000028 grad: 0.1463 (0.1509) loss: 0.7868 (0.7957) time: 0.1733 data: 0.0899 max mem: 9377 +Train: [70] [2800/6250] eta: 0:09:23 lr: 0.000028 grad: 0.1398 (0.1506) loss: 0.7937 (0.7956) time: 0.1874 data: 0.1108 max mem: 9377 +Train: [70] [2900/6250] eta: 0:09:07 lr: 0.000028 grad: 0.1416 (0.1504) loss: 0.7988 (0.7956) time: 0.1409 data: 0.0533 max mem: 9377 +Train: [70] [3000/6250] eta: 0:08:50 lr: 0.000027 grad: 0.1484 (0.1504) loss: 0.7850 (0.7955) time: 0.1552 data: 0.0740 max mem: 9377 +Train: [70] [3100/6250] eta: 0:08:35 lr: 0.000027 grad: 0.1444 (0.1504) loss: 0.7944 (0.7954) time: 0.2000 data: 0.1218 max mem: 9377 +Train: [70] [3200/6250] eta: 0:08:19 lr: 0.000027 grad: 0.1480 (0.1503) loss: 0.7970 (0.7954) time: 0.1659 data: 0.0844 max mem: 9377 +Train: [70] [3300/6250] eta: 0:08:04 lr: 0.000027 grad: 0.1396 (0.1502) loss: 0.7964 (0.7953) time: 0.1791 data: 0.0756 max mem: 9377 +Train: [70] [3400/6250] eta: 0:07:49 lr: 0.000027 grad: 0.1461 (0.1501) loss: 0.7986 (0.7953) time: 0.1743 data: 0.0763 max mem: 9377 +Train: [70] [3500/6250] eta: 0:07:32 lr: 0.000027 grad: 0.1417 (0.1500) loss: 0.7998 (0.7952) time: 0.1482 data: 0.0531 max mem: 9377 +Train: [70] [3600/6250] eta: 0:07:15 lr: 0.000027 grad: 0.1394 (0.1499) loss: 0.8022 (0.7953) time: 0.1529 data: 0.0648 max mem: 9377 +Train: [70] [3700/6250] eta: 0:06:58 lr: 0.000027 grad: 0.1459 (0.1498) loss: 0.7886 (0.7952) time: 0.1422 data: 0.0535 max mem: 9377 +Train: [70] [3800/6250] eta: 0:06:41 lr: 0.000027 grad: 0.1486 (0.1498) loss: 0.7909 (0.7951) time: 0.1542 data: 0.0659 max mem: 9377 +Train: [70] [3900/6250] eta: 0:06:24 lr: 0.000027 grad: 0.1414 (0.1498) loss: 0.7962 (0.7950) time: 0.1616 data: 0.0758 max mem: 9377 +Train: [70] [4000/6250] eta: 0:06:07 lr: 0.000027 grad: 0.1432 (0.1498) loss: 0.7912 (0.7950) time: 0.1738 data: 0.0865 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:51 lr: 0.000027 grad: 0.1442 (0.1496) loss: 0.7860 (0.7950) time: 0.1764 data: 0.0922 max mem: 9377 +Train: [70] [4200/6250] eta: 0:05:34 lr: 0.000027 grad: 0.1512 (0.1496) loss: 0.7932 (0.7950) time: 0.1511 data: 0.0669 max mem: 9377 +Train: [70] [4300/6250] eta: 0:05:17 lr: 0.000027 grad: 0.1343 (0.1494) loss: 0.8049 (0.7951) time: 0.1777 data: 0.0966 max mem: 9377 +Train: [70] [4400/6250] eta: 0:05:01 lr: 0.000027 grad: 0.1491 (0.1493) loss: 0.7899 (0.7950) time: 0.1240 data: 0.0219 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:45 lr: 0.000027 grad: 0.1437 (0.1493) loss: 0.7939 (0.7950) time: 0.2069 data: 0.1229 max mem: 9377 +Train: [70] [4600/6250] eta: 0:04:28 lr: 0.000027 grad: 0.1509 (0.1493) loss: 0.7861 (0.7949) time: 0.1591 data: 0.0651 max mem: 9377 +Train: [70] [4700/6250] eta: 0:04:12 lr: 0.000027 grad: 0.1464 (0.1494) loss: 0.7884 (0.7948) time: 0.1648 data: 0.0768 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:55 lr: 0.000027 grad: 0.1521 (0.1494) loss: 0.7913 (0.7947) time: 0.1457 data: 0.0596 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:39 lr: 0.000027 grad: 0.1435 (0.1495) loss: 0.7983 (0.7947) time: 0.1387 data: 0.0495 max mem: 9377 +Train: [70] [5000/6250] eta: 0:03:22 lr: 0.000027 grad: 0.1552 (0.1496) loss: 0.7909 (0.7946) time: 0.1663 data: 0.0776 max mem: 9377 +Train: [70] [5100/6250] eta: 0:03:06 lr: 0.000027 grad: 0.1443 (0.1497) loss: 0.7904 (0.7945) time: 0.1536 data: 0.0604 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:50 lr: 0.000027 grad: 0.1470 (0.1497) loss: 0.7893 (0.7944) time: 0.1824 data: 0.0935 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:34 lr: 0.000027 grad: 0.1474 (0.1498) loss: 0.7935 (0.7945) time: 0.1969 data: 0.1118 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:18 lr: 0.000027 grad: 0.1437 (0.1497) loss: 0.7998 (0.7945) time: 0.1625 data: 0.0782 max mem: 9377 +Train: [70] [5500/6250] eta: 0:02:02 lr: 0.000027 grad: 0.1487 (0.1498) loss: 0.7915 (0.7945) time: 0.1981 data: 0.1177 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:46 lr: 0.000027 grad: 0.1517 (0.1498) loss: 0.7983 (0.7945) time: 0.2058 data: 0.1080 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:30 lr: 0.000027 grad: 0.1486 (0.1498) loss: 0.7968 (0.7945) time: 0.1688 data: 0.0688 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:14 lr: 0.000027 grad: 0.1498 (0.1498) loss: 0.7858 (0.7945) time: 0.1651 data: 0.0751 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:57 lr: 0.000027 grad: 0.1455 (0.1498) loss: 0.7946 (0.7944) time: 0.1857 data: 0.0994 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:41 lr: 0.000027 grad: 0.1407 (0.1497) loss: 0.7890 (0.7944) time: 0.1565 data: 0.0693 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:24 lr: 0.000027 grad: 0.1479 (0.1497) loss: 0.7971 (0.7944) time: 0.1605 data: 0.0673 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:08 lr: 0.000027 grad: 0.1533 (0.1497) loss: 0.7946 (0.7943) time: 0.1500 data: 0.0516 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1446 (0.1497) loss: 0.7965 (0.7943) time: 0.1686 data: 0.0777 max mem: 9377 +Train: [70] Total time: 0:17:14 (0.1654 s / it) +Averaged stats: lr: 0.000027 grad: 0.1446 (0.1497) loss: 0.7965 (0.7943) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:05:39 loss: 0.8199 (0.8199) time: 5.4837 data: 5.4451 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.7960 (0.8017) time: 0.1304 data: 0.1049 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:15 (0.2472 s / it) +Averaged stats (hcp-train-subset): loss: 0.7960 (0.8017) +Eval (hcp-val): [70] [ 0/62] eta: 0:06:10 loss: 0.8389 (0.8389) time: 5.9779 data: 5.9467 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8372 (0.8386) time: 0.1362 data: 0.1095 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:15 (0.2484 s / it) +Averaged stats (hcp-val): loss: 0.8372 (0.8386) +Eval (nsd-val): [70] [ 0/62] eta: 0:05:44 loss: 0.8139 (0.8139) time: 5.5546 data: 5.5236 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8202 (0.8215) time: 0.1447 data: 0.1193 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:15 (0.2441 s / it) +Averaged stats (nsd-val): loss: 0.8202 (0.8215) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 12:01:20 lr: 0.000027 grad: 0.1252 (0.1252) loss: 0.8458 (0.8458) time: 6.9249 data: 6.8214 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:24:02 lr: 0.000027 grad: 0.1748 (0.2316) loss: 0.8179 (0.8013) time: 0.2005 data: 0.0992 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:20:53 lr: 0.000027 grad: 0.1576 (0.2014) loss: 0.8175 (0.8075) time: 0.1679 data: 0.0686 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:19:17 lr: 0.000027 grad: 0.1603 (0.1899) loss: 0.8026 (0.8058) time: 0.1653 data: 0.0713 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:18:26 lr: 0.000026 grad: 0.1467 (0.1824) loss: 0.7965 (0.8040) time: 0.1648 data: 0.0677 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:17:43 lr: 0.000026 grad: 0.1580 (0.1773) loss: 0.7934 (0.8025) time: 0.1816 data: 0.0895 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:17:03 lr: 0.000026 grad: 0.1556 (0.1743) loss: 0.7911 (0.8008) time: 0.1548 data: 0.0593 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:16:33 lr: 0.000026 grad: 0.1640 (0.1722) loss: 0.7854 (0.7994) time: 0.1430 data: 0.0459 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:16:05 lr: 0.000026 grad: 0.1506 (0.1698) loss: 0.8072 (0.7990) time: 0.1519 data: 0.0571 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:15:45 lr: 0.000026 grad: 0.1518 (0.1682) loss: 0.7984 (0.7988) time: 0.1884 data: 0.0929 max mem: 9377 +Train: [71] [1000/6250] eta: 0:15:14 lr: 0.000026 grad: 0.1511 (0.1669) loss: 0.7923 (0.7984) time: 0.1496 data: 0.0624 max mem: 9377 +Train: [71] [1100/6250] eta: 0:14:52 lr: 0.000026 grad: 0.1438 (0.1656) loss: 0.7931 (0.7981) time: 0.1727 data: 0.0859 max mem: 9377 +Train: [71] [1200/6250] eta: 0:14:34 lr: 0.000026 grad: 0.1565 (0.1645) loss: 0.7907 (0.7977) time: 0.1656 data: 0.0846 max mem: 9377 +Train: [71] [1300/6250] eta: 0:14:12 lr: 0.000026 grad: 0.1424 (0.1635) loss: 0.7962 (0.7973) time: 0.1146 data: 0.0340 max mem: 9377 +Train: [71] [1400/6250] eta: 0:13:50 lr: 0.000026 grad: 0.1586 (0.1625) loss: 0.7892 (0.7970) time: 0.1706 data: 0.0900 max mem: 9377 +Train: [71] [1500/6250] eta: 0:13:31 lr: 0.000026 grad: 0.1521 (0.1618) loss: 0.7915 (0.7967) time: 0.1556 data: 0.0650 max mem: 9377 +Train: [71] [1600/6250] eta: 0:13:13 lr: 0.000026 grad: 0.1456 (0.1610) loss: 0.7917 (0.7965) time: 0.1692 data: 0.0714 max mem: 9377 +Train: [71] [1700/6250] eta: 0:12:53 lr: 0.000026 grad: 0.1412 (0.1605) loss: 0.7948 (0.7962) time: 0.1742 data: 0.0794 max mem: 9377 +Train: [71] [1800/6250] eta: 0:12:32 lr: 0.000026 grad: 0.1470 (0.1602) loss: 0.8007 (0.7962) time: 0.1630 data: 0.0772 max mem: 9377 +Train: [71] [1900/6250] eta: 0:12:12 lr: 0.000026 grad: 0.1515 (0.1599) loss: 0.7768 (0.7958) time: 0.1599 data: 0.0717 max mem: 9377 +Train: [71] [2000/6250] eta: 0:11:52 lr: 0.000026 grad: 0.1435 (0.1594) loss: 0.7926 (0.7957) time: 0.1575 data: 0.0739 max mem: 9377 +Train: [71] [2100/6250] eta: 0:11:32 lr: 0.000026 grad: 0.1543 (0.1591) loss: 0.7935 (0.7955) time: 0.1438 data: 0.0481 max mem: 9377 +Train: [71] [2200/6250] eta: 0:11:11 lr: 0.000026 grad: 0.1502 (0.1592) loss: 0.7940 (0.7954) time: 0.1509 data: 0.0579 max mem: 9377 +Train: [71] [2300/6250] eta: 0:10:52 lr: 0.000026 grad: 0.1447 (0.1588) loss: 0.7947 (0.7955) time: 0.1435 data: 0.0540 max mem: 9377 +Train: [71] [2400/6250] eta: 0:10:32 lr: 0.000026 grad: 0.1449 (0.1583) loss: 0.7919 (0.7954) time: 0.1372 data: 0.0369 max mem: 9377 +Train: [71] [2500/6250] eta: 0:10:13 lr: 0.000026 grad: 0.1468 (0.1579) loss: 0.8035 (0.7954) time: 0.1565 data: 0.0652 max mem: 9377 +Train: [71] [2600/6250] eta: 0:09:56 lr: 0.000026 grad: 0.1477 (0.1575) loss: 0.7856 (0.7954) time: 0.1764 data: 0.0916 max mem: 9377 +Train: [71] [2700/6250] eta: 0:09:39 lr: 0.000026 grad: 0.1425 (0.1571) loss: 0.7908 (0.7955) time: 0.1784 data: 0.0947 max mem: 9377 +Train: [71] [2800/6250] eta: 0:09:23 lr: 0.000026 grad: 0.1442 (0.1568) loss: 0.8005 (0.7955) time: 0.1743 data: 0.0796 max mem: 9377 +Train: [71] [2900/6250] eta: 0:09:07 lr: 0.000026 grad: 0.1353 (0.1564) loss: 0.8040 (0.7956) time: 0.1880 data: 0.1061 max mem: 9377 +Train: [71] [3000/6250] eta: 0:08:52 lr: 0.000026 grad: 0.1438 (0.1561) loss: 0.7967 (0.7956) time: 0.1610 data: 0.0782 max mem: 9377 +Train: [71] [3100/6250] eta: 0:08:36 lr: 0.000026 grad: 0.1426 (0.1558) loss: 0.7918 (0.7956) time: 0.1617 data: 0.0753 max mem: 9377 +Train: [71] [3200/6250] eta: 0:08:19 lr: 0.000026 grad: 0.1496 (0.1557) loss: 0.7917 (0.7955) time: 0.1574 data: 0.0728 max mem: 9377 +Train: [71] [3300/6250] eta: 0:08:05 lr: 0.000026 grad: 0.1577 (0.1556) loss: 0.7941 (0.7955) time: 0.1856 data: 0.0946 max mem: 9377 +Train: [71] [3400/6250] eta: 0:07:48 lr: 0.000026 grad: 0.1586 (0.1556) loss: 0.7939 (0.7953) time: 0.1570 data: 0.0661 max mem: 9377 +Train: [71] [3500/6250] eta: 0:07:31 lr: 0.000026 grad: 0.1536 (0.1557) loss: 0.8042 (0.7953) time: 0.1625 data: 0.0707 max mem: 9377 +Train: [71] [3600/6250] eta: 0:07:14 lr: 0.000026 grad: 0.1513 (0.1558) loss: 0.7902 (0.7952) time: 0.1692 data: 0.0857 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:56 lr: 0.000026 grad: 0.1593 (0.1557) loss: 0.7946 (0.7951) time: 0.1451 data: 0.0540 max mem: 9377 +Train: [71] [3800/6250] eta: 0:06:40 lr: 0.000026 grad: 0.1599 (0.1558) loss: 0.7801 (0.7950) time: 0.1693 data: 0.0772 max mem: 9377 +Train: [71] [3900/6250] eta: 0:06:22 lr: 0.000026 grad: 0.1442 (0.1557) loss: 0.8007 (0.7950) time: 0.1501 data: 0.0578 max mem: 9377 +Train: [71] [4000/6250] eta: 0:06:05 lr: 0.000026 grad: 0.1535 (0.1557) loss: 0.7922 (0.7950) time: 0.1411 data: 0.0489 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:48 lr: 0.000026 grad: 0.1538 (0.1557) loss: 0.7908 (0.7949) time: 0.1398 data: 0.0516 max mem: 9377 +Train: [71] [4200/6250] eta: 0:05:31 lr: 0.000025 grad: 0.1461 (0.1557) loss: 0.8029 (0.7949) time: 0.1541 data: 0.0660 max mem: 9377 +Train: [71] [4300/6250] eta: 0:05:14 lr: 0.000025 grad: 0.1487 (0.1556) loss: 0.7888 (0.7949) time: 0.1549 data: 0.0562 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:58 lr: 0.000025 grad: 0.1430 (0.1554) loss: 0.7918 (0.7949) time: 0.1494 data: 0.0671 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:42 lr: 0.000025 grad: 0.1520 (0.1554) loss: 0.7964 (0.7950) time: 0.1420 data: 0.0544 max mem: 9377 +Train: [71] [4600/6250] eta: 0:04:25 lr: 0.000025 grad: 0.1466 (0.1553) loss: 0.7955 (0.7949) time: 0.1839 data: 0.0921 max mem: 9377 +Train: [71] [4700/6250] eta: 0:04:09 lr: 0.000025 grad: 0.1557 (0.1553) loss: 0.7917 (0.7948) time: 0.1508 data: 0.0631 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:53 lr: 0.000025 grad: 0.1623 (0.1553) loss: 0.7917 (0.7947) time: 0.1651 data: 0.0749 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:37 lr: 0.000025 grad: 0.1530 (0.1553) loss: 0.7924 (0.7946) time: 0.1644 data: 0.0763 max mem: 9377 +Train: [71] [5000/6250] eta: 0:03:21 lr: 0.000025 grad: 0.1496 (0.1552) loss: 0.7882 (0.7946) time: 0.1641 data: 0.0851 max mem: 9377 +Train: [71] [5100/6250] eta: 0:03:06 lr: 0.000025 grad: 0.1532 (0.1552) loss: 0.7859 (0.7945) time: 0.1606 data: 0.0742 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:50 lr: 0.000025 grad: 0.1569 (0.1552) loss: 0.7904 (0.7943) time: 0.1497 data: 0.0652 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:34 lr: 0.000025 grad: 0.1548 (0.1552) loss: 0.7918 (0.7942) time: 0.1679 data: 0.0895 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:17 lr: 0.000025 grad: 0.1470 (0.1553) loss: 0.7876 (0.7940) time: 0.1721 data: 0.0974 max mem: 9377 +Train: [71] [5500/6250] eta: 0:02:01 lr: 0.000025 grad: 0.1483 (0.1553) loss: 0.7997 (0.7939) time: 0.1654 data: 0.0781 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:45 lr: 0.000025 grad: 0.1552 (0.1553) loss: 0.7878 (0.7939) time: 0.1633 data: 0.0711 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:29 lr: 0.000025 grad: 0.1476 (0.1553) loss: 0.7887 (0.7938) time: 0.1605 data: 0.0716 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:13 lr: 0.000025 grad: 0.1497 (0.1553) loss: 0.7856 (0.7938) time: 0.1605 data: 0.0504 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:56 lr: 0.000025 grad: 0.1443 (0.1552) loss: 0.7909 (0.7937) time: 0.1536 data: 0.0500 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:40 lr: 0.000025 grad: 0.1504 (0.1551) loss: 0.7957 (0.7937) time: 0.1382 data: 0.0367 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:24 lr: 0.000025 grad: 0.1547 (0.1551) loss: 0.7894 (0.7937) time: 0.1398 data: 0.0556 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:08 lr: 0.000025 grad: 0.1535 (0.1551) loss: 0.7951 (0.7937) time: 0.1801 data: 0.0983 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1514 (0.1550) loss: 0.7892 (0.7937) time: 0.1775 data: 0.0879 max mem: 9377 +Train: [71] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000025 grad: 0.1514 (0.1550) loss: 0.7892 (0.7937) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:13 loss: 0.8135 (0.8135) time: 4.0952 data: 4.0148 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.7972 (0.8019) time: 0.1284 data: 0.1016 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:14 (0.2383 s / it) +Averaged stats (hcp-train-subset): loss: 0.7972 (0.8019) +Eval (hcp-val): [71] [ 0/62] eta: 0:04:06 loss: 0.8365 (0.8365) time: 3.9786 data: 3.8812 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8370 (0.8390) time: 0.1181 data: 0.0930 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (hcp-val): loss: 0.8370 (0.8390) +Eval (nsd-val): [71] [ 0/62] eta: 0:06:15 loss: 0.8085 (0.8085) time: 6.0517 data: 6.0200 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8180 (0.8194) time: 0.1368 data: 0.1114 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:14 (0.2323 s / it) +Averaged stats (nsd-val): loss: 0.8180 (0.8194) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 10:45:09 lr: 0.000025 grad: 0.1244 (0.1244) loss: 0.8531 (0.8531) time: 6.1935 data: 6.0393 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:22:33 lr: 0.000025 grad: 0.1748 (0.2183) loss: 0.7958 (0.8053) time: 0.1690 data: 0.0639 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:19:29 lr: 0.000025 grad: 0.1499 (0.1998) loss: 0.8066 (0.8042) time: 0.1660 data: 0.0561 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:18:12 lr: 0.000025 grad: 0.1632 (0.1886) loss: 0.7915 (0.8027) time: 0.1459 data: 0.0447 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:17:16 lr: 0.000025 grad: 0.1565 (0.1811) loss: 0.7932 (0.8005) time: 0.1338 data: 0.0404 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:16:39 lr: 0.000025 grad: 0.1510 (0.1753) loss: 0.7950 (0.7995) time: 0.1491 data: 0.0524 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:16:07 lr: 0.000025 grad: 0.1567 (0.1713) loss: 0.7942 (0.7990) time: 0.1562 data: 0.0484 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:15:40 lr: 0.000025 grad: 0.1432 (0.1685) loss: 0.7902 (0.7980) time: 0.1736 data: 0.0844 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:15:18 lr: 0.000025 grad: 0.1483 (0.1661) loss: 0.7901 (0.7975) time: 0.0982 data: 0.0003 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:15:03 lr: 0.000025 grad: 0.1616 (0.1654) loss: 0.7834 (0.7965) time: 0.1653 data: 0.0706 max mem: 9377 +Train: [72] [1000/6250] eta: 0:14:56 lr: 0.000025 grad: 0.1506 (0.1643) loss: 0.7913 (0.7957) time: 0.2122 data: 0.1241 max mem: 9377 +Train: [72] [1100/6250] eta: 0:14:30 lr: 0.000025 grad: 0.1538 (0.1633) loss: 0.7892 (0.7953) time: 0.1658 data: 0.0827 max mem: 9377 +Train: [72] [1200/6250] eta: 0:14:16 lr: 0.000025 grad: 0.1493 (0.1624) loss: 0.7910 (0.7949) time: 0.1898 data: 0.1028 max mem: 9377 +Train: [72] [1300/6250] eta: 0:13:56 lr: 0.000025 grad: 0.1444 (0.1614) loss: 0.7904 (0.7945) time: 0.1625 data: 0.0707 max mem: 9377 +Train: [72] [1400/6250] eta: 0:13:35 lr: 0.000025 grad: 0.1503 (0.1608) loss: 0.7765 (0.7939) time: 0.1642 data: 0.0723 max mem: 9377 +Train: [72] [1500/6250] eta: 0:13:14 lr: 0.000025 grad: 0.1475 (0.1601) loss: 0.7896 (0.7936) time: 0.1678 data: 0.0818 max mem: 9377 +Train: [72] [1600/6250] eta: 0:12:55 lr: 0.000025 grad: 0.1481 (0.1596) loss: 0.7895 (0.7934) time: 0.1584 data: 0.0752 max mem: 9377 +Train: [72] [1700/6250] eta: 0:12:36 lr: 0.000024 grad: 0.1472 (0.1590) loss: 0.7955 (0.7933) time: 0.1318 data: 0.0356 max mem: 9377 +Train: [72] [1800/6250] eta: 0:12:19 lr: 0.000024 grad: 0.1479 (0.1586) loss: 0.7932 (0.7931) time: 0.1540 data: 0.0654 max mem: 9377 +Train: [72] [1900/6250] eta: 0:12:02 lr: 0.000024 grad: 0.1523 (0.1583) loss: 0.7870 (0.7929) time: 0.1563 data: 0.0613 max mem: 9377 +Train: [72] [2000/6250] eta: 0:11:44 lr: 0.000024 grad: 0.1456 (0.1580) loss: 0.7875 (0.7926) time: 0.1505 data: 0.0616 max mem: 9377 +Train: [72] [2100/6250] eta: 0:11:27 lr: 0.000024 grad: 0.1456 (0.1578) loss: 0.7892 (0.7925) time: 0.1595 data: 0.0763 max mem: 9377 +Train: [72] [2200/6250] eta: 0:11:10 lr: 0.000024 grad: 0.1417 (0.1574) loss: 0.7917 (0.7925) time: 0.1808 data: 0.0933 max mem: 9377 +Train: [72] [2300/6250] eta: 0:10:52 lr: 0.000024 grad: 0.1476 (0.1571) loss: 0.7871 (0.7925) time: 0.1603 data: 0.0700 max mem: 9377 +Train: [72] [2400/6250] eta: 0:10:34 lr: 0.000024 grad: 0.1566 (0.1569) loss: 0.7897 (0.7925) time: 0.1606 data: 0.0842 max mem: 9377 +Train: [72] [2500/6250] eta: 0:10:17 lr: 0.000024 grad: 0.1452 (0.1567) loss: 0.7913 (0.7924) time: 0.1744 data: 0.0892 max mem: 9377 +Train: [72] [2600/6250] eta: 0:09:57 lr: 0.000024 grad: 0.1535 (0.1564) loss: 0.7938 (0.7924) time: 0.1489 data: 0.0614 max mem: 9377 +Train: [72] [2700/6250] eta: 0:09:41 lr: 0.000024 grad: 0.1352 (0.1559) loss: 0.8000 (0.7925) time: 0.2065 data: 0.1249 max mem: 9377 +Train: [72] [2800/6250] eta: 0:09:22 lr: 0.000024 grad: 0.1504 (0.1557) loss: 0.7993 (0.7927) time: 0.1356 data: 0.0529 max mem: 9377 +Train: [72] [2900/6250] eta: 0:09:05 lr: 0.000024 grad: 0.1435 (0.1555) loss: 0.8008 (0.7928) time: 0.1057 data: 0.0138 max mem: 9377 +Train: [72] [3000/6250] eta: 0:08:50 lr: 0.000024 grad: 0.1388 (0.1553) loss: 0.7952 (0.7929) time: 0.1354 data: 0.0450 max mem: 9377 +Train: [72] [3100/6250] eta: 0:08:33 lr: 0.000024 grad: 0.1439 (0.1550) loss: 0.8023 (0.7930) time: 0.1675 data: 0.0840 max mem: 9377 +Train: [72] [3200/6250] eta: 0:08:16 lr: 0.000024 grad: 0.1359 (0.1547) loss: 0.7958 (0.7931) time: 0.1600 data: 0.0832 max mem: 9377 +Train: [72] [3300/6250] eta: 0:07:59 lr: 0.000024 grad: 0.1421 (0.1545) loss: 0.7937 (0.7932) time: 0.1545 data: 0.0523 max mem: 9377 +Train: [72] [3400/6250] eta: 0:07:43 lr: 0.000024 grad: 0.1529 (0.1542) loss: 0.7933 (0.7934) time: 0.1439 data: 0.0544 max mem: 9377 +Train: [72] [3500/6250] eta: 0:07:27 lr: 0.000024 grad: 0.1435 (0.1540) loss: 0.7993 (0.7935) time: 0.1709 data: 0.0781 max mem: 9377 +Train: [72] [3600/6250] eta: 0:07:11 lr: 0.000024 grad: 0.1469 (0.1538) loss: 0.7944 (0.7936) time: 0.1580 data: 0.0588 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:54 lr: 0.000024 grad: 0.1390 (0.1536) loss: 0.8048 (0.7937) time: 0.1757 data: 0.0877 max mem: 9377 +Train: [72] [3800/6250] eta: 0:06:38 lr: 0.000024 grad: 0.1467 (0.1535) loss: 0.7972 (0.7937) time: 0.1632 data: 0.0828 max mem: 9377 +Train: [72] [3900/6250] eta: 0:06:21 lr: 0.000024 grad: 0.1518 (0.1535) loss: 0.7939 (0.7938) time: 0.1600 data: 0.0702 max mem: 9377 +Train: [72] [4000/6250] eta: 0:06:06 lr: 0.000024 grad: 0.1413 (0.1534) loss: 0.7957 (0.7939) time: 0.1600 data: 0.0715 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:49 lr: 0.000024 grad: 0.1445 (0.1533) loss: 0.7878 (0.7939) time: 0.1353 data: 0.0422 max mem: 9377 +Train: [72] [4200/6250] eta: 0:05:33 lr: 0.000024 grad: 0.1514 (0.1532) loss: 0.7953 (0.7940) time: 0.1296 data: 0.0382 max mem: 9377 +Train: [72] [4300/6250] eta: 0:05:16 lr: 0.000024 grad: 0.1541 (0.1532) loss: 0.7953 (0.7940) time: 0.1313 data: 0.0434 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:59 lr: 0.000024 grad: 0.1367 (0.1530) loss: 0.7982 (0.7941) time: 0.1316 data: 0.0363 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:43 lr: 0.000024 grad: 0.1440 (0.1530) loss: 0.8008 (0.7941) time: 0.1603 data: 0.0725 max mem: 9377 +Train: [72] [4600/6250] eta: 0:04:27 lr: 0.000024 grad: 0.1536 (0.1530) loss: 0.7980 (0.7942) time: 0.1789 data: 0.0931 max mem: 9377 +Train: [72] [4700/6250] eta: 0:04:10 lr: 0.000024 grad: 0.1454 (0.1529) loss: 0.7969 (0.7942) time: 0.1588 data: 0.0741 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:54 lr: 0.000024 grad: 0.1450 (0.1529) loss: 0.7977 (0.7943) time: 0.1439 data: 0.0561 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:38 lr: 0.000024 grad: 0.1326 (0.1529) loss: 0.8016 (0.7943) time: 0.1681 data: 0.0709 max mem: 9377 +Train: [72] [5000/6250] eta: 0:03:21 lr: 0.000024 grad: 0.1556 (0.1528) loss: 0.8034 (0.7944) time: 0.1527 data: 0.0644 max mem: 9377 +Train: [72] [5100/6250] eta: 0:03:05 lr: 0.000024 grad: 0.1499 (0.1528) loss: 0.8022 (0.7944) time: 0.1643 data: 0.0765 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:49 lr: 0.000024 grad: 0.1364 (0.1527) loss: 0.8030 (0.7945) time: 0.1625 data: 0.0724 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:33 lr: 0.000024 grad: 0.1447 (0.1526) loss: 0.7969 (0.7946) time: 0.1794 data: 0.0824 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:17 lr: 0.000024 grad: 0.1361 (0.1526) loss: 0.8052 (0.7947) time: 0.1667 data: 0.0793 max mem: 9377 +Train: [72] [5500/6250] eta: 0:02:01 lr: 0.000023 grad: 0.1411 (0.1525) loss: 0.7989 (0.7948) time: 0.1767 data: 0.0904 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:45 lr: 0.000023 grad: 0.1468 (0.1523) loss: 0.8050 (0.7949) time: 0.1884 data: 0.1004 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:29 lr: 0.000023 grad: 0.1471 (0.1523) loss: 0.8001 (0.7950) time: 0.1788 data: 0.0879 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:13 lr: 0.000023 grad: 0.1387 (0.1522) loss: 0.8064 (0.7950) time: 0.1630 data: 0.0688 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:56 lr: 0.000023 grad: 0.1470 (0.1521) loss: 0.8015 (0.7951) time: 0.1650 data: 0.0807 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:40 lr: 0.000023 grad: 0.1515 (0.1520) loss: 0.7988 (0.7952) time: 0.1538 data: 0.0688 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:24 lr: 0.000023 grad: 0.1518 (0.1520) loss: 0.7912 (0.7953) time: 0.1397 data: 0.0532 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:08 lr: 0.000023 grad: 0.1513 (0.1519) loss: 0.7981 (0.7953) time: 0.1790 data: 0.0957 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1483 (0.1518) loss: 0.7939 (0.7954) time: 0.1745 data: 0.0868 max mem: 9377 +Train: [72] Total time: 0:16:58 (0.1630 s / it) +Averaged stats: lr: 0.000023 grad: 0.1483 (0.1518) loss: 0.7939 (0.7954) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:05:49 loss: 0.8125 (0.8125) time: 5.6318 data: 5.5828 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.7983 (0.8010) time: 0.1387 data: 0.1119 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:14 (0.2392 s / it) +Averaged stats (hcp-train-subset): loss: 0.7983 (0.8010) +Eval (hcp-val): [72] [ 0/62] eta: 0:04:05 loss: 0.8429 (0.8429) time: 3.9646 data: 3.8839 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8378 (0.8392) time: 0.1181 data: 0.0913 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-val): loss: 0.8378 (0.8392) +Eval (nsd-val): [72] [ 0/62] eta: 0:04:41 loss: 0.8107 (0.8107) time: 4.5359 data: 4.4706 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8181 (0.8197) time: 0.1409 data: 0.1156 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (nsd-val): loss: 0.8181 (0.8197) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 12:08:36 lr: 0.000023 grad: 0.3226 (0.3226) loss: 0.7627 (0.7627) time: 6.9946 data: 6.8912 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:22:44 lr: 0.000023 grad: 0.2042 (0.2360) loss: 0.7991 (0.7989) time: 0.1817 data: 0.0738 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:19:20 lr: 0.000023 grad: 0.1791 (0.2123) loss: 0.7899 (0.7951) time: 0.1638 data: 0.0677 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:17:53 lr: 0.000023 grad: 0.1552 (0.1970) loss: 0.7832 (0.7942) time: 0.1400 data: 0.0456 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:16:48 lr: 0.000023 grad: 0.1640 (0.1898) loss: 0.7897 (0.7933) time: 0.1331 data: 0.0306 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:16:10 lr: 0.000023 grad: 0.1576 (0.1838) loss: 0.7985 (0.7935) time: 0.1838 data: 0.0838 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:15:31 lr: 0.000023 grad: 0.1436 (0.1784) loss: 0.8003 (0.7948) time: 0.1689 data: 0.0666 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:15:03 lr: 0.000023 grad: 0.1431 (0.1743) loss: 0.8046 (0.7964) time: 0.1346 data: 0.0368 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:14:45 lr: 0.000023 grad: 0.1459 (0.1711) loss: 0.8066 (0.7973) time: 0.1790 data: 0.0814 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:14:29 lr: 0.000023 grad: 0.1480 (0.1681) loss: 0.8094 (0.7982) time: 0.1584 data: 0.0530 max mem: 9377 +Train: [73] [1000/6250] eta: 0:14:07 lr: 0.000023 grad: 0.1383 (0.1654) loss: 0.8085 (0.7990) time: 0.1540 data: 0.0670 max mem: 9377 +Train: [73] [1100/6250] eta: 0:13:49 lr: 0.000023 grad: 0.1438 (0.1634) loss: 0.8040 (0.7997) time: 0.1563 data: 0.0641 max mem: 9377 +Train: [73] [1200/6250] eta: 0:13:47 lr: 0.000023 grad: 0.1386 (0.1620) loss: 0.8061 (0.8001) time: 0.1991 data: 0.1184 max mem: 9377 +Train: [73] [1300/6250] eta: 0:13:41 lr: 0.000023 grad: 0.1403 (0.1607) loss: 0.8044 (0.8002) time: 0.1922 data: 0.0958 max mem: 9377 +Train: [73] [1400/6250] eta: 0:13:29 lr: 0.000023 grad: 0.1414 (0.1595) loss: 0.8022 (0.8004) time: 0.1809 data: 0.0892 max mem: 9377 +Train: [73] [1500/6250] eta: 0:13:11 lr: 0.000023 grad: 0.1423 (0.1588) loss: 0.7983 (0.8004) time: 0.1586 data: 0.0725 max mem: 9377 +Train: [73] [1600/6250] eta: 0:12:59 lr: 0.000023 grad: 0.1390 (0.1580) loss: 0.8101 (0.8005) time: 0.1911 data: 0.1019 max mem: 9377 +Train: [73] [1700/6250] eta: 0:12:44 lr: 0.000023 grad: 0.1509 (0.1574) loss: 0.8004 (0.8005) time: 0.1806 data: 0.0885 max mem: 9377 +Train: [73] [1800/6250] eta: 0:12:28 lr: 0.000023 grad: 0.1398 (0.1568) loss: 0.8040 (0.8006) time: 0.1655 data: 0.0640 max mem: 9377 +Train: [73] [1900/6250] eta: 0:12:10 lr: 0.000023 grad: 0.1364 (0.1563) loss: 0.7981 (0.8004) time: 0.1790 data: 0.0811 max mem: 9377 +Train: [73] [2000/6250] eta: 0:11:51 lr: 0.000023 grad: 0.1372 (0.1561) loss: 0.8059 (0.8002) time: 0.1822 data: 0.0889 max mem: 9377 +Train: [73] [2100/6250] eta: 0:11:31 lr: 0.000023 grad: 0.1482 (0.1559) loss: 0.7881 (0.7999) time: 0.1511 data: 0.0537 max mem: 9377 +Train: [73] [2200/6250] eta: 0:11:12 lr: 0.000023 grad: 0.1566 (0.1559) loss: 0.7993 (0.7995) time: 0.1624 data: 0.0712 max mem: 9377 +Train: [73] [2300/6250] eta: 0:10:55 lr: 0.000023 grad: 0.1504 (0.1560) loss: 0.7946 (0.7994) time: 0.1244 data: 0.0277 max mem: 9377 +Train: [73] [2400/6250] eta: 0:10:35 lr: 0.000023 grad: 0.1530 (0.1560) loss: 0.7908 (0.7991) time: 0.1331 data: 0.0452 max mem: 9377 +Train: [73] [2500/6250] eta: 0:10:17 lr: 0.000023 grad: 0.1552 (0.1562) loss: 0.7937 (0.7987) time: 0.1659 data: 0.0816 max mem: 9377 +Train: [73] [2600/6250] eta: 0:09:59 lr: 0.000023 grad: 0.1469 (0.1563) loss: 0.7900 (0.7984) time: 0.1446 data: 0.0513 max mem: 9377 +Train: [73] [2700/6250] eta: 0:09:42 lr: 0.000023 grad: 0.1449 (0.1563) loss: 0.7889 (0.7981) time: 0.1467 data: 0.0641 max mem: 9377 +Train: [73] [2800/6250] eta: 0:09:27 lr: 0.000023 grad: 0.1521 (0.1563) loss: 0.7907 (0.7979) time: 0.1732 data: 0.0866 max mem: 9377 +Train: [73] [2900/6250] eta: 0:09:10 lr: 0.000023 grad: 0.1566 (0.1564) loss: 0.7925 (0.7976) time: 0.1679 data: 0.0836 max mem: 9377 +Train: [73] [3000/6250] eta: 0:08:56 lr: 0.000023 grad: 0.1477 (0.1565) loss: 0.7876 (0.7973) time: 0.2071 data: 0.1238 max mem: 9377 +Train: [73] [3100/6250] eta: 0:08:40 lr: 0.000023 grad: 0.1513 (0.1566) loss: 0.7941 (0.7969) time: 0.1959 data: 0.1020 max mem: 9377 +Train: [73] [3200/6250] eta: 0:08:25 lr: 0.000022 grad: 0.1497 (0.1568) loss: 0.7915 (0.7967) time: 0.2019 data: 0.1079 max mem: 9377 +Train: [73] [3300/6250] eta: 0:08:10 lr: 0.000022 grad: 0.1539 (0.1569) loss: 0.7843 (0.7964) time: 0.2062 data: 0.1129 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:55 lr: 0.000022 grad: 0.1579 (0.1570) loss: 0.7917 (0.7962) time: 0.1805 data: 0.0913 max mem: 9377 +Train: [73] [3500/6250] eta: 0:07:39 lr: 0.000022 grad: 0.1484 (0.1569) loss: 0.7974 (0.7961) time: 0.1617 data: 0.0763 max mem: 9377 +Train: [73] [3600/6250] eta: 0:07:23 lr: 0.000022 grad: 0.1505 (0.1569) loss: 0.7911 (0.7960) time: 0.1742 data: 0.0876 max mem: 9377 +Train: [73] [3700/6250] eta: 0:07:06 lr: 0.000022 grad: 0.1475 (0.1568) loss: 0.7957 (0.7958) time: 0.1691 data: 0.0770 max mem: 9377 +Train: [73] [3800/6250] eta: 0:06:49 lr: 0.000022 grad: 0.1411 (0.1567) loss: 0.7940 (0.7957) time: 0.1807 data: 0.0885 max mem: 9377 +Train: [73] [3900/6250] eta: 0:06:33 lr: 0.000022 grad: 0.1568 (0.1567) loss: 0.7848 (0.7956) time: 0.1637 data: 0.0708 max mem: 9377 +Train: [73] [4000/6250] eta: 0:06:16 lr: 0.000022 grad: 0.1507 (0.1566) loss: 0.7829 (0.7954) time: 0.1731 data: 0.0847 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:59 lr: 0.000022 grad: 0.1544 (0.1566) loss: 0.7886 (0.7953) time: 0.1261 data: 0.0334 max mem: 9377 +Train: [73] [4200/6250] eta: 0:05:42 lr: 0.000022 grad: 0.1535 (0.1565) loss: 0.7885 (0.7952) time: 0.1611 data: 0.0783 max mem: 9377 +Train: [73] [4300/6250] eta: 0:05:25 lr: 0.000022 grad: 0.1489 (0.1564) loss: 0.7949 (0.7952) time: 0.1564 data: 0.0651 max mem: 9377 +Train: [73] [4400/6250] eta: 0:05:08 lr: 0.000022 grad: 0.1498 (0.1563) loss: 0.7950 (0.7952) time: 0.1699 data: 0.0837 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:51 lr: 0.000022 grad: 0.1404 (0.1562) loss: 0.7951 (0.7952) time: 0.1489 data: 0.0543 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:34 lr: 0.000022 grad: 0.1425 (0.1562) loss: 0.7971 (0.7952) time: 0.1578 data: 0.0752 max mem: 9377 +Train: [73] [4700/6250] eta: 0:04:17 lr: 0.000022 grad: 0.1529 (0.1560) loss: 0.7946 (0.7952) time: 0.1432 data: 0.0540 max mem: 9377 +Train: [73] [4800/6250] eta: 0:04:00 lr: 0.000022 grad: 0.1516 (0.1560) loss: 0.7974 (0.7952) time: 0.1476 data: 0.0498 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:43 lr: 0.000022 grad: 0.1405 (0.1559) loss: 0.7947 (0.7951) time: 0.1433 data: 0.0590 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:26 lr: 0.000022 grad: 0.1472 (0.1559) loss: 0.8001 (0.7951) time: 0.1613 data: 0.0647 max mem: 9377 +Train: [73] [5100/6250] eta: 0:03:10 lr: 0.000022 grad: 0.1580 (0.1558) loss: 0.7965 (0.7951) time: 0.1730 data: 0.0949 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:54 lr: 0.000022 grad: 0.1464 (0.1557) loss: 0.7979 (0.7950) time: 0.1298 data: 0.0413 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:37 lr: 0.000022 grad: 0.1551 (0.1557) loss: 0.7959 (0.7951) time: 0.1529 data: 0.0639 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:21 lr: 0.000022 grad: 0.1462 (0.1556) loss: 0.7975 (0.7950) time: 0.1757 data: 0.0888 max mem: 9377 +Train: [73] [5500/6250] eta: 0:02:04 lr: 0.000022 grad: 0.1490 (0.1556) loss: 0.7976 (0.7951) time: 0.1554 data: 0.0623 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:48 lr: 0.000022 grad: 0.1537 (0.1556) loss: 0.7921 (0.7950) time: 0.1689 data: 0.0781 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:31 lr: 0.000022 grad: 0.1426 (0.1555) loss: 0.7912 (0.7950) time: 0.1780 data: 0.0826 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:14 lr: 0.000022 grad: 0.1565 (0.1555) loss: 0.7952 (0.7950) time: 0.1750 data: 0.0857 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:58 lr: 0.000022 grad: 0.1419 (0.1554) loss: 0.8012 (0.7951) time: 0.1531 data: 0.0650 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:41 lr: 0.000022 grad: 0.1456 (0.1554) loss: 0.7949 (0.7951) time: 0.1666 data: 0.0692 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:24 lr: 0.000022 grad: 0.1476 (0.1553) loss: 0.7975 (0.7951) time: 0.1507 data: 0.0680 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:08 lr: 0.000022 grad: 0.1648 (0.1555) loss: 0.7833 (0.7949) time: 0.1365 data: 0.0411 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1535 (0.1555) loss: 0.7869 (0.7949) time: 0.1749 data: 0.0886 max mem: 9377 +Train: [73] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000022 grad: 0.1535 (0.1555) loss: 0.7869 (0.7949) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:05:32 loss: 0.8133 (0.8133) time: 5.3597 data: 5.3280 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.7982 (0.7986) time: 0.1248 data: 0.0997 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:14 (0.2336 s / it) +Averaged stats (hcp-train-subset): loss: 0.7982 (0.7986) +Eval (hcp-val): [73] [ 0/62] eta: 0:05:35 loss: 0.8353 (0.8353) time: 5.4138 data: 5.3732 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8377 (0.8390) time: 0.1472 data: 0.1220 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (hcp-val): loss: 0.8377 (0.8390) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:45 loss: 0.8069 (0.8069) time: 5.5740 data: 5.5433 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8178 (0.8179) time: 0.1365 data: 0.1113 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (nsd-val): loss: 0.8178 (0.8179) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 9:16:54 lr: 0.000022 grad: 0.1065 (0.1065) loss: 0.8411 (0.8411) time: 5.3462 data: 5.1308 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:22:02 lr: 0.000022 grad: 0.1954 (0.1899) loss: 0.7883 (0.8134) time: 0.1658 data: 0.0485 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:18:53 lr: 0.000022 grad: 0.1882 (0.1907) loss: 0.7869 (0.8034) time: 0.1355 data: 0.0421 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:17:30 lr: 0.000022 grad: 0.1586 (0.1865) loss: 0.7987 (0.8011) time: 0.1635 data: 0.0645 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:17:17 lr: 0.000022 grad: 0.1623 (0.1828) loss: 0.7843 (0.7985) time: 0.1971 data: 0.0909 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:16:42 lr: 0.000022 grad: 0.1558 (0.1808) loss: 0.7859 (0.7956) time: 0.1645 data: 0.0566 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:16:20 lr: 0.000022 grad: 0.1532 (0.1772) loss: 0.8001 (0.7948) time: 0.1530 data: 0.0476 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:15:54 lr: 0.000022 grad: 0.1572 (0.1745) loss: 0.7895 (0.7944) time: 0.1740 data: 0.0890 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:15:27 lr: 0.000022 grad: 0.1610 (0.1728) loss: 0.7956 (0.7941) time: 0.1617 data: 0.0725 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:15:06 lr: 0.000021 grad: 0.1568 (0.1707) loss: 0.8020 (0.7944) time: 0.1626 data: 0.0630 max mem: 9377 +Train: [74] [1000/6250] eta: 0:14:43 lr: 0.000021 grad: 0.1451 (0.1692) loss: 0.8011 (0.7948) time: 0.1629 data: 0.0727 max mem: 9377 +Train: [74] [1100/6250] eta: 0:14:23 lr: 0.000021 grad: 0.1443 (0.1677) loss: 0.8025 (0.7949) time: 0.1745 data: 0.0965 max mem: 9377 +Train: [74] [1200/6250] eta: 0:14:05 lr: 0.000021 grad: 0.1553 (0.1665) loss: 0.7895 (0.7950) time: 0.1729 data: 0.0892 max mem: 9377 +Train: [74] [1300/6250] eta: 0:13:49 lr: 0.000021 grad: 0.1571 (0.1653) loss: 0.7848 (0.7950) time: 0.1581 data: 0.0716 max mem: 9377 +Train: [74] [1400/6250] eta: 0:13:32 lr: 0.000021 grad: 0.1504 (0.1643) loss: 0.7867 (0.7951) time: 0.1709 data: 0.0899 max mem: 9377 +Train: [74] [1500/6250] eta: 0:13:11 lr: 0.000021 grad: 0.1533 (0.1637) loss: 0.7872 (0.7951) time: 0.1541 data: 0.0643 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:51 lr: 0.000021 grad: 0.1505 (0.1633) loss: 0.7941 (0.7949) time: 0.1517 data: 0.0537 max mem: 9377 +Train: [74] [1700/6250] eta: 0:12:33 lr: 0.000021 grad: 0.1564 (0.1628) loss: 0.7863 (0.7948) time: 0.1662 data: 0.0719 max mem: 9377 +Train: [74] [1800/6250] eta: 0:12:15 lr: 0.000021 grad: 0.1473 (0.1622) loss: 0.7900 (0.7948) time: 0.1502 data: 0.0591 max mem: 9377 +Train: [74] [1900/6250] eta: 0:11:56 lr: 0.000021 grad: 0.1458 (0.1616) loss: 0.7897 (0.7949) time: 0.1473 data: 0.0524 max mem: 9377 +Train: [74] [2000/6250] eta: 0:11:35 lr: 0.000021 grad: 0.1499 (0.1610) loss: 0.7958 (0.7950) time: 0.1492 data: 0.0507 max mem: 9377 +Train: [74] [2100/6250] eta: 0:11:17 lr: 0.000021 grad: 0.1511 (0.1606) loss: 0.7991 (0.7950) time: 0.1590 data: 0.0625 max mem: 9377 +Train: [74] [2200/6250] eta: 0:10:58 lr: 0.000021 grad: 0.1464 (0.1602) loss: 0.7994 (0.7950) time: 0.1466 data: 0.0561 max mem: 9377 +Train: [74] [2300/6250] eta: 0:10:39 lr: 0.000021 grad: 0.1526 (0.1599) loss: 0.7997 (0.7951) time: 0.1580 data: 0.0641 max mem: 9377 +Train: [74] [2400/6250] eta: 0:10:22 lr: 0.000021 grad: 0.1407 (0.1597) loss: 0.8018 (0.7951) time: 0.1534 data: 0.0567 max mem: 9377 +Train: [74] [2500/6250] eta: 0:10:05 lr: 0.000021 grad: 0.1613 (0.1595) loss: 0.7852 (0.7950) time: 0.1471 data: 0.0541 max mem: 9377 +Train: [74] [2600/6250] eta: 0:09:47 lr: 0.000021 grad: 0.1470 (0.1592) loss: 0.8045 (0.7951) time: 0.1307 data: 0.0332 max mem: 9377 +Train: [74] [2700/6250] eta: 0:09:30 lr: 0.000021 grad: 0.1511 (0.1592) loss: 0.7917 (0.7950) time: 0.1175 data: 0.0249 max mem: 9377 +Train: [74] [2800/6250] eta: 0:09:13 lr: 0.000021 grad: 0.1542 (0.1591) loss: 0.7940 (0.7950) time: 0.1783 data: 0.0942 max mem: 9377 +Train: [74] [2900/6250] eta: 0:08:56 lr: 0.000021 grad: 0.1495 (0.1590) loss: 0.7916 (0.7949) time: 0.1517 data: 0.0611 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:41 lr: 0.000021 grad: 0.1448 (0.1588) loss: 0.7951 (0.7950) time: 0.1755 data: 0.0937 max mem: 9377 +Train: [74] [3100/6250] eta: 0:08:24 lr: 0.000021 grad: 0.1480 (0.1586) loss: 0.8014 (0.7951) time: 0.1522 data: 0.0725 max mem: 9377 +Train: [74] [3200/6250] eta: 0:08:09 lr: 0.000021 grad: 0.1456 (0.1585) loss: 0.7929 (0.7951) time: 0.1584 data: 0.0716 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:52 lr: 0.000021 grad: 0.1435 (0.1583) loss: 0.7963 (0.7951) time: 0.1630 data: 0.0790 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:37 lr: 0.000021 grad: 0.1589 (0.1581) loss: 0.7927 (0.7951) time: 0.1313 data: 0.0482 max mem: 9377 +Train: [74] [3500/6250] eta: 0:07:22 lr: 0.000021 grad: 0.1479 (0.1579) loss: 0.7918 (0.7952) time: 0.1751 data: 0.0906 max mem: 9377 +Train: [74] [3600/6250] eta: 0:07:06 lr: 0.000021 grad: 0.1510 (0.1578) loss: 0.7864 (0.7952) time: 0.1886 data: 0.0974 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:50 lr: 0.000021 grad: 0.1538 (0.1578) loss: 0.7913 (0.7951) time: 0.1524 data: 0.0697 max mem: 9377 +Train: [74] [3800/6250] eta: 0:06:34 lr: 0.000021 grad: 0.1485 (0.1577) loss: 0.7946 (0.7951) time: 0.1609 data: 0.0719 max mem: 9377 +Train: [74] [3900/6250] eta: 0:06:17 lr: 0.000021 grad: 0.1577 (0.1575) loss: 0.7877 (0.7951) time: 0.1459 data: 0.0412 max mem: 9377 +Train: [74] [4000/6250] eta: 0:06:01 lr: 0.000021 grad: 0.1472 (0.1573) loss: 0.7960 (0.7951) time: 0.1407 data: 0.0502 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:45 lr: 0.000021 grad: 0.1374 (0.1570) loss: 0.7922 (0.7952) time: 0.1348 data: 0.0462 max mem: 9377 +Train: [74] [4200/6250] eta: 0:05:28 lr: 0.000021 grad: 0.1481 (0.1569) loss: 0.7901 (0.7951) time: 0.1612 data: 0.0703 max mem: 9377 +Train: [74] [4300/6250] eta: 0:05:12 lr: 0.000021 grad: 0.1490 (0.1568) loss: 0.7958 (0.7951) time: 0.1485 data: 0.0563 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:56 lr: 0.000021 grad: 0.1540 (0.1567) loss: 0.7911 (0.7951) time: 0.1394 data: 0.0472 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:39 lr: 0.000021 grad: 0.1473 (0.1566) loss: 0.7985 (0.7952) time: 0.1672 data: 0.0729 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:23 lr: 0.000021 grad: 0.1584 (0.1567) loss: 0.7902 (0.7951) time: 0.1641 data: 0.0803 max mem: 9377 +Train: [74] [4700/6250] eta: 0:04:07 lr: 0.000021 grad: 0.1500 (0.1566) loss: 0.7995 (0.7951) time: 0.1486 data: 0.0514 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:51 lr: 0.000021 grad: 0.1525 (0.1566) loss: 0.7969 (0.7951) time: 0.1602 data: 0.0652 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:35 lr: 0.000020 grad: 0.1481 (0.1566) loss: 0.7971 (0.7950) time: 0.1749 data: 0.0886 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:19 lr: 0.000020 grad: 0.1565 (0.1566) loss: 0.7953 (0.7949) time: 0.1674 data: 0.0792 max mem: 9377 +Train: [74] [5100/6250] eta: 0:03:04 lr: 0.000020 grad: 0.1477 (0.1567) loss: 0.7923 (0.7949) time: 0.2147 data: 0.1195 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:48 lr: 0.000020 grad: 0.1549 (0.1567) loss: 0.7959 (0.7948) time: 0.1564 data: 0.0695 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:32 lr: 0.000020 grad: 0.1585 (0.1567) loss: 0.7866 (0.7948) time: 0.1650 data: 0.0742 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:16 lr: 0.000020 grad: 0.1537 (0.1568) loss: 0.7883 (0.7947) time: 0.1688 data: 0.0824 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:59 lr: 0.000020 grad: 0.1529 (0.1569) loss: 0.7979 (0.7946) time: 0.1511 data: 0.0527 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:44 lr: 0.000020 grad: 0.1509 (0.1569) loss: 0.7924 (0.7946) time: 0.1805 data: 0.0934 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:27 lr: 0.000020 grad: 0.1632 (0.1569) loss: 0.7885 (0.7946) time: 0.1529 data: 0.0599 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:11 lr: 0.000020 grad: 0.1530 (0.1569) loss: 0.7902 (0.7946) time: 0.1584 data: 0.0671 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:55 lr: 0.000020 grad: 0.1559 (0.1568) loss: 0.7926 (0.7946) time: 0.1724 data: 0.0781 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:39 lr: 0.000020 grad: 0.1582 (0.1568) loss: 0.7824 (0.7945) time: 0.1642 data: 0.0666 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:23 lr: 0.000020 grad: 0.1448 (0.1567) loss: 0.7955 (0.7946) time: 0.1326 data: 0.0437 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1465 (0.1566) loss: 0.7891 (0.7945) time: 0.1586 data: 0.0708 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1517 (0.1566) loss: 0.7860 (0.7945) time: 0.1569 data: 0.0585 max mem: 9377 +Train: [74] Total time: 0:16:42 (0.1603 s / it) +Averaged stats: lr: 0.000020 grad: 0.1517 (0.1566) loss: 0.7860 (0.7945) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:04:10 loss: 0.8149 (0.8149) time: 4.0412 data: 3.9561 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.7983 (0.7998) time: 0.1377 data: 0.1109 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.7983 (0.7998) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:44 loss: 0.8406 (0.8406) time: 6.5200 data: 6.4876 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8355 (0.8391) time: 0.1434 data: 0.1160 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:15 (0.2572 s / it) +Averaged stats (hcp-val): loss: 0.8355 (0.8391) +Making plots (hcp-val): example=61 +Eval (nsd-val): [74] [ 0/62] eta: 0:06:40 loss: 0.8132 (0.8132) time: 6.4524 data: 6.4181 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8181 (0.8217) time: 0.1328 data: 0.1060 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:15 (0.2576 s / it) +Averaged stats (nsd-val): loss: 0.8181 (0.8217) +Making plots (nsd-val): example=37 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 9:17:19 lr: 0.000020 grad: 0.6746 (0.6746) loss: 0.7945 (0.7945) time: 5.3503 data: 5.0651 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:25:39 lr: 0.000020 grad: 0.1706 (0.2061) loss: 0.8047 (0.8039) time: 0.1981 data: 0.0860 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:21:09 lr: 0.000020 grad: 0.1698 (0.1939) loss: 0.7907 (0.7977) time: 0.1606 data: 0.0565 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:19:14 lr: 0.000020 grad: 0.1399 (0.1826) loss: 0.8139 (0.7979) time: 0.1586 data: 0.0576 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:18:03 lr: 0.000020 grad: 0.1485 (0.1761) loss: 0.8051 (0.7982) time: 0.1662 data: 0.0627 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:17:14 lr: 0.000020 grad: 0.1442 (0.1721) loss: 0.7988 (0.7980) time: 0.1511 data: 0.0514 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:16:43 lr: 0.000020 grad: 0.1648 (0.1701) loss: 0.7856 (0.7979) time: 0.1629 data: 0.0724 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:16:14 lr: 0.000020 grad: 0.1610 (0.1696) loss: 0.7896 (0.7966) time: 0.1614 data: 0.0649 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:15:53 lr: 0.000020 grad: 0.1635 (0.1687) loss: 0.7905 (0.7960) time: 0.1625 data: 0.0697 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:15:35 lr: 0.000020 grad: 0.1582 (0.1681) loss: 0.7775 (0.7949) time: 0.2054 data: 0.1107 max mem: 9377 +Train: [75] [1000/6250] eta: 0:15:11 lr: 0.000020 grad: 0.1670 (0.1681) loss: 0.7836 (0.7942) time: 0.1728 data: 0.0776 max mem: 9377 +Train: [75] [1100/6250] eta: 0:14:54 lr: 0.000020 grad: 0.1692 (0.1682) loss: 0.7753 (0.7932) time: 0.1903 data: 0.1023 max mem: 9377 +Train: [75] [1200/6250] eta: 0:14:51 lr: 0.000020 grad: 0.1738 (0.1679) loss: 0.7962 (0.7925) time: 0.1948 data: 0.1094 max mem: 9377 +Train: [75] [1300/6250] eta: 0:14:35 lr: 0.000020 grad: 0.1718 (0.1678) loss: 0.7838 (0.7917) time: 0.1485 data: 0.0592 max mem: 9377 +Train: [75] [1400/6250] eta: 0:14:20 lr: 0.000020 grad: 0.1505 (0.1674) loss: 0.7866 (0.7912) time: 0.1621 data: 0.0649 max mem: 9377 +Train: [75] [1500/6250] eta: 0:14:00 lr: 0.000020 grad: 0.1644 (0.1669) loss: 0.7820 (0.7910) time: 0.1612 data: 0.0762 max mem: 9377 +Train: [75] [1600/6250] eta: 0:13:45 lr: 0.000020 grad: 0.1605 (0.1667) loss: 0.7813 (0.7910) time: 0.2014 data: 0.1099 max mem: 9377 +Train: [75] [1700/6250] eta: 0:13:29 lr: 0.000020 grad: 0.1598 (0.1664) loss: 0.7802 (0.7909) time: 0.1913 data: 0.1019 max mem: 9377 +Train: [75] [1800/6250] eta: 0:13:12 lr: 0.000020 grad: 0.1480 (0.1661) loss: 0.8020 (0.7909) time: 0.1678 data: 0.0732 max mem: 9377 +Train: [75] [1900/6250] eta: 0:12:56 lr: 0.000020 grad: 0.1606 (0.1658) loss: 0.7927 (0.7909) time: 0.2018 data: 0.1147 max mem: 9377 +Train: [75] [2000/6250] eta: 0:12:37 lr: 0.000020 grad: 0.1598 (0.1654) loss: 0.7899 (0.7909) time: 0.1564 data: 0.0628 max mem: 9377 +Train: [75] [2100/6250] eta: 0:12:16 lr: 0.000020 grad: 0.1525 (0.1652) loss: 0.7865 (0.7908) time: 0.1707 data: 0.0762 max mem: 9377 +Train: [75] [2200/6250] eta: 0:11:57 lr: 0.000020 grad: 0.1614 (0.1648) loss: 0.7850 (0.7909) time: 0.1506 data: 0.0601 max mem: 9377 +Train: [75] [2300/6250] eta: 0:11:36 lr: 0.000020 grad: 0.1515 (0.1642) loss: 0.7958 (0.7911) time: 0.1669 data: 0.0767 max mem: 9377 +Train: [75] [2400/6250] eta: 0:11:17 lr: 0.000020 grad: 0.1548 (0.1639) loss: 0.7871 (0.7912) time: 0.1961 data: 0.0997 max mem: 9377 +Train: [75] [2500/6250] eta: 0:10:57 lr: 0.000020 grad: 0.1507 (0.1637) loss: 0.7920 (0.7913) time: 0.1779 data: 0.0881 max mem: 9377 +Train: [75] [2600/6250] eta: 0:10:38 lr: 0.000020 grad: 0.1549 (0.1634) loss: 0.7876 (0.7914) time: 0.1856 data: 0.1040 max mem: 9377 +Train: [75] [2700/6250] eta: 0:10:18 lr: 0.000020 grad: 0.1484 (0.1631) loss: 0.7943 (0.7915) time: 0.1430 data: 0.0467 max mem: 9377 +Train: [75] [2800/6250] eta: 0:09:59 lr: 0.000019 grad: 0.1507 (0.1628) loss: 0.7927 (0.7917) time: 0.1565 data: 0.0700 max mem: 9377 +Train: [75] [2900/6250] eta: 0:09:40 lr: 0.000019 grad: 0.1531 (0.1625) loss: 0.7946 (0.7917) time: 0.1555 data: 0.0614 max mem: 9377 +Train: [75] [3000/6250] eta: 0:09:24 lr: 0.000019 grad: 0.1542 (0.1623) loss: 0.7928 (0.7917) time: 0.1991 data: 0.1107 max mem: 9377 +Train: [75] [3100/6250] eta: 0:09:06 lr: 0.000019 grad: 0.1542 (0.1622) loss: 0.8032 (0.7918) time: 0.2166 data: 0.1257 max mem: 9377 +Train: [75] [3200/6250] eta: 0:08:48 lr: 0.000019 grad: 0.1499 (0.1620) loss: 0.7946 (0.7919) time: 0.1448 data: 0.0665 max mem: 9377 +Train: [75] [3300/6250] eta: 0:08:30 lr: 0.000019 grad: 0.1605 (0.1620) loss: 0.7878 (0.7919) time: 0.1855 data: 0.1025 max mem: 9377 +Train: [75] [3400/6250] eta: 0:08:13 lr: 0.000019 grad: 0.1538 (0.1619) loss: 0.7860 (0.7919) time: 0.1917 data: 0.0952 max mem: 9377 +Train: [75] [3500/6250] eta: 0:07:55 lr: 0.000019 grad: 0.1575 (0.1619) loss: 0.7876 (0.7918) time: 0.1726 data: 0.0712 max mem: 9377 +Train: [75] [3600/6250] eta: 0:07:38 lr: 0.000019 grad: 0.1622 (0.1618) loss: 0.7909 (0.7918) time: 0.1852 data: 0.0959 max mem: 9377 +Train: [75] [3700/6250] eta: 0:07:20 lr: 0.000019 grad: 0.1591 (0.1618) loss: 0.7890 (0.7918) time: 0.1645 data: 0.0659 max mem: 9377 +Train: [75] [3800/6250] eta: 0:07:03 lr: 0.000019 grad: 0.1573 (0.1617) loss: 0.7869 (0.7918) time: 0.1804 data: 0.0853 max mem: 9377 +Train: [75] [3900/6250] eta: 0:06:45 lr: 0.000019 grad: 0.1551 (0.1617) loss: 0.7843 (0.7917) time: 0.1655 data: 0.0730 max mem: 9377 +Train: [75] [4000/6250] eta: 0:06:27 lr: 0.000019 grad: 0.1566 (0.1616) loss: 0.7940 (0.7917) time: 0.1537 data: 0.0688 max mem: 9377 +Train: [75] [4100/6250] eta: 0:06:08 lr: 0.000019 grad: 0.1550 (0.1615) loss: 0.7985 (0.7917) time: 0.1520 data: 0.0658 max mem: 9377 +Train: [75] [4200/6250] eta: 0:05:51 lr: 0.000019 grad: 0.1549 (0.1615) loss: 0.7937 (0.7917) time: 0.1727 data: 0.0878 max mem: 9377 +Train: [75] [4300/6250] eta: 0:05:33 lr: 0.000019 grad: 0.1606 (0.1614) loss: 0.7878 (0.7917) time: 0.1739 data: 0.0912 max mem: 9377 +Train: [75] [4400/6250] eta: 0:05:16 lr: 0.000019 grad: 0.1583 (0.1614) loss: 0.7859 (0.7916) time: 0.1443 data: 0.0536 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:58 lr: 0.000019 grad: 0.1578 (0.1614) loss: 0.7859 (0.7916) time: 0.1558 data: 0.0692 max mem: 9377 +Train: [75] [4600/6250] eta: 0:04:40 lr: 0.000019 grad: 0.1494 (0.1613) loss: 0.7877 (0.7916) time: 0.1435 data: 0.0604 max mem: 9377 +Train: [75] [4700/6250] eta: 0:04:23 lr: 0.000019 grad: 0.1598 (0.1614) loss: 0.7936 (0.7915) time: 0.1412 data: 0.0446 max mem: 9377 +Train: [75] [4800/6250] eta: 0:04:06 lr: 0.000019 grad: 0.1542 (0.1614) loss: 0.7888 (0.7915) time: 0.1435 data: 0.0617 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:48 lr: 0.000019 grad: 0.1622 (0.1615) loss: 0.7890 (0.7914) time: 0.1562 data: 0.0734 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:31 lr: 0.000019 grad: 0.1525 (0.1614) loss: 0.7972 (0.7915) time: 0.1633 data: 0.0755 max mem: 9377 +Train: [75] [5100/6250] eta: 0:03:14 lr: 0.000019 grad: 0.1563 (0.1614) loss: 0.7877 (0.7916) time: 0.1673 data: 0.0795 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:57 lr: 0.000019 grad: 0.1586 (0.1614) loss: 0.7941 (0.7917) time: 0.1838 data: 0.1014 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:40 lr: 0.000019 grad: 0.1518 (0.1613) loss: 0.7941 (0.7917) time: 0.1800 data: 0.1020 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:23 lr: 0.000019 grad: 0.1512 (0.1613) loss: 0.7940 (0.7917) time: 0.1669 data: 0.0768 max mem: 9377 +Train: [75] [5500/6250] eta: 0:02:06 lr: 0.000019 grad: 0.1571 (0.1613) loss: 0.8008 (0.7917) time: 0.1938 data: 0.1065 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:49 lr: 0.000019 grad: 0.1558 (0.1613) loss: 0.7884 (0.7917) time: 0.1323 data: 0.0324 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:32 lr: 0.000019 grad: 0.1589 (0.1613) loss: 0.7892 (0.7917) time: 0.1684 data: 0.0718 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:15 lr: 0.000019 grad: 0.1573 (0.1613) loss: 0.7918 (0.7916) time: 0.1610 data: 0.0742 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:58 lr: 0.000019 grad: 0.1467 (0.1612) loss: 0.7957 (0.7916) time: 0.1689 data: 0.0828 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:42 lr: 0.000019 grad: 0.1511 (0.1611) loss: 0.7970 (0.7917) time: 0.1640 data: 0.0694 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:25 lr: 0.000019 grad: 0.1526 (0.1610) loss: 0.7926 (0.7917) time: 0.1750 data: 0.0956 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:08 lr: 0.000019 grad: 0.1586 (0.1610) loss: 0.7890 (0.7917) time: 0.1789 data: 0.0919 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1553 (0.1609) loss: 0.7929 (0.7917) time: 0.2347 data: 0.1518 max mem: 9377 +Train: [75] Total time: 0:17:35 (0.1689 s / it) +Averaged stats: lr: 0.000019 grad: 0.1553 (0.1609) loss: 0.7929 (0.7917) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:06:32 loss: 0.8143 (0.8143) time: 6.3316 data: 6.2985 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.7983 (0.7982) time: 0.1102 data: 0.0848 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (hcp-train-subset): loss: 0.7983 (0.7982) +Eval (hcp-val): [75] [ 0/62] eta: 0:05:43 loss: 0.8382 (0.8382) time: 5.5470 data: 5.5172 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8385 (0.8393) time: 0.1397 data: 0.1142 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-val): loss: 0.8385 (0.8393) +Eval (nsd-val): [75] [ 0/62] eta: 0:06:21 loss: 0.8101 (0.8101) time: 6.1511 data: 6.1198 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8199 (0.8211) time: 0.1273 data: 0.1023 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 12:00:24 lr: 0.000019 grad: 0.2868 (0.2868) loss: 0.7965 (0.7965) time: 6.9160 data: 6.8246 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:22:14 lr: 0.000019 grad: 0.1789 (0.2093) loss: 0.7992 (0.8050) time: 0.1525 data: 0.0410 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:19:06 lr: 0.000019 grad: 0.1786 (0.2012) loss: 0.7922 (0.7973) time: 0.1562 data: 0.0537 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:17:32 lr: 0.000019 grad: 0.1678 (0.1903) loss: 0.7963 (0.7959) time: 0.1206 data: 0.0148 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:16:36 lr: 0.000019 grad: 0.1458 (0.1819) loss: 0.8130 (0.7975) time: 0.1224 data: 0.0090 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:16:01 lr: 0.000019 grad: 0.1579 (0.1765) loss: 0.8033 (0.7985) time: 0.1721 data: 0.0795 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:15:39 lr: 0.000019 grad: 0.1677 (0.1740) loss: 0.7919 (0.7987) time: 0.1632 data: 0.0731 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:15:21 lr: 0.000019 grad: 0.1466 (0.1720) loss: 0.8028 (0.7983) time: 0.1615 data: 0.0736 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:14:58 lr: 0.000018 grad: 0.1564 (0.1700) loss: 0.8012 (0.7984) time: 0.1568 data: 0.0615 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:14:43 lr: 0.000018 grad: 0.1510 (0.1685) loss: 0.8022 (0.7983) time: 0.1802 data: 0.0930 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:21 lr: 0.000018 grad: 0.1567 (0.1675) loss: 0.8029 (0.7982) time: 0.1758 data: 0.0826 max mem: 9377 +Train: [76] [1100/6250] eta: 0:13:56 lr: 0.000018 grad: 0.1517 (0.1662) loss: 0.7965 (0.7979) time: 0.1437 data: 0.0525 max mem: 9377 +Train: [76] [1200/6250] eta: 0:13:36 lr: 0.000018 grad: 0.1577 (0.1654) loss: 0.7905 (0.7978) time: 0.1636 data: 0.0695 max mem: 9377 +Train: [76] [1300/6250] eta: 0:13:25 lr: 0.000018 grad: 0.1537 (0.1645) loss: 0.7925 (0.7976) time: 0.1553 data: 0.0610 max mem: 9377 +Train: [76] [1400/6250] eta: 0:13:11 lr: 0.000018 grad: 0.1579 (0.1637) loss: 0.7989 (0.7974) time: 0.1777 data: 0.0898 max mem: 9377 +Train: [76] [1500/6250] eta: 0:12:54 lr: 0.000018 grad: 0.1599 (0.1633) loss: 0.7872 (0.7970) time: 0.1609 data: 0.0779 max mem: 9377 +Train: [76] [1600/6250] eta: 0:12:37 lr: 0.000018 grad: 0.1490 (0.1627) loss: 0.7922 (0.7969) time: 0.1591 data: 0.0632 max mem: 9377 +Train: [76] [1700/6250] eta: 0:12:21 lr: 0.000018 grad: 0.1549 (0.1621) loss: 0.7928 (0.7966) time: 0.1544 data: 0.0667 max mem: 9377 +Train: [76] [1800/6250] eta: 0:12:07 lr: 0.000018 grad: 0.1540 (0.1617) loss: 0.7925 (0.7963) time: 0.1320 data: 0.0524 max mem: 9377 +Train: [76] [1900/6250] eta: 0:11:49 lr: 0.000018 grad: 0.1587 (0.1614) loss: 0.7935 (0.7961) time: 0.1369 data: 0.0398 max mem: 9377 +Train: [76] [2000/6250] eta: 0:11:31 lr: 0.000018 grad: 0.1637 (0.1612) loss: 0.7952 (0.7958) time: 0.1340 data: 0.0497 max mem: 9377 +Train: [76] [2100/6250] eta: 0:11:16 lr: 0.000018 grad: 0.1542 (0.1612) loss: 0.7929 (0.7955) time: 0.1880 data: 0.0965 max mem: 9377 +Train: [76] [2200/6250] eta: 0:10:56 lr: 0.000018 grad: 0.1567 (0.1614) loss: 0.7829 (0.7951) time: 0.1558 data: 0.0698 max mem: 9377 +Train: [76] [2300/6250] eta: 0:10:39 lr: 0.000018 grad: 0.1605 (0.1615) loss: 0.7973 (0.7949) time: 0.1795 data: 0.0920 max mem: 9377 +Train: [76] [2400/6250] eta: 0:10:21 lr: 0.000018 grad: 0.1552 (0.1613) loss: 0.7894 (0.7947) time: 0.1423 data: 0.0553 max mem: 9377 +Train: [76] [2500/6250] eta: 0:10:03 lr: 0.000018 grad: 0.1543 (0.1615) loss: 0.7897 (0.7943) time: 0.1416 data: 0.0571 max mem: 9377 +Train: [76] [2600/6250] eta: 0:09:47 lr: 0.000018 grad: 0.1525 (0.1615) loss: 0.7858 (0.7941) time: 0.1831 data: 0.0974 max mem: 9377 +Train: [76] [2700/6250] eta: 0:09:30 lr: 0.000018 grad: 0.1574 (0.1614) loss: 0.7857 (0.7939) time: 0.1475 data: 0.0601 max mem: 9377 +Train: [76] [2800/6250] eta: 0:09:13 lr: 0.000018 grad: 0.1560 (0.1612) loss: 0.7878 (0.7938) time: 0.1553 data: 0.0703 max mem: 9377 +Train: [76] [2900/6250] eta: 0:08:56 lr: 0.000018 grad: 0.1588 (0.1611) loss: 0.7798 (0.7936) time: 0.1462 data: 0.0519 max mem: 9377 +Train: [76] [3000/6250] eta: 0:08:40 lr: 0.000018 grad: 0.1595 (0.1611) loss: 0.7838 (0.7934) time: 0.1618 data: 0.0761 max mem: 9377 +Train: [76] [3100/6250] eta: 0:08:26 lr: 0.000018 grad: 0.1508 (0.1610) loss: 0.7929 (0.7933) time: 0.1825 data: 0.1022 max mem: 9377 +Train: [76] [3200/6250] eta: 0:08:09 lr: 0.000018 grad: 0.1582 (0.1609) loss: 0.7866 (0.7932) time: 0.1486 data: 0.0622 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:53 lr: 0.000018 grad: 0.1595 (0.1608) loss: 0.7978 (0.7932) time: 0.1709 data: 0.0771 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:37 lr: 0.000018 grad: 0.1530 (0.1607) loss: 0.7932 (0.7932) time: 0.1247 data: 0.0409 max mem: 9377 +Train: [76] [3500/6250] eta: 0:07:21 lr: 0.000018 grad: 0.1558 (0.1609) loss: 0.7871 (0.7932) time: 0.1560 data: 0.0582 max mem: 9377 +Train: [76] [3600/6250] eta: 0:07:05 lr: 0.000018 grad: 0.1660 (0.1608) loss: 0.7966 (0.7932) time: 0.1613 data: 0.0676 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:49 lr: 0.000018 grad: 0.1514 (0.1609) loss: 0.7961 (0.7932) time: 0.1491 data: 0.0643 max mem: 9377 +Train: [76] [3800/6250] eta: 0:06:32 lr: 0.000018 grad: 0.1620 (0.1609) loss: 0.7935 (0.7932) time: 0.1464 data: 0.0531 max mem: 9377 +Train: [76] [3900/6250] eta: 0:06:16 lr: 0.000018 grad: 0.1568 (0.1610) loss: 0.8029 (0.7931) time: 0.1285 data: 0.0372 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:59 lr: 0.000018 grad: 0.1572 (0.1609) loss: 0.7933 (0.7932) time: 0.1619 data: 0.0638 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:43 lr: 0.000018 grad: 0.1466 (0.1609) loss: 0.8009 (0.7933) time: 0.1500 data: 0.0586 max mem: 9377 +Train: [76] [4200/6250] eta: 0:05:26 lr: 0.000018 grad: 0.1581 (0.1608) loss: 0.7978 (0.7933) time: 0.1505 data: 0.0546 max mem: 9377 +Train: [76] [4300/6250] eta: 0:05:10 lr: 0.000018 grad: 0.1650 (0.1608) loss: 0.7876 (0.7932) time: 0.1757 data: 0.0839 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:54 lr: 0.000018 grad: 0.1467 (0.1608) loss: 0.7958 (0.7933) time: 0.1637 data: 0.0769 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:38 lr: 0.000018 grad: 0.1560 (0.1607) loss: 0.7865 (0.7933) time: 0.1621 data: 0.0781 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:22 lr: 0.000018 grad: 0.1473 (0.1606) loss: 0.7981 (0.7933) time: 0.1690 data: 0.0864 max mem: 9377 +Train: [76] [4700/6250] eta: 0:04:07 lr: 0.000018 grad: 0.1614 (0.1607) loss: 0.7871 (0.7932) time: 0.1361 data: 0.0514 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:51 lr: 0.000018 grad: 0.1502 (0.1606) loss: 0.7915 (0.7932) time: 0.1499 data: 0.0622 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:35 lr: 0.000018 grad: 0.1667 (0.1606) loss: 0.7869 (0.7932) time: 0.1374 data: 0.0633 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:19 lr: 0.000018 grad: 0.1540 (0.1606) loss: 0.7972 (0.7932) time: 0.1747 data: 0.0861 max mem: 9377 +Train: [76] [5100/6250] eta: 0:03:03 lr: 0.000017 grad: 0.1567 (0.1607) loss: 0.7876 (0.7932) time: 0.1434 data: 0.0506 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:48 lr: 0.000017 grad: 0.1565 (0.1606) loss: 0.7936 (0.7932) time: 0.2144 data: 0.1157 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:32 lr: 0.000017 grad: 0.1578 (0.1606) loss: 0.7894 (0.7932) time: 0.2160 data: 0.1326 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:16 lr: 0.000017 grad: 0.1605 (0.1605) loss: 0.7908 (0.7932) time: 0.1806 data: 0.0912 max mem: 9377 +Train: [76] [5500/6250] eta: 0:02:00 lr: 0.000017 grad: 0.1543 (0.1605) loss: 0.7901 (0.7933) time: 0.1658 data: 0.0760 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:44 lr: 0.000017 grad: 0.1532 (0.1604) loss: 0.7900 (0.7934) time: 0.1480 data: 0.0621 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:28 lr: 0.000017 grad: 0.1556 (0.1604) loss: 0.7969 (0.7934) time: 0.1529 data: 0.0601 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:12 lr: 0.000017 grad: 0.1578 (0.1604) loss: 0.7874 (0.7934) time: 0.1470 data: 0.0544 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:56 lr: 0.000017 grad: 0.1596 (0.1604) loss: 0.7983 (0.7934) time: 0.1393 data: 0.0454 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:40 lr: 0.000017 grad: 0.1622 (0.1604) loss: 0.7874 (0.7934) time: 0.1482 data: 0.0530 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:24 lr: 0.000017 grad: 0.1686 (0.1604) loss: 0.7868 (0.7933) time: 0.1634 data: 0.0765 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:08 lr: 0.000017 grad: 0.1641 (0.1605) loss: 0.7918 (0.7932) time: 0.1389 data: 0.0441 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1571 (0.1605) loss: 0.7914 (0.7932) time: 0.1382 data: 0.0378 max mem: 9377 +Train: [76] Total time: 0:16:47 (0.1613 s / it) +Averaged stats: lr: 0.000017 grad: 0.1571 (0.1605) loss: 0.7914 (0.7932) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:26 loss: 0.8136 (0.8136) time: 6.2353 data: 6.2036 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.7966 (0.7959) time: 0.1357 data: 0.1095 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:16 (0.2634 s / it) +Averaged stats (hcp-train-subset): loss: 0.7966 (0.7959) +Eval (hcp-val): [76] [ 0/62] eta: 0:06:15 loss: 0.8357 (0.8357) time: 6.0559 data: 6.0183 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8373 (0.8392) time: 0.1454 data: 0.1200 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:16 (0.2686 s / it) +Averaged stats (hcp-val): loss: 0.8373 (0.8392) +Eval (nsd-val): [76] [ 0/62] eta: 0:05:07 loss: 0.8058 (0.8058) time: 4.9621 data: 4.8803 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8176 (0.8190) time: 0.1188 data: 0.0913 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (nsd-val): loss: 0.8176 (0.8190) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 10:34:22 lr: 0.000017 grad: 0.1004 (0.1004) loss: 0.8491 (0.8491) time: 6.0900 data: 5.8247 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:25:51 lr: 0.000017 grad: 0.2231 (0.2484) loss: 0.7800 (0.7921) time: 0.2158 data: 0.0949 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:21:37 lr: 0.000017 grad: 0.1628 (0.2200) loss: 0.7915 (0.7923) time: 0.1895 data: 0.0736 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:20:05 lr: 0.000017 grad: 0.1544 (0.2036) loss: 0.7979 (0.7930) time: 0.1812 data: 0.0802 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:19:00 lr: 0.000017 grad: 0.1615 (0.1951) loss: 0.7920 (0.7925) time: 0.1749 data: 0.0813 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:18:07 lr: 0.000017 grad: 0.1567 (0.1897) loss: 0.7963 (0.7917) time: 0.1550 data: 0.0532 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:17:29 lr: 0.000017 grad: 0.1592 (0.1857) loss: 0.7923 (0.7914) time: 0.1712 data: 0.0757 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:16:54 lr: 0.000017 grad: 0.1562 (0.1842) loss: 0.7892 (0.7915) time: 0.1740 data: 0.0859 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:16:28 lr: 0.000017 grad: 0.1598 (0.1815) loss: 0.7767 (0.7914) time: 0.1893 data: 0.0995 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:16:02 lr: 0.000017 grad: 0.1569 (0.1790) loss: 0.7899 (0.7915) time: 0.1801 data: 0.0921 max mem: 9377 +Train: [77] [1000/6250] eta: 0:15:37 lr: 0.000017 grad: 0.1509 (0.1774) loss: 0.7978 (0.7918) time: 0.1882 data: 0.0908 max mem: 9377 +Train: [77] [1100/6250] eta: 0:15:05 lr: 0.000017 grad: 0.1585 (0.1760) loss: 0.7891 (0.7917) time: 0.1478 data: 0.0567 max mem: 9377 +Train: [77] [1200/6250] eta: 0:14:41 lr: 0.000017 grad: 0.1624 (0.1748) loss: 0.7903 (0.7915) time: 0.1531 data: 0.0665 max mem: 9377 +Train: [77] [1300/6250] eta: 0:14:20 lr: 0.000017 grad: 0.1562 (0.1734) loss: 0.7811 (0.7914) time: 0.2043 data: 0.1196 max mem: 9377 +Train: [77] [1400/6250] eta: 0:14:05 lr: 0.000017 grad: 0.1537 (0.1724) loss: 0.7931 (0.7912) time: 0.1577 data: 0.0709 max mem: 9377 +Train: [77] [1500/6250] eta: 0:13:44 lr: 0.000017 grad: 0.1575 (0.1713) loss: 0.7840 (0.7910) time: 0.1534 data: 0.0780 max mem: 9377 +Train: [77] [1600/6250] eta: 0:13:23 lr: 0.000017 grad: 0.1527 (0.1705) loss: 0.7949 (0.7909) time: 0.1562 data: 0.0744 max mem: 9377 +Train: [77] [1700/6250] eta: 0:13:06 lr: 0.000017 grad: 0.1579 (0.1698) loss: 0.7753 (0.7907) time: 0.1926 data: 0.1010 max mem: 9377 +Train: [77] [1800/6250] eta: 0:12:49 lr: 0.000017 grad: 0.1516 (0.1692) loss: 0.7929 (0.7907) time: 0.1617 data: 0.0703 max mem: 9377 +Train: [77] [1900/6250] eta: 0:12:31 lr: 0.000017 grad: 0.1551 (0.1686) loss: 0.7859 (0.7907) time: 0.1461 data: 0.0483 max mem: 9377 +Train: [77] [2000/6250] eta: 0:12:12 lr: 0.000017 grad: 0.1586 (0.1681) loss: 0.7908 (0.7907) time: 0.1662 data: 0.0547 max mem: 9377 +Train: [77] [2100/6250] eta: 0:11:54 lr: 0.000017 grad: 0.1574 (0.1676) loss: 0.7921 (0.7909) time: 0.1720 data: 0.0749 max mem: 9377 +Train: [77] [2200/6250] eta: 0:11:35 lr: 0.000017 grad: 0.1631 (0.1671) loss: 0.8032 (0.7911) time: 0.1913 data: 0.1046 max mem: 9377 +Train: [77] [2300/6250] eta: 0:11:15 lr: 0.000017 grad: 0.1516 (0.1668) loss: 0.7960 (0.7911) time: 0.1542 data: 0.0580 max mem: 9377 +Train: [77] [2400/6250] eta: 0:10:55 lr: 0.000017 grad: 0.1680 (0.1668) loss: 0.7922 (0.7911) time: 0.1620 data: 0.0730 max mem: 9377 +Train: [77] [2500/6250] eta: 0:10:37 lr: 0.000017 grad: 0.1647 (0.1666) loss: 0.7922 (0.7911) time: 0.1648 data: 0.0705 max mem: 9377 +Train: [77] [2600/6250] eta: 0:10:18 lr: 0.000017 grad: 0.1530 (0.1664) loss: 0.7920 (0.7910) time: 0.1431 data: 0.0522 max mem: 9377 +Train: [77] [2700/6250] eta: 0:09:59 lr: 0.000017 grad: 0.1608 (0.1664) loss: 0.7900 (0.7909) time: 0.1548 data: 0.0680 max mem: 9377 +Train: [77] [2800/6250] eta: 0:09:41 lr: 0.000017 grad: 0.1623 (0.1663) loss: 0.7959 (0.7907) time: 0.1658 data: 0.0761 max mem: 9377 +Train: [77] [2900/6250] eta: 0:09:22 lr: 0.000017 grad: 0.1574 (0.1661) loss: 0.7881 (0.7907) time: 0.1572 data: 0.0745 max mem: 9377 +Train: [77] [3000/6250] eta: 0:09:03 lr: 0.000017 grad: 0.1622 (0.1661) loss: 0.7872 (0.7905) time: 0.1409 data: 0.0532 max mem: 9377 +Train: [77] [3100/6250] eta: 0:08:45 lr: 0.000017 grad: 0.1653 (0.1659) loss: 0.7871 (0.7904) time: 0.1505 data: 0.0680 max mem: 9377 +Train: [77] [3200/6250] eta: 0:08:29 lr: 0.000017 grad: 0.1569 (0.1658) loss: 0.7915 (0.7904) time: 0.1964 data: 0.1094 max mem: 9377 +Train: [77] [3300/6250] eta: 0:08:12 lr: 0.000016 grad: 0.1568 (0.1657) loss: 0.7920 (0.7904) time: 0.1579 data: 0.0716 max mem: 9377 +Train: [77] [3400/6250] eta: 0:07:54 lr: 0.000016 grad: 0.1582 (0.1655) loss: 0.7912 (0.7904) time: 0.1427 data: 0.0554 max mem: 9377 +Train: [77] [3500/6250] eta: 0:07:36 lr: 0.000016 grad: 0.1584 (0.1653) loss: 0.7890 (0.7904) time: 0.1346 data: 0.0507 max mem: 9377 +Train: [77] [3600/6250] eta: 0:07:19 lr: 0.000016 grad: 0.1562 (0.1651) loss: 0.7953 (0.7905) time: 0.1677 data: 0.0756 max mem: 9377 +Train: [77] [3700/6250] eta: 0:07:03 lr: 0.000016 grad: 0.1560 (0.1650) loss: 0.7974 (0.7904) time: 0.1495 data: 0.0625 max mem: 9377 +Train: [77] [3800/6250] eta: 0:06:46 lr: 0.000016 grad: 0.1545 (0.1649) loss: 0.7993 (0.7905) time: 0.1541 data: 0.0664 max mem: 9377 +Train: [77] [3900/6250] eta: 0:06:29 lr: 0.000016 grad: 0.1542 (0.1648) loss: 0.7919 (0.7905) time: 0.1476 data: 0.0493 max mem: 9377 +Train: [77] [4000/6250] eta: 0:06:12 lr: 0.000016 grad: 0.1678 (0.1647) loss: 0.7984 (0.7905) time: 0.1408 data: 0.0384 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:54 lr: 0.000016 grad: 0.1607 (0.1648) loss: 0.7918 (0.7906) time: 0.1442 data: 0.0535 max mem: 9377 +Train: [77] [4200/6250] eta: 0:05:37 lr: 0.000016 grad: 0.1603 (0.1647) loss: 0.7908 (0.7906) time: 0.1650 data: 0.0703 max mem: 9377 +Train: [77] [4300/6250] eta: 0:05:20 lr: 0.000016 grad: 0.1577 (0.1646) loss: 0.7938 (0.7906) time: 0.1540 data: 0.0565 max mem: 9377 +Train: [77] [4400/6250] eta: 0:05:03 lr: 0.000016 grad: 0.1594 (0.1645) loss: 0.7899 (0.7907) time: 0.1602 data: 0.0709 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:46 lr: 0.000016 grad: 0.1588 (0.1643) loss: 0.7945 (0.7908) time: 0.1380 data: 0.0522 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:29 lr: 0.000016 grad: 0.1536 (0.1642) loss: 0.7944 (0.7909) time: 0.1596 data: 0.0626 max mem: 9377 +Train: [77] [4700/6250] eta: 0:04:13 lr: 0.000016 grad: 0.1592 (0.1640) loss: 0.7956 (0.7910) time: 0.1564 data: 0.0667 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:56 lr: 0.000016 grad: 0.1480 (0.1639) loss: 0.7868 (0.7911) time: 0.1196 data: 0.0205 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:40 lr: 0.000016 grad: 0.1522 (0.1637) loss: 0.7958 (0.7912) time: 0.1331 data: 0.0283 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:23 lr: 0.000016 grad: 0.1552 (0.1636) loss: 0.7935 (0.7913) time: 0.1346 data: 0.0458 max mem: 9377 +Train: [77] [5100/6250] eta: 0:03:08 lr: 0.000016 grad: 0.1575 (0.1635) loss: 0.7895 (0.7913) time: 0.3623 data: 0.2769 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:51 lr: 0.000016 grad: 0.1512 (0.1634) loss: 0.7959 (0.7913) time: 0.1855 data: 0.1027 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:35 lr: 0.000016 grad: 0.1607 (0.1635) loss: 0.7957 (0.7913) time: 0.1593 data: 0.0700 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:18 lr: 0.000016 grad: 0.1601 (0.1634) loss: 0.7951 (0.7914) time: 0.1424 data: 0.0571 max mem: 9377 +Train: [77] [5500/6250] eta: 0:02:02 lr: 0.000016 grad: 0.1605 (0.1633) loss: 0.7868 (0.7914) time: 0.1625 data: 0.0710 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:46 lr: 0.000016 grad: 0.1583 (0.1633) loss: 0.7993 (0.7913) time: 0.1783 data: 0.0864 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:30 lr: 0.000016 grad: 0.1547 (0.1633) loss: 0.7875 (0.7913) time: 0.1749 data: 0.0631 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:13 lr: 0.000016 grad: 0.1559 (0.1632) loss: 0.7952 (0.7913) time: 0.1681 data: 0.0729 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:57 lr: 0.000016 grad: 0.1624 (0.1631) loss: 0.7874 (0.7913) time: 0.1400 data: 0.0372 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:41 lr: 0.000016 grad: 0.1647 (0.1631) loss: 0.7867 (0.7913) time: 0.1580 data: 0.0682 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:24 lr: 0.000016 grad: 0.1653 (0.1631) loss: 0.7910 (0.7913) time: 0.1579 data: 0.0667 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:08 lr: 0.000016 grad: 0.1497 (0.1630) loss: 0.7873 (0.7912) time: 0.1571 data: 0.0678 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1555 (0.1630) loss: 0.7881 (0.7913) time: 0.1755 data: 0.0960 max mem: 9377 +Train: [77] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000016 grad: 0.1555 (0.1630) loss: 0.7881 (0.7913) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:59 loss: 0.8141 (0.8141) time: 5.7934 data: 5.7630 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.7941 (0.7953) time: 0.1456 data: 0.1203 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (hcp-train-subset): loss: 0.7941 (0.7953) +Eval (hcp-val): [77] [ 0/62] eta: 0:06:05 loss: 0.8380 (0.8380) time: 5.8887 data: 5.8586 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8380 (0.8392) time: 0.1281 data: 0.1027 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-val): loss: 0.8380 (0.8392) +Eval (nsd-val): [77] [ 0/62] eta: 0:06:01 loss: 0.8092 (0.8092) time: 5.8242 data: 5.7917 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8202 (0.8211) time: 0.1214 data: 0.0939 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (nsd-val): loss: 0.8202 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 12:10:40 lr: 0.000016 grad: 0.1295 (0.1295) loss: 0.8400 (0.8400) time: 7.0145 data: 6.9178 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:23:07 lr: 0.000016 grad: 0.1494 (0.2076) loss: 0.8072 (0.8087) time: 0.1764 data: 0.0664 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:20:03 lr: 0.000016 grad: 0.1422 (0.1899) loss: 0.8177 (0.8085) time: 0.1634 data: 0.0498 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:18:48 lr: 0.000016 grad: 0.1790 (0.1835) loss: 0.7904 (0.8066) time: 0.1774 data: 0.0781 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:17:57 lr: 0.000016 grad: 0.1671 (0.1820) loss: 0.8023 (0.8045) time: 0.1631 data: 0.0602 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:17:13 lr: 0.000016 grad: 0.1688 (0.1805) loss: 0.7938 (0.8024) time: 0.1768 data: 0.0710 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:16:38 lr: 0.000016 grad: 0.1560 (0.1782) loss: 0.7863 (0.8007) time: 0.1601 data: 0.0568 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:16:08 lr: 0.000016 grad: 0.1653 (0.1766) loss: 0.7894 (0.7990) time: 0.1532 data: 0.0549 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:15:37 lr: 0.000016 grad: 0.1608 (0.1756) loss: 0.7874 (0.7977) time: 0.1283 data: 0.0390 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:15:13 lr: 0.000016 grad: 0.1610 (0.1742) loss: 0.7936 (0.7973) time: 0.1776 data: 0.0859 max mem: 9377 +Train: [78] [1000/6250] eta: 0:14:52 lr: 0.000016 grad: 0.1566 (0.1734) loss: 0.7927 (0.7966) time: 0.1678 data: 0.0809 max mem: 9377 +Train: [78] [1100/6250] eta: 0:14:30 lr: 0.000016 grad: 0.1605 (0.1728) loss: 0.7886 (0.7959) time: 0.1549 data: 0.0592 max mem: 9377 +Train: [78] [1200/6250] eta: 0:14:05 lr: 0.000016 grad: 0.1640 (0.1720) loss: 0.7933 (0.7955) time: 0.1532 data: 0.0666 max mem: 9377 +Train: [78] [1300/6250] eta: 0:13:47 lr: 0.000016 grad: 0.1600 (0.1716) loss: 0.7859 (0.7950) time: 0.1958 data: 0.1088 max mem: 9377 +Train: [78] [1400/6250] eta: 0:13:33 lr: 0.000016 grad: 0.1583 (0.1711) loss: 0.7821 (0.7947) time: 0.1472 data: 0.0497 max mem: 9377 +Train: [78] [1500/6250] eta: 0:13:17 lr: 0.000015 grad: 0.1540 (0.1707) loss: 0.7946 (0.7944) time: 0.1536 data: 0.0723 max mem: 9377 +Train: [78] [1600/6250] eta: 0:12:59 lr: 0.000015 grad: 0.1619 (0.1706) loss: 0.7883 (0.7941) time: 0.1461 data: 0.0607 max mem: 9377 +Train: [78] [1700/6250] eta: 0:12:42 lr: 0.000015 grad: 0.1673 (0.1703) loss: 0.7890 (0.7939) time: 0.1599 data: 0.0725 max mem: 9377 +Train: [78] [1800/6250] eta: 0:12:26 lr: 0.000015 grad: 0.1573 (0.1698) loss: 0.7956 (0.7937) time: 0.1794 data: 0.0910 max mem: 9377 +Train: [78] [1900/6250] eta: 0:12:10 lr: 0.000015 grad: 0.1629 (0.1696) loss: 0.7887 (0.7936) time: 0.1783 data: 0.0809 max mem: 9377 +Train: [78] [2000/6250] eta: 0:11:50 lr: 0.000015 grad: 0.1738 (0.1696) loss: 0.7860 (0.7934) time: 0.1619 data: 0.0583 max mem: 9377 +Train: [78] [2100/6250] eta: 0:11:32 lr: 0.000015 grad: 0.1606 (0.1693) loss: 0.7822 (0.7931) time: 0.1579 data: 0.0656 max mem: 9377 +Train: [78] [2200/6250] eta: 0:11:12 lr: 0.000015 grad: 0.1569 (0.1691) loss: 0.7879 (0.7930) time: 0.1408 data: 0.0508 max mem: 9377 +Train: [78] [2300/6250] eta: 0:10:52 lr: 0.000015 grad: 0.1495 (0.1688) loss: 0.7932 (0.7929) time: 0.1425 data: 0.0579 max mem: 9377 +Train: [78] [2400/6250] eta: 0:10:34 lr: 0.000015 grad: 0.1714 (0.1686) loss: 0.7882 (0.7928) time: 0.1425 data: 0.0548 max mem: 9377 +Train: [78] [2500/6250] eta: 0:10:15 lr: 0.000015 grad: 0.1544 (0.1684) loss: 0.7948 (0.7928) time: 0.1335 data: 0.0386 max mem: 9377 +Train: [78] [2600/6250] eta: 0:09:58 lr: 0.000015 grad: 0.1622 (0.1681) loss: 0.7955 (0.7927) time: 0.2116 data: 0.1243 max mem: 9377 +Train: [78] [2700/6250] eta: 0:09:39 lr: 0.000015 grad: 0.1495 (0.1678) loss: 0.7966 (0.7927) time: 0.1525 data: 0.0626 max mem: 9377 +Train: [78] [2800/6250] eta: 0:09:22 lr: 0.000015 grad: 0.1609 (0.1676) loss: 0.7951 (0.7927) time: 0.1622 data: 0.0749 max mem: 9377 +Train: [78] [2900/6250] eta: 0:09:05 lr: 0.000015 grad: 0.1551 (0.1674) loss: 0.7931 (0.7926) time: 0.1531 data: 0.0664 max mem: 9377 +Train: [78] [3000/6250] eta: 0:08:49 lr: 0.000015 grad: 0.1512 (0.1671) loss: 0.8045 (0.7927) time: 0.1738 data: 0.0925 max mem: 9377 +Train: [78] [3100/6250] eta: 0:08:33 lr: 0.000015 grad: 0.1507 (0.1669) loss: 0.7943 (0.7927) time: 0.1610 data: 0.0742 max mem: 9377 +Train: [78] [3200/6250] eta: 0:08:18 lr: 0.000015 grad: 0.1617 (0.1670) loss: 0.7820 (0.7927) time: 0.1222 data: 0.0474 max mem: 9377 +Train: [78] [3300/6250] eta: 0:08:01 lr: 0.000015 grad: 0.1585 (0.1669) loss: 0.7842 (0.7926) time: 0.1519 data: 0.0622 max mem: 9377 +Train: [78] [3400/6250] eta: 0:07:45 lr: 0.000015 grad: 0.1648 (0.1667) loss: 0.7868 (0.7926) time: 0.1545 data: 0.0804 max mem: 9377 +Train: [78] [3500/6250] eta: 0:07:28 lr: 0.000015 grad: 0.1637 (0.1666) loss: 0.7895 (0.7926) time: 0.1496 data: 0.0725 max mem: 9377 +Train: [78] [3600/6250] eta: 0:07:12 lr: 0.000015 grad: 0.1515 (0.1664) loss: 0.7954 (0.7926) time: 0.1610 data: 0.0666 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:57 lr: 0.000015 grad: 0.1535 (0.1662) loss: 0.7878 (0.7925) time: 0.1899 data: 0.1067 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:40 lr: 0.000015 grad: 0.1670 (0.1661) loss: 0.7872 (0.7925) time: 0.1645 data: 0.0703 max mem: 9377 +Train: [78] [3900/6250] eta: 0:06:23 lr: 0.000015 grad: 0.1614 (0.1660) loss: 0.7950 (0.7924) time: 0.1611 data: 0.0733 max mem: 9377 +Train: [78] [4000/6250] eta: 0:06:06 lr: 0.000015 grad: 0.1604 (0.1659) loss: 0.7956 (0.7924) time: 0.1501 data: 0.0505 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:49 lr: 0.000015 grad: 0.1572 (0.1659) loss: 0.7938 (0.7923) time: 0.1342 data: 0.0411 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:32 lr: 0.000015 grad: 0.1560 (0.1658) loss: 0.7944 (0.7922) time: 0.1371 data: 0.0486 max mem: 9377 +Train: [78] [4300/6250] eta: 0:05:15 lr: 0.000015 grad: 0.1569 (0.1657) loss: 0.7927 (0.7922) time: 0.1414 data: 0.0478 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:58 lr: 0.000015 grad: 0.1646 (0.1657) loss: 0.7840 (0.7920) time: 0.1465 data: 0.0565 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:42 lr: 0.000015 grad: 0.1571 (0.1656) loss: 0.7950 (0.7920) time: 0.1363 data: 0.0435 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:25 lr: 0.000015 grad: 0.1660 (0.1657) loss: 0.7825 (0.7918) time: 0.1481 data: 0.0634 max mem: 9377 +Train: [78] [4700/6250] eta: 0:04:09 lr: 0.000015 grad: 0.1620 (0.1656) loss: 0.7869 (0.7918) time: 0.1869 data: 0.1017 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:53 lr: 0.000015 grad: 0.1570 (0.1656) loss: 0.7861 (0.7917) time: 0.1599 data: 0.0743 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:37 lr: 0.000015 grad: 0.1599 (0.1656) loss: 0.7946 (0.7917) time: 0.1546 data: 0.0572 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:21 lr: 0.000015 grad: 0.1602 (0.1656) loss: 0.7908 (0.7917) time: 0.1682 data: 0.0809 max mem: 9377 +Train: [78] [5100/6250] eta: 0:03:05 lr: 0.000015 grad: 0.1648 (0.1656) loss: 0.7879 (0.7916) time: 0.1689 data: 0.0859 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:49 lr: 0.000015 grad: 0.1705 (0.1655) loss: 0.7972 (0.7917) time: 0.1801 data: 0.0912 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:34 lr: 0.000015 grad: 0.1629 (0.1655) loss: 0.7930 (0.7917) time: 0.1903 data: 0.1015 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:17 lr: 0.000015 grad: 0.1692 (0.1655) loss: 0.7892 (0.7917) time: 0.1685 data: 0.0809 max mem: 9377 +Train: [78] [5500/6250] eta: 0:02:01 lr: 0.000015 grad: 0.1623 (0.1655) loss: 0.7892 (0.7917) time: 0.1697 data: 0.0845 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:45 lr: 0.000015 grad: 0.1595 (0.1656) loss: 0.7941 (0.7917) time: 0.1791 data: 0.0907 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:29 lr: 0.000015 grad: 0.1591 (0.1656) loss: 0.7891 (0.7917) time: 0.1639 data: 0.0759 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:13 lr: 0.000015 grad: 0.1611 (0.1656) loss: 0.7933 (0.7916) time: 0.1666 data: 0.0724 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:57 lr: 0.000015 grad: 0.1633 (0.1657) loss: 0.7807 (0.7916) time: 0.1923 data: 0.1008 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:40 lr: 0.000015 grad: 0.1682 (0.1657) loss: 0.7868 (0.7915) time: 0.1607 data: 0.0671 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:24 lr: 0.000015 grad: 0.1616 (0.1657) loss: 0.7921 (0.7915) time: 0.1347 data: 0.0427 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:08 lr: 0.000014 grad: 0.1619 (0.1657) loss: 0.7932 (0.7914) time: 0.1238 data: 0.0279 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1761 (0.1657) loss: 0.7836 (0.7914) time: 0.1636 data: 0.0748 max mem: 9377 +Train: [78] Total time: 0:17:06 (0.1642 s / it) +Averaged stats: lr: 0.000014 grad: 0.1761 (0.1657) loss: 0.7836 (0.7914) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:06:43 loss: 0.8133 (0.8133) time: 6.5156 data: 6.4844 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.7922 (0.7943) time: 0.1150 data: 0.0898 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:14 (0.2399 s / it) +Averaged stats (hcp-train-subset): loss: 0.7922 (0.7943) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:00 loss: 0.8399 (0.8399) time: 4.8432 data: 4.7607 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8363 (0.8385) time: 0.1364 data: 0.1109 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2412 s / it) +Averaged stats (hcp-val): loss: 0.8363 (0.8385) +Eval (nsd-val): [78] [ 0/62] eta: 0:05:06 loss: 0.8105 (0.8105) time: 4.9427 data: 4.8860 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8215 (0.8220) time: 0.1215 data: 0.0959 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.8215 (0.8220) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [79] [ 0/6250] eta: 11:57:58 lr: 0.000014 grad: 0.0924 (0.0924) loss: 0.8592 (0.8592) time: 6.8925 data: 6.7080 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:23:58 lr: 0.000014 grad: 0.1803 (0.2358) loss: 0.8000 (0.7949) time: 0.1834 data: 0.0756 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:20:48 lr: 0.000014 grad: 0.1900 (0.2144) loss: 0.7997 (0.7946) time: 0.1956 data: 0.0901 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:18:49 lr: 0.000014 grad: 0.1729 (0.2036) loss: 0.7795 (0.7933) time: 0.1671 data: 0.0586 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:17:52 lr: 0.000014 grad: 0.1558 (0.1948) loss: 0.8009 (0.7943) time: 0.1481 data: 0.0453 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:17:14 lr: 0.000014 grad: 0.1507 (0.1872) loss: 0.7972 (0.7956) time: 0.1282 data: 0.0294 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:16:40 lr: 0.000014 grad: 0.1483 (0.1823) loss: 0.7959 (0.7963) time: 0.1683 data: 0.0712 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:16:14 lr: 0.000014 grad: 0.1584 (0.1796) loss: 0.7930 (0.7970) time: 0.1520 data: 0.0499 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:15:46 lr: 0.000014 grad: 0.1596 (0.1777) loss: 0.7971 (0.7972) time: 0.1761 data: 0.0848 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:15:25 lr: 0.000014 grad: 0.1596 (0.1766) loss: 0.7948 (0.7969) time: 0.1625 data: 0.0622 max mem: 9377 +Train: [79] [1000/6250] eta: 0:14:58 lr: 0.000014 grad: 0.1607 (0.1754) loss: 0.8082 (0.7968) time: 0.1394 data: 0.0486 max mem: 9377 +Train: [79] [1100/6250] eta: 0:14:35 lr: 0.000014 grad: 0.1641 (0.1747) loss: 0.7945 (0.7965) time: 0.1561 data: 0.0584 max mem: 9377 +Train: [79] [1200/6250] eta: 0:14:13 lr: 0.000014 grad: 0.1680 (0.1743) loss: 0.7851 (0.7960) time: 0.1495 data: 0.0568 max mem: 9377 +Train: [79] [1300/6250] eta: 0:13:58 lr: 0.000014 grad: 0.1720 (0.1736) loss: 0.7885 (0.7956) time: 0.2458 data: 0.1662 max mem: 9377 +Train: [79] [1400/6250] eta: 0:13:40 lr: 0.000014 grad: 0.1570 (0.1730) loss: 0.7891 (0.7954) time: 0.1672 data: 0.0817 max mem: 9377 +Train: [79] [1500/6250] eta: 0:13:21 lr: 0.000014 grad: 0.1599 (0.1726) loss: 0.8005 (0.7952) time: 0.1628 data: 0.0753 max mem: 9377 +Train: [79] [1600/6250] eta: 0:13:01 lr: 0.000014 grad: 0.1602 (0.1721) loss: 0.7870 (0.7950) time: 0.1363 data: 0.0438 max mem: 9377 +Train: [79] [1700/6250] eta: 0:12:44 lr: 0.000014 grad: 0.1681 (0.1717) loss: 0.7931 (0.7949) time: 0.1592 data: 0.0708 max mem: 9377 +Train: [79] [1800/6250] eta: 0:12:26 lr: 0.000014 grad: 0.1584 (0.1712) loss: 0.8003 (0.7948) time: 0.1707 data: 0.0772 max mem: 9377 +Train: [79] [1900/6250] eta: 0:12:06 lr: 0.000014 grad: 0.1560 (0.1707) loss: 0.7973 (0.7946) time: 0.1572 data: 0.0592 max mem: 9377 +Train: [79] [2000/6250] eta: 0:11:46 lr: 0.000014 grad: 0.1671 (0.1704) loss: 0.7909 (0.7945) time: 0.1591 data: 0.0620 max mem: 9377 +Train: [79] [2100/6250] eta: 0:11:27 lr: 0.000014 grad: 0.1534 (0.1700) loss: 0.7912 (0.7944) time: 0.1584 data: 0.0629 max mem: 9377 +Train: [79] [2200/6250] eta: 0:11:09 lr: 0.000014 grad: 0.1691 (0.1698) loss: 0.7921 (0.7943) time: 0.1569 data: 0.0612 max mem: 9377 +Train: [79] [2300/6250] eta: 0:10:53 lr: 0.000014 grad: 0.1559 (0.1694) loss: 0.7977 (0.7943) time: 0.1513 data: 0.0500 max mem: 9377 +Train: [79] [2400/6250] eta: 0:10:35 lr: 0.000014 grad: 0.1617 (0.1692) loss: 0.7873 (0.7942) time: 0.1567 data: 0.0615 max mem: 9377 +Train: [79] [2500/6250] eta: 0:10:17 lr: 0.000014 grad: 0.1694 (0.1690) loss: 0.7834 (0.7940) time: 0.1593 data: 0.0607 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:59 lr: 0.000014 grad: 0.1639 (0.1688) loss: 0.7858 (0.7939) time: 0.1627 data: 0.0663 max mem: 9377 +Train: [79] [2700/6250] eta: 0:09:41 lr: 0.000014 grad: 0.1674 (0.1687) loss: 0.7914 (0.7938) time: 0.1547 data: 0.0759 max mem: 9377 +Train: [79] [2800/6250] eta: 0:09:24 lr: 0.000014 grad: 0.1713 (0.1688) loss: 0.7876 (0.7936) time: 0.1465 data: 0.0517 max mem: 9377 +Train: [79] [2900/6250] eta: 0:09:08 lr: 0.000014 grad: 0.1599 (0.1687) loss: 0.7887 (0.7933) time: 0.1610 data: 0.0729 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:51 lr: 0.000014 grad: 0.1668 (0.1686) loss: 0.7873 (0.7932) time: 0.1567 data: 0.0665 max mem: 9377 +Train: [79] [3100/6250] eta: 0:08:35 lr: 0.000014 grad: 0.1628 (0.1687) loss: 0.7857 (0.7929) time: 0.1398 data: 0.0497 max mem: 9377 +Train: [79] [3200/6250] eta: 0:08:19 lr: 0.000014 grad: 0.1690 (0.1689) loss: 0.7815 (0.7926) time: 0.2015 data: 0.1195 max mem: 9377 +Train: [79] [3300/6250] eta: 0:08:03 lr: 0.000014 grad: 0.1560 (0.1688) loss: 0.7876 (0.7924) time: 0.1861 data: 0.0921 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:47 lr: 0.000014 grad: 0.1645 (0.1688) loss: 0.7920 (0.7923) time: 0.1372 data: 0.0512 max mem: 9377 +Train: [79] [3500/6250] eta: 0:07:29 lr: 0.000014 grad: 0.1614 (0.1686) loss: 0.7897 (0.7922) time: 0.1569 data: 0.0676 max mem: 9377 +Train: [79] [3600/6250] eta: 0:07:13 lr: 0.000014 grad: 0.1557 (0.1685) loss: 0.7909 (0.7921) time: 0.1659 data: 0.0755 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:58 lr: 0.000014 grad: 0.1586 (0.1684) loss: 0.7891 (0.7920) time: 0.1664 data: 0.0805 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:41 lr: 0.000014 grad: 0.1550 (0.1683) loss: 0.7892 (0.7918) time: 0.1539 data: 0.0666 max mem: 9377 +Train: [79] [3900/6250] eta: 0:06:24 lr: 0.000014 grad: 0.1630 (0.1682) loss: 0.7828 (0.7916) time: 0.1544 data: 0.0618 max mem: 9377 +Train: [79] [4000/6250] eta: 0:06:07 lr: 0.000014 grad: 0.1525 (0.1682) loss: 0.7958 (0.7916) time: 0.1374 data: 0.0372 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:49 lr: 0.000014 grad: 0.1657 (0.1681) loss: 0.7810 (0.7915) time: 0.1336 data: 0.0386 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:33 lr: 0.000014 grad: 0.1545 (0.1679) loss: 0.7987 (0.7915) time: 0.1491 data: 0.0615 max mem: 9377 +Train: [79] [4300/6250] eta: 0:05:16 lr: 0.000014 grad: 0.1680 (0.1680) loss: 0.7907 (0.7915) time: 0.1460 data: 0.0541 max mem: 9377 +Train: [79] [4400/6250] eta: 0:05:00 lr: 0.000014 grad: 0.1661 (0.1679) loss: 0.7799 (0.7914) time: 0.1917 data: 0.1049 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:43 lr: 0.000014 grad: 0.1638 (0.1679) loss: 0.7915 (0.7913) time: 0.1822 data: 0.0896 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:27 lr: 0.000014 grad: 0.1616 (0.1677) loss: 0.7943 (0.7912) time: 0.1488 data: 0.0576 max mem: 9377 +Train: [79] [4700/6250] eta: 0:04:10 lr: 0.000013 grad: 0.1709 (0.1676) loss: 0.7854 (0.7912) time: 0.1723 data: 0.0796 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:54 lr: 0.000013 grad: 0.1595 (0.1676) loss: 0.7958 (0.7912) time: 0.1461 data: 0.0528 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:38 lr: 0.000013 grad: 0.1612 (0.1675) loss: 0.7954 (0.7912) time: 0.1796 data: 0.0974 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:21 lr: 0.000013 grad: 0.1565 (0.1674) loss: 0.7931 (0.7912) time: 0.1397 data: 0.0477 max mem: 9377 +Train: [79] [5100/6250] eta: 0:03:05 lr: 0.000013 grad: 0.1576 (0.1674) loss: 0.7911 (0.7912) time: 0.1648 data: 0.0820 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:49 lr: 0.000013 grad: 0.1655 (0.1673) loss: 0.7966 (0.7913) time: 0.1804 data: 0.0905 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:33 lr: 0.000013 grad: 0.1659 (0.1672) loss: 0.7929 (0.7913) time: 0.1759 data: 0.0897 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:17 lr: 0.000013 grad: 0.1607 (0.1671) loss: 0.7865 (0.7914) time: 0.1652 data: 0.0835 max mem: 9377 +Train: [79] [5500/6250] eta: 0:02:01 lr: 0.000013 grad: 0.1643 (0.1671) loss: 0.7904 (0.7914) time: 0.1671 data: 0.0753 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:45 lr: 0.000013 grad: 0.1658 (0.1671) loss: 0.7951 (0.7913) time: 0.1557 data: 0.0650 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:29 lr: 0.000013 grad: 0.1611 (0.1671) loss: 0.7903 (0.7913) time: 0.1739 data: 0.0864 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:12 lr: 0.000013 grad: 0.1715 (0.1672) loss: 0.7836 (0.7912) time: 0.1819 data: 0.0967 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:56 lr: 0.000013 grad: 0.1679 (0.1672) loss: 0.7825 (0.7912) time: 0.1601 data: 0.0650 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:40 lr: 0.000013 grad: 0.1623 (0.1673) loss: 0.7966 (0.7912) time: 0.1394 data: 0.0427 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:24 lr: 0.000013 grad: 0.1620 (0.1673) loss: 0.7917 (0.7912) time: 0.1544 data: 0.0685 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:08 lr: 0.000013 grad: 0.1594 (0.1673) loss: 0.7941 (0.7913) time: 0.1369 data: 0.0479 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1497 (0.1672) loss: 0.8046 (0.7913) time: 0.1458 data: 0.0521 max mem: 9377 +Train: [79] Total time: 0:16:57 (0.1629 s / it) +Averaged stats: lr: 0.000013 grad: 0.1497 (0.1672) loss: 0.8046 (0.7913) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:06:20 loss: 0.8103 (0.8103) time: 6.1446 data: 6.1133 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.7933 (0.7934) time: 0.1420 data: 0.1168 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:15 (0.2513 s / it) +Averaged stats (hcp-train-subset): loss: 0.7933 (0.7934) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [79] [ 0/62] eta: 0:05:12 loss: 0.8403 (0.8403) time: 5.0437 data: 4.9257 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8367 (0.8398) time: 0.1398 data: 0.1144 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8398) +Making plots (hcp-val): example=8 +Eval (nsd-val): [79] [ 0/62] eta: 0:04:42 loss: 0.8104 (0.8104) time: 4.5603 data: 4.4760 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8188 (0.8222) time: 0.1529 data: 0.1276 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (nsd-val): loss: 0.8188 (0.8222) +Making plots (nsd-val): example=10 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 10:04:59 lr: 0.000013 grad: 0.1089 (0.1089) loss: 0.8512 (0.8512) time: 5.8079 data: 5.5335 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:24:45 lr: 0.000013 grad: 0.1753 (0.2594) loss: 0.8009 (0.7750) time: 0.1673 data: 0.0393 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:20:59 lr: 0.000013 grad: 0.1872 (0.2364) loss: 0.7777 (0.7743) time: 0.1734 data: 0.0693 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:19:05 lr: 0.000013 grad: 0.1732 (0.2173) loss: 0.7763 (0.7764) time: 0.1743 data: 0.0798 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:17:48 lr: 0.000013 grad: 0.1692 (0.2064) loss: 0.7896 (0.7785) time: 0.1589 data: 0.0563 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:16:56 lr: 0.000013 grad: 0.1579 (0.1992) loss: 0.7968 (0.7809) time: 0.1665 data: 0.0706 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:16:19 lr: 0.000013 grad: 0.1637 (0.1947) loss: 0.7917 (0.7822) time: 0.1853 data: 0.0982 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:15:55 lr: 0.000013 grad: 0.1556 (0.1910) loss: 0.7965 (0.7835) time: 0.1796 data: 0.0981 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:15:47 lr: 0.000013 grad: 0.1593 (0.1879) loss: 0.7926 (0.7849) time: 0.2465 data: 0.1455 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:15:16 lr: 0.000013 grad: 0.1649 (0.1861) loss: 0.7951 (0.7857) time: 0.1299 data: 0.0252 max mem: 9377 +Train: [80] [1000/6250] eta: 0:14:53 lr: 0.000013 grad: 0.1677 (0.1846) loss: 0.7953 (0.7861) time: 0.1416 data: 0.0458 max mem: 9377 +Train: [80] [1100/6250] eta: 0:14:31 lr: 0.000013 grad: 0.1747 (0.1829) loss: 0.7899 (0.7865) time: 0.0922 data: 0.0002 max mem: 9377 +Train: [80] [1200/6250] eta: 0:14:10 lr: 0.000013 grad: 0.1656 (0.1817) loss: 0.7941 (0.7866) time: 0.1686 data: 0.0769 max mem: 9377 +Train: [80] [1300/6250] eta: 0:14:04 lr: 0.000013 grad: 0.1782 (0.1808) loss: 0.7773 (0.7867) time: 0.2032 data: 0.1087 max mem: 9377 +Train: [80] [1400/6250] eta: 0:13:49 lr: 0.000013 grad: 0.1612 (0.1802) loss: 0.7925 (0.7867) time: 0.1795 data: 0.0981 max mem: 9377 +Train: [80] [1500/6250] eta: 0:13:35 lr: 0.000013 grad: 0.1687 (0.1796) loss: 0.7895 (0.7865) time: 0.1750 data: 0.0837 max mem: 9377 +Train: [80] [1600/6250] eta: 0:13:15 lr: 0.000013 grad: 0.1584 (0.1789) loss: 0.7988 (0.7866) time: 0.1520 data: 0.0582 max mem: 9377 +Train: [80] [1700/6250] eta: 0:13:01 lr: 0.000013 grad: 0.1682 (0.1784) loss: 0.7825 (0.7864) time: 0.1812 data: 0.0968 max mem: 9377 +Train: [80] [1800/6250] eta: 0:12:46 lr: 0.000013 grad: 0.1707 (0.1776) loss: 0.7903 (0.7866) time: 0.1739 data: 0.0737 max mem: 9377 +Train: [80] [1900/6250] eta: 0:12:28 lr: 0.000013 grad: 0.1622 (0.1771) loss: 0.7827 (0.7866) time: 0.1606 data: 0.0697 max mem: 9377 +Train: [80] [2000/6250] eta: 0:12:08 lr: 0.000013 grad: 0.1673 (0.1764) loss: 0.7877 (0.7866) time: 0.1518 data: 0.0525 max mem: 9377 +Train: [80] [2100/6250] eta: 0:11:47 lr: 0.000013 grad: 0.1587 (0.1759) loss: 0.7873 (0.7866) time: 0.1449 data: 0.0560 max mem: 9377 +Train: [80] [2200/6250] eta: 0:11:29 lr: 0.000013 grad: 0.1644 (0.1754) loss: 0.7894 (0.7868) time: 0.1559 data: 0.0651 max mem: 9377 +Train: [80] [2300/6250] eta: 0:11:11 lr: 0.000013 grad: 0.1565 (0.1748) loss: 0.7944 (0.7872) time: 0.1321 data: 0.0470 max mem: 9377 +Train: [80] [2400/6250] eta: 0:10:53 lr: 0.000013 grad: 0.1620 (0.1742) loss: 0.7833 (0.7875) time: 0.1767 data: 0.0883 max mem: 9377 +Train: [80] [2500/6250] eta: 0:10:37 lr: 0.000013 grad: 0.1519 (0.1738) loss: 0.7923 (0.7876) time: 0.2075 data: 0.1164 max mem: 9377 +Train: [80] [2600/6250] eta: 0:10:19 lr: 0.000013 grad: 0.1682 (0.1734) loss: 0.7850 (0.7877) time: 0.1653 data: 0.0787 max mem: 9377 +Train: [80] [2700/6250] eta: 0:10:01 lr: 0.000013 grad: 0.1610 (0.1730) loss: 0.7858 (0.7878) time: 0.1724 data: 0.0892 max mem: 9377 +Train: [80] [2800/6250] eta: 0:09:45 lr: 0.000013 grad: 0.1570 (0.1727) loss: 0.7880 (0.7880) time: 0.1949 data: 0.1138 max mem: 9377 +Train: [80] [2900/6250] eta: 0:09:27 lr: 0.000013 grad: 0.1609 (0.1724) loss: 0.7988 (0.7882) time: 0.1288 data: 0.0328 max mem: 9377 +Train: [80] [3000/6250] eta: 0:09:09 lr: 0.000013 grad: 0.1712 (0.1724) loss: 0.7862 (0.7881) time: 0.1536 data: 0.0643 max mem: 9377 +Train: [80] [3100/6250] eta: 0:08:51 lr: 0.000013 grad: 0.1621 (0.1723) loss: 0.7864 (0.7882) time: 0.1624 data: 0.0689 max mem: 9377 +Train: [80] [3200/6250] eta: 0:08:36 lr: 0.000013 grad: 0.1687 (0.1722) loss: 0.7933 (0.7883) time: 0.2299 data: 0.1460 max mem: 9377 +Train: [80] [3300/6250] eta: 0:08:20 lr: 0.000013 grad: 0.1611 (0.1720) loss: 0.7890 (0.7884) time: 0.1930 data: 0.0931 max mem: 9377 +Train: [80] [3400/6250] eta: 0:08:04 lr: 0.000012 grad: 0.1686 (0.1720) loss: 0.7794 (0.7884) time: 0.1792 data: 0.0915 max mem: 9377 +Train: [80] [3500/6250] eta: 0:07:48 lr: 0.000012 grad: 0.1644 (0.1720) loss: 0.7914 (0.7883) time: 0.1671 data: 0.0797 max mem: 9377 +Train: [80] [3600/6250] eta: 0:07:32 lr: 0.000012 grad: 0.1696 (0.1720) loss: 0.7948 (0.7883) time: 0.2055 data: 0.1156 max mem: 9377 +Train: [80] [3700/6250] eta: 0:07:16 lr: 0.000012 grad: 0.1652 (0.1719) loss: 0.7907 (0.7884) time: 0.1996 data: 0.1072 max mem: 9377 +Train: [80] [3800/6250] eta: 0:06:59 lr: 0.000012 grad: 0.1691 (0.1720) loss: 0.7818 (0.7883) time: 0.1805 data: 0.0931 max mem: 9377 +Train: [80] [3900/6250] eta: 0:06:41 lr: 0.000012 grad: 0.1664 (0.1719) loss: 0.7936 (0.7884) time: 0.1332 data: 0.0370 max mem: 9377 +Train: [80] [4000/6250] eta: 0:06:23 lr: 0.000012 grad: 0.1530 (0.1717) loss: 0.7909 (0.7884) time: 0.1750 data: 0.0857 max mem: 9377 +Train: [80] [4100/6250] eta: 0:06:05 lr: 0.000012 grad: 0.1576 (0.1716) loss: 0.7954 (0.7884) time: 0.1544 data: 0.0649 max mem: 9377 +Train: [80] [4200/6250] eta: 0:05:48 lr: 0.000012 grad: 0.1702 (0.1716) loss: 0.7903 (0.7884) time: 0.1457 data: 0.0570 max mem: 9377 +Train: [80] [4300/6250] eta: 0:05:31 lr: 0.000012 grad: 0.1599 (0.1715) loss: 0.7953 (0.7884) time: 0.1696 data: 0.0862 max mem: 9377 +Train: [80] [4400/6250] eta: 0:05:13 lr: 0.000012 grad: 0.1621 (0.1715) loss: 0.7968 (0.7885) time: 0.1428 data: 0.0447 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:55 lr: 0.000012 grad: 0.1559 (0.1714) loss: 0.7945 (0.7885) time: 0.1411 data: 0.0490 max mem: 9377 +Train: [80] [4600/6250] eta: 0:04:38 lr: 0.000012 grad: 0.1637 (0.1714) loss: 0.7907 (0.7885) time: 0.1468 data: 0.0554 max mem: 9377 +Train: [80] [4700/6250] eta: 0:04:21 lr: 0.000012 grad: 0.1710 (0.1713) loss: 0.7918 (0.7886) time: 0.1644 data: 0.0772 max mem: 9377 +Train: [80] [4800/6250] eta: 0:04:04 lr: 0.000012 grad: 0.1650 (0.1714) loss: 0.7958 (0.7887) time: 0.1474 data: 0.0583 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:47 lr: 0.000012 grad: 0.1617 (0.1713) loss: 0.7997 (0.7888) time: 0.1606 data: 0.0680 max mem: 9377 +Train: [80] [5000/6250] eta: 0:03:30 lr: 0.000012 grad: 0.1722 (0.1714) loss: 0.7951 (0.7889) time: 0.1668 data: 0.0790 max mem: 9377 +Train: [80] [5100/6250] eta: 0:03:13 lr: 0.000012 grad: 0.1709 (0.1714) loss: 0.7949 (0.7890) time: 0.1626 data: 0.0602 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:56 lr: 0.000012 grad: 0.1683 (0.1714) loss: 0.7875 (0.7890) time: 0.1321 data: 0.0418 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:39 lr: 0.000012 grad: 0.1680 (0.1713) loss: 0.7950 (0.7891) time: 0.1814 data: 0.0937 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:23 lr: 0.000012 grad: 0.1582 (0.1713) loss: 0.7911 (0.7891) time: 0.1686 data: 0.0768 max mem: 9377 +Train: [80] [5500/6250] eta: 0:02:06 lr: 0.000012 grad: 0.1777 (0.1713) loss: 0.7871 (0.7892) time: 0.1705 data: 0.0830 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:49 lr: 0.000012 grad: 0.1597 (0.1712) loss: 0.7934 (0.7892) time: 0.1604 data: 0.0710 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:32 lr: 0.000012 grad: 0.1574 (0.1711) loss: 0.7976 (0.7892) time: 0.1661 data: 0.0678 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:15 lr: 0.000012 grad: 0.1709 (0.1711) loss: 0.7877 (0.7892) time: 0.1756 data: 0.0761 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:58 lr: 0.000012 grad: 0.1603 (0.1710) loss: 0.7931 (0.7892) time: 0.1733 data: 0.0830 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:42 lr: 0.000012 grad: 0.1677 (0.1710) loss: 0.7916 (0.7892) time: 0.1505 data: 0.0545 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:25 lr: 0.000012 grad: 0.1698 (0.1709) loss: 0.7853 (0.7892) time: 0.1130 data: 0.0319 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:08 lr: 0.000012 grad: 0.1691 (0.1709) loss: 0.7856 (0.7892) time: 0.1760 data: 0.0882 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1675 (0.1709) loss: 0.7853 (0.7892) time: 0.1762 data: 0.0808 max mem: 9377 +Train: [80] Total time: 0:17:32 (0.1685 s / it) +Averaged stats: lr: 0.000012 grad: 0.1675 (0.1709) loss: 0.7853 (0.7892) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:04:05 loss: 0.8096 (0.8096) time: 3.9525 data: 3.8435 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.7898 (0.7917) time: 0.1403 data: 0.1120 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-train-subset): loss: 0.7898 (0.7917) +Eval (hcp-val): [80] [ 0/62] eta: 0:04:48 loss: 0.8413 (0.8413) time: 4.6457 data: 4.5500 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8367 (0.8385) time: 0.1313 data: 0.1062 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8385) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:01 loss: 0.8115 (0.8115) time: 4.8593 data: 4.7948 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8200 (0.8224) time: 0.1251 data: 0.0999 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (nsd-val): loss: 0.8200 (0.8224) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 8:35:31 lr: 0.000012 grad: 0.1202 (0.1202) loss: 0.8482 (0.8482) time: 4.9491 data: 4.6350 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:23:00 lr: 0.000012 grad: 0.1827 (0.2248) loss: 0.7848 (0.8076) time: 0.1786 data: 0.0826 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:19:34 lr: 0.000012 grad: 0.1728 (0.2072) loss: 0.8129 (0.8045) time: 0.1505 data: 0.0440 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:18:22 lr: 0.000012 grad: 0.1410 (0.1949) loss: 0.8224 (0.8050) time: 0.1712 data: 0.0710 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:17:14 lr: 0.000012 grad: 0.1674 (0.1885) loss: 0.8060 (0.8050) time: 0.1497 data: 0.0484 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:16:28 lr: 0.000012 grad: 0.1584 (0.1844) loss: 0.8145 (0.8048) time: 0.1569 data: 0.0551 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:15:53 lr: 0.000012 grad: 0.1757 (0.1818) loss: 0.7924 (0.8041) time: 0.1536 data: 0.0569 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:15:24 lr: 0.000012 grad: 0.1549 (0.1801) loss: 0.7982 (0.8035) time: 0.1522 data: 0.0513 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:15:06 lr: 0.000012 grad: 0.1576 (0.1783) loss: 0.7957 (0.8029) time: 0.1758 data: 0.0816 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:14:42 lr: 0.000012 grad: 0.1689 (0.1772) loss: 0.7948 (0.8026) time: 0.1412 data: 0.0454 max mem: 9377 +Train: [81] [1000/6250] eta: 0:14:23 lr: 0.000012 grad: 0.1642 (0.1758) loss: 0.7965 (0.8021) time: 0.1466 data: 0.0532 max mem: 9377 +Train: [81] [1100/6250] eta: 0:14:03 lr: 0.000012 grad: 0.1610 (0.1748) loss: 0.7937 (0.8016) time: 0.1786 data: 0.1016 max mem: 9377 +Train: [81] [1200/6250] eta: 0:13:45 lr: 0.000012 grad: 0.1584 (0.1740) loss: 0.7982 (0.8011) time: 0.1649 data: 0.0693 max mem: 9377 +Train: [81] [1300/6250] eta: 0:13:26 lr: 0.000012 grad: 0.1585 (0.1730) loss: 0.8006 (0.8009) time: 0.1353 data: 0.0448 max mem: 9377 +Train: [81] [1400/6250] eta: 0:13:16 lr: 0.000012 grad: 0.1627 (0.1725) loss: 0.7903 (0.8005) time: 0.2196 data: 0.0781 max mem: 9377 +Train: [81] [1500/6250] eta: 0:12:58 lr: 0.000012 grad: 0.1635 (0.1721) loss: 0.7910 (0.8002) time: 0.1617 data: 0.0693 max mem: 9377 +Train: [81] [1600/6250] eta: 0:12:43 lr: 0.000012 grad: 0.1722 (0.1718) loss: 0.7977 (0.7999) time: 0.1829 data: 0.0942 max mem: 9377 +Train: [81] [1700/6250] eta: 0:12:24 lr: 0.000012 grad: 0.1603 (0.1714) loss: 0.7947 (0.7996) time: 0.1718 data: 0.0853 max mem: 9377 +Train: [81] [1800/6250] eta: 0:12:09 lr: 0.000012 grad: 0.1536 (0.1709) loss: 0.8000 (0.7994) time: 0.1754 data: 0.0864 max mem: 9377 +Train: [81] [1900/6250] eta: 0:11:54 lr: 0.000012 grad: 0.1620 (0.1707) loss: 0.7925 (0.7990) time: 0.1472 data: 0.0543 max mem: 9377 +Train: [81] [2000/6250] eta: 0:11:35 lr: 0.000012 grad: 0.1619 (0.1704) loss: 0.7945 (0.7988) time: 0.1566 data: 0.0550 max mem: 9377 +Train: [81] [2100/6250] eta: 0:11:20 lr: 0.000012 grad: 0.1563 (0.1702) loss: 0.7994 (0.7985) time: 0.1769 data: 0.0958 max mem: 9377 +Train: [81] [2200/6250] eta: 0:11:03 lr: 0.000012 grad: 0.1616 (0.1699) loss: 0.7794 (0.7983) time: 0.1563 data: 0.0645 max mem: 9377 +Train: [81] [2300/6250] eta: 0:10:44 lr: 0.000011 grad: 0.1680 (0.1698) loss: 0.7914 (0.7980) time: 0.1500 data: 0.0608 max mem: 9377 +Train: [81] [2400/6250] eta: 0:10:26 lr: 0.000011 grad: 0.1621 (0.1697) loss: 0.7878 (0.7978) time: 0.1443 data: 0.0523 max mem: 9377 +Train: [81] [2500/6250] eta: 0:10:07 lr: 0.000011 grad: 0.1631 (0.1695) loss: 0.7955 (0.7977) time: 0.1598 data: 0.0658 max mem: 9377 +Train: [81] [2600/6250] eta: 0:09:50 lr: 0.000011 grad: 0.1649 (0.1694) loss: 0.7896 (0.7975) time: 0.1743 data: 0.0821 max mem: 9377 +Train: [81] [2700/6250] eta: 0:09:33 lr: 0.000011 grad: 0.1567 (0.1695) loss: 0.8006 (0.7973) time: 0.1458 data: 0.0592 max mem: 9377 +Train: [81] [2800/6250] eta: 0:09:17 lr: 0.000011 grad: 0.1649 (0.1695) loss: 0.7945 (0.7971) time: 0.1729 data: 0.0836 max mem: 9377 +Train: [81] [2900/6250] eta: 0:09:01 lr: 0.000011 grad: 0.1629 (0.1695) loss: 0.7896 (0.7969) time: 0.1730 data: 0.0885 max mem: 9377 +Train: [81] [3000/6250] eta: 0:08:45 lr: 0.000011 grad: 0.1748 (0.1696) loss: 0.7884 (0.7966) time: 0.1665 data: 0.0804 max mem: 9377 +Train: [81] [3100/6250] eta: 0:08:28 lr: 0.000011 grad: 0.1643 (0.1696) loss: 0.7921 (0.7964) time: 0.1472 data: 0.0599 max mem: 9377 +Train: [81] [3200/6250] eta: 0:08:12 lr: 0.000011 grad: 0.1628 (0.1695) loss: 0.7909 (0.7963) time: 0.1799 data: 0.0943 max mem: 9377 +Train: [81] [3300/6250] eta: 0:07:57 lr: 0.000011 grad: 0.1653 (0.1694) loss: 0.7928 (0.7962) time: 0.1290 data: 0.0449 max mem: 9377 +Train: [81] [3400/6250] eta: 0:07:41 lr: 0.000011 grad: 0.1636 (0.1693) loss: 0.7834 (0.7961) time: 0.1589 data: 0.0668 max mem: 9377 +Train: [81] [3500/6250] eta: 0:07:26 lr: 0.000011 grad: 0.1635 (0.1692) loss: 0.7873 (0.7960) time: 0.1701 data: 0.0838 max mem: 9377 +Train: [81] [3600/6250] eta: 0:07:10 lr: 0.000011 grad: 0.1727 (0.1692) loss: 0.7947 (0.7960) time: 0.1611 data: 0.0763 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:54 lr: 0.000011 grad: 0.1596 (0.1692) loss: 0.7934 (0.7959) time: 0.1570 data: 0.0658 max mem: 9377 +Train: [81] [3800/6250] eta: 0:06:39 lr: 0.000011 grad: 0.1734 (0.1692) loss: 0.7892 (0.7958) time: 0.1727 data: 0.0761 max mem: 9377 +Train: [81] [3900/6250] eta: 0:06:23 lr: 0.000011 grad: 0.1695 (0.1692) loss: 0.7882 (0.7957) time: 0.1421 data: 0.0460 max mem: 9377 +Train: [81] [4000/6250] eta: 0:06:07 lr: 0.000011 grad: 0.1648 (0.1692) loss: 0.7880 (0.7956) time: 0.1638 data: 0.0753 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:50 lr: 0.000011 grad: 0.1619 (0.1691) loss: 0.8029 (0.7956) time: 0.1755 data: 0.0821 max mem: 9377 +Train: [81] [4200/6250] eta: 0:05:34 lr: 0.000011 grad: 0.1589 (0.1690) loss: 0.7974 (0.7956) time: 0.2205 data: 0.1346 max mem: 9377 +Train: [81] [4300/6250] eta: 0:05:17 lr: 0.000011 grad: 0.1740 (0.1689) loss: 0.7851 (0.7955) time: 0.1589 data: 0.0679 max mem: 9377 +Train: [81] [4400/6250] eta: 0:05:00 lr: 0.000011 grad: 0.1684 (0.1689) loss: 0.7903 (0.7955) time: 0.1619 data: 0.0727 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:44 lr: 0.000011 grad: 0.1626 (0.1689) loss: 0.8052 (0.7954) time: 0.1268 data: 0.0342 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:27 lr: 0.000011 grad: 0.1585 (0.1687) loss: 0.8010 (0.7954) time: 0.1734 data: 0.0850 max mem: 9377 +Train: [81] [4700/6250] eta: 0:04:11 lr: 0.000011 grad: 0.1681 (0.1687) loss: 0.7962 (0.7954) time: 0.1207 data: 0.0249 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:55 lr: 0.000011 grad: 0.1658 (0.1687) loss: 0.7936 (0.7953) time: 0.1484 data: 0.0629 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:38 lr: 0.000011 grad: 0.1607 (0.1687) loss: 0.7976 (0.7954) time: 0.1471 data: 0.0537 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:22 lr: 0.000011 grad: 0.1762 (0.1687) loss: 0.7854 (0.7953) time: 0.1707 data: 0.0840 max mem: 9377 +Train: [81] [5100/6250] eta: 0:03:06 lr: 0.000011 grad: 0.1640 (0.1687) loss: 0.7927 (0.7952) time: 0.1581 data: 0.0628 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:50 lr: 0.000011 grad: 0.1624 (0.1687) loss: 0.7962 (0.7951) time: 0.1542 data: 0.0743 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:34 lr: 0.000011 grad: 0.1632 (0.1687) loss: 0.7964 (0.7951) time: 0.1591 data: 0.0703 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:17 lr: 0.000011 grad: 0.1631 (0.1687) loss: 0.7948 (0.7950) time: 0.1747 data: 0.0773 max mem: 9377 +Train: [81] [5500/6250] eta: 0:02:01 lr: 0.000011 grad: 0.1745 (0.1687) loss: 0.7895 (0.7949) time: 0.1663 data: 0.0762 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:45 lr: 0.000011 grad: 0.1753 (0.1687) loss: 0.7831 (0.7949) time: 0.1582 data: 0.0657 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:29 lr: 0.000011 grad: 0.1548 (0.1686) loss: 0.7911 (0.7948) time: 0.1519 data: 0.0453 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:12 lr: 0.000011 grad: 0.1619 (0.1686) loss: 0.7954 (0.7948) time: 0.1591 data: 0.0624 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:56 lr: 0.000011 grad: 0.1575 (0.1686) loss: 0.7952 (0.7947) time: 0.1757 data: 0.0854 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:40 lr: 0.000011 grad: 0.1691 (0.1685) loss: 0.7809 (0.7946) time: 0.1408 data: 0.0502 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:24 lr: 0.000011 grad: 0.1677 (0.1686) loss: 0.7901 (0.7945) time: 0.1419 data: 0.0436 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:08 lr: 0.000011 grad: 0.1665 (0.1686) loss: 0.7908 (0.7944) time: 0.1402 data: 0.0549 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1742 (0.1687) loss: 0.7843 (0.7943) time: 0.1306 data: 0.0440 max mem: 9377 +Train: [81] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000011 grad: 0.1742 (0.1687) loss: 0.7843 (0.7943) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:04:52 loss: 0.8065 (0.8065) time: 4.7222 data: 4.6036 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.7887 (0.7907) time: 0.1210 data: 0.0952 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-train-subset): loss: 0.7887 (0.7907) +Eval (hcp-val): [81] [ 0/62] eta: 0:06:14 loss: 0.8351 (0.8351) time: 6.0467 data: 6.0165 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8390 (0.8392) time: 0.1227 data: 0.0960 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:14 (0.2368 s / it) +Averaged stats (hcp-val): loss: 0.8390 (0.8392) +Eval (nsd-val): [81] [ 0/62] eta: 0:05:10 loss: 0.8123 (0.8123) time: 5.0049 data: 4.9732 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8198 (0.8215) time: 0.1353 data: 0.1080 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:14 (0.2374 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8215) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 12:56:38 lr: 0.000011 grad: 0.1856 (0.1856) loss: 0.8184 (0.8184) time: 7.4558 data: 7.3392 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:24:38 lr: 0.000011 grad: 0.1982 (0.2304) loss: 0.7799 (0.7947) time: 0.1911 data: 0.0838 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:21:09 lr: 0.000011 grad: 0.1472 (0.2097) loss: 0.8118 (0.7927) time: 0.1924 data: 0.0854 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:19:44 lr: 0.000011 grad: 0.1725 (0.1987) loss: 0.8012 (0.7939) time: 0.1720 data: 0.0759 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:18:39 lr: 0.000011 grad: 0.1707 (0.1932) loss: 0.8041 (0.7949) time: 0.1592 data: 0.0558 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:17:50 lr: 0.000011 grad: 0.1650 (0.1885) loss: 0.7996 (0.7958) time: 0.1545 data: 0.0525 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:17:03 lr: 0.000011 grad: 0.1648 (0.1861) loss: 0.7982 (0.7958) time: 0.1566 data: 0.0529 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:16:32 lr: 0.000011 grad: 0.1687 (0.1838) loss: 0.7962 (0.7953) time: 0.1729 data: 0.0777 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:16:11 lr: 0.000011 grad: 0.1506 (0.1816) loss: 0.8011 (0.7952) time: 0.1539 data: 0.0605 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:15:51 lr: 0.000011 grad: 0.1679 (0.1807) loss: 0.7960 (0.7949) time: 0.1167 data: 0.0296 max mem: 9377 +Train: [82] [1000/6250] eta: 0:15:25 lr: 0.000011 grad: 0.1676 (0.1800) loss: 0.7968 (0.7945) time: 0.1767 data: 0.0895 max mem: 9377 +Train: [82] [1100/6250] eta: 0:15:07 lr: 0.000011 grad: 0.1645 (0.1787) loss: 0.7792 (0.7943) time: 0.1054 data: 0.0116 max mem: 9377 +Train: [82] [1200/6250] eta: 0:14:44 lr: 0.000011 grad: 0.1640 (0.1778) loss: 0.7865 (0.7942) time: 0.1713 data: 0.0741 max mem: 9377 +Train: [82] [1300/6250] eta: 0:14:24 lr: 0.000011 grad: 0.1684 (0.1769) loss: 0.7913 (0.7941) time: 0.1667 data: 0.0828 max mem: 9377 +Train: [82] [1400/6250] eta: 0:14:09 lr: 0.000010 grad: 0.1657 (0.1765) loss: 0.7927 (0.7939) time: 0.1633 data: 0.0742 max mem: 9377 +Train: [82] [1500/6250] eta: 0:13:47 lr: 0.000010 grad: 0.1727 (0.1763) loss: 0.7860 (0.7936) time: 0.1476 data: 0.0553 max mem: 9377 +Train: [82] [1600/6250] eta: 0:13:26 lr: 0.000010 grad: 0.1796 (0.1762) loss: 0.7756 (0.7931) time: 0.1572 data: 0.0706 max mem: 9377 +Train: [82] [1700/6250] eta: 0:13:04 lr: 0.000010 grad: 0.1649 (0.1758) loss: 0.7905 (0.7930) time: 0.1641 data: 0.0761 max mem: 9377 +Train: [82] [1800/6250] eta: 0:12:44 lr: 0.000010 grad: 0.1710 (0.1757) loss: 0.7869 (0.7926) time: 0.1634 data: 0.0749 max mem: 9377 +Train: [82] [1900/6250] eta: 0:12:24 lr: 0.000010 grad: 0.1786 (0.1758) loss: 0.7887 (0.7923) time: 0.1687 data: 0.0780 max mem: 9377 +Train: [82] [2000/6250] eta: 0:12:05 lr: 0.000010 grad: 0.1655 (0.1755) loss: 0.7887 (0.7921) time: 0.1606 data: 0.0565 max mem: 9377 +Train: [82] [2100/6250] eta: 0:11:48 lr: 0.000010 grad: 0.1616 (0.1752) loss: 0.7972 (0.7920) time: 0.2004 data: 0.1120 max mem: 9377 +Train: [82] [2200/6250] eta: 0:11:28 lr: 0.000010 grad: 0.1657 (0.1749) loss: 0.7943 (0.7919) time: 0.1572 data: 0.0658 max mem: 9377 +Train: [82] [2300/6250] eta: 0:11:08 lr: 0.000010 grad: 0.1689 (0.1749) loss: 0.7964 (0.7917) time: 0.1564 data: 0.0683 max mem: 9377 +Train: [82] [2400/6250] eta: 0:10:50 lr: 0.000010 grad: 0.1681 (0.1747) loss: 0.7883 (0.7914) time: 0.1461 data: 0.0578 max mem: 9377 +Train: [82] [2500/6250] eta: 0:10:32 lr: 0.000010 grad: 0.1626 (0.1747) loss: 0.7915 (0.7912) time: 0.1552 data: 0.0626 max mem: 9377 +Train: [82] [2600/6250] eta: 0:10:13 lr: 0.000010 grad: 0.1722 (0.1745) loss: 0.7843 (0.7910) time: 0.1599 data: 0.0784 max mem: 9377 +Train: [82] [2700/6250] eta: 0:09:55 lr: 0.000010 grad: 0.1733 (0.1744) loss: 0.7838 (0.7909) time: 0.1509 data: 0.0656 max mem: 9377 +Train: [82] [2800/6250] eta: 0:09:37 lr: 0.000010 grad: 0.1699 (0.1741) loss: 0.7914 (0.7908) time: 0.1511 data: 0.0590 max mem: 9377 +Train: [82] [2900/6250] eta: 0:09:18 lr: 0.000010 grad: 0.1595 (0.1737) loss: 0.7963 (0.7910) time: 0.1403 data: 0.0411 max mem: 9377 +Train: [82] [3000/6250] eta: 0:09:01 lr: 0.000010 grad: 0.1538 (0.1734) loss: 0.7941 (0.7911) time: 0.1375 data: 0.0517 max mem: 9377 +Train: [82] [3100/6250] eta: 0:08:43 lr: 0.000010 grad: 0.1566 (0.1731) loss: 0.7909 (0.7912) time: 0.1417 data: 0.0527 max mem: 9377 +Train: [82] [3200/6250] eta: 0:08:26 lr: 0.000010 grad: 0.1629 (0.1729) loss: 0.7873 (0.7912) time: 0.1764 data: 0.0922 max mem: 9377 +Train: [82] [3300/6250] eta: 0:08:09 lr: 0.000010 grad: 0.1610 (0.1728) loss: 0.7964 (0.7913) time: 0.1893 data: 0.1021 max mem: 9377 +Train: [82] [3400/6250] eta: 0:07:53 lr: 0.000010 grad: 0.1672 (0.1725) loss: 0.7908 (0.7914) time: 0.1718 data: 0.0897 max mem: 9377 +Train: [82] [3500/6250] eta: 0:07:36 lr: 0.000010 grad: 0.1660 (0.1724) loss: 0.7897 (0.7915) time: 0.1480 data: 0.0749 max mem: 9377 +Train: [82] [3600/6250] eta: 0:07:19 lr: 0.000010 grad: 0.1626 (0.1722) loss: 0.7939 (0.7915) time: 0.1524 data: 0.0703 max mem: 9377 +Train: [82] [3700/6250] eta: 0:07:01 lr: 0.000010 grad: 0.1672 (0.1722) loss: 0.7911 (0.7915) time: 0.1580 data: 0.0610 max mem: 9377 +Train: [82] [3800/6250] eta: 0:06:45 lr: 0.000010 grad: 0.1666 (0.1720) loss: 0.7906 (0.7916) time: 0.1788 data: 0.0927 max mem: 9377 +Train: [82] [3900/6250] eta: 0:06:29 lr: 0.000010 grad: 0.1649 (0.1718) loss: 0.7877 (0.7916) time: 0.1660 data: 0.0727 max mem: 9377 +Train: [82] [4000/6250] eta: 0:06:12 lr: 0.000010 grad: 0.1655 (0.1717) loss: 0.7843 (0.7916) time: 0.1774 data: 0.0880 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:55 lr: 0.000010 grad: 0.1672 (0.1717) loss: 0.7918 (0.7916) time: 0.1619 data: 0.0703 max mem: 9377 +Train: [82] [4200/6250] eta: 0:05:38 lr: 0.000010 grad: 0.1680 (0.1716) loss: 0.7914 (0.7917) time: 0.1416 data: 0.0449 max mem: 9377 +Train: [82] [4300/6250] eta: 0:05:21 lr: 0.000010 grad: 0.1602 (0.1715) loss: 0.7950 (0.7918) time: 0.1397 data: 0.0492 max mem: 9377 +Train: [82] [4400/6250] eta: 0:05:04 lr: 0.000010 grad: 0.1629 (0.1714) loss: 0.7965 (0.7919) time: 0.1478 data: 0.0547 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:47 lr: 0.000010 grad: 0.1526 (0.1713) loss: 0.8054 (0.7920) time: 0.1767 data: 0.0921 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:30 lr: 0.000010 grad: 0.1601 (0.1712) loss: 0.7904 (0.7921) time: 0.1485 data: 0.0632 max mem: 9377 +Train: [82] [4700/6250] eta: 0:04:14 lr: 0.000010 grad: 0.1690 (0.1711) loss: 0.7973 (0.7922) time: 0.1316 data: 0.0429 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:57 lr: 0.000010 grad: 0.1645 (0.1710) loss: 0.7928 (0.7923) time: 0.1438 data: 0.0584 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:40 lr: 0.000010 grad: 0.1601 (0.1709) loss: 0.7962 (0.7923) time: 0.1503 data: 0.0584 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:24 lr: 0.000010 grad: 0.1578 (0.1708) loss: 0.7988 (0.7924) time: 0.1560 data: 0.0739 max mem: 9377 +Train: [82] [5100/6250] eta: 0:03:08 lr: 0.000010 grad: 0.1556 (0.1707) loss: 0.8015 (0.7925) time: 0.1440 data: 0.0627 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:51 lr: 0.000010 grad: 0.1655 (0.1706) loss: 0.7896 (0.7926) time: 0.1611 data: 0.0814 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:35 lr: 0.000010 grad: 0.1651 (0.1705) loss: 0.8004 (0.7926) time: 0.1523 data: 0.0631 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:18 lr: 0.000010 grad: 0.1544 (0.1705) loss: 0.7994 (0.7926) time: 0.1405 data: 0.0542 max mem: 9377 +Train: [82] [5500/6250] eta: 0:02:02 lr: 0.000010 grad: 0.1588 (0.1704) loss: 0.7986 (0.7927) time: 0.1831 data: 0.0878 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:46 lr: 0.000010 grad: 0.1616 (0.1702) loss: 0.7919 (0.7928) time: 0.1601 data: 0.0667 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:29 lr: 0.000010 grad: 0.1651 (0.1703) loss: 0.7909 (0.7928) time: 0.1399 data: 0.0531 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:13 lr: 0.000010 grad: 0.1708 (0.1702) loss: 0.7954 (0.7928) time: 0.1669 data: 0.0767 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:56 lr: 0.000010 grad: 0.1742 (0.1703) loss: 0.7933 (0.7928) time: 0.1517 data: 0.0475 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:40 lr: 0.000010 grad: 0.1616 (0.1703) loss: 0.8002 (0.7928) time: 0.1681 data: 0.0790 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:24 lr: 0.000010 grad: 0.1728 (0.1703) loss: 0.7857 (0.7928) time: 0.1581 data: 0.0745 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:08 lr: 0.000010 grad: 0.1672 (0.1702) loss: 0.7966 (0.7928) time: 0.1774 data: 0.0951 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1616 (0.1702) loss: 0.7899 (0.7928) time: 0.1285 data: 0.0339 max mem: 9377 +Train: [82] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000010 grad: 0.1616 (0.1702) loss: 0.7899 (0.7928) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:06:07 loss: 0.8089 (0.8089) time: 5.9194 data: 5.8891 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.7901 (0.7900) time: 0.1346 data: 0.1093 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:14 (0.2383 s / it) +Averaged stats (hcp-train-subset): loss: 0.7901 (0.7900) +Eval (hcp-val): [82] [ 0/62] eta: 0:06:21 loss: 0.8361 (0.8361) time: 6.1551 data: 6.1229 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8368 (0.8390) time: 0.1206 data: 0.0938 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-val): loss: 0.8368 (0.8390) +Eval (nsd-val): [82] [ 0/62] eta: 0:03:48 loss: 0.8090 (0.8090) time: 3.6826 data: 3.5837 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8184 (0.8214) time: 0.1284 data: 0.1029 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (nsd-val): loss: 0.8184 (0.8214) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 12:05:54 lr: 0.000010 grad: 0.3940 (0.3940) loss: 0.8220 (0.8220) time: 6.9687 data: 6.8654 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:23:17 lr: 0.000010 grad: 0.1976 (0.2153) loss: 0.8026 (0.8015) time: 0.1922 data: 0.0809 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:19:50 lr: 0.000010 grad: 0.1751 (0.2064) loss: 0.8019 (0.7997) time: 0.1785 data: 0.0720 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:18:22 lr: 0.000010 grad: 0.1720 (0.1977) loss: 0.8003 (0.8002) time: 0.1331 data: 0.0387 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:17:12 lr: 0.000010 grad: 0.1672 (0.1925) loss: 0.8019 (0.7994) time: 0.1367 data: 0.0308 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:16:27 lr: 0.000010 grad: 0.1742 (0.1892) loss: 0.7887 (0.7989) time: 0.1583 data: 0.0654 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:16:06 lr: 0.000010 grad: 0.1725 (0.1874) loss: 0.7901 (0.7985) time: 0.1676 data: 0.0767 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:15:47 lr: 0.000009 grad: 0.1627 (0.1845) loss: 0.8033 (0.7985) time: 0.1851 data: 0.0814 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:15:32 lr: 0.000009 grad: 0.1719 (0.1832) loss: 0.7888 (0.7977) time: 0.1533 data: 0.0537 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:15:13 lr: 0.000009 grad: 0.1661 (0.1824) loss: 0.7908 (0.7973) time: 0.1618 data: 0.0686 max mem: 9377 +Train: [83] [1000/6250] eta: 0:14:59 lr: 0.000009 grad: 0.1707 (0.1812) loss: 0.7869 (0.7968) time: 0.1750 data: 0.0856 max mem: 9377 +Train: [83] [1100/6250] eta: 0:14:36 lr: 0.000009 grad: 0.1668 (0.1802) loss: 0.7908 (0.7963) time: 0.1297 data: 0.0256 max mem: 9377 +Train: [83] [1200/6250] eta: 0:14:14 lr: 0.000009 grad: 0.1682 (0.1797) loss: 0.7825 (0.7955) time: 0.1560 data: 0.0621 max mem: 9377 +Train: [83] [1300/6250] eta: 0:13:51 lr: 0.000009 grad: 0.1674 (0.1790) loss: 0.8008 (0.7951) time: 0.1761 data: 0.0836 max mem: 9377 +Train: [83] [1400/6250] eta: 0:13:37 lr: 0.000009 grad: 0.1685 (0.1785) loss: 0.7879 (0.7946) time: 0.1880 data: 0.1051 max mem: 9377 +Train: [83] [1500/6250] eta: 0:13:22 lr: 0.000009 grad: 0.1717 (0.1781) loss: 0.7851 (0.7940) time: 0.1728 data: 0.0867 max mem: 9377 +Train: [83] [1600/6250] eta: 0:13:08 lr: 0.000009 grad: 0.1663 (0.1777) loss: 0.7874 (0.7936) time: 0.1972 data: 0.1072 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:51 lr: 0.000009 grad: 0.1801 (0.1775) loss: 0.7872 (0.7934) time: 0.2053 data: 0.1176 max mem: 9377 +Train: [83] [1800/6250] eta: 0:12:39 lr: 0.000009 grad: 0.1734 (0.1772) loss: 0.7881 (0.7930) time: 0.1721 data: 0.0686 max mem: 9377 +Train: [83] [1900/6250] eta: 0:12:21 lr: 0.000009 grad: 0.1632 (0.1767) loss: 0.7861 (0.7928) time: 0.1838 data: 0.0918 max mem: 9377 +Train: [83] [2000/6250] eta: 0:12:04 lr: 0.000009 grad: 0.1685 (0.1765) loss: 0.7762 (0.7925) time: 0.1667 data: 0.0709 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:49 lr: 0.000009 grad: 0.1756 (0.1764) loss: 0.7801 (0.7920) time: 0.1700 data: 0.0760 max mem: 9377 +Train: [83] [2200/6250] eta: 0:11:31 lr: 0.000009 grad: 0.1641 (0.1762) loss: 0.7854 (0.7917) time: 0.1726 data: 0.0810 max mem: 9377 +Train: [83] [2300/6250] eta: 0:11:13 lr: 0.000009 grad: 0.1775 (0.1762) loss: 0.7784 (0.7913) time: 0.1585 data: 0.0641 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:54 lr: 0.000009 grad: 0.1642 (0.1760) loss: 0.7922 (0.7910) time: 0.1742 data: 0.0874 max mem: 9377 +Train: [83] [2500/6250] eta: 0:10:35 lr: 0.000009 grad: 0.1665 (0.1760) loss: 0.7854 (0.7907) time: 0.1325 data: 0.0409 max mem: 9377 +Train: [83] [2600/6250] eta: 0:10:16 lr: 0.000009 grad: 0.1676 (0.1758) loss: 0.7882 (0.7905) time: 0.1490 data: 0.0619 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:58 lr: 0.000009 grad: 0.1656 (0.1758) loss: 0.7886 (0.7903) time: 0.1724 data: 0.0844 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:40 lr: 0.000009 grad: 0.1734 (0.1757) loss: 0.7870 (0.7902) time: 0.1328 data: 0.0390 max mem: 9377 +Train: [83] [2900/6250] eta: 0:09:21 lr: 0.000009 grad: 0.1630 (0.1759) loss: 0.7955 (0.7900) time: 0.1500 data: 0.0545 max mem: 9377 +Train: [83] [3000/6250] eta: 0:09:03 lr: 0.000009 grad: 0.1700 (0.1758) loss: 0.7828 (0.7899) time: 0.1421 data: 0.0565 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:46 lr: 0.000009 grad: 0.1637 (0.1758) loss: 0.7881 (0.7898) time: 0.1750 data: 0.0830 max mem: 9377 +Train: [83] [3200/6250] eta: 0:08:28 lr: 0.000009 grad: 0.1724 (0.1758) loss: 0.7874 (0.7897) time: 0.1676 data: 0.0757 max mem: 9377 +Train: [83] [3300/6250] eta: 0:08:10 lr: 0.000009 grad: 0.1799 (0.1760) loss: 0.7862 (0.7895) time: 0.1300 data: 0.0411 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:54 lr: 0.000009 grad: 0.1735 (0.1760) loss: 0.7842 (0.7895) time: 0.1645 data: 0.0837 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:37 lr: 0.000009 grad: 0.1749 (0.1759) loss: 0.7930 (0.7895) time: 0.1728 data: 0.0776 max mem: 9377 +Train: [83] [3600/6250] eta: 0:07:21 lr: 0.000009 grad: 0.1785 (0.1759) loss: 0.7870 (0.7895) time: 0.1536 data: 0.0769 max mem: 9377 +Train: [83] [3700/6250] eta: 0:07:03 lr: 0.000009 grad: 0.1774 (0.1759) loss: 0.7847 (0.7895) time: 0.1843 data: 0.0974 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:46 lr: 0.000009 grad: 0.1681 (0.1758) loss: 0.7902 (0.7895) time: 0.1452 data: 0.0485 max mem: 9377 +Train: [83] [3900/6250] eta: 0:06:30 lr: 0.000009 grad: 0.1692 (0.1758) loss: 0.7942 (0.7894) time: 0.1546 data: 0.0696 max mem: 9377 +Train: [83] [4000/6250] eta: 0:06:13 lr: 0.000009 grad: 0.1728 (0.1757) loss: 0.7813 (0.7893) time: 0.1532 data: 0.0563 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:56 lr: 0.000009 grad: 0.1753 (0.1757) loss: 0.7904 (0.7893) time: 0.1407 data: 0.0472 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:38 lr: 0.000009 grad: 0.1734 (0.1757) loss: 0.7809 (0.7891) time: 0.1347 data: 0.0415 max mem: 9377 +Train: [83] [4300/6250] eta: 0:05:21 lr: 0.000009 grad: 0.1687 (0.1757) loss: 0.7895 (0.7890) time: 0.1748 data: 0.0857 max mem: 9377 +Train: [83] [4400/6250] eta: 0:05:04 lr: 0.000009 grad: 0.1722 (0.1756) loss: 0.7853 (0.7891) time: 0.1543 data: 0.0657 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:47 lr: 0.000009 grad: 0.1674 (0.1755) loss: 0.7846 (0.7890) time: 0.1580 data: 0.0720 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:30 lr: 0.000009 grad: 0.1807 (0.1754) loss: 0.7810 (0.7890) time: 0.1424 data: 0.0488 max mem: 9377 +Train: [83] [4700/6250] eta: 0:04:13 lr: 0.000009 grad: 0.1781 (0.1754) loss: 0.7855 (0.7889) time: 0.1673 data: 0.0774 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:56 lr: 0.000009 grad: 0.1752 (0.1755) loss: 0.7849 (0.7888) time: 0.1353 data: 0.0387 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:40 lr: 0.000009 grad: 0.1755 (0.1756) loss: 0.7890 (0.7888) time: 0.1529 data: 0.0647 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:23 lr: 0.000009 grad: 0.1893 (0.1757) loss: 0.7819 (0.7887) time: 0.1926 data: 0.1147 max mem: 9377 +Train: [83] [5100/6250] eta: 0:03:07 lr: 0.000009 grad: 0.1736 (0.1757) loss: 0.7870 (0.7887) time: 0.1453 data: 0.0650 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:51 lr: 0.000009 grad: 0.1873 (0.1757) loss: 0.7811 (0.7887) time: 0.1685 data: 0.0771 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:34 lr: 0.000009 grad: 0.1852 (0.1758) loss: 0.7849 (0.7887) time: 0.1651 data: 0.0733 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:19 lr: 0.000009 grad: 0.1698 (0.1759) loss: 0.7905 (0.7885) time: 0.2010 data: 0.1045 max mem: 9377 +Train: [83] [5500/6250] eta: 0:02:03 lr: 0.000009 grad: 0.1744 (0.1761) loss: 0.7763 (0.7884) time: 0.1943 data: 0.1065 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:46 lr: 0.000009 grad: 0.1812 (0.1762) loss: 0.7826 (0.7883) time: 0.2089 data: 0.1207 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:30 lr: 0.000009 grad: 0.1776 (0.1762) loss: 0.7820 (0.7882) time: 0.1589 data: 0.0705 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:14 lr: 0.000009 grad: 0.1784 (0.1762) loss: 0.7907 (0.7882) time: 0.1658 data: 0.0795 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:57 lr: 0.000009 grad: 0.1725 (0.1763) loss: 0.7830 (0.7882) time: 0.1467 data: 0.0519 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:41 lr: 0.000009 grad: 0.1846 (0.1764) loss: 0.7759 (0.7881) time: 0.1738 data: 0.0866 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:24 lr: 0.000009 grad: 0.1869 (0.1765) loss: 0.7891 (0.7880) time: 0.1843 data: 0.1032 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:08 lr: 0.000009 grad: 0.1649 (0.1765) loss: 0.7911 (0.7880) time: 0.1670 data: 0.0818 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1761 (0.1765) loss: 0.7774 (0.7880) time: 0.1595 data: 0.0746 max mem: 9377 +Train: [83] Total time: 0:17:11 (0.1651 s / it) +Averaged stats: lr: 0.000009 grad: 0.1761 (0.1765) loss: 0.7774 (0.7880) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:04:36 loss: 0.8064 (0.8064) time: 4.4626 data: 4.3728 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.7901 (0.7894) time: 0.1355 data: 0.1088 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:15 (0.2453 s / it) +Averaged stats (hcp-train-subset): loss: 0.7901 (0.7894) +Eval (hcp-val): [83] [ 0/62] eta: 0:06:22 loss: 0.8371 (0.8371) time: 6.1758 data: 6.1462 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8375 (0.8394) time: 0.1502 data: 0.1248 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-val): loss: 0.8375 (0.8394) +Eval (nsd-val): [83] [ 0/62] eta: 0:06:06 loss: 0.8171 (0.8171) time: 5.9096 data: 5.8763 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8209 (0.8237) time: 0.1215 data: 0.0944 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8237) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 11:29:17 lr: 0.000009 grad: 0.1673 (0.1673) loss: 0.8232 (0.8232) time: 6.6173 data: 6.4952 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:22:25 lr: 0.000009 grad: 0.1882 (0.2132) loss: 0.8020 (0.8003) time: 0.1666 data: 0.0626 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:19:47 lr: 0.000009 grad: 0.1975 (0.2026) loss: 0.7815 (0.7971) time: 0.1685 data: 0.0567 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:18:15 lr: 0.000008 grad: 0.1756 (0.1953) loss: 0.8059 (0.7965) time: 0.1352 data: 0.0252 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:17:19 lr: 0.000008 grad: 0.1580 (0.1911) loss: 0.8124 (0.7977) time: 0.1558 data: 0.0462 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:16:27 lr: 0.000008 grad: 0.1733 (0.1882) loss: 0.8009 (0.7969) time: 0.1637 data: 0.0557 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:15:58 lr: 0.000008 grad: 0.1842 (0.1873) loss: 0.7913 (0.7963) time: 0.1734 data: 0.0807 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:15:29 lr: 0.000008 grad: 0.1789 (0.1861) loss: 0.7870 (0.7956) time: 0.1577 data: 0.0566 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:15:07 lr: 0.000008 grad: 0.1815 (0.1852) loss: 0.7872 (0.7946) time: 0.1788 data: 0.0843 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:14:48 lr: 0.000008 grad: 0.1626 (0.1845) loss: 0.7919 (0.7937) time: 0.1731 data: 0.0858 max mem: 9377 +Train: [84] [1000/6250] eta: 0:14:25 lr: 0.000008 grad: 0.1768 (0.1838) loss: 0.7942 (0.7933) time: 0.1229 data: 0.0216 max mem: 9377 +Train: [84] [1100/6250] eta: 0:14:06 lr: 0.000008 grad: 0.1784 (0.1829) loss: 0.7963 (0.7933) time: 0.1566 data: 0.0755 max mem: 9377 +Train: [84] [1200/6250] eta: 0:13:44 lr: 0.000008 grad: 0.1728 (0.1821) loss: 0.7969 (0.7933) time: 0.1478 data: 0.0565 max mem: 9377 +Train: [84] [1300/6250] eta: 0:13:26 lr: 0.000008 grad: 0.1642 (0.1811) loss: 0.8001 (0.7935) time: 0.1750 data: 0.0838 max mem: 9377 +Train: [84] [1400/6250] eta: 0:13:10 lr: 0.000008 grad: 0.1675 (0.1806) loss: 0.7871 (0.7934) time: 0.1787 data: 0.1030 max mem: 9377 +Train: [84] [1500/6250] eta: 0:12:56 lr: 0.000008 grad: 0.1750 (0.1802) loss: 0.7825 (0.7932) time: 0.1476 data: 0.0623 max mem: 9377 +Train: [84] [1600/6250] eta: 0:12:37 lr: 0.000008 grad: 0.1664 (0.1797) loss: 0.7997 (0.7932) time: 0.1299 data: 0.0494 max mem: 9377 +Train: [84] [1700/6250] eta: 0:12:21 lr: 0.000008 grad: 0.1645 (0.1792) loss: 0.7886 (0.7932) time: 0.1597 data: 0.0780 max mem: 9377 +Train: [84] [1800/6250] eta: 0:12:07 lr: 0.000008 grad: 0.1656 (0.1790) loss: 0.7889 (0.7931) time: 0.1798 data: 0.0819 max mem: 9377 +Train: [84] [1900/6250] eta: 0:11:52 lr: 0.000008 grad: 0.1703 (0.1788) loss: 0.7860 (0.7929) time: 0.1879 data: 0.0963 max mem: 9377 +Train: [84] [2000/6250] eta: 0:11:36 lr: 0.000008 grad: 0.1717 (0.1784) loss: 0.7870 (0.7929) time: 0.1696 data: 0.0795 max mem: 9377 +Train: [84] [2100/6250] eta: 0:11:21 lr: 0.000008 grad: 0.1731 (0.1783) loss: 0.7938 (0.7928) time: 0.1708 data: 0.0770 max mem: 9377 +Train: [84] [2200/6250] eta: 0:11:02 lr: 0.000008 grad: 0.1469 (0.1778) loss: 0.8076 (0.7928) time: 0.1378 data: 0.0433 max mem: 9377 +Train: [84] [2300/6250] eta: 0:10:46 lr: 0.000008 grad: 0.1627 (0.1775) loss: 0.7984 (0.7927) time: 0.1695 data: 0.0700 max mem: 9377 +Train: [84] [2400/6250] eta: 0:10:28 lr: 0.000008 grad: 0.1625 (0.1771) loss: 0.7904 (0.7927) time: 0.1601 data: 0.0732 max mem: 9377 +Train: [84] [2500/6250] eta: 0:10:12 lr: 0.000008 grad: 0.1637 (0.1767) loss: 0.7920 (0.7926) time: 0.1566 data: 0.0704 max mem: 9377 +Train: [84] [2600/6250] eta: 0:09:53 lr: 0.000008 grad: 0.1617 (0.1764) loss: 0.7878 (0.7926) time: 0.1457 data: 0.0528 max mem: 9377 +Train: [84] [2700/6250] eta: 0:09:36 lr: 0.000008 grad: 0.1596 (0.1764) loss: 0.7902 (0.7925) time: 0.1629 data: 0.0675 max mem: 9377 +Train: [84] [2800/6250] eta: 0:09:19 lr: 0.000008 grad: 0.1631 (0.1760) loss: 0.7949 (0.7925) time: 0.1496 data: 0.0428 max mem: 9377 +Train: [84] [2900/6250] eta: 0:09:02 lr: 0.000008 grad: 0.1642 (0.1759) loss: 0.7973 (0.7925) time: 0.1590 data: 0.0727 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:45 lr: 0.000008 grad: 0.1719 (0.1756) loss: 0.7906 (0.7925) time: 0.1698 data: 0.0784 max mem: 9377 +Train: [84] [3100/6250] eta: 0:08:28 lr: 0.000008 grad: 0.1692 (0.1755) loss: 0.7965 (0.7925) time: 0.1757 data: 0.0900 max mem: 9377 +Train: [84] [3200/6250] eta: 0:08:12 lr: 0.000008 grad: 0.1678 (0.1753) loss: 0.8019 (0.7926) time: 0.1579 data: 0.0650 max mem: 9377 +Train: [84] [3300/6250] eta: 0:07:55 lr: 0.000008 grad: 0.1541 (0.1751) loss: 0.8044 (0.7926) time: 0.1559 data: 0.0630 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:39 lr: 0.000008 grad: 0.1674 (0.1749) loss: 0.8025 (0.7927) time: 0.1858 data: 0.1049 max mem: 9377 +Train: [84] [3500/6250] eta: 0:07:23 lr: 0.000008 grad: 0.1747 (0.1749) loss: 0.7874 (0.7927) time: 0.1500 data: 0.0697 max mem: 9377 +Train: [84] [3600/6250] eta: 0:07:07 lr: 0.000008 grad: 0.1654 (0.1749) loss: 0.7952 (0.7927) time: 0.1441 data: 0.0593 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:51 lr: 0.000008 grad: 0.1714 (0.1748) loss: 0.7931 (0.7927) time: 0.1323 data: 0.0455 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:34 lr: 0.000008 grad: 0.1618 (0.1746) loss: 0.7928 (0.7927) time: 0.1686 data: 0.0728 max mem: 9377 +Train: [84] [3900/6250] eta: 0:06:18 lr: 0.000008 grad: 0.1687 (0.1745) loss: 0.7874 (0.7927) time: 0.1700 data: 0.0841 max mem: 9377 +Train: [84] [4000/6250] eta: 0:06:02 lr: 0.000008 grad: 0.1654 (0.1744) loss: 0.7914 (0.7927) time: 0.1698 data: 0.0826 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:46 lr: 0.000008 grad: 0.1705 (0.1743) loss: 0.7884 (0.7928) time: 0.1509 data: 0.0599 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:29 lr: 0.000008 grad: 0.1669 (0.1742) loss: 0.7941 (0.7928) time: 0.1618 data: 0.0688 max mem: 9377 +Train: [84] [4300/6250] eta: 0:05:13 lr: 0.000008 grad: 0.1731 (0.1742) loss: 0.7893 (0.7928) time: 0.1316 data: 0.0460 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:56 lr: 0.000008 grad: 0.1784 (0.1742) loss: 0.7854 (0.7927) time: 0.1357 data: 0.0348 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:40 lr: 0.000008 grad: 0.1757 (0.1742) loss: 0.7854 (0.7926) time: 0.1359 data: 0.0408 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:23 lr: 0.000008 grad: 0.1665 (0.1741) loss: 0.7995 (0.7926) time: 0.1492 data: 0.0528 max mem: 9377 +Train: [84] [4700/6250] eta: 0:04:07 lr: 0.000008 grad: 0.1716 (0.1740) loss: 0.7848 (0.7925) time: 0.1525 data: 0.0560 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:51 lr: 0.000008 grad: 0.1673 (0.1740) loss: 0.7989 (0.7925) time: 0.1619 data: 0.0761 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:35 lr: 0.000008 grad: 0.1712 (0.1739) loss: 0.7826 (0.7925) time: 0.1664 data: 0.0760 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:19 lr: 0.000008 grad: 0.1719 (0.1738) loss: 0.7858 (0.7924) time: 0.1757 data: 0.0920 max mem: 9377 +Train: [84] [5100/6250] eta: 0:03:03 lr: 0.000008 grad: 0.1703 (0.1738) loss: 0.7961 (0.7924) time: 0.1416 data: 0.0567 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:47 lr: 0.000008 grad: 0.1728 (0.1738) loss: 0.7968 (0.7924) time: 0.1470 data: 0.0646 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:31 lr: 0.000008 grad: 0.1629 (0.1737) loss: 0.7931 (0.7924) time: 0.1404 data: 0.0507 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:15 lr: 0.000008 grad: 0.1745 (0.1736) loss: 0.7869 (0.7924) time: 0.1558 data: 0.0704 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:59 lr: 0.000008 grad: 0.1737 (0.1737) loss: 0.7905 (0.7924) time: 0.1530 data: 0.0710 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:43 lr: 0.000008 grad: 0.1735 (0.1738) loss: 0.7891 (0.7924) time: 0.1480 data: 0.0648 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:27 lr: 0.000008 grad: 0.1654 (0.1738) loss: 0.7999 (0.7924) time: 0.1139 data: 0.0175 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:11 lr: 0.000008 grad: 0.1711 (0.1737) loss: 0.7848 (0.7924) time: 0.1349 data: 0.0411 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:55 lr: 0.000008 grad: 0.1687 (0.1737) loss: 0.7937 (0.7924) time: 0.1418 data: 0.0483 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:39 lr: 0.000008 grad: 0.1699 (0.1737) loss: 0.7900 (0.7923) time: 0.1410 data: 0.0533 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:23 lr: 0.000008 grad: 0.1756 (0.1737) loss: 0.7894 (0.7923) time: 0.1360 data: 0.0332 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.1707 (0.1736) loss: 0.7904 (0.7923) time: 0.1554 data: 0.0619 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.1652 (0.1736) loss: 0.7909 (0.7923) time: 0.1255 data: 0.0329 max mem: 9377 +Train: [84] Total time: 0:16:39 (0.1598 s / it) +Averaged stats: lr: 0.000008 grad: 0.1652 (0.1736) loss: 0.7909 (0.7923) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:04:36 loss: 0.8049 (0.8049) time: 4.4526 data: 4.4007 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.7882 (0.7890) time: 0.1298 data: 0.1046 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:14 (0.2392 s / it) +Averaged stats (hcp-train-subset): loss: 0.7882 (0.7890) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [84] [ 0/62] eta: 0:04:21 loss: 0.8380 (0.8380) time: 4.2103 data: 4.1245 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8371 (0.8390) time: 0.1151 data: 0.0899 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-val): loss: 0.8371 (0.8390) +Making plots (hcp-val): example=23 +Eval (nsd-val): [84] [ 0/62] eta: 0:04:36 loss: 0.8178 (0.8178) time: 4.4581 data: 4.3679 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8232 (0.8238) time: 0.1306 data: 0.1050 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (nsd-val): loss: 0.8232 (0.8238) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 13:12:28 lr: 0.000008 grad: 0.1213 (0.1213) loss: 0.8132 (0.8132) time: 7.6078 data: 7.4996 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:25:28 lr: 0.000008 grad: 0.2109 (0.2236) loss: 0.8047 (0.7998) time: 0.1684 data: 0.0491 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:22:07 lr: 0.000008 grad: 0.1914 (0.2113) loss: 0.7853 (0.7957) time: 0.2003 data: 0.0903 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:20:21 lr: 0.000007 grad: 0.1960 (0.2059) loss: 0.7925 (0.7925) time: 0.1810 data: 0.0773 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:19:05 lr: 0.000007 grad: 0.1747 (0.1998) loss: 0.7854 (0.7918) time: 0.1798 data: 0.0815 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:18:06 lr: 0.000007 grad: 0.1748 (0.1961) loss: 0.7960 (0.7916) time: 0.1611 data: 0.0504 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:17:24 lr: 0.000007 grad: 0.1702 (0.1932) loss: 0.7928 (0.7916) time: 0.1743 data: 0.0780 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:16:54 lr: 0.000007 grad: 0.1686 (0.1908) loss: 0.7921 (0.7915) time: 0.1646 data: 0.0677 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:16:29 lr: 0.000007 grad: 0.1706 (0.1892) loss: 0.7958 (0.7916) time: 0.1866 data: 0.0972 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:16:03 lr: 0.000007 grad: 0.1699 (0.1874) loss: 0.7958 (0.7921) time: 0.1464 data: 0.0483 max mem: 9377 +Train: [85] [1000/6250] eta: 0:15:35 lr: 0.000007 grad: 0.1664 (0.1860) loss: 0.7921 (0.7925) time: 0.1647 data: 0.0775 max mem: 9377 +Train: [85] [1100/6250] eta: 0:15:10 lr: 0.000007 grad: 0.1711 (0.1847) loss: 0.7991 (0.7926) time: 0.1910 data: 0.1016 max mem: 9377 +Train: [85] [1200/6250] eta: 0:14:46 lr: 0.000007 grad: 0.1729 (0.1836) loss: 0.7893 (0.7927) time: 0.1645 data: 0.0793 max mem: 9377 +Train: [85] [1300/6250] eta: 0:14:19 lr: 0.000007 grad: 0.1549 (0.1825) loss: 0.8025 (0.7930) time: 0.1509 data: 0.0627 max mem: 9377 +Train: [85] [1400/6250] eta: 0:14:05 lr: 0.000007 grad: 0.1691 (0.1817) loss: 0.8031 (0.7932) time: 0.1708 data: 0.0818 max mem: 9377 +Train: [85] [1500/6250] eta: 0:13:43 lr: 0.000007 grad: 0.1575 (0.1810) loss: 0.8002 (0.7934) time: 0.1452 data: 0.0572 max mem: 9377 +Train: [85] [1600/6250] eta: 0:13:22 lr: 0.000007 grad: 0.1689 (0.1803) loss: 0.7957 (0.7935) time: 0.1736 data: 0.0907 max mem: 9377 +Train: [85] [1700/6250] eta: 0:13:02 lr: 0.000007 grad: 0.1644 (0.1798) loss: 0.7996 (0.7937) time: 0.1627 data: 0.0735 max mem: 9377 +Train: [85] [1800/6250] eta: 0:12:42 lr: 0.000007 grad: 0.1620 (0.1792) loss: 0.7943 (0.7939) time: 0.1672 data: 0.0870 max mem: 9377 +Train: [85] [1900/6250] eta: 0:12:23 lr: 0.000007 grad: 0.1610 (0.1785) loss: 0.8087 (0.7942) time: 0.1574 data: 0.0692 max mem: 9377 +Train: [85] [2000/6250] eta: 0:12:06 lr: 0.000007 grad: 0.1649 (0.1778) loss: 0.7921 (0.7946) time: 0.1692 data: 0.0670 max mem: 9377 +Train: [85] [2100/6250] eta: 0:11:48 lr: 0.000007 grad: 0.1655 (0.1774) loss: 0.7959 (0.7947) time: 0.1403 data: 0.0417 max mem: 9377 +Train: [85] [2200/6250] eta: 0:11:30 lr: 0.000007 grad: 0.1657 (0.1771) loss: 0.7991 (0.7948) time: 0.1801 data: 0.0918 max mem: 9377 +Train: [85] [2300/6250] eta: 0:11:10 lr: 0.000007 grad: 0.1692 (0.1768) loss: 0.7910 (0.7949) time: 0.1506 data: 0.0646 max mem: 9377 +Train: [85] [2400/6250] eta: 0:10:50 lr: 0.000007 grad: 0.1667 (0.1765) loss: 0.7963 (0.7950) time: 0.1454 data: 0.0532 max mem: 9377 +Train: [85] [2500/6250] eta: 0:10:31 lr: 0.000007 grad: 0.1678 (0.1762) loss: 0.8010 (0.7950) time: 0.1516 data: 0.0654 max mem: 9377 +Train: [85] [2600/6250] eta: 0:10:12 lr: 0.000007 grad: 0.1680 (0.1759) loss: 0.7898 (0.7950) time: 0.1513 data: 0.0617 max mem: 9377 +Train: [85] [2700/6250] eta: 0:09:55 lr: 0.000007 grad: 0.1668 (0.1756) loss: 0.7917 (0.7951) time: 0.1640 data: 0.0853 max mem: 9377 +Train: [85] [2800/6250] eta: 0:09:37 lr: 0.000007 grad: 0.1711 (0.1756) loss: 0.8028 (0.7951) time: 0.1476 data: 0.0586 max mem: 9377 +Train: [85] [2900/6250] eta: 0:09:20 lr: 0.000007 grad: 0.1730 (0.1756) loss: 0.7912 (0.7949) time: 0.1656 data: 0.0875 max mem: 9377 +Train: [85] [3000/6250] eta: 0:09:02 lr: 0.000007 grad: 0.1761 (0.1757) loss: 0.7961 (0.7949) time: 0.1059 data: 0.0183 max mem: 9377 +Train: [85] [3100/6250] eta: 0:08:45 lr: 0.000007 grad: 0.1712 (0.1756) loss: 0.7990 (0.7948) time: 0.1731 data: 0.0874 max mem: 9377 +Train: [85] [3200/6250] eta: 0:08:28 lr: 0.000007 grad: 0.1783 (0.1756) loss: 0.7916 (0.7948) time: 0.2069 data: 0.1161 max mem: 9377 +Train: [85] [3300/6250] eta: 0:08:10 lr: 0.000007 grad: 0.1660 (0.1755) loss: 0.7936 (0.7947) time: 0.1522 data: 0.0654 max mem: 9377 +Train: [85] [3400/6250] eta: 0:07:53 lr: 0.000007 grad: 0.1732 (0.1755) loss: 0.7822 (0.7946) time: 0.1941 data: 0.1112 max mem: 9377 +Train: [85] [3500/6250] eta: 0:07:37 lr: 0.000007 grad: 0.1837 (0.1755) loss: 0.7840 (0.7945) time: 0.1458 data: 0.0663 max mem: 9377 +Train: [85] [3600/6250] eta: 0:07:21 lr: 0.000007 grad: 0.1708 (0.1755) loss: 0.7947 (0.7943) time: 0.1666 data: 0.0810 max mem: 9377 +Train: [85] [3700/6250] eta: 0:07:05 lr: 0.000007 grad: 0.1691 (0.1754) loss: 0.7930 (0.7942) time: 0.1630 data: 0.0836 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:48 lr: 0.000007 grad: 0.1740 (0.1754) loss: 0.7895 (0.7940) time: 0.1569 data: 0.0744 max mem: 9377 +Train: [85] [3900/6250] eta: 0:06:32 lr: 0.000007 grad: 0.1761 (0.1753) loss: 0.7854 (0.7938) time: 0.1376 data: 0.0403 max mem: 9377 +Train: [85] [4000/6250] eta: 0:06:16 lr: 0.000007 grad: 0.1657 (0.1753) loss: 0.7865 (0.7937) time: 0.1841 data: 0.0833 max mem: 9377 +Train: [85] [4100/6250] eta: 0:06:00 lr: 0.000007 grad: 0.1801 (0.1753) loss: 0.7789 (0.7935) time: 0.1703 data: 0.0710 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:44 lr: 0.000007 grad: 0.1723 (0.1753) loss: 0.7897 (0.7934) time: 0.1529 data: 0.0570 max mem: 9377 +Train: [85] [4300/6250] eta: 0:05:27 lr: 0.000007 grad: 0.1795 (0.1753) loss: 0.7843 (0.7932) time: 0.1649 data: 0.0609 max mem: 9377 +Train: [85] [4400/6250] eta: 0:05:10 lr: 0.000007 grad: 0.1617 (0.1752) loss: 0.7924 (0.7931) time: 0.1816 data: 0.0927 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:52 lr: 0.000007 grad: 0.1681 (0.1752) loss: 0.7856 (0.7930) time: 0.1426 data: 0.0471 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:35 lr: 0.000007 grad: 0.1780 (0.1752) loss: 0.7877 (0.7929) time: 0.1582 data: 0.0780 max mem: 9377 +Train: [85] [4700/6250] eta: 0:04:18 lr: 0.000007 grad: 0.1781 (0.1752) loss: 0.7820 (0.7929) time: 0.1360 data: 0.0505 max mem: 9377 +Train: [85] [4800/6250] eta: 0:04:01 lr: 0.000007 grad: 0.1719 (0.1753) loss: 0.7847 (0.7928) time: 0.1459 data: 0.0570 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:45 lr: 0.000007 grad: 0.1725 (0.1754) loss: 0.7842 (0.7927) time: 0.1591 data: 0.0683 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:28 lr: 0.000007 grad: 0.1725 (0.1755) loss: 0.7856 (0.7926) time: 0.1848 data: 0.0868 max mem: 9377 +Train: [85] [5100/6250] eta: 0:03:11 lr: 0.000007 grad: 0.1751 (0.1755) loss: 0.7827 (0.7925) time: 0.1552 data: 0.0684 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:55 lr: 0.000007 grad: 0.1765 (0.1756) loss: 0.7882 (0.7924) time: 0.1412 data: 0.0534 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:38 lr: 0.000007 grad: 0.1744 (0.1756) loss: 0.7874 (0.7923) time: 0.1621 data: 0.0719 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:21 lr: 0.000007 grad: 0.1738 (0.1757) loss: 0.7887 (0.7921) time: 0.1720 data: 0.0878 max mem: 9377 +Train: [85] [5500/6250] eta: 0:02:04 lr: 0.000007 grad: 0.1759 (0.1757) loss: 0.7815 (0.7921) time: 0.1785 data: 0.0855 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:48 lr: 0.000007 grad: 0.1635 (0.1757) loss: 0.7879 (0.7920) time: 0.1556 data: 0.0618 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:31 lr: 0.000007 grad: 0.1604 (0.1756) loss: 0.7969 (0.7920) time: 0.1679 data: 0.0793 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:14 lr: 0.000007 grad: 0.1624 (0.1755) loss: 0.7934 (0.7919) time: 0.1322 data: 0.0355 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:57 lr: 0.000007 grad: 0.1704 (0.1755) loss: 0.7877 (0.7919) time: 0.1741 data: 0.0794 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:41 lr: 0.000007 grad: 0.1750 (0.1754) loss: 0.7926 (0.7919) time: 0.1511 data: 0.0503 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:24 lr: 0.000007 grad: 0.1659 (0.1754) loss: 0.7844 (0.7918) time: 0.1434 data: 0.0572 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:08 lr: 0.000007 grad: 0.1783 (0.1754) loss: 0.7821 (0.7918) time: 0.1869 data: 0.0934 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1654 (0.1753) loss: 0.7867 (0.7917) time: 0.1509 data: 0.0661 max mem: 9377 +Train: [85] Total time: 0:17:18 (0.1661 s / it) +Averaged stats: lr: 0.000007 grad: 0.1654 (0.1753) loss: 0.7867 (0.7917) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:06:00 loss: 0.8054 (0.8054) time: 5.8113 data: 5.7788 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.7866 (0.7879) time: 0.1307 data: 0.1051 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:16 (0.2629 s / it) +Averaged stats (hcp-train-subset): loss: 0.7866 (0.7879) +Eval (hcp-val): [85] [ 0/62] eta: 0:04:15 loss: 0.8368 (0.8368) time: 4.1185 data: 4.0570 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8367 (0.8392) time: 0.1353 data: 0.1099 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:15 (0.2563 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8392) +Eval (nsd-val): [85] [ 0/62] eta: 0:06:08 loss: 0.8131 (0.8131) time: 5.9403 data: 5.9096 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8209 (0.8221) time: 0.1502 data: 0.1246 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:15 (0.2509 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8221) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 9:01:16 lr: 0.000007 grad: 0.1421 (0.1421) loss: 0.8687 (0.8687) time: 5.1963 data: 4.8749 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:24:15 lr: 0.000007 grad: 0.1912 (0.2383) loss: 0.7949 (0.7985) time: 0.1822 data: 0.0679 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:21:07 lr: 0.000007 grad: 0.1966 (0.2146) loss: 0.8006 (0.7976) time: 0.1842 data: 0.0853 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:19:50 lr: 0.000007 grad: 0.2022 (0.2107) loss: 0.7782 (0.7944) time: 0.2153 data: 0.1153 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:18:45 lr: 0.000007 grad: 0.1795 (0.2061) loss: 0.7836 (0.7932) time: 0.1488 data: 0.0574 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:18:05 lr: 0.000007 grad: 0.1736 (0.2009) loss: 0.7825 (0.7924) time: 0.1976 data: 0.0996 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:17:25 lr: 0.000006 grad: 0.1811 (0.1976) loss: 0.7895 (0.7917) time: 0.1747 data: 0.0765 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:16:44 lr: 0.000006 grad: 0.1712 (0.1947) loss: 0.7949 (0.7917) time: 0.1727 data: 0.0805 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:16:19 lr: 0.000006 grad: 0.1624 (0.1922) loss: 0.7996 (0.7924) time: 0.1627 data: 0.0575 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:15:49 lr: 0.000006 grad: 0.1609 (0.1902) loss: 0.7956 (0.7926) time: 0.1555 data: 0.0538 max mem: 9377 +Train: [86] [1000/6250] eta: 0:15:19 lr: 0.000006 grad: 0.1789 (0.1890) loss: 0.7893 (0.7924) time: 0.1373 data: 0.0458 max mem: 9377 +Train: [86] [1100/6250] eta: 0:14:51 lr: 0.000006 grad: 0.1758 (0.1880) loss: 0.7873 (0.7922) time: 0.1609 data: 0.0657 max mem: 9377 +Train: [86] [1200/6250] eta: 0:14:27 lr: 0.000006 grad: 0.1767 (0.1870) loss: 0.7911 (0.7919) time: 0.1574 data: 0.0691 max mem: 9377 +Train: [86] [1300/6250] eta: 0:14:09 lr: 0.000006 grad: 0.1679 (0.1862) loss: 0.7955 (0.7915) time: 0.2169 data: 0.1281 max mem: 9377 +Train: [86] [1400/6250] eta: 0:13:50 lr: 0.000006 grad: 0.1794 (0.1852) loss: 0.7910 (0.7913) time: 0.1545 data: 0.0743 max mem: 9377 +Train: [86] [1500/6250] eta: 0:13:31 lr: 0.000006 grad: 0.1685 (0.1844) loss: 0.7860 (0.7912) time: 0.1716 data: 0.0774 max mem: 9377 +Train: [86] [1600/6250] eta: 0:13:15 lr: 0.000006 grad: 0.1686 (0.1837) loss: 0.7921 (0.7910) time: 0.1486 data: 0.0606 max mem: 9377 +Train: [86] [1700/6250] eta: 0:12:54 lr: 0.000006 grad: 0.1731 (0.1831) loss: 0.7837 (0.7907) time: 0.1608 data: 0.0752 max mem: 9377 +Train: [86] [1800/6250] eta: 0:12:35 lr: 0.000006 grad: 0.1724 (0.1827) loss: 0.7900 (0.7904) time: 0.1631 data: 0.0674 max mem: 9377 +Train: [86] [1900/6250] eta: 0:12:16 lr: 0.000006 grad: 0.1777 (0.1823) loss: 0.7857 (0.7901) time: 0.1536 data: 0.0579 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:57 lr: 0.000006 grad: 0.1625 (0.1819) loss: 0.7863 (0.7900) time: 0.1396 data: 0.0520 max mem: 9377 +Train: [86] [2100/6250] eta: 0:11:37 lr: 0.000006 grad: 0.1673 (0.1817) loss: 0.7853 (0.7899) time: 0.1365 data: 0.0457 max mem: 9377 +Train: [86] [2200/6250] eta: 0:11:17 lr: 0.000006 grad: 0.1770 (0.1815) loss: 0.7878 (0.7899) time: 0.1498 data: 0.0530 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:57 lr: 0.000006 grad: 0.1861 (0.1814) loss: 0.7889 (0.7898) time: 0.1558 data: 0.0643 max mem: 9377 +Train: [86] [2400/6250] eta: 0:10:37 lr: 0.000006 grad: 0.1751 (0.1812) loss: 0.7843 (0.7898) time: 0.1345 data: 0.0368 max mem: 9377 +Train: [86] [2500/6250] eta: 0:10:18 lr: 0.000006 grad: 0.1718 (0.1808) loss: 0.7970 (0.7899) time: 0.1612 data: 0.0647 max mem: 9377 +Train: [86] [2600/6250] eta: 0:10:00 lr: 0.000006 grad: 0.1698 (0.1806) loss: 0.7892 (0.7899) time: 0.1479 data: 0.0583 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:41 lr: 0.000006 grad: 0.1676 (0.1805) loss: 0.7906 (0.7899) time: 0.1505 data: 0.0545 max mem: 9377 +Train: [86] [2800/6250] eta: 0:09:23 lr: 0.000006 grad: 0.1798 (0.1803) loss: 0.7840 (0.7899) time: 0.1558 data: 0.0682 max mem: 9377 +Train: [86] [2900/6250] eta: 0:09:05 lr: 0.000006 grad: 0.1785 (0.1802) loss: 0.7920 (0.7899) time: 0.1512 data: 0.0600 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:47 lr: 0.000006 grad: 0.1688 (0.1801) loss: 0.7890 (0.7899) time: 0.1516 data: 0.0592 max mem: 9377 +Train: [86] [3100/6250] eta: 0:08:30 lr: 0.000006 grad: 0.1652 (0.1799) loss: 0.7911 (0.7899) time: 0.1636 data: 0.0725 max mem: 9377 +Train: [86] [3200/6250] eta: 0:08:13 lr: 0.000006 grad: 0.1669 (0.1796) loss: 0.8008 (0.7900) time: 0.1196 data: 0.0282 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:57 lr: 0.000006 grad: 0.1677 (0.1795) loss: 0.7905 (0.7900) time: 0.1524 data: 0.0498 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:40 lr: 0.000006 grad: 0.1705 (0.1791) loss: 0.7900 (0.7901) time: 0.1562 data: 0.0704 max mem: 9377 +Train: [86] [3500/6250] eta: 0:07:23 lr: 0.000006 grad: 0.1711 (0.1789) loss: 0.7877 (0.7902) time: 0.1491 data: 0.0692 max mem: 9377 +Train: [86] [3600/6250] eta: 0:07:08 lr: 0.000006 grad: 0.1733 (0.1787) loss: 0.7906 (0.7903) time: 0.1534 data: 0.0732 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:52 lr: 0.000006 grad: 0.1684 (0.1784) loss: 0.7915 (0.7903) time: 0.1835 data: 0.0939 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:36 lr: 0.000006 grad: 0.1747 (0.1783) loss: 0.7931 (0.7904) time: 0.1790 data: 0.0937 max mem: 9377 +Train: [86] [3900/6250] eta: 0:06:20 lr: 0.000006 grad: 0.1708 (0.1781) loss: 0.7972 (0.7904) time: 0.1722 data: 0.0822 max mem: 9377 +Train: [86] [4000/6250] eta: 0:06:04 lr: 0.000006 grad: 0.1702 (0.1780) loss: 0.7862 (0.7903) time: 0.1355 data: 0.0518 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:47 lr: 0.000006 grad: 0.1685 (0.1779) loss: 0.7862 (0.7903) time: 0.1563 data: 0.0585 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:31 lr: 0.000006 grad: 0.1775 (0.1778) loss: 0.7885 (0.7903) time: 0.1514 data: 0.0679 max mem: 9377 +Train: [86] [4300/6250] eta: 0:05:14 lr: 0.000006 grad: 0.1688 (0.1778) loss: 0.7935 (0.7903) time: 0.1456 data: 0.0438 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:58 lr: 0.000006 grad: 0.1711 (0.1776) loss: 0.7939 (0.7903) time: 0.1373 data: 0.0398 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:41 lr: 0.000006 grad: 0.1702 (0.1775) loss: 0.7879 (0.7904) time: 0.1369 data: 0.0361 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:25 lr: 0.000006 grad: 0.1724 (0.1775) loss: 0.7893 (0.7903) time: 0.1464 data: 0.0514 max mem: 9377 +Train: [86] [4700/6250] eta: 0:04:08 lr: 0.000006 grad: 0.1696 (0.1775) loss: 0.7967 (0.7903) time: 0.1876 data: 0.1050 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:52 lr: 0.000006 grad: 0.1728 (0.1775) loss: 0.7885 (0.7902) time: 0.2023 data: 0.1036 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:36 lr: 0.000006 grad: 0.1694 (0.1775) loss: 0.7867 (0.7901) time: 0.1452 data: 0.0527 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:20 lr: 0.000006 grad: 0.1839 (0.1776) loss: 0.7811 (0.7900) time: 0.1666 data: 0.0802 max mem: 9377 +Train: [86] [5100/6250] eta: 0:03:04 lr: 0.000006 grad: 0.1804 (0.1776) loss: 0.7821 (0.7899) time: 0.1629 data: 0.0703 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:48 lr: 0.000006 grad: 0.1730 (0.1775) loss: 0.7907 (0.7898) time: 0.1537 data: 0.0620 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:32 lr: 0.000006 grad: 0.1614 (0.1775) loss: 0.7894 (0.7897) time: 0.1513 data: 0.0644 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:16 lr: 0.000006 grad: 0.1660 (0.1775) loss: 0.7883 (0.7897) time: 0.1585 data: 0.0722 max mem: 9377 +Train: [86] [5500/6250] eta: 0:02:00 lr: 0.000006 grad: 0.1789 (0.1777) loss: 0.7782 (0.7895) time: 0.1548 data: 0.0561 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:44 lr: 0.000006 grad: 0.1803 (0.1778) loss: 0.7799 (0.7894) time: 0.1762 data: 0.0825 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:28 lr: 0.000006 grad: 0.1825 (0.1779) loss: 0.7756 (0.7892) time: 0.1724 data: 0.0751 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:12 lr: 0.000006 grad: 0.1745 (0.1779) loss: 0.7842 (0.7892) time: 0.1589 data: 0.0714 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:56 lr: 0.000006 grad: 0.1788 (0.1780) loss: 0.7828 (0.7891) time: 0.1546 data: 0.0709 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:40 lr: 0.000006 grad: 0.1755 (0.1781) loss: 0.7915 (0.7890) time: 0.1447 data: 0.0564 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:24 lr: 0.000006 grad: 0.1843 (0.1782) loss: 0.7880 (0.7890) time: 0.1862 data: 0.0974 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:08 lr: 0.000006 grad: 0.1769 (0.1783) loss: 0.7833 (0.7889) time: 0.1668 data: 0.0809 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1814 (0.1783) loss: 0.7899 (0.7889) time: 0.1847 data: 0.0959 max mem: 9377 +Train: [86] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000006 grad: 0.1814 (0.1783) loss: 0.7899 (0.7889) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:53 loss: 0.8060 (0.8060) time: 5.6954 data: 5.6632 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.7857 (0.7869) time: 0.1531 data: 0.1252 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:15 (0.2475 s / it) +Averaged stats (hcp-train-subset): loss: 0.7857 (0.7869) +Eval (hcp-val): [86] [ 0/62] eta: 0:04:57 loss: 0.8384 (0.8384) time: 4.8012 data: 4.7193 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8365 (0.8379) time: 0.1420 data: 0.1167 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:14 (0.2418 s / it) +Averaged stats (hcp-val): loss: 0.8365 (0.8379) +Eval (nsd-val): [86] [ 0/62] eta: 0:03:52 loss: 0.8099 (0.8099) time: 3.7568 data: 3.6896 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8202 (0.8222) time: 0.1457 data: 0.1187 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (nsd-val): loss: 0.8202 (0.8222) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [87] [ 0/6250] eta: 11:42:21 lr: 0.000006 grad: 0.1304 (0.1304) loss: 0.8155 (0.8155) time: 6.7427 data: 6.6387 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:23:25 lr: 0.000006 grad: 0.1845 (0.2013) loss: 0.8251 (0.8164) time: 0.1862 data: 0.0924 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:20:13 lr: 0.000006 grad: 0.1877 (0.2063) loss: 0.7768 (0.8023) time: 0.1866 data: 0.0871 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:18:27 lr: 0.000006 grad: 0.1926 (0.2040) loss: 0.7808 (0.7958) time: 0.1457 data: 0.0552 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:17:37 lr: 0.000006 grad: 0.1844 (0.2021) loss: 0.7856 (0.7934) time: 0.1516 data: 0.0507 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:16:47 lr: 0.000006 grad: 0.1967 (0.1996) loss: 0.7866 (0.7926) time: 0.1363 data: 0.0408 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:16:08 lr: 0.000006 grad: 0.1859 (0.1972) loss: 0.7917 (0.7925) time: 0.1600 data: 0.0554 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:15:35 lr: 0.000006 grad: 0.1586 (0.1945) loss: 0.8043 (0.7925) time: 0.1386 data: 0.0489 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:15:09 lr: 0.000006 grad: 0.1685 (0.1921) loss: 0.7979 (0.7929) time: 0.1478 data: 0.0488 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:14:44 lr: 0.000006 grad: 0.1786 (0.1902) loss: 0.7901 (0.7927) time: 0.1529 data: 0.0574 max mem: 9377 +Train: [87] [1000/6250] eta: 0:14:36 lr: 0.000006 grad: 0.1700 (0.1886) loss: 0.7888 (0.7928) time: 0.2997 data: 0.1990 max mem: 9377 +Train: [87] [1100/6250] eta: 0:14:06 lr: 0.000006 grad: 0.1757 (0.1874) loss: 0.7881 (0.7929) time: 0.1487 data: 0.0549 max mem: 9377 +Train: [87] [1200/6250] eta: 0:13:47 lr: 0.000006 grad: 0.1740 (0.1865) loss: 0.7914 (0.7927) time: 0.1266 data: 0.0277 max mem: 9377 +Train: [87] [1300/6250] eta: 0:13:28 lr: 0.000006 grad: 0.1743 (0.1853) loss: 0.7892 (0.7927) time: 0.1552 data: 0.0634 max mem: 9377 +Train: [87] [1400/6250] eta: 0:13:17 lr: 0.000005 grad: 0.1749 (0.1847) loss: 0.7891 (0.7926) time: 0.2633 data: 0.1865 max mem: 9377 +Train: [87] [1500/6250] eta: 0:12:57 lr: 0.000005 grad: 0.1782 (0.1842) loss: 0.7801 (0.7923) time: 0.1534 data: 0.0693 max mem: 9377 +Train: [87] [1600/6250] eta: 0:12:38 lr: 0.000005 grad: 0.1676 (0.1834) loss: 0.7895 (0.7923) time: 0.1618 data: 0.0702 max mem: 9377 +Train: [87] [1700/6250] eta: 0:12:20 lr: 0.000005 grad: 0.1666 (0.1829) loss: 0.7849 (0.7922) time: 0.1491 data: 0.0632 max mem: 9377 +Train: [87] [1800/6250] eta: 0:12:05 lr: 0.000005 grad: 0.1718 (0.1823) loss: 0.7828 (0.7921) time: 0.1703 data: 0.0809 max mem: 9377 +Train: [87] [1900/6250] eta: 0:11:50 lr: 0.000005 grad: 0.1787 (0.1822) loss: 0.7916 (0.7917) time: 0.1658 data: 0.0780 max mem: 9377 +Train: [87] [2000/6250] eta: 0:11:31 lr: 0.000005 grad: 0.1719 (0.1819) loss: 0.7925 (0.7915) time: 0.1358 data: 0.0425 max mem: 9377 +Train: [87] [2100/6250] eta: 0:11:13 lr: 0.000005 grad: 0.1623 (0.1813) loss: 0.7838 (0.7915) time: 0.1731 data: 0.0829 max mem: 9377 +Train: [87] [2200/6250] eta: 0:10:57 lr: 0.000005 grad: 0.1730 (0.1810) loss: 0.8007 (0.7913) time: 0.1709 data: 0.0862 max mem: 9377 +Train: [87] [2300/6250] eta: 0:10:39 lr: 0.000005 grad: 0.1673 (0.1807) loss: 0.7913 (0.7913) time: 0.1464 data: 0.0523 max mem: 9377 +Train: [87] [2400/6250] eta: 0:10:22 lr: 0.000005 grad: 0.1788 (0.1804) loss: 0.7843 (0.7912) time: 0.1549 data: 0.0556 max mem: 9377 +Train: [87] [2500/6250] eta: 0:10:05 lr: 0.000005 grad: 0.1709 (0.1801) loss: 0.7868 (0.7913) time: 0.1478 data: 0.0544 max mem: 9377 +Train: [87] [2600/6250] eta: 0:09:49 lr: 0.000005 grad: 0.1763 (0.1799) loss: 0.7890 (0.7912) time: 0.1685 data: 0.0731 max mem: 9377 +Train: [87] [2700/6250] eta: 0:09:32 lr: 0.000005 grad: 0.1732 (0.1796) loss: 0.7953 (0.7912) time: 0.1599 data: 0.0721 max mem: 9377 +Train: [87] [2800/6250] eta: 0:09:15 lr: 0.000005 grad: 0.1786 (0.1795) loss: 0.7840 (0.7911) time: 0.1584 data: 0.0693 max mem: 9377 +Train: [87] [2900/6250] eta: 0:08:59 lr: 0.000005 grad: 0.1735 (0.1793) loss: 0.7946 (0.7909) time: 0.1454 data: 0.0495 max mem: 9377 +Train: [87] [3000/6250] eta: 0:08:42 lr: 0.000005 grad: 0.1604 (0.1790) loss: 0.7946 (0.7909) time: 0.1552 data: 0.0620 max mem: 9377 +Train: [87] [3100/6250] eta: 0:08:26 lr: 0.000005 grad: 0.1740 (0.1789) loss: 0.7966 (0.7909) time: 0.1544 data: 0.0642 max mem: 9377 +Train: [87] [3200/6250] eta: 0:08:10 lr: 0.000005 grad: 0.1626 (0.1786) loss: 0.7932 (0.7910) time: 0.1789 data: 0.0945 max mem: 9377 +Train: [87] [3300/6250] eta: 0:07:53 lr: 0.000005 grad: 0.1710 (0.1785) loss: 0.8008 (0.7910) time: 0.1796 data: 0.0934 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:36 lr: 0.000005 grad: 0.1682 (0.1783) loss: 0.7894 (0.7910) time: 0.1482 data: 0.0564 max mem: 9377 +Train: [87] [3500/6250] eta: 0:07:20 lr: 0.000005 grad: 0.1670 (0.1781) loss: 0.7904 (0.7910) time: 0.1496 data: 0.0524 max mem: 9377 +Train: [87] [3600/6250] eta: 0:07:05 lr: 0.000005 grad: 0.1687 (0.1780) loss: 0.7986 (0.7911) time: 0.1700 data: 0.0855 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:49 lr: 0.000005 grad: 0.1682 (0.1778) loss: 0.7848 (0.7910) time: 0.1547 data: 0.0656 max mem: 9377 +Train: [87] [3800/6250] eta: 0:06:33 lr: 0.000005 grad: 0.1690 (0.1777) loss: 0.7979 (0.7911) time: 0.1635 data: 0.0794 max mem: 9377 +Train: [87] [3900/6250] eta: 0:06:17 lr: 0.000005 grad: 0.1722 (0.1775) loss: 0.7895 (0.7911) time: 0.1836 data: 0.0869 max mem: 9377 +Train: [87] [4000/6250] eta: 0:06:01 lr: 0.000005 grad: 0.1602 (0.1773) loss: 0.7957 (0.7911) time: 0.2009 data: 0.1132 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:46 lr: 0.000005 grad: 0.1742 (0.1772) loss: 0.7872 (0.7912) time: 0.1721 data: 0.0667 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:30 lr: 0.000005 grad: 0.1656 (0.1769) loss: 0.7991 (0.7912) time: 0.1586 data: 0.0628 max mem: 9377 +Train: [87] [4300/6250] eta: 0:05:13 lr: 0.000005 grad: 0.1681 (0.1767) loss: 0.7950 (0.7913) time: 0.1271 data: 0.0306 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:57 lr: 0.000005 grad: 0.1583 (0.1765) loss: 0.8011 (0.7915) time: 0.1526 data: 0.0573 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:40 lr: 0.000005 grad: 0.1703 (0.1762) loss: 0.8002 (0.7916) time: 0.1494 data: 0.0599 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:24 lr: 0.000005 grad: 0.1534 (0.1760) loss: 0.8016 (0.7917) time: 0.1423 data: 0.0591 max mem: 9377 +Train: [87] [4700/6250] eta: 0:04:07 lr: 0.000005 grad: 0.1645 (0.1759) loss: 0.7977 (0.7918) time: 0.1294 data: 0.0351 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:51 lr: 0.000005 grad: 0.1700 (0.1757) loss: 0.7960 (0.7920) time: 0.1583 data: 0.0651 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:35 lr: 0.000005 grad: 0.1699 (0.1756) loss: 0.8011 (0.7921) time: 0.1907 data: 0.1140 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:19 lr: 0.000005 grad: 0.1565 (0.1754) loss: 0.8060 (0.7922) time: 0.1406 data: 0.0589 max mem: 9377 +Train: [87] [5100/6250] eta: 0:03:03 lr: 0.000005 grad: 0.1593 (0.1752) loss: 0.8060 (0.7924) time: 0.1529 data: 0.0710 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:48 lr: 0.000005 grad: 0.1666 (0.1750) loss: 0.7892 (0.7924) time: 0.1894 data: 0.1004 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:32 lr: 0.000005 grad: 0.1600 (0.1749) loss: 0.7988 (0.7925) time: 0.2100 data: 0.1254 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:17 lr: 0.000005 grad: 0.1574 (0.1746) loss: 0.7875 (0.7925) time: 0.2539 data: 0.1622 max mem: 9377 +Train: [87] [5500/6250] eta: 0:02:01 lr: 0.000005 grad: 0.1648 (0.1746) loss: 0.7929 (0.7925) time: 0.1711 data: 0.0703 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:45 lr: 0.000005 grad: 0.1681 (0.1745) loss: 0.7937 (0.7926) time: 0.1407 data: 0.0515 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:29 lr: 0.000005 grad: 0.1777 (0.1745) loss: 0.7859 (0.7926) time: 0.1839 data: 0.0863 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:12 lr: 0.000005 grad: 0.1696 (0.1746) loss: 0.7932 (0.7926) time: 0.1750 data: 0.0788 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:56 lr: 0.000005 grad: 0.1727 (0.1745) loss: 0.7928 (0.7926) time: 0.1429 data: 0.0540 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:40 lr: 0.000005 grad: 0.1668 (0.1744) loss: 0.7910 (0.7926) time: 0.1457 data: 0.0439 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:24 lr: 0.000005 grad: 0.1646 (0.1743) loss: 0.7897 (0.7927) time: 0.1467 data: 0.0539 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:08 lr: 0.000005 grad: 0.1663 (0.1742) loss: 0.7956 (0.7927) time: 0.1656 data: 0.0772 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1697 (0.1742) loss: 0.7927 (0.7927) time: 0.1615 data: 0.0741 max mem: 9377 +Train: [87] Total time: 0:16:56 (0.1627 s / it) +Averaged stats: lr: 0.000005 grad: 0.1697 (0.1742) loss: 0.7927 (0.7927) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:06:58 loss: 0.8054 (0.8054) time: 6.7519 data: 6.7200 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.7844 (0.7869) time: 0.1195 data: 0.0945 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:15 (0.2499 s / it) +Averaged stats (hcp-train-subset): loss: 0.7844 (0.7869) +Eval (hcp-val): [87] [ 0/62] eta: 0:04:19 loss: 0.8405 (0.8405) time: 4.1781 data: 4.0927 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8378 (0.8387) time: 0.1311 data: 0.1060 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:15 (0.2428 s / it) +Averaged stats (hcp-val): loss: 0.8378 (0.8387) +Eval (nsd-val): [87] [ 0/62] eta: 0:06:09 loss: 0.8118 (0.8118) time: 5.9603 data: 5.9307 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8216 (0.8220) time: 0.1486 data: 0.1217 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:14 (0.2404 s / it) +Averaged stats (nsd-val): loss: 0.8216 (0.8220) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 11:21:42 lr: 0.000005 grad: 0.1480 (0.1480) loss: 0.8273 (0.8273) time: 6.5445 data: 6.3875 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:23:03 lr: 0.000005 grad: 0.2136 (0.2218) loss: 0.7978 (0.8025) time: 0.1825 data: 0.0681 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:20:15 lr: 0.000005 grad: 0.1937 (0.2133) loss: 0.7935 (0.7962) time: 0.1818 data: 0.0710 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:18:47 lr: 0.000005 grad: 0.1737 (0.2095) loss: 0.7903 (0.7920) time: 0.1551 data: 0.0462 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:17:34 lr: 0.000005 grad: 0.1885 (0.2048) loss: 0.7850 (0.7907) time: 0.1469 data: 0.0320 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:16:50 lr: 0.000005 grad: 0.1814 (0.2010) loss: 0.7847 (0.7901) time: 0.1785 data: 0.0719 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:16:10 lr: 0.000005 grad: 0.1882 (0.1982) loss: 0.7818 (0.7896) time: 0.1577 data: 0.0615 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:15:38 lr: 0.000005 grad: 0.1925 (0.1970) loss: 0.7794 (0.7885) time: 0.1600 data: 0.0577 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:15:12 lr: 0.000005 grad: 0.1873 (0.1968) loss: 0.7794 (0.7878) time: 0.1438 data: 0.0482 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:14:52 lr: 0.000005 grad: 0.1743 (0.1954) loss: 0.7882 (0.7878) time: 0.1543 data: 0.0561 max mem: 9377 +Train: [88] [1000/6250] eta: 0:14:27 lr: 0.000005 grad: 0.1793 (0.1941) loss: 0.7916 (0.7877) time: 0.1353 data: 0.0439 max mem: 9377 +Train: [88] [1100/6250] eta: 0:14:07 lr: 0.000005 grad: 0.1755 (0.1925) loss: 0.7991 (0.7880) time: 0.1673 data: 0.0729 max mem: 9377 +Train: [88] [1200/6250] eta: 0:13:49 lr: 0.000005 grad: 0.1742 (0.1910) loss: 0.7881 (0.7883) time: 0.1791 data: 0.0888 max mem: 9377 +Train: [88] [1300/6250] eta: 0:13:29 lr: 0.000005 grad: 0.1735 (0.1898) loss: 0.7947 (0.7884) time: 0.1335 data: 0.0376 max mem: 9377 +Train: [88] [1400/6250] eta: 0:13:12 lr: 0.000005 grad: 0.1578 (0.1887) loss: 0.7963 (0.7886) time: 0.1730 data: 0.0733 max mem: 9377 +Train: [88] [1500/6250] eta: 0:13:06 lr: 0.000005 grad: 0.1727 (0.1879) loss: 0.7929 (0.7886) time: 0.2076 data: 0.1216 max mem: 9377 +Train: [88] [1600/6250] eta: 0:12:50 lr: 0.000005 grad: 0.1756 (0.1869) loss: 0.7903 (0.7889) time: 0.1488 data: 0.0646 max mem: 9377 +Train: [88] [1700/6250] eta: 0:12:34 lr: 0.000005 grad: 0.1707 (0.1860) loss: 0.7930 (0.7893) time: 0.1962 data: 0.1089 max mem: 9377 +Train: [88] [1800/6250] eta: 0:12:17 lr: 0.000005 grad: 0.1644 (0.1852) loss: 0.7939 (0.7896) time: 0.1778 data: 0.0915 max mem: 9377 +Train: [88] [1900/6250] eta: 0:12:02 lr: 0.000005 grad: 0.1627 (0.1845) loss: 0.7977 (0.7899) time: 0.1716 data: 0.0707 max mem: 9377 +Train: [88] [2000/6250] eta: 0:11:45 lr: 0.000005 grad: 0.1651 (0.1840) loss: 0.7981 (0.7903) time: 0.1370 data: 0.0526 max mem: 9377 +Train: [88] [2100/6250] eta: 0:11:28 lr: 0.000005 grad: 0.1664 (0.1834) loss: 0.8002 (0.7906) time: 0.1544 data: 0.0595 max mem: 9377 +Train: [88] [2200/6250] eta: 0:11:13 lr: 0.000005 grad: 0.1670 (0.1828) loss: 0.7875 (0.7907) time: 0.1791 data: 0.0848 max mem: 9377 +Train: [88] [2300/6250] eta: 0:10:54 lr: 0.000005 grad: 0.1765 (0.1824) loss: 0.7923 (0.7910) time: 0.1368 data: 0.0317 max mem: 9377 +Train: [88] [2400/6250] eta: 0:10:36 lr: 0.000005 grad: 0.1623 (0.1822) loss: 0.8019 (0.7911) time: 0.1568 data: 0.0705 max mem: 9377 +Train: [88] [2500/6250] eta: 0:10:17 lr: 0.000005 grad: 0.1701 (0.1818) loss: 0.7937 (0.7913) time: 0.1345 data: 0.0359 max mem: 9377 +Train: [88] [2600/6250] eta: 0:10:00 lr: 0.000005 grad: 0.1640 (0.1812) loss: 0.7967 (0.7916) time: 0.1935 data: 0.1031 max mem: 9377 +Train: [88] [2700/6250] eta: 0:09:41 lr: 0.000005 grad: 0.1661 (0.1808) loss: 0.7932 (0.7917) time: 0.1500 data: 0.0602 max mem: 9377 +Train: [88] [2800/6250] eta: 0:09:24 lr: 0.000005 grad: 0.1712 (0.1805) loss: 0.7981 (0.7918) time: 0.1519 data: 0.0621 max mem: 9377 +Train: [88] [2900/6250] eta: 0:09:06 lr: 0.000004 grad: 0.1640 (0.1801) loss: 0.7904 (0.7919) time: 0.1640 data: 0.0788 max mem: 9377 +Train: [88] [3000/6250] eta: 0:08:48 lr: 0.000004 grad: 0.1728 (0.1798) loss: 0.7899 (0.7920) time: 0.1364 data: 0.0365 max mem: 9377 +Train: [88] [3100/6250] eta: 0:08:32 lr: 0.000004 grad: 0.1764 (0.1797) loss: 0.7902 (0.7919) time: 0.1703 data: 0.0779 max mem: 9377 +Train: [88] [3200/6250] eta: 0:08:15 lr: 0.000004 grad: 0.1649 (0.1796) loss: 0.7857 (0.7920) time: 0.2064 data: 0.1230 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:58 lr: 0.000004 grad: 0.1689 (0.1795) loss: 0.7916 (0.7920) time: 0.1607 data: 0.0730 max mem: 9377 +Train: [88] [3400/6250] eta: 0:07:43 lr: 0.000004 grad: 0.1601 (0.1794) loss: 0.7965 (0.7920) time: 0.1160 data: 0.0222 max mem: 9377 +Train: [88] [3500/6250] eta: 0:07:27 lr: 0.000004 grad: 0.1720 (0.1793) loss: 0.7900 (0.7920) time: 0.1746 data: 0.0846 max mem: 9377 +Train: [88] [3600/6250] eta: 0:07:11 lr: 0.000004 grad: 0.1779 (0.1792) loss: 0.7863 (0.7919) time: 0.1758 data: 0.0855 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:56 lr: 0.000004 grad: 0.1639 (0.1791) loss: 0.7907 (0.7919) time: 0.1303 data: 0.0448 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:40 lr: 0.000004 grad: 0.1681 (0.1790) loss: 0.7921 (0.7919) time: 0.1376 data: 0.0476 max mem: 9377 +Train: [88] [3900/6250] eta: 0:06:24 lr: 0.000004 grad: 0.1698 (0.1789) loss: 0.7871 (0.7919) time: 0.2153 data: 0.1323 max mem: 9377 +Train: [88] [4000/6250] eta: 0:06:07 lr: 0.000004 grad: 0.1641 (0.1788) loss: 0.7945 (0.7920) time: 0.1701 data: 0.0783 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:50 lr: 0.000004 grad: 0.1741 (0.1787) loss: 0.7924 (0.7920) time: 0.1761 data: 0.0786 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:35 lr: 0.000004 grad: 0.1593 (0.1786) loss: 0.8003 (0.7921) time: 0.1968 data: 0.1109 max mem: 9377 +Train: [88] [4300/6250] eta: 0:05:18 lr: 0.000004 grad: 0.1819 (0.1784) loss: 0.7880 (0.7922) time: 0.1727 data: 0.0756 max mem: 9377 +Train: [88] [4400/6250] eta: 0:05:01 lr: 0.000004 grad: 0.1713 (0.1784) loss: 0.7872 (0.7922) time: 0.1580 data: 0.0587 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:45 lr: 0.000004 grad: 0.1747 (0.1783) loss: 0.7862 (0.7922) time: 0.1574 data: 0.0667 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:28 lr: 0.000004 grad: 0.1775 (0.1782) loss: 0.7940 (0.7922) time: 0.1762 data: 0.0845 max mem: 9377 +Train: [88] [4700/6250] eta: 0:04:12 lr: 0.000004 grad: 0.1683 (0.1780) loss: 0.7901 (0.7922) time: 0.1722 data: 0.0774 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:55 lr: 0.000004 grad: 0.1759 (0.1779) loss: 0.7957 (0.7922) time: 0.1328 data: 0.0402 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:39 lr: 0.000004 grad: 0.1648 (0.1777) loss: 0.7961 (0.7923) time: 0.1652 data: 0.0821 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:23 lr: 0.000004 grad: 0.1757 (0.1776) loss: 0.7977 (0.7923) time: 0.1708 data: 0.0807 max mem: 9377 +Train: [88] [5100/6250] eta: 0:03:07 lr: 0.000004 grad: 0.1745 (0.1775) loss: 0.7881 (0.7924) time: 0.1569 data: 0.0651 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:50 lr: 0.000004 grad: 0.1660 (0.1774) loss: 0.7902 (0.7924) time: 0.1490 data: 0.0553 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:34 lr: 0.000004 grad: 0.1684 (0.1773) loss: 0.7963 (0.7924) time: 0.1779 data: 0.0975 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:18 lr: 0.000004 grad: 0.1742 (0.1773) loss: 0.7917 (0.7924) time: 0.1804 data: 0.0915 max mem: 9377 +Train: [88] [5500/6250] eta: 0:02:02 lr: 0.000004 grad: 0.1771 (0.1773) loss: 0.7881 (0.7923) time: 0.1614 data: 0.0762 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:45 lr: 0.000004 grad: 0.1838 (0.1773) loss: 0.7833 (0.7923) time: 0.1659 data: 0.0671 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:29 lr: 0.000004 grad: 0.1681 (0.1772) loss: 0.7977 (0.7923) time: 0.1588 data: 0.0677 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:13 lr: 0.000004 grad: 0.1722 (0.1771) loss: 0.7894 (0.7924) time: 0.1598 data: 0.0624 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:57 lr: 0.000004 grad: 0.1681 (0.1771) loss: 0.7964 (0.7924) time: 0.1508 data: 0.0535 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:40 lr: 0.000004 grad: 0.1719 (0.1770) loss: 0.7942 (0.7925) time: 0.1643 data: 0.0776 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:24 lr: 0.000004 grad: 0.1528 (0.1769) loss: 0.8086 (0.7926) time: 0.1439 data: 0.0395 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:08 lr: 0.000004 grad: 0.1620 (0.1767) loss: 0.7995 (0.7928) time: 0.1608 data: 0.0704 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1606 (0.1766) loss: 0.8098 (0.7928) time: 0.1505 data: 0.0575 max mem: 9377 +Train: [88] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000004 grad: 0.1606 (0.1766) loss: 0.8098 (0.7928) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:06:11 loss: 0.8029 (0.8029) time: 5.9893 data: 5.9579 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.7851 (0.7867) time: 0.1474 data: 0.1217 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (hcp-train-subset): loss: 0.7851 (0.7867) +Eval (hcp-val): [88] [ 0/62] eta: 0:05:23 loss: 0.8323 (0.8323) time: 5.2255 data: 5.1780 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8367 (0.8387) time: 0.1189 data: 0.0936 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:14 (0.2411 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8387) +Eval (nsd-val): [88] [ 0/62] eta: 0:06:16 loss: 0.8136 (0.8136) time: 6.0718 data: 6.0383 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8215 (0.8215) time: 0.1194 data: 0.0945 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (nsd-val): loss: 0.8215 (0.8215) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 11:19:25 lr: 0.000004 grad: 0.1814 (0.1814) loss: 0.8430 (0.8430) time: 6.5225 data: 6.3719 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:23:13 lr: 0.000004 grad: 0.1975 (0.2177) loss: 0.8073 (0.8044) time: 0.1789 data: 0.0696 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:19:59 lr: 0.000004 grad: 0.1944 (0.2052) loss: 0.8077 (0.8047) time: 0.1933 data: 0.0787 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:18:19 lr: 0.000004 grad: 0.1824 (0.2060) loss: 0.7981 (0.8009) time: 0.1665 data: 0.0726 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:17:15 lr: 0.000004 grad: 0.1877 (0.2056) loss: 0.7947 (0.7980) time: 0.1588 data: 0.0658 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:16:33 lr: 0.000004 grad: 0.1784 (0.2040) loss: 0.7974 (0.7954) time: 0.1439 data: 0.0428 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:15:57 lr: 0.000004 grad: 0.1797 (0.2017) loss: 0.7988 (0.7947) time: 0.1520 data: 0.0638 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:15:32 lr: 0.000004 grad: 0.1709 (0.1994) loss: 0.7962 (0.7947) time: 0.1726 data: 0.0801 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:15:05 lr: 0.000004 grad: 0.1848 (0.1972) loss: 0.7943 (0.7947) time: 0.1613 data: 0.0683 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:14:45 lr: 0.000004 grad: 0.1793 (0.1958) loss: 0.7905 (0.7942) time: 0.1704 data: 0.0708 max mem: 9377 +Train: [89] [1000/6250] eta: 0:14:27 lr: 0.000004 grad: 0.1708 (0.1941) loss: 0.7916 (0.7939) time: 0.2023 data: 0.1225 max mem: 9377 +Train: [89] [1100/6250] eta: 0:14:05 lr: 0.000004 grad: 0.1747 (0.1923) loss: 0.7944 (0.7939) time: 0.1445 data: 0.0546 max mem: 9377 +Train: [89] [1200/6250] eta: 0:13:44 lr: 0.000004 grad: 0.1726 (0.1909) loss: 0.7992 (0.7939) time: 0.1735 data: 0.0727 max mem: 9377 +Train: [89] [1300/6250] eta: 0:13:28 lr: 0.000004 grad: 0.1725 (0.1897) loss: 0.7928 (0.7939) time: 0.1802 data: 0.0951 max mem: 9377 +Train: [89] [1400/6250] eta: 0:13:07 lr: 0.000004 grad: 0.1747 (0.1889) loss: 0.7927 (0.7938) time: 0.1471 data: 0.0545 max mem: 9377 +Train: [89] [1500/6250] eta: 0:12:51 lr: 0.000004 grad: 0.1727 (0.1879) loss: 0.7909 (0.7938) time: 0.1781 data: 0.0900 max mem: 9377 +Train: [89] [1600/6250] eta: 0:12:35 lr: 0.000004 grad: 0.1786 (0.1874) loss: 0.7925 (0.7938) time: 0.1853 data: 0.0944 max mem: 9377 +Train: [89] [1700/6250] eta: 0:12:16 lr: 0.000004 grad: 0.1637 (0.1869) loss: 0.7941 (0.7938) time: 0.1582 data: 0.0737 max mem: 9377 +Train: [89] [1800/6250] eta: 0:11:57 lr: 0.000004 grad: 0.1798 (0.1865) loss: 0.7944 (0.7936) time: 0.1421 data: 0.0554 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:42 lr: 0.000004 grad: 0.1674 (0.1860) loss: 0.7995 (0.7934) time: 0.1478 data: 0.0613 max mem: 9377 +Train: [89] [2000/6250] eta: 0:11:25 lr: 0.000004 grad: 0.1741 (0.1856) loss: 0.7935 (0.7931) time: 0.1497 data: 0.0603 max mem: 9377 +Train: [89] [2100/6250] eta: 0:11:07 lr: 0.000004 grad: 0.1751 (0.1852) loss: 0.7873 (0.7930) time: 0.1235 data: 0.0256 max mem: 9377 +Train: [89] [2200/6250] eta: 0:10:50 lr: 0.000004 grad: 0.1739 (0.1849) loss: 0.7919 (0.7929) time: 0.1696 data: 0.0778 max mem: 9377 +Train: [89] [2300/6250] eta: 0:10:32 lr: 0.000004 grad: 0.1671 (0.1846) loss: 0.7906 (0.7927) time: 0.1558 data: 0.0617 max mem: 9377 +Train: [89] [2400/6250] eta: 0:10:15 lr: 0.000004 grad: 0.1663 (0.1842) loss: 0.7957 (0.7927) time: 0.1784 data: 0.0881 max mem: 9377 +Train: [89] [2500/6250] eta: 0:09:57 lr: 0.000004 grad: 0.1674 (0.1840) loss: 0.7854 (0.7924) time: 0.1558 data: 0.0550 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:40 lr: 0.000004 grad: 0.1771 (0.1838) loss: 0.7764 (0.7922) time: 0.1509 data: 0.0643 max mem: 9377 +Train: [89] [2700/6250] eta: 0:09:23 lr: 0.000004 grad: 0.1860 (0.1836) loss: 0.7798 (0.7920) time: 0.1558 data: 0.0717 max mem: 9377 +Train: [89] [2800/6250] eta: 0:09:07 lr: 0.000004 grad: 0.1804 (0.1836) loss: 0.7798 (0.7917) time: 0.1576 data: 0.0758 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:52 lr: 0.000004 grad: 0.1803 (0.1834) loss: 0.7833 (0.7916) time: 0.1532 data: 0.0725 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:35 lr: 0.000004 grad: 0.1814 (0.1832) loss: 0.7893 (0.7916) time: 0.1549 data: 0.0698 max mem: 9377 +Train: [89] [3100/6250] eta: 0:08:20 lr: 0.000004 grad: 0.1706 (0.1830) loss: 0.7864 (0.7915) time: 0.1888 data: 0.1066 max mem: 9377 +Train: [89] [3200/6250] eta: 0:08:05 lr: 0.000004 grad: 0.1748 (0.1829) loss: 0.7917 (0.7914) time: 0.1685 data: 0.0794 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:50 lr: 0.000004 grad: 0.1731 (0.1828) loss: 0.7894 (0.7913) time: 0.1754 data: 0.0920 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:35 lr: 0.000004 grad: 0.1725 (0.1826) loss: 0.7893 (0.7912) time: 0.1644 data: 0.0832 max mem: 9377 +Train: [89] [3500/6250] eta: 0:07:19 lr: 0.000004 grad: 0.1726 (0.1824) loss: 0.7891 (0.7912) time: 0.1627 data: 0.0772 max mem: 9377 +Train: [89] [3600/6250] eta: 0:07:02 lr: 0.000004 grad: 0.1698 (0.1823) loss: 0.7988 (0.7912) time: 0.1494 data: 0.0593 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:47 lr: 0.000004 grad: 0.1657 (0.1820) loss: 0.7945 (0.7912) time: 0.1390 data: 0.0530 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:31 lr: 0.000004 grad: 0.1747 (0.1819) loss: 0.7848 (0.7911) time: 0.1638 data: 0.0723 max mem: 9377 +Train: [89] [3900/6250] eta: 0:06:14 lr: 0.000004 grad: 0.1773 (0.1819) loss: 0.7819 (0.7910) time: 0.1412 data: 0.0443 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:58 lr: 0.000004 grad: 0.1788 (0.1817) loss: 0.7974 (0.7910) time: 0.1744 data: 0.0883 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:42 lr: 0.000004 grad: 0.1766 (0.1815) loss: 0.7922 (0.7910) time: 0.1530 data: 0.0638 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:26 lr: 0.000004 grad: 0.1616 (0.1813) loss: 0.7936 (0.7910) time: 0.1439 data: 0.0612 max mem: 9377 +Train: [89] [4300/6250] eta: 0:05:09 lr: 0.000004 grad: 0.1683 (0.1811) loss: 0.7986 (0.7910) time: 0.1441 data: 0.0567 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:53 lr: 0.000004 grad: 0.1696 (0.1809) loss: 0.7955 (0.7910) time: 0.1369 data: 0.0427 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:36 lr: 0.000004 grad: 0.1734 (0.1808) loss: 0.7819 (0.7910) time: 0.1480 data: 0.0597 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:20 lr: 0.000004 grad: 0.1701 (0.1806) loss: 0.7924 (0.7910) time: 0.1681 data: 0.0814 max mem: 9377 +Train: [89] [4700/6250] eta: 0:04:04 lr: 0.000004 grad: 0.1741 (0.1805) loss: 0.7916 (0.7911) time: 0.1479 data: 0.0563 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:48 lr: 0.000004 grad: 0.1653 (0.1804) loss: 0.8010 (0.7911) time: 0.1322 data: 0.0251 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:32 lr: 0.000004 grad: 0.1719 (0.1802) loss: 0.7906 (0.7911) time: 0.1642 data: 0.0740 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:16 lr: 0.000004 grad: 0.1667 (0.1801) loss: 0.7951 (0.7911) time: 0.1506 data: 0.0692 max mem: 9377 +Train: [89] [5100/6250] eta: 0:03:01 lr: 0.000004 grad: 0.1732 (0.1799) loss: 0.7932 (0.7912) time: 0.1840 data: 0.0957 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:45 lr: 0.000003 grad: 0.1655 (0.1797) loss: 0.7934 (0.7913) time: 0.1451 data: 0.0498 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:29 lr: 0.000003 grad: 0.1724 (0.1796) loss: 0.7941 (0.7913) time: 0.1468 data: 0.0625 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:13 lr: 0.000003 grad: 0.1732 (0.1795) loss: 0.7932 (0.7913) time: 0.1738 data: 0.0821 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:57 lr: 0.000003 grad: 0.1656 (0.1793) loss: 0.7895 (0.7914) time: 0.1555 data: 0.0663 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:42 lr: 0.000003 grad: 0.1698 (0.1792) loss: 0.7913 (0.7914) time: 0.1621 data: 0.0767 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:26 lr: 0.000003 grad: 0.1665 (0.1791) loss: 0.7949 (0.7914) time: 0.1064 data: 0.0007 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:10 lr: 0.000003 grad: 0.1660 (0.1790) loss: 0.7985 (0.7914) time: 0.1325 data: 0.0357 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:54 lr: 0.000003 grad: 0.1706 (0.1790) loss: 0.7930 (0.7914) time: 0.1641 data: 0.0800 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:39 lr: 0.000003 grad: 0.1786 (0.1790) loss: 0.7753 (0.7913) time: 0.1954 data: 0.1135 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:23 lr: 0.000003 grad: 0.1788 (0.1789) loss: 0.7942 (0.7913) time: 0.1433 data: 0.0512 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1678 (0.1789) loss: 0.7935 (0.7913) time: 0.1042 data: 0.0207 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1723 (0.1788) loss: 0.7947 (0.7913) time: 0.1408 data: 0.0513 max mem: 9377 +Train: [89] Total time: 0:16:21 (0.1570 s / it) +Averaged stats: lr: 0.000003 grad: 0.1723 (0.1788) loss: 0.7947 (0.7913) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:04:12 loss: 0.8063 (0.8063) time: 4.0728 data: 3.9608 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.7846 (0.7856) time: 0.1300 data: 0.1049 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.7846 (0.7856) +Making plots (hcp-train-subset): example=23 +Eval (hcp-val): [89] [ 0/62] eta: 0:05:24 loss: 0.8393 (0.8393) time: 5.2351 data: 5.1458 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8377 (0.8387) time: 0.1287 data: 0.1035 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:17 (0.2818 s / it) +Averaged stats (hcp-val): loss: 0.8377 (0.8387) +Making plots (hcp-val): example=5 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:58 loss: 0.8108 (0.8108) time: 4.8135 data: 4.7169 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8202 (0.8229) time: 0.1081 data: 0.0825 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (nsd-val): loss: 0.8202 (0.8229) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 11:32:05 lr: 0.000003 grad: 0.1071 (0.1071) loss: 0.8289 (0.8289) time: 6.6441 data: 6.5356 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:21:39 lr: 0.000003 grad: 0.2399 (0.2370) loss: 0.8003 (0.8036) time: 0.1534 data: 0.0373 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:18:46 lr: 0.000003 grad: 0.1862 (0.2286) loss: 0.7834 (0.7978) time: 0.1608 data: 0.0585 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:17:22 lr: 0.000003 grad: 0.1908 (0.2191) loss: 0.7931 (0.7960) time: 0.1585 data: 0.0617 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:16:36 lr: 0.000003 grad: 0.1845 (0.2120) loss: 0.7981 (0.7964) time: 0.1685 data: 0.0766 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:15:47 lr: 0.000003 grad: 0.1764 (0.2075) loss: 0.8107 (0.7964) time: 0.1447 data: 0.0568 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:15:17 lr: 0.000003 grad: 0.1789 (0.2029) loss: 0.7986 (0.7966) time: 0.1101 data: 0.0059 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:14:56 lr: 0.000003 grad: 0.1781 (0.1997) loss: 0.7999 (0.7968) time: 0.1631 data: 0.0562 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:14:32 lr: 0.000003 grad: 0.1744 (0.1980) loss: 0.7998 (0.7968) time: 0.1535 data: 0.0592 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:14:12 lr: 0.000003 grad: 0.1758 (0.1957) loss: 0.7926 (0.7966) time: 0.1740 data: 0.0635 max mem: 9377 +Train: [90] [1000/6250] eta: 0:13:47 lr: 0.000003 grad: 0.1681 (0.1937) loss: 0.7911 (0.7965) time: 0.1425 data: 0.0519 max mem: 9377 +Train: [90] [1100/6250] eta: 0:13:26 lr: 0.000003 grad: 0.1783 (0.1921) loss: 0.7862 (0.7960) time: 0.0996 data: 0.0002 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:07 lr: 0.000003 grad: 0.1789 (0.1910) loss: 0.7913 (0.7957) time: 0.1493 data: 0.0544 max mem: 9377 +Train: [90] [1300/6250] eta: 0:12:49 lr: 0.000003 grad: 0.1764 (0.1901) loss: 0.7886 (0.7951) time: 0.1522 data: 0.0637 max mem: 9377 +Train: [90] [1400/6250] eta: 0:12:30 lr: 0.000003 grad: 0.1753 (0.1893) loss: 0.7909 (0.7948) time: 0.1520 data: 0.0713 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:15 lr: 0.000003 grad: 0.1824 (0.1890) loss: 0.7835 (0.7943) time: 0.1502 data: 0.0624 max mem: 9377 +Train: [90] [1600/6250] eta: 0:11:59 lr: 0.000003 grad: 0.1776 (0.1882) loss: 0.7844 (0.7939) time: 0.1674 data: 0.0844 max mem: 9377 +Train: [90] [1700/6250] eta: 0:11:45 lr: 0.000003 grad: 0.1689 (0.1877) loss: 0.7951 (0.7936) time: 0.1369 data: 0.0377 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:27 lr: 0.000003 grad: 0.1769 (0.1872) loss: 0.7957 (0.7935) time: 0.1408 data: 0.0510 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:10 lr: 0.000003 grad: 0.1754 (0.1868) loss: 0.7892 (0.7933) time: 0.1486 data: 0.0641 max mem: 9377 +Train: [90] [2000/6250] eta: 0:10:54 lr: 0.000003 grad: 0.1734 (0.1866) loss: 0.7851 (0.7930) time: 0.1330 data: 0.0416 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:38 lr: 0.000003 grad: 0.1749 (0.1862) loss: 0.7862 (0.7927) time: 0.1410 data: 0.0501 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:21 lr: 0.000003 grad: 0.1698 (0.1859) loss: 0.7815 (0.7925) time: 0.1484 data: 0.0611 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:04 lr: 0.000003 grad: 0.1752 (0.1855) loss: 0.7862 (0.7924) time: 0.1445 data: 0.0621 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:47 lr: 0.000003 grad: 0.1677 (0.1850) loss: 0.8002 (0.7923) time: 0.1157 data: 0.0250 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:32 lr: 0.000003 grad: 0.1746 (0.1846) loss: 0.7930 (0.7923) time: 0.1548 data: 0.0719 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:16 lr: 0.000003 grad: 0.1781 (0.1844) loss: 0.7875 (0.7922) time: 0.1559 data: 0.0687 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:00 lr: 0.000003 grad: 0.1721 (0.1840) loss: 0.7866 (0.7922) time: 0.1445 data: 0.0539 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:44 lr: 0.000003 grad: 0.1741 (0.1837) loss: 0.7927 (0.7922) time: 0.1621 data: 0.0719 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:28 lr: 0.000003 grad: 0.1754 (0.1834) loss: 0.7887 (0.7923) time: 0.1387 data: 0.0501 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:13 lr: 0.000003 grad: 0.1677 (0.1831) loss: 0.7941 (0.7923) time: 0.1147 data: 0.0229 max mem: 9377 +Train: [90] [3100/6250] eta: 0:07:57 lr: 0.000003 grad: 0.1722 (0.1831) loss: 0.7929 (0.7922) time: 0.1462 data: 0.0566 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:42 lr: 0.000003 grad: 0.1832 (0.1830) loss: 0.7886 (0.7923) time: 0.1330 data: 0.0388 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:26 lr: 0.000003 grad: 0.1778 (0.1828) loss: 0.7920 (0.7923) time: 0.1594 data: 0.0733 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:11 lr: 0.000003 grad: 0.1732 (0.1826) loss: 0.7921 (0.7923) time: 0.1264 data: 0.0385 max mem: 9377 +Train: [90] [3500/6250] eta: 0:06:56 lr: 0.000003 grad: 0.1728 (0.1825) loss: 0.8006 (0.7923) time: 0.1712 data: 0.0954 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:42 lr: 0.000003 grad: 0.1602 (0.1823) loss: 0.8020 (0.7924) time: 0.1563 data: 0.0715 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:27 lr: 0.000003 grad: 0.1825 (0.1822) loss: 0.7925 (0.7924) time: 0.1585 data: 0.0734 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:12 lr: 0.000003 grad: 0.1779 (0.1821) loss: 0.7948 (0.7925) time: 0.1328 data: 0.0446 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:57 lr: 0.000003 grad: 0.1796 (0.1821) loss: 0.7950 (0.7926) time: 0.1398 data: 0.0576 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:42 lr: 0.000003 grad: 0.1645 (0.1819) loss: 0.8069 (0.7927) time: 0.1378 data: 0.0479 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:27 lr: 0.000003 grad: 0.1729 (0.1818) loss: 0.7951 (0.7927) time: 0.1635 data: 0.0645 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:12 lr: 0.000003 grad: 0.1790 (0.1818) loss: 0.7833 (0.7926) time: 0.1362 data: 0.0436 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:56 lr: 0.000003 grad: 0.1717 (0.1817) loss: 0.7921 (0.7926) time: 0.1657 data: 0.0753 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:41 lr: 0.000003 grad: 0.1782 (0.1817) loss: 0.7879 (0.7926) time: 0.1565 data: 0.0659 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:25 lr: 0.000003 grad: 0.1739 (0.1816) loss: 0.7942 (0.7925) time: 0.1302 data: 0.0304 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:10 lr: 0.000003 grad: 0.1756 (0.1816) loss: 0.7883 (0.7924) time: 0.1233 data: 0.0244 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:54 lr: 0.000003 grad: 0.1799 (0.1816) loss: 0.7891 (0.7924) time: 0.1396 data: 0.0510 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:39 lr: 0.000003 grad: 0.1768 (0.1816) loss: 0.7880 (0.7923) time: 0.1257 data: 0.0419 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:24 lr: 0.000003 grad: 0.1765 (0.1815) loss: 0.7931 (0.7922) time: 0.1691 data: 0.0865 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:09 lr: 0.000003 grad: 0.1649 (0.1814) loss: 0.7862 (0.7922) time: 0.1616 data: 0.0739 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:54 lr: 0.000003 grad: 0.1793 (0.1813) loss: 0.7891 (0.7922) time: 0.1740 data: 0.0796 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:39 lr: 0.000003 grad: 0.1776 (0.1813) loss: 0.7905 (0.7922) time: 0.1436 data: 0.0655 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:24 lr: 0.000003 grad: 0.1800 (0.1813) loss: 0.7837 (0.7921) time: 0.1609 data: 0.0717 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:08 lr: 0.000003 grad: 0.1736 (0.1812) loss: 0.7948 (0.7921) time: 0.1419 data: 0.0504 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:53 lr: 0.000003 grad: 0.1698 (0.1811) loss: 0.7931 (0.7921) time: 0.1523 data: 0.0647 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:38 lr: 0.000003 grad: 0.1885 (0.1812) loss: 0.7826 (0.7920) time: 0.1340 data: 0.0519 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:23 lr: 0.000003 grad: 0.1717 (0.1812) loss: 0.7917 (0.7920) time: 0.1519 data: 0.0687 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:08 lr: 0.000003 grad: 0.1731 (0.1812) loss: 0.7899 (0.7919) time: 0.1621 data: 0.0723 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:52 lr: 0.000003 grad: 0.1798 (0.1812) loss: 0.7848 (0.7917) time: 0.1407 data: 0.0602 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:37 lr: 0.000003 grad: 0.1848 (0.1813) loss: 0.7799 (0.7916) time: 0.1449 data: 0.0605 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1713 (0.1813) loss: 0.7833 (0.7915) time: 0.1464 data: 0.0574 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1824 (0.1814) loss: 0.7851 (0.7914) time: 0.1421 data: 0.0520 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1750 (0.1814) loss: 0.7905 (0.7914) time: 0.1268 data: 0.0389 max mem: 9377 +Train: [90] Total time: 0:15:47 (0.1516 s / it) +Averaged stats: lr: 0.000003 grad: 0.1750 (0.1814) loss: 0.7905 (0.7914) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:05:13 loss: 0.8046 (0.8046) time: 5.0559 data: 4.9538 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.7838 (0.7847) time: 0.1309 data: 0.1057 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-train-subset): loss: 0.7838 (0.7847) +Eval (hcp-val): [90] [ 0/62] eta: 0:06:18 loss: 0.8370 (0.8370) time: 6.1011 data: 6.0693 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8380 (0.8391) time: 0.1388 data: 0.1119 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-val): loss: 0.8380 (0.8391) +Eval (nsd-val): [90] [ 0/62] eta: 0:05:16 loss: 0.8119 (0.8119) time: 5.1076 data: 5.0625 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8214 (0.8219) time: 0.1354 data: 0.1098 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (nsd-val): loss: 0.8214 (0.8219) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 11:33:03 lr: 0.000003 grad: 0.1820 (0.1820) loss: 0.8246 (0.8246) time: 6.6534 data: 6.5113 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:22:00 lr: 0.000003 grad: 0.1952 (0.2228) loss: 0.7894 (0.8045) time: 0.1688 data: 0.0570 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:18:34 lr: 0.000003 grad: 0.1722 (0.2084) loss: 0.8031 (0.8014) time: 0.1576 data: 0.0417 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:17:04 lr: 0.000003 grad: 0.1745 (0.1979) loss: 0.8054 (0.8033) time: 0.1570 data: 0.0573 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:16:18 lr: 0.000003 grad: 0.1866 (0.1945) loss: 0.7933 (0.8020) time: 0.1869 data: 0.0965 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:15:45 lr: 0.000003 grad: 0.1903 (0.1933) loss: 0.7893 (0.8002) time: 0.1495 data: 0.0553 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:15:24 lr: 0.000003 grad: 0.1777 (0.1918) loss: 0.7927 (0.7993) time: 0.1377 data: 0.0290 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:15:02 lr: 0.000003 grad: 0.1749 (0.1900) loss: 0.7968 (0.7989) time: 0.1481 data: 0.0478 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:14:47 lr: 0.000003 grad: 0.1808 (0.1890) loss: 0.7838 (0.7979) time: 0.1481 data: 0.0410 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:14:26 lr: 0.000003 grad: 0.1819 (0.1885) loss: 0.7908 (0.7973) time: 0.1721 data: 0.0867 max mem: 9377 +Train: [91] [1000/6250] eta: 0:14:01 lr: 0.000003 grad: 0.1851 (0.1879) loss: 0.7879 (0.7967) time: 0.1394 data: 0.0388 max mem: 9377 +Train: [91] [1100/6250] eta: 0:13:39 lr: 0.000003 grad: 0.1749 (0.1870) loss: 0.8030 (0.7966) time: 0.1246 data: 0.0294 max mem: 9377 +Train: [91] [1200/6250] eta: 0:13:17 lr: 0.000003 grad: 0.1730 (0.1863) loss: 0.8032 (0.7965) time: 0.1506 data: 0.0628 max mem: 9377 +Train: [91] [1300/6250] eta: 0:12:57 lr: 0.000003 grad: 0.1656 (0.1858) loss: 0.7964 (0.7964) time: 0.1692 data: 0.0710 max mem: 9377 +Train: [91] [1400/6250] eta: 0:12:35 lr: 0.000003 grad: 0.1761 (0.1852) loss: 0.7913 (0.7964) time: 0.1451 data: 0.0572 max mem: 9377 +Train: [91] [1500/6250] eta: 0:12:18 lr: 0.000003 grad: 0.1708 (0.1849) loss: 0.8018 (0.7963) time: 0.1650 data: 0.0745 max mem: 9377 +Train: [91] [1600/6250] eta: 0:12:06 lr: 0.000003 grad: 0.1750 (0.1844) loss: 0.7945 (0.7963) time: 0.1685 data: 0.0829 max mem: 9377 +Train: [91] [1700/6250] eta: 0:11:50 lr: 0.000003 grad: 0.1617 (0.1839) loss: 0.7971 (0.7964) time: 0.1425 data: 0.0620 max mem: 9377 +Train: [91] [1800/6250] eta: 0:11:35 lr: 0.000003 grad: 0.1778 (0.1834) loss: 0.7941 (0.7964) time: 0.1479 data: 0.0699 max mem: 9377 +Train: [91] [1900/6250] eta: 0:11:19 lr: 0.000003 grad: 0.1800 (0.1830) loss: 0.7908 (0.7962) time: 0.1757 data: 0.0820 max mem: 9377 +Train: [91] [2000/6250] eta: 0:11:03 lr: 0.000003 grad: 0.1733 (0.1828) loss: 0.7921 (0.7959) time: 0.1501 data: 0.0594 max mem: 9377 +Train: [91] [2100/6250] eta: 0:10:47 lr: 0.000003 grad: 0.1758 (0.1826) loss: 0.7906 (0.7957) time: 0.1488 data: 0.0597 max mem: 9377 +Train: [91] [2200/6250] eta: 0:10:30 lr: 0.000003 grad: 0.1798 (0.1824) loss: 0.7804 (0.7954) time: 0.1326 data: 0.0385 max mem: 9377 +Train: [91] [2300/6250] eta: 0:10:13 lr: 0.000003 grad: 0.1731 (0.1823) loss: 0.7899 (0.7951) time: 0.1464 data: 0.0564 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:56 lr: 0.000003 grad: 0.1650 (0.1821) loss: 0.7836 (0.7948) time: 0.1488 data: 0.0653 max mem: 9377 +Train: [91] [2500/6250] eta: 0:09:39 lr: 0.000003 grad: 0.1779 (0.1820) loss: 0.7954 (0.7945) time: 0.1372 data: 0.0445 max mem: 9377 +Train: [91] [2600/6250] eta: 0:09:24 lr: 0.000003 grad: 0.1742 (0.1818) loss: 0.7939 (0.7943) time: 0.1953 data: 0.1052 max mem: 9377 +Train: [91] [2700/6250] eta: 0:09:07 lr: 0.000002 grad: 0.1731 (0.1817) loss: 0.7959 (0.7942) time: 0.1561 data: 0.0664 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:55 lr: 0.000002 grad: 0.1794 (0.1815) loss: 0.7923 (0.7941) time: 0.2251 data: 0.1344 max mem: 9377 +Train: [91] [2900/6250] eta: 0:08:44 lr: 0.000002 grad: 0.1748 (0.1815) loss: 0.7941 (0.7939) time: 0.1352 data: 0.0381 max mem: 9377 +Train: [91] [3000/6250] eta: 0:08:36 lr: 0.000002 grad: 0.1733 (0.1814) loss: 0.7925 (0.7938) time: 0.4124 data: 0.3137 max mem: 9377 +Train: [91] [3100/6250] eta: 0:08:24 lr: 0.000002 grad: 0.1808 (0.1814) loss: 0.7946 (0.7938) time: 0.1760 data: 0.0562 max mem: 9377 +Train: [91] [3200/6250] eta: 0:08:06 lr: 0.000002 grad: 0.1758 (0.1814) loss: 0.7949 (0.7937) time: 0.1063 data: 0.0044 max mem: 9377 +Train: [91] [3300/6250] eta: 0:07:50 lr: 0.000002 grad: 0.1714 (0.1813) loss: 0.7946 (0.7936) time: 0.1435 data: 0.0567 max mem: 9377 +Train: [91] [3400/6250] eta: 0:07:34 lr: 0.000002 grad: 0.1707 (0.1814) loss: 0.7877 (0.7935) time: 0.2055 data: 0.1072 max mem: 9377 +Train: [91] [3500/6250] eta: 0:07:20 lr: 0.000002 grad: 0.1690 (0.1813) loss: 0.7895 (0.7934) time: 0.1351 data: 0.0225 max mem: 9377 +Train: [91] [3600/6250] eta: 0:07:09 lr: 0.000002 grad: 0.1717 (0.1814) loss: 0.7989 (0.7934) time: 0.1867 data: 0.1002 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:54 lr: 0.000002 grad: 0.1655 (0.1813) loss: 0.7948 (0.7934) time: 0.1852 data: 0.1019 max mem: 9377 +Train: [91] [3800/6250] eta: 0:06:37 lr: 0.000002 grad: 0.1692 (0.1811) loss: 0.7930 (0.7934) time: 0.1334 data: 0.0536 max mem: 9377 +Train: [91] [3900/6250] eta: 0:06:21 lr: 0.000002 grad: 0.1796 (0.1811) loss: 0.7907 (0.7934) time: 0.1541 data: 0.0674 max mem: 9377 +Train: [91] [4000/6250] eta: 0:06:04 lr: 0.000002 grad: 0.1748 (0.1809) loss: 0.7951 (0.7935) time: 0.1664 data: 0.0718 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:48 lr: 0.000002 grad: 0.1681 (0.1808) loss: 0.7939 (0.7936) time: 0.1848 data: 0.0941 max mem: 9377 +Train: [91] [4200/6250] eta: 0:05:32 lr: 0.000002 grad: 0.1726 (0.1806) loss: 0.7988 (0.7937) time: 0.1425 data: 0.0519 max mem: 9377 +Train: [91] [4300/6250] eta: 0:05:15 lr: 0.000002 grad: 0.1703 (0.1804) loss: 0.7985 (0.7938) time: 0.1672 data: 0.0802 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:58 lr: 0.000002 grad: 0.1739 (0.1803) loss: 0.7970 (0.7939) time: 0.1376 data: 0.0416 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:42 lr: 0.000002 grad: 0.1733 (0.1802) loss: 0.8021 (0.7940) time: 0.1350 data: 0.0327 max mem: 9377 +Train: [91] [4600/6250] eta: 0:04:25 lr: 0.000002 grad: 0.1692 (0.1802) loss: 0.7976 (0.7941) time: 0.1417 data: 0.0530 max mem: 9377 +Train: [91] [4700/6250] eta: 0:04:08 lr: 0.000002 grad: 0.1749 (0.1803) loss: 0.8012 (0.7942) time: 0.1035 data: 0.0114 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:52 lr: 0.000002 grad: 0.1766 (0.1803) loss: 0.7943 (0.7943) time: 0.1456 data: 0.0591 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:35 lr: 0.000002 grad: 0.1726 (0.1803) loss: 0.7946 (0.7943) time: 0.1586 data: 0.0750 max mem: 9377 +Train: [91] [5000/6250] eta: 0:03:20 lr: 0.000002 grad: 0.1737 (0.1802) loss: 0.8009 (0.7944) time: 0.1919 data: 0.1068 max mem: 9377 +Train: [91] [5100/6250] eta: 0:03:03 lr: 0.000002 grad: 0.1805 (0.1803) loss: 0.7938 (0.7943) time: 0.1643 data: 0.0705 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:47 lr: 0.000002 grad: 0.1806 (0.1804) loss: 0.7948 (0.7943) time: 0.1154 data: 0.0320 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:31 lr: 0.000002 grad: 0.1864 (0.1804) loss: 0.7952 (0.7943) time: 0.1407 data: 0.0407 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:15 lr: 0.000002 grad: 0.1863 (0.1805) loss: 0.7918 (0.7943) time: 0.1835 data: 0.0988 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:59 lr: 0.000002 grad: 0.1816 (0.1806) loss: 0.7924 (0.7943) time: 0.1604 data: 0.0727 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:43 lr: 0.000002 grad: 0.1751 (0.1806) loss: 0.8019 (0.7943) time: 0.1524 data: 0.0698 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:27 lr: 0.000002 grad: 0.1863 (0.1806) loss: 0.7897 (0.7943) time: 0.1752 data: 0.0823 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:11 lr: 0.000002 grad: 0.1713 (0.1807) loss: 0.7979 (0.7943) time: 0.1561 data: 0.0700 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:55 lr: 0.000002 grad: 0.1851 (0.1808) loss: 0.7932 (0.7943) time: 0.1474 data: 0.0571 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:39 lr: 0.000002 grad: 0.1704 (0.1807) loss: 0.7976 (0.7943) time: 0.1759 data: 0.0796 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:23 lr: 0.000002 grad: 0.1725 (0.1807) loss: 0.7990 (0.7943) time: 0.1540 data: 0.0720 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1812 (0.1808) loss: 0.7908 (0.7942) time: 0.1421 data: 0.0598 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1777 (0.1809) loss: 0.7868 (0.7942) time: 0.1598 data: 0.0736 max mem: 9377 +Train: [91] Total time: 0:16:38 (0.1598 s / it) +Averaged stats: lr: 0.000002 grad: 0.1777 (0.1809) loss: 0.7868 (0.7942) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:04:37 loss: 0.8049 (0.8049) time: 4.4722 data: 4.4050 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.7840 (0.7841) time: 0.1124 data: 0.0877 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-train-subset): loss: 0.7840 (0.7841) +Eval (hcp-val): [91] [ 0/62] eta: 0:03:54 loss: 0.8382 (0.8382) time: 3.7827 data: 3.7016 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8366 (0.8386) time: 0.1239 data: 0.0987 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-val): loss: 0.8366 (0.8386) +Eval (nsd-val): [91] [ 0/62] eta: 0:06:19 loss: 0.8090 (0.8090) time: 6.1148 data: 6.0838 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8198 (0.8204) time: 0.1391 data: 0.1137 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8204) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 9:30:22 lr: 0.000002 grad: 0.3717 (0.3717) loss: 0.7647 (0.7647) time: 5.4757 data: 5.1726 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:22:36 lr: 0.000002 grad: 0.2217 (0.2534) loss: 0.7912 (0.7913) time: 0.1869 data: 0.0910 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:18:52 lr: 0.000002 grad: 0.1842 (0.2303) loss: 0.8033 (0.7964) time: 0.1679 data: 0.0635 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:17:32 lr: 0.000002 grad: 0.1926 (0.2185) loss: 0.8043 (0.7986) time: 0.1727 data: 0.0744 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:16:36 lr: 0.000002 grad: 0.1743 (0.2106) loss: 0.8019 (0.7991) time: 0.1487 data: 0.0460 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:15:57 lr: 0.000002 grad: 0.1790 (0.2059) loss: 0.7971 (0.7995) time: 0.1472 data: 0.0504 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:15:21 lr: 0.000002 grad: 0.1859 (0.2045) loss: 0.8017 (0.7987) time: 0.1347 data: 0.0379 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:14:59 lr: 0.000002 grad: 0.1899 (0.2032) loss: 0.7911 (0.7977) time: 0.1513 data: 0.0528 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:14:36 lr: 0.000002 grad: 0.1917 (0.2016) loss: 0.7990 (0.7975) time: 0.1509 data: 0.0607 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:14:18 lr: 0.000002 grad: 0.1874 (0.2011) loss: 0.7911 (0.7968) time: 0.1520 data: 0.0576 max mem: 9377 +Train: [92] [1000/6250] eta: 0:13:57 lr: 0.000002 grad: 0.1792 (0.2001) loss: 0.7998 (0.7967) time: 0.1655 data: 0.0740 max mem: 9377 +Train: [92] [1100/6250] eta: 0:13:34 lr: 0.000002 grad: 0.1995 (0.1989) loss: 0.7887 (0.7965) time: 0.1710 data: 0.0886 max mem: 9377 +Train: [92] [1200/6250] eta: 0:13:16 lr: 0.000002 grad: 0.1717 (0.1972) loss: 0.8000 (0.7966) time: 0.1627 data: 0.0740 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:54 lr: 0.000002 grad: 0.1906 (0.1966) loss: 0.7939 (0.7962) time: 0.1236 data: 0.0251 max mem: 9377 +Train: [92] [1400/6250] eta: 0:12:36 lr: 0.000002 grad: 0.1882 (0.1960) loss: 0.7877 (0.7957) time: 0.1537 data: 0.0724 max mem: 9377 +Train: [92] [1500/6250] eta: 0:12:23 lr: 0.000002 grad: 0.1818 (0.1953) loss: 0.7874 (0.7952) time: 0.2051 data: 0.1189 max mem: 9377 +Train: [92] [1600/6250] eta: 0:12:06 lr: 0.000002 grad: 0.1814 (0.1947) loss: 0.7948 (0.7950) time: 0.1520 data: 0.0700 max mem: 9377 +Train: [92] [1700/6250] eta: 0:11:50 lr: 0.000002 grad: 0.1826 (0.1943) loss: 0.7901 (0.7946) time: 0.1483 data: 0.0668 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:34 lr: 0.000002 grad: 0.1801 (0.1936) loss: 0.7842 (0.7945) time: 0.1663 data: 0.0891 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:19 lr: 0.000002 grad: 0.1752 (0.1928) loss: 0.7919 (0.7944) time: 0.1636 data: 0.0667 max mem: 9377 +Train: [92] [2000/6250] eta: 0:11:03 lr: 0.000002 grad: 0.1673 (0.1923) loss: 0.7949 (0.7943) time: 0.1360 data: 0.0433 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:48 lr: 0.000002 grad: 0.1772 (0.1916) loss: 0.7946 (0.7943) time: 0.1735 data: 0.0957 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:31 lr: 0.000002 grad: 0.1791 (0.1911) loss: 0.7877 (0.7942) time: 0.1613 data: 0.0764 max mem: 9377 +Train: [92] [2300/6250] eta: 0:10:12 lr: 0.000002 grad: 0.1811 (0.1905) loss: 0.7926 (0.7942) time: 0.1205 data: 0.0286 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:56 lr: 0.000002 grad: 0.1659 (0.1899) loss: 0.7970 (0.7942) time: 0.1441 data: 0.0546 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:39 lr: 0.000002 grad: 0.1719 (0.1895) loss: 0.7988 (0.7943) time: 0.1401 data: 0.0554 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:23 lr: 0.000002 grad: 0.1853 (0.1891) loss: 0.7884 (0.7942) time: 0.1631 data: 0.0737 max mem: 9377 +Train: [92] [2700/6250] eta: 0:09:08 lr: 0.000002 grad: 0.1795 (0.1890) loss: 0.7897 (0.7941) time: 0.1320 data: 0.0468 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:51 lr: 0.000002 grad: 0.1739 (0.1887) loss: 0.7917 (0.7940) time: 0.1506 data: 0.0603 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:35 lr: 0.000002 grad: 0.1763 (0.1886) loss: 0.7915 (0.7938) time: 0.1483 data: 0.0598 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:19 lr: 0.000002 grad: 0.1783 (0.1885) loss: 0.7853 (0.7937) time: 0.1641 data: 0.0837 max mem: 9377 +Train: [92] [3100/6250] eta: 0:08:03 lr: 0.000002 grad: 0.1796 (0.1883) loss: 0.7939 (0.7936) time: 0.1406 data: 0.0550 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:48 lr: 0.000002 grad: 0.1784 (0.1881) loss: 0.7925 (0.7936) time: 0.1593 data: 0.0726 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:32 lr: 0.000002 grad: 0.1701 (0.1879) loss: 0.7939 (0.7935) time: 0.1620 data: 0.0793 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:18 lr: 0.000002 grad: 0.1817 (0.1877) loss: 0.7905 (0.7934) time: 0.1718 data: 0.0911 max mem: 9377 +Train: [92] [3500/6250] eta: 0:07:01 lr: 0.000002 grad: 0.1798 (0.1874) loss: 0.7944 (0.7934) time: 0.1403 data: 0.0571 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:45 lr: 0.000002 grad: 0.1717 (0.1871) loss: 0.7944 (0.7935) time: 0.1513 data: 0.0657 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:31 lr: 0.000002 grad: 0.1747 (0.1868) loss: 0.7960 (0.7935) time: 0.1566 data: 0.0669 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:16 lr: 0.000002 grad: 0.1655 (0.1865) loss: 0.7996 (0.7936) time: 0.1667 data: 0.0834 max mem: 9377 +Train: [92] [3900/6250] eta: 0:06:01 lr: 0.000002 grad: 0.1784 (0.1863) loss: 0.7941 (0.7936) time: 0.1490 data: 0.0619 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:45 lr: 0.000002 grad: 0.1645 (0.1860) loss: 0.7978 (0.7936) time: 0.1540 data: 0.0667 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:30 lr: 0.000002 grad: 0.1814 (0.1858) loss: 0.7984 (0.7937) time: 0.1685 data: 0.0805 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:15 lr: 0.000002 grad: 0.1805 (0.1857) loss: 0.8005 (0.7938) time: 0.1441 data: 0.0482 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:59 lr: 0.000002 grad: 0.1646 (0.1856) loss: 0.8045 (0.7939) time: 0.1489 data: 0.0570 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:44 lr: 0.000002 grad: 0.1723 (0.1855) loss: 0.7978 (0.7939) time: 0.1474 data: 0.0564 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:28 lr: 0.000002 grad: 0.1665 (0.1853) loss: 0.7971 (0.7939) time: 0.1559 data: 0.0636 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:12 lr: 0.000002 grad: 0.1745 (0.1850) loss: 0.8001 (0.7940) time: 0.1305 data: 0.0348 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:57 lr: 0.000002 grad: 0.1790 (0.1848) loss: 0.7943 (0.7941) time: 0.1405 data: 0.0529 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:41 lr: 0.000002 grad: 0.1723 (0.1847) loss: 0.7890 (0.7941) time: 0.1613 data: 0.0745 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:26 lr: 0.000002 grad: 0.1708 (0.1845) loss: 0.7931 (0.7941) time: 0.1630 data: 0.0766 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1838 (0.1844) loss: 0.7901 (0.7941) time: 0.1780 data: 0.0883 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:56 lr: 0.000002 grad: 0.1693 (0.1843) loss: 0.7958 (0.7940) time: 0.1513 data: 0.0609 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:40 lr: 0.000002 grad: 0.1717 (0.1842) loss: 0.7854 (0.7940) time: 0.1486 data: 0.0700 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:25 lr: 0.000002 grad: 0.1715 (0.1841) loss: 0.7898 (0.7939) time: 0.1455 data: 0.0579 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:10 lr: 0.000002 grad: 0.1747 (0.1841) loss: 0.7939 (0.7939) time: 0.1675 data: 0.0847 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:54 lr: 0.000002 grad: 0.1746 (0.1840) loss: 0.7931 (0.7938) time: 0.1517 data: 0.0636 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:39 lr: 0.000002 grad: 0.1850 (0.1840) loss: 0.7831 (0.7937) time: 0.1389 data: 0.0506 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:24 lr: 0.000002 grad: 0.1800 (0.1840) loss: 0.7892 (0.7936) time: 0.1396 data: 0.0497 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:08 lr: 0.000002 grad: 0.1866 (0.1840) loss: 0.7778 (0.7935) time: 0.1495 data: 0.0636 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.1866 (0.1839) loss: 0.7972 (0.7935) time: 0.1424 data: 0.0591 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:38 lr: 0.000002 grad: 0.1767 (0.1839) loss: 0.7951 (0.7934) time: 0.1307 data: 0.0418 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:22 lr: 0.000002 grad: 0.1752 (0.1839) loss: 0.7854 (0.7934) time: 0.1452 data: 0.0616 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1779 (0.1838) loss: 0.7894 (0.7934) time: 0.1564 data: 0.0670 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1670 (0.1837) loss: 0.7970 (0.7934) time: 0.1297 data: 0.0441 max mem: 9377 +Train: [92] Total time: 0:15:58 (0.1534 s / it) +Averaged stats: lr: 0.000002 grad: 0.1670 (0.1837) loss: 0.7970 (0.7934) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:05:12 loss: 0.8049 (0.8049) time: 5.0347 data: 4.9455 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.7829 (0.7838) time: 0.1333 data: 0.1080 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:14 (0.2273 s / it) +Averaged stats (hcp-train-subset): loss: 0.7829 (0.7838) +Eval (hcp-val): [92] [ 0/62] eta: 0:04:10 loss: 0.8386 (0.8386) time: 4.0415 data: 3.9569 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8386 (0.8384) time: 0.1178 data: 0.0924 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (hcp-val): loss: 0.8386 (0.8384) +Eval (nsd-val): [92] [ 0/62] eta: 0:04:15 loss: 0.8134 (0.8134) time: 4.1152 data: 4.0523 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8194 (0.8219) time: 0.1290 data: 0.1021 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (nsd-val): loss: 0.8194 (0.8219) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 11:08:20 lr: 0.000002 grad: 0.2318 (0.2318) loss: 0.8118 (0.8118) time: 6.4162 data: 6.2876 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:21:41 lr: 0.000002 grad: 0.2038 (0.2261) loss: 0.7828 (0.7891) time: 0.1658 data: 0.0666 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:18:39 lr: 0.000002 grad: 0.1916 (0.2130) loss: 0.7932 (0.7892) time: 0.1590 data: 0.0488 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:17:09 lr: 0.000002 grad: 0.1973 (0.2087) loss: 0.7915 (0.7887) time: 0.1514 data: 0.0512 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:16:16 lr: 0.000002 grad: 0.1950 (0.2051) loss: 0.7995 (0.7884) time: 0.1541 data: 0.0634 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:15:36 lr: 0.000002 grad: 0.1879 (0.2035) loss: 0.7875 (0.7879) time: 0.1496 data: 0.0594 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:15:09 lr: 0.000002 grad: 0.1860 (0.2006) loss: 0.7877 (0.7883) time: 0.1538 data: 0.0694 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:14:45 lr: 0.000002 grad: 0.1681 (0.1977) loss: 0.7968 (0.7892) time: 0.1531 data: 0.0607 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:14:20 lr: 0.000002 grad: 0.1748 (0.1955) loss: 0.7943 (0.7901) time: 0.1518 data: 0.0675 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:14:01 lr: 0.000002 grad: 0.1649 (0.1935) loss: 0.7970 (0.7906) time: 0.1417 data: 0.0471 max mem: 9377 +Train: [93] [1000/6250] eta: 0:13:40 lr: 0.000002 grad: 0.1709 (0.1918) loss: 0.8002 (0.7913) time: 0.1417 data: 0.0496 max mem: 9377 +Train: [93] [1100/6250] eta: 0:13:21 lr: 0.000002 grad: 0.1777 (0.1903) loss: 0.7969 (0.7920) time: 0.1650 data: 0.0826 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:04 lr: 0.000002 grad: 0.1765 (0.1895) loss: 0.7977 (0.7924) time: 0.1640 data: 0.0796 max mem: 9377 +Train: [93] [1300/6250] eta: 0:12:45 lr: 0.000002 grad: 0.1713 (0.1889) loss: 0.7982 (0.7927) time: 0.1256 data: 0.0164 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:26 lr: 0.000002 grad: 0.1870 (0.1885) loss: 0.7992 (0.7928) time: 0.1431 data: 0.0539 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:09 lr: 0.000002 grad: 0.1752 (0.1880) loss: 0.7934 (0.7929) time: 0.1636 data: 0.0767 max mem: 9377 +Train: [93] [1600/6250] eta: 0:11:53 lr: 0.000002 grad: 0.1800 (0.1878) loss: 0.7804 (0.7926) time: 0.1469 data: 0.0603 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:38 lr: 0.000002 grad: 0.1828 (0.1877) loss: 0.7890 (0.7925) time: 0.1377 data: 0.0480 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:24 lr: 0.000002 grad: 0.1810 (0.1874) loss: 0.7921 (0.7924) time: 0.1624 data: 0.0810 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:08 lr: 0.000002 grad: 0.1760 (0.1871) loss: 0.7926 (0.7923) time: 0.1653 data: 0.0675 max mem: 9377 +Train: [93] [2000/6250] eta: 0:10:55 lr: 0.000002 grad: 0.1796 (0.1869) loss: 0.7952 (0.7922) time: 0.1646 data: 0.0774 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:41 lr: 0.000002 grad: 0.1714 (0.1868) loss: 0.7949 (0.7920) time: 0.1822 data: 0.0909 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:24 lr: 0.000002 grad: 0.1781 (0.1867) loss: 0.7879 (0.7918) time: 0.1672 data: 0.0798 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:07 lr: 0.000001 grad: 0.1816 (0.1865) loss: 0.7831 (0.7916) time: 0.1313 data: 0.0363 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:50 lr: 0.000001 grad: 0.1814 (0.1866) loss: 0.7845 (0.7912) time: 0.1366 data: 0.0419 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:34 lr: 0.000001 grad: 0.1788 (0.1866) loss: 0.7854 (0.7909) time: 0.1499 data: 0.0651 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:17 lr: 0.000001 grad: 0.1701 (0.1865) loss: 0.7823 (0.7907) time: 0.1614 data: 0.0787 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:01 lr: 0.000001 grad: 0.1862 (0.1865) loss: 0.7799 (0.7905) time: 0.1420 data: 0.0605 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:46 lr: 0.000001 grad: 0.1732 (0.1865) loss: 0.7802 (0.7902) time: 0.1324 data: 0.0459 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:30 lr: 0.000001 grad: 0.1887 (0.1866) loss: 0.7823 (0.7900) time: 0.1493 data: 0.0660 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:15 lr: 0.000001 grad: 0.1815 (0.1866) loss: 0.7843 (0.7898) time: 0.1180 data: 0.0314 max mem: 9377 +Train: [93] [3100/6250] eta: 0:07:59 lr: 0.000001 grad: 0.1853 (0.1866) loss: 0.7823 (0.7897) time: 0.1536 data: 0.0596 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:44 lr: 0.000001 grad: 0.1833 (0.1865) loss: 0.7839 (0.7896) time: 0.1350 data: 0.0443 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:29 lr: 0.000001 grad: 0.1777 (0.1863) loss: 0.7858 (0.7896) time: 0.1685 data: 0.0891 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:13 lr: 0.000001 grad: 0.1752 (0.1863) loss: 0.7873 (0.7895) time: 0.1355 data: 0.0409 max mem: 9377 +Train: [93] [3500/6250] eta: 0:06:58 lr: 0.000001 grad: 0.1812 (0.1862) loss: 0.7797 (0.7894) time: 0.1438 data: 0.0558 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:43 lr: 0.000001 grad: 0.1789 (0.1861) loss: 0.7839 (0.7892) time: 0.1455 data: 0.0608 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:27 lr: 0.000001 grad: 0.1866 (0.1861) loss: 0.7831 (0.7891) time: 0.1358 data: 0.0513 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:13 lr: 0.000001 grad: 0.1791 (0.1861) loss: 0.7857 (0.7890) time: 0.1556 data: 0.0638 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:58 lr: 0.000001 grad: 0.1742 (0.1860) loss: 0.7890 (0.7890) time: 0.1576 data: 0.0652 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:43 lr: 0.000001 grad: 0.1793 (0.1859) loss: 0.7894 (0.7889) time: 0.1631 data: 0.0837 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:28 lr: 0.000001 grad: 0.1682 (0.1857) loss: 0.7931 (0.7890) time: 0.1523 data: 0.0608 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:12 lr: 0.000001 grad: 0.1714 (0.1855) loss: 0.7864 (0.7890) time: 0.1663 data: 0.0831 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:57 lr: 0.000001 grad: 0.1811 (0.1853) loss: 0.7987 (0.7892) time: 0.1490 data: 0.0620 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:41 lr: 0.000001 grad: 0.1736 (0.1851) loss: 0.7934 (0.7893) time: 0.1468 data: 0.0606 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:26 lr: 0.000001 grad: 0.1687 (0.1849) loss: 0.8017 (0.7894) time: 0.1524 data: 0.0572 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:11 lr: 0.000001 grad: 0.1755 (0.1848) loss: 0.7912 (0.7895) time: 0.1688 data: 0.0887 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:55 lr: 0.000001 grad: 0.1753 (0.1847) loss: 0.7886 (0.7895) time: 0.1390 data: 0.0502 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:40 lr: 0.000001 grad: 0.1728 (0.1845) loss: 0.7900 (0.7896) time: 0.1456 data: 0.0649 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:25 lr: 0.000001 grad: 0.1813 (0.1844) loss: 0.7904 (0.7896) time: 0.1958 data: 0.1168 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:10 lr: 0.000001 grad: 0.1776 (0.1844) loss: 0.7858 (0.7896) time: 0.1482 data: 0.0659 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:54 lr: 0.000001 grad: 0.1838 (0.1843) loss: 0.7848 (0.7896) time: 0.1416 data: 0.0563 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:39 lr: 0.000001 grad: 0.1809 (0.1842) loss: 0.7838 (0.7896) time: 0.1563 data: 0.0761 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.1757 (0.1841) loss: 0.7891 (0.7896) time: 0.1486 data: 0.0553 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:09 lr: 0.000001 grad: 0.1679 (0.1839) loss: 0.7953 (0.7896) time: 0.1498 data: 0.0625 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:54 lr: 0.000001 grad: 0.1680 (0.1839) loss: 0.7851 (0.7896) time: 0.1505 data: 0.0540 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.1771 (0.1838) loss: 0.7918 (0.7897) time: 0.1688 data: 0.0863 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.1754 (0.1836) loss: 0.7910 (0.7898) time: 0.1354 data: 0.0400 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.1841 (0.1835) loss: 0.7928 (0.7898) time: 0.1316 data: 0.0411 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.1879 (0.1835) loss: 0.7886 (0.7898) time: 0.1417 data: 0.0518 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1748 (0.1834) loss: 0.7987 (0.7899) time: 0.1254 data: 0.0317 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1779 (0.1834) loss: 0.7860 (0.7899) time: 0.1370 data: 0.0419 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1736 (0.1833) loss: 0.7895 (0.7900) time: 0.1553 data: 0.0645 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1764 (0.1833) loss: 0.7895 (0.7900) time: 0.1308 data: 0.0417 max mem: 9377 +Train: [93] Total time: 0:15:51 (0.1522 s / it) +Averaged stats: lr: 0.000001 grad: 0.1764 (0.1833) loss: 0.7895 (0.7900) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:04:49 loss: 0.8036 (0.8036) time: 4.6663 data: 4.5973 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.7800 (0.7833) time: 0.1330 data: 0.1059 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.7800 (0.7833) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:43 loss: 0.8368 (0.8368) time: 5.5408 data: 5.5095 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8350 (0.8383) time: 0.1381 data: 0.1130 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-val): loss: 0.8350 (0.8383) +Eval (nsd-val): [93] [ 0/62] eta: 0:05:15 loss: 0.8090 (0.8090) time: 5.0896 data: 5.0503 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8189 (0.8213) time: 0.1283 data: 0.1031 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (nsd-val): loss: 0.8189 (0.8213) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 11:05:42 lr: 0.000001 grad: 0.2879 (0.2879) loss: 0.8490 (0.8490) time: 6.3908 data: 6.2503 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:22:00 lr: 0.000001 grad: 0.2017 (0.2480) loss: 0.8008 (0.7972) time: 0.1563 data: 0.0492 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:18:46 lr: 0.000001 grad: 0.1724 (0.2252) loss: 0.8005 (0.7994) time: 0.1508 data: 0.0511 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:17:22 lr: 0.000001 grad: 0.1705 (0.2113) loss: 0.7976 (0.8006) time: 0.1842 data: 0.0943 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:16:24 lr: 0.000001 grad: 0.1694 (0.2044) loss: 0.8012 (0.8005) time: 0.1648 data: 0.0689 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:15:36 lr: 0.000001 grad: 0.1743 (0.1998) loss: 0.7977 (0.7998) time: 0.1419 data: 0.0529 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:15:10 lr: 0.000001 grad: 0.1750 (0.1964) loss: 0.7946 (0.7991) time: 0.1552 data: 0.0576 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:14:50 lr: 0.000001 grad: 0.1841 (0.1950) loss: 0.7891 (0.7983) time: 0.1532 data: 0.0592 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:32 lr: 0.000001 grad: 0.1685 (0.1929) loss: 0.7950 (0.7981) time: 0.1704 data: 0.0761 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:08 lr: 0.000001 grad: 0.1826 (0.1916) loss: 0.7905 (0.7977) time: 0.1386 data: 0.0521 max mem: 9377 +Train: [94] [1000/6250] eta: 0:13:50 lr: 0.000001 grad: 0.1643 (0.1901) loss: 0.7953 (0.7972) time: 0.1539 data: 0.0674 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:27 lr: 0.000001 grad: 0.1811 (0.1893) loss: 0.7802 (0.7965) time: 0.1319 data: 0.0323 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:10 lr: 0.000001 grad: 0.1852 (0.1887) loss: 0.7870 (0.7961) time: 0.1617 data: 0.0762 max mem: 9377 +Train: [94] [1300/6250] eta: 0:12:47 lr: 0.000001 grad: 0.1807 (0.1883) loss: 0.7880 (0.7956) time: 0.1545 data: 0.0598 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:29 lr: 0.000001 grad: 0.1821 (0.1880) loss: 0.7811 (0.7950) time: 0.1497 data: 0.0633 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:13 lr: 0.000001 grad: 0.1768 (0.1879) loss: 0.7857 (0.7945) time: 0.1485 data: 0.0671 max mem: 9377 +Train: [94] [1600/6250] eta: 0:12:00 lr: 0.000001 grad: 0.1832 (0.1875) loss: 0.7860 (0.7942) time: 0.1643 data: 0.0787 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:46 lr: 0.000001 grad: 0.1825 (0.1871) loss: 0.7851 (0.7939) time: 0.1600 data: 0.0741 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:33 lr: 0.000001 grad: 0.1784 (0.1867) loss: 0.7886 (0.7938) time: 0.2086 data: 0.0759 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:15 lr: 0.000001 grad: 0.1831 (0.1865) loss: 0.7853 (0.7935) time: 0.1679 data: 0.0763 max mem: 9377 +Train: [94] [2000/6250] eta: 0:11:00 lr: 0.000001 grad: 0.1750 (0.1860) loss: 0.7910 (0.7934) time: 0.1503 data: 0.0537 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:44 lr: 0.000001 grad: 0.1744 (0.1856) loss: 0.7864 (0.7933) time: 0.1440 data: 0.0476 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:27 lr: 0.000001 grad: 0.1753 (0.1854) loss: 0.7906 (0.7931) time: 0.1381 data: 0.0455 max mem: 9377 +Train: [94] [2300/6250] eta: 0:10:10 lr: 0.000001 grad: 0.1897 (0.1853) loss: 0.7787 (0.7929) time: 0.1303 data: 0.0399 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:53 lr: 0.000001 grad: 0.1822 (0.1853) loss: 0.7914 (0.7927) time: 0.1525 data: 0.0657 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:35 lr: 0.000001 grad: 0.1816 (0.1849) loss: 0.7876 (0.7927) time: 0.1441 data: 0.0578 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:19 lr: 0.000001 grad: 0.1880 (0.1849) loss: 0.7941 (0.7925) time: 0.1548 data: 0.0680 max mem: 9377 +Train: [94] [2700/6250] eta: 0:09:02 lr: 0.000001 grad: 0.1824 (0.1848) loss: 0.7832 (0.7924) time: 0.1388 data: 0.0526 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:46 lr: 0.000001 grad: 0.1803 (0.1846) loss: 0.7912 (0.7923) time: 0.1435 data: 0.0525 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:30 lr: 0.000001 grad: 0.1673 (0.1844) loss: 0.7927 (0.7923) time: 0.1480 data: 0.0615 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:14 lr: 0.000001 grad: 0.1616 (0.1841) loss: 0.7988 (0.7924) time: 0.1611 data: 0.0783 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:58 lr: 0.000001 grad: 0.1787 (0.1839) loss: 0.7908 (0.7924) time: 0.1363 data: 0.0515 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:42 lr: 0.000001 grad: 0.1830 (0.1839) loss: 0.7907 (0.7923) time: 0.1174 data: 0.0149 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:26 lr: 0.000001 grad: 0.1791 (0.1838) loss: 0.7939 (0.7923) time: 0.1468 data: 0.0469 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:11 lr: 0.000001 grad: 0.1687 (0.1837) loss: 0.7928 (0.7923) time: 0.1491 data: 0.0544 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:55 lr: 0.000001 grad: 0.1770 (0.1836) loss: 0.7928 (0.7923) time: 0.1480 data: 0.0577 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:39 lr: 0.000001 grad: 0.1816 (0.1836) loss: 0.7877 (0.7923) time: 0.1536 data: 0.0686 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:24 lr: 0.000001 grad: 0.1804 (0.1835) loss: 0.7874 (0.7922) time: 0.1456 data: 0.0543 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:10 lr: 0.000001 grad: 0.1886 (0.1835) loss: 0.7939 (0.7921) time: 0.1691 data: 0.0867 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:55 lr: 0.000001 grad: 0.1806 (0.1834) loss: 0.7915 (0.7921) time: 0.1659 data: 0.0782 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:41 lr: 0.000001 grad: 0.1687 (0.1834) loss: 0.7964 (0.7921) time: 0.1812 data: 0.1015 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:25 lr: 0.000001 grad: 0.1690 (0.1833) loss: 0.7937 (0.7921) time: 0.1518 data: 0.0704 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:11 lr: 0.000001 grad: 0.1898 (0.1833) loss: 0.7879 (0.7921) time: 0.1602 data: 0.0671 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:56 lr: 0.000001 grad: 0.1803 (0.1834) loss: 0.7880 (0.7920) time: 0.1343 data: 0.0421 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:41 lr: 0.000001 grad: 0.1822 (0.1833) loss: 0.7940 (0.7920) time: 0.1398 data: 0.0522 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:25 lr: 0.000001 grad: 0.1809 (0.1835) loss: 0.7852 (0.7920) time: 0.1466 data: 0.0476 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:10 lr: 0.000001 grad: 0.1847 (0.1835) loss: 0.7968 (0.7920) time: 0.1357 data: 0.0442 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:54 lr: 0.000001 grad: 0.1839 (0.1834) loss: 0.7883 (0.7920) time: 0.1546 data: 0.0631 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:39 lr: 0.000001 grad: 0.1773 (0.1834) loss: 0.7961 (0.7919) time: 0.1292 data: 0.0430 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:24 lr: 0.000001 grad: 0.1867 (0.1834) loss: 0.7882 (0.7920) time: 0.1175 data: 0.0286 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:09 lr: 0.000001 grad: 0.1741 (0.1834) loss: 0.7920 (0.7920) time: 0.1815 data: 0.0933 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:54 lr: 0.000001 grad: 0.1916 (0.1834) loss: 0.7813 (0.7920) time: 0.1995 data: 0.1135 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:39 lr: 0.000001 grad: 0.1747 (0.1834) loss: 0.7897 (0.7919) time: 0.1751 data: 0.0877 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.1738 (0.1833) loss: 0.7949 (0.7920) time: 0.1613 data: 0.0807 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:08 lr: 0.000001 grad: 0.1778 (0.1833) loss: 0.7942 (0.7919) time: 0.1591 data: 0.0617 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:53 lr: 0.000001 grad: 0.1819 (0.1832) loss: 0.7903 (0.7919) time: 0.1525 data: 0.0644 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.1660 (0.1831) loss: 0.8016 (0.7920) time: 0.1360 data: 0.0440 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.1698 (0.1830) loss: 0.7944 (0.7920) time: 0.1394 data: 0.0445 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.1811 (0.1829) loss: 0.7948 (0.7921) time: 0.1482 data: 0.0526 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1748 (0.1828) loss: 0.7951 (0.7922) time: 0.1548 data: 0.0648 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1870 (0.1829) loss: 0.7898 (0.7922) time: 0.1580 data: 0.0714 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1828 (0.1829) loss: 0.7840 (0.7921) time: 0.1353 data: 0.0475 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1795 (0.1828) loss: 0.7906 (0.7921) time: 0.1382 data: 0.0550 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1772 (0.1828) loss: 0.7939 (0.7922) time: 0.1546 data: 0.0668 max mem: 9377 +Train: [94] Total time: 0:15:48 (0.1518 s / it) +Averaged stats: lr: 0.000001 grad: 0.1772 (0.1828) loss: 0.7939 (0.7922) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:04:29 loss: 0.8023 (0.8023) time: 4.3432 data: 4.2604 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.7798 (0.7827) time: 0.1422 data: 0.1162 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-train-subset): loss: 0.7798 (0.7827) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [94] [ 0/62] eta: 0:06:12 loss: 0.8345 (0.8345) time: 6.0000 data: 5.9692 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8360 (0.8382) time: 0.1129 data: 0.0860 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-val): loss: 0.8360 (0.8382) +Making plots (hcp-val): example=43 +Eval (nsd-val): [94] [ 0/62] eta: 0:06:19 loss: 0.8182 (0.8182) time: 6.1233 data: 6.0915 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8209 (0.8230) time: 0.1297 data: 0.1041 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8230) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 9:41:10 lr: 0.000001 grad: 0.2922 (0.2922) loss: 0.7636 (0.7636) time: 5.5792 data: 5.2394 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:22:34 lr: 0.000001 grad: 0.1818 (0.2133) loss: 0.8037 (0.7892) time: 0.1680 data: 0.0647 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:45 lr: 0.000001 grad: 0.1806 (0.2059) loss: 0.7990 (0.7912) time: 0.1451 data: 0.0357 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:24 lr: 0.000001 grad: 0.1915 (0.2017) loss: 0.7989 (0.7933) time: 0.1692 data: 0.0732 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:29 lr: 0.000001 grad: 0.1947 (0.2012) loss: 0.7877 (0.7925) time: 0.1320 data: 0.0326 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:51 lr: 0.000001 grad: 0.1937 (0.2004) loss: 0.7910 (0.7921) time: 0.1397 data: 0.0435 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:15:18 lr: 0.000001 grad: 0.1937 (0.2010) loss: 0.7819 (0.7910) time: 0.1334 data: 0.0408 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:14:56 lr: 0.000001 grad: 0.1934 (0.2003) loss: 0.7760 (0.7905) time: 0.1563 data: 0.0606 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:14:32 lr: 0.000001 grad: 0.1879 (0.1996) loss: 0.7951 (0.7905) time: 0.1492 data: 0.0492 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:14:10 lr: 0.000001 grad: 0.1908 (0.1997) loss: 0.7866 (0.7900) time: 0.1346 data: 0.0344 max mem: 9377 +Train: [95] [1000/6250] eta: 0:13:47 lr: 0.000001 grad: 0.1892 (0.1994) loss: 0.7918 (0.7898) time: 0.1356 data: 0.0384 max mem: 9377 +Train: [95] [1100/6250] eta: 0:13:26 lr: 0.000001 grad: 0.1906 (0.1992) loss: 0.7779 (0.7894) time: 0.1685 data: 0.0819 max mem: 9377 +Train: [95] [1200/6250] eta: 0:13:06 lr: 0.000001 grad: 0.1859 (0.1991) loss: 0.7768 (0.7888) time: 0.1416 data: 0.0584 max mem: 9377 +Train: [95] [1300/6250] eta: 0:12:46 lr: 0.000001 grad: 0.1777 (0.1983) loss: 0.7851 (0.7888) time: 0.1314 data: 0.0424 max mem: 9377 +Train: [95] [1400/6250] eta: 0:12:27 lr: 0.000001 grad: 0.1910 (0.1979) loss: 0.7801 (0.7884) time: 0.1529 data: 0.0620 max mem: 9377 +Train: [95] [1500/6250] eta: 0:12:11 lr: 0.000001 grad: 0.1920 (0.1972) loss: 0.7735 (0.7882) time: 0.1760 data: 0.0900 max mem: 9377 +Train: [95] [1600/6250] eta: 0:11:56 lr: 0.000001 grad: 0.1846 (0.1965) loss: 0.7857 (0.7881) time: 0.1480 data: 0.0652 max mem: 9377 +Train: [95] [1700/6250] eta: 0:11:42 lr: 0.000001 grad: 0.1816 (0.1961) loss: 0.7864 (0.7879) time: 0.1443 data: 0.0607 max mem: 9377 +Train: [95] [1800/6250] eta: 0:11:26 lr: 0.000001 grad: 0.1804 (0.1957) loss: 0.7755 (0.7877) time: 0.1552 data: 0.0720 max mem: 9377 +Train: [95] [1900/6250] eta: 0:11:09 lr: 0.000001 grad: 0.1945 (0.1954) loss: 0.7854 (0.7873) time: 0.1366 data: 0.0553 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:55 lr: 0.000001 grad: 0.1912 (0.1951) loss: 0.7915 (0.7873) time: 0.1540 data: 0.0591 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:38 lr: 0.000001 grad: 0.1831 (0.1948) loss: 0.7933 (0.7871) time: 0.1438 data: 0.0577 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:21 lr: 0.000001 grad: 0.1918 (0.1945) loss: 0.7883 (0.7871) time: 0.1412 data: 0.0498 max mem: 9377 +Train: [95] [2300/6250] eta: 0:10:04 lr: 0.000001 grad: 0.1805 (0.1942) loss: 0.7845 (0.7870) time: 0.1504 data: 0.0619 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:47 lr: 0.000001 grad: 0.1936 (0.1940) loss: 0.7905 (0.7869) time: 0.1389 data: 0.0485 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:32 lr: 0.000001 grad: 0.1846 (0.1938) loss: 0.7895 (0.7868) time: 0.1393 data: 0.0564 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:16 lr: 0.000001 grad: 0.1992 (0.1939) loss: 0.7736 (0.7866) time: 0.1377 data: 0.0502 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:58 lr: 0.000001 grad: 0.1931 (0.1939) loss: 0.7786 (0.7864) time: 0.1330 data: 0.0390 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:43 lr: 0.000001 grad: 0.1800 (0.1938) loss: 0.7933 (0.7864) time: 0.1623 data: 0.0862 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:27 lr: 0.000001 grad: 0.1817 (0.1937) loss: 0.7887 (0.7863) time: 0.1352 data: 0.0487 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:11 lr: 0.000001 grad: 0.1850 (0.1936) loss: 0.7961 (0.7865) time: 0.1315 data: 0.0343 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:55 lr: 0.000001 grad: 0.1880 (0.1934) loss: 0.7880 (0.7866) time: 0.1536 data: 0.0596 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:38 lr: 0.000001 grad: 0.1764 (0.1933) loss: 0.7972 (0.7867) time: 0.1312 data: 0.0360 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:23 lr: 0.000001 grad: 0.1829 (0.1931) loss: 0.7920 (0.7868) time: 0.1589 data: 0.0685 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:07 lr: 0.000001 grad: 0.1888 (0.1929) loss: 0.7918 (0.7869) time: 0.1533 data: 0.0611 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:51 lr: 0.000001 grad: 0.1789 (0.1927) loss: 0.7911 (0.7870) time: 0.1524 data: 0.0664 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:36 lr: 0.000001 grad: 0.1791 (0.1926) loss: 0.7933 (0.7871) time: 0.1609 data: 0.0736 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:21 lr: 0.000001 grad: 0.1839 (0.1924) loss: 0.7955 (0.7872) time: 0.1511 data: 0.0686 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:06 lr: 0.000001 grad: 0.1805 (0.1923) loss: 0.7941 (0.7872) time: 0.1448 data: 0.0594 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:52 lr: 0.000001 grad: 0.1812 (0.1922) loss: 0.7829 (0.7872) time: 0.1536 data: 0.0721 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:37 lr: 0.000001 grad: 0.1752 (0.1920) loss: 0.7914 (0.7873) time: 0.1413 data: 0.0551 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:22 lr: 0.000001 grad: 0.1807 (0.1918) loss: 0.7887 (0.7874) time: 0.1488 data: 0.0680 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:07 lr: 0.000001 grad: 0.1807 (0.1916) loss: 0.7957 (0.7874) time: 0.1415 data: 0.0585 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:52 lr: 0.000001 grad: 0.1843 (0.1914) loss: 0.7908 (0.7875) time: 0.1393 data: 0.0496 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:36 lr: 0.000001 grad: 0.1776 (0.1912) loss: 0.7946 (0.7876) time: 0.1501 data: 0.0664 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:21 lr: 0.000001 grad: 0.1687 (0.1910) loss: 0.7933 (0.7877) time: 0.1483 data: 0.0553 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:07 lr: 0.000001 grad: 0.1821 (0.1908) loss: 0.7871 (0.7878) time: 0.1413 data: 0.0564 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:51 lr: 0.000001 grad: 0.1863 (0.1907) loss: 0.7867 (0.7878) time: 0.1542 data: 0.0659 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:36 lr: 0.000001 grad: 0.1849 (0.1905) loss: 0.7902 (0.7879) time: 0.1522 data: 0.0658 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:21 lr: 0.000001 grad: 0.1827 (0.1904) loss: 0.7805 (0.7880) time: 0.1038 data: 0.0011 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:07 lr: 0.000001 grad: 0.1832 (0.1903) loss: 0.7859 (0.7879) time: 0.1853 data: 0.1083 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:52 lr: 0.000001 grad: 0.1755 (0.1903) loss: 0.7929 (0.7879) time: 0.1789 data: 0.0905 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:37 lr: 0.000001 grad: 0.1767 (0.1902) loss: 0.7814 (0.7879) time: 0.1529 data: 0.0657 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:22 lr: 0.000001 grad: 0.1888 (0.1901) loss: 0.7932 (0.7879) time: 0.1665 data: 0.0762 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:07 lr: 0.000001 grad: 0.1794 (0.1898) loss: 0.7867 (0.7879) time: 0.1584 data: 0.0702 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:52 lr: 0.000001 grad: 0.1774 (0.1897) loss: 0.7909 (0.7880) time: 0.1463 data: 0.0624 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:37 lr: 0.000001 grad: 0.1805 (0.1895) loss: 0.7965 (0.7881) time: 0.1557 data: 0.0671 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:22 lr: 0.000001 grad: 0.1765 (0.1893) loss: 0.7935 (0.7882) time: 0.1822 data: 0.0977 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.1846 (0.1891) loss: 0.7874 (0.7883) time: 0.1612 data: 0.0736 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1816 (0.1890) loss: 0.7945 (0.7884) time: 0.1244 data: 0.0295 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1746 (0.1888) loss: 0.7960 (0.7886) time: 0.1235 data: 0.0417 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1868 (0.1886) loss: 0.7891 (0.7886) time: 0.1187 data: 0.0276 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1784 (0.1886) loss: 0.7874 (0.7886) time: 0.1639 data: 0.0667 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1830 (0.1885) loss: 0.7935 (0.7887) time: 0.1684 data: 0.0853 max mem: 9377 +Train: [95] Total time: 0:15:40 (0.1505 s / it) +Averaged stats: lr: 0.000001 grad: 0.1830 (0.1885) loss: 0.7935 (0.7887) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:04:18 loss: 0.8034 (0.8034) time: 4.1662 data: 4.0689 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.7799 (0.7822) time: 0.1208 data: 0.0940 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-train-subset): loss: 0.7799 (0.7822) +Eval (hcp-val): [95] [ 0/62] eta: 0:04:41 loss: 0.8373 (0.8373) time: 4.5474 data: 4.4609 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8364 (0.8382) time: 0.1388 data: 0.1115 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:13 (0.2224 s / it) +Averaged stats (hcp-val): loss: 0.8364 (0.8382) +Eval (nsd-val): [95] [ 0/62] eta: 0:05:23 loss: 0.8166 (0.8166) time: 5.2145 data: 5.1784 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8209 (0.8224) time: 0.1329 data: 0.1078 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8224) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 11:24:49 lr: 0.000001 grad: 0.1669 (0.1669) loss: 0.8304 (0.8304) time: 6.5743 data: 6.4741 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:21:42 lr: 0.000001 grad: 0.2198 (0.2353) loss: 0.8012 (0.7929) time: 0.1795 data: 0.0790 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:31 lr: 0.000001 grad: 0.1793 (0.2167) loss: 0.7970 (0.7937) time: 0.1766 data: 0.0798 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:17:01 lr: 0.000001 grad: 0.1985 (0.2059) loss: 0.7992 (0.7959) time: 0.1351 data: 0.0466 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:04 lr: 0.000001 grad: 0.1709 (0.2013) loss: 0.8130 (0.7963) time: 0.1447 data: 0.0497 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:15:28 lr: 0.000001 grad: 0.1692 (0.1972) loss: 0.7963 (0.7974) time: 0.1309 data: 0.0320 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:15:00 lr: 0.000001 grad: 0.1692 (0.1938) loss: 0.8095 (0.7979) time: 0.1433 data: 0.0443 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:35 lr: 0.000001 grad: 0.1724 (0.1919) loss: 0.8002 (0.7979) time: 0.1412 data: 0.0423 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:10 lr: 0.000001 grad: 0.1788 (0.1902) loss: 0.7925 (0.7980) time: 0.1425 data: 0.0472 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:13:52 lr: 0.000001 grad: 0.1719 (0.1892) loss: 0.7965 (0.7980) time: 0.1371 data: 0.0297 max mem: 9377 +Train: [96] [1000/6250] eta: 0:13:29 lr: 0.000001 grad: 0.1746 (0.1880) loss: 0.8052 (0.7983) time: 0.1191 data: 0.0270 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:06 lr: 0.000000 grad: 0.1736 (0.1868) loss: 0.7991 (0.7984) time: 0.1309 data: 0.0343 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:44 lr: 0.000000 grad: 0.1766 (0.1861) loss: 0.7972 (0.7982) time: 0.1302 data: 0.0368 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:24 lr: 0.000000 grad: 0.1705 (0.1856) loss: 0.7968 (0.7981) time: 0.1404 data: 0.0489 max mem: 9377 +Train: [96] [1400/6250] eta: 0:12:05 lr: 0.000000 grad: 0.1710 (0.1852) loss: 0.8026 (0.7983) time: 0.1342 data: 0.0423 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:46 lr: 0.000000 grad: 0.1804 (0.1850) loss: 0.7974 (0.7982) time: 0.1283 data: 0.0348 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:31 lr: 0.000000 grad: 0.1736 (0.1849) loss: 0.7995 (0.7981) time: 0.1363 data: 0.0543 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:18 lr: 0.000000 grad: 0.1732 (0.1848) loss: 0.7902 (0.7980) time: 0.1443 data: 0.0612 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:06 lr: 0.000000 grad: 0.1779 (0.1849) loss: 0.7959 (0.7979) time: 0.1678 data: 0.0879 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:52 lr: 0.000000 grad: 0.1847 (0.1848) loss: 0.7949 (0.7978) time: 0.1613 data: 0.0780 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:38 lr: 0.000000 grad: 0.1836 (0.1847) loss: 0.7843 (0.7976) time: 0.1537 data: 0.0622 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:24 lr: 0.000000 grad: 0.1856 (0.1846) loss: 0.7946 (0.7974) time: 0.1417 data: 0.0412 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:09 lr: 0.000000 grad: 0.1811 (0.1845) loss: 0.7908 (0.7974) time: 0.1526 data: 0.0660 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:54 lr: 0.000000 grad: 0.1748 (0.1844) loss: 0.7882 (0.7973) time: 0.1395 data: 0.0550 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:37 lr: 0.000000 grad: 0.1768 (0.1844) loss: 0.7917 (0.7971) time: 0.1262 data: 0.0381 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:21 lr: 0.000000 grad: 0.1806 (0.1845) loss: 0.7962 (0.7968) time: 0.1284 data: 0.0427 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:05 lr: 0.000000 grad: 0.1771 (0.1843) loss: 0.7925 (0.7967) time: 0.1340 data: 0.0375 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:49 lr: 0.000000 grad: 0.1789 (0.1842) loss: 0.7937 (0.7966) time: 0.1384 data: 0.0443 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:32 lr: 0.000000 grad: 0.1713 (0.1840) loss: 0.7941 (0.7966) time: 0.1404 data: 0.0513 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:15 lr: 0.000000 grad: 0.1672 (0.1837) loss: 0.8001 (0.7966) time: 0.1290 data: 0.0383 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:00 lr: 0.000000 grad: 0.1642 (0.1835) loss: 0.8040 (0.7966) time: 0.1406 data: 0.0400 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:44 lr: 0.000000 grad: 0.1680 (0.1833) loss: 0.7959 (0.7967) time: 0.1520 data: 0.0655 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:29 lr: 0.000000 grad: 0.1743 (0.1833) loss: 0.7973 (0.7966) time: 0.1598 data: 0.0754 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:14 lr: 0.000000 grad: 0.1731 (0.1831) loss: 0.8007 (0.7967) time: 0.1496 data: 0.0595 max mem: 9377 +Train: [96] [3400/6250] eta: 0:06:59 lr: 0.000000 grad: 0.1758 (0.1829) loss: 0.7996 (0.7967) time: 0.1453 data: 0.0544 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:44 lr: 0.000000 grad: 0.1709 (0.1828) loss: 0.7914 (0.7967) time: 0.1289 data: 0.0481 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:29 lr: 0.000000 grad: 0.1797 (0.1827) loss: 0.8020 (0.7967) time: 0.1542 data: 0.0603 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:14 lr: 0.000000 grad: 0.1772 (0.1827) loss: 0.8038 (0.7968) time: 0.1508 data: 0.0610 max mem: 9377 +Train: [96] [3800/6250] eta: 0:05:58 lr: 0.000000 grad: 0.1815 (0.1828) loss: 0.7986 (0.7968) time: 0.1513 data: 0.0662 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:45 lr: 0.000000 grad: 0.1791 (0.1829) loss: 0.8009 (0.7968) time: 0.1613 data: 0.0728 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:31 lr: 0.000000 grad: 0.1770 (0.1831) loss: 0.8004 (0.7968) time: 0.1518 data: 0.0704 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:17 lr: 0.000000 grad: 0.1758 (0.1831) loss: 0.7997 (0.7968) time: 0.1481 data: 0.0652 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:03 lr: 0.000000 grad: 0.1822 (0.1832) loss: 0.7951 (0.7968) time: 0.1654 data: 0.0807 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:48 lr: 0.000000 grad: 0.1796 (0.1833) loss: 0.7982 (0.7967) time: 0.1486 data: 0.0614 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:34 lr: 0.000000 grad: 0.1902 (0.1833) loss: 0.7836 (0.7966) time: 0.1633 data: 0.0752 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:19 lr: 0.000000 grad: 0.1776 (0.1834) loss: 0.8021 (0.7966) time: 0.1583 data: 0.0672 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:04 lr: 0.000000 grad: 0.1810 (0.1835) loss: 0.7952 (0.7965) time: 0.1429 data: 0.0519 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:49 lr: 0.000000 grad: 0.1668 (0.1834) loss: 0.7972 (0.7964) time: 0.1463 data: 0.0601 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:34 lr: 0.000000 grad: 0.1744 (0.1835) loss: 0.7963 (0.7963) time: 0.1458 data: 0.0582 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:19 lr: 0.000000 grad: 0.1828 (0.1835) loss: 0.7932 (0.7962) time: 0.1604 data: 0.0815 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:05 lr: 0.000000 grad: 0.1736 (0.1835) loss: 0.7927 (0.7961) time: 0.1433 data: 0.0593 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:50 lr: 0.000000 grad: 0.1777 (0.1835) loss: 0.7945 (0.7960) time: 0.1364 data: 0.0478 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:35 lr: 0.000000 grad: 0.1799 (0.1835) loss: 0.7873 (0.7959) time: 0.1469 data: 0.0566 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1796 (0.1837) loss: 0.7849 (0.7958) time: 0.1676 data: 0.0748 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:06 lr: 0.000000 grad: 0.1943 (0.1837) loss: 0.7842 (0.7958) time: 0.1405 data: 0.0438 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:51 lr: 0.000000 grad: 0.1929 (0.1840) loss: 0.7925 (0.7956) time: 0.1423 data: 0.0483 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:36 lr: 0.000000 grad: 0.1793 (0.1841) loss: 0.7927 (0.7955) time: 0.1270 data: 0.0343 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:21 lr: 0.000000 grad: 0.1975 (0.1842) loss: 0.7806 (0.7954) time: 0.1295 data: 0.0382 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:06 lr: 0.000000 grad: 0.1889 (0.1843) loss: 0.7834 (0.7952) time: 0.1380 data: 0.0434 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.1949 (0.1845) loss: 0.7805 (0.7951) time: 0.1336 data: 0.0455 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.1861 (0.1847) loss: 0.7842 (0.7950) time: 0.1577 data: 0.0759 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1893 (0.1848) loss: 0.7806 (0.7948) time: 0.1398 data: 0.0499 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1825 (0.1849) loss: 0.7889 (0.7947) time: 0.1658 data: 0.0774 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1875 (0.1850) loss: 0.7956 (0.7946) time: 0.1333 data: 0.0458 max mem: 9377 +Train: [96] Total time: 0:15:28 (0.1485 s / it) +Averaged stats: lr: 0.000000 grad: 0.1875 (0.1850) loss: 0.7956 (0.7946) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:04:54 loss: 0.8012 (0.8012) time: 4.7450 data: 4.6662 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.7794 (0.7820) time: 0.1289 data: 0.1036 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-train-subset): loss: 0.7794 (0.7820) +Eval (hcp-val): [96] [ 0/62] eta: 0:03:58 loss: 0.8368 (0.8368) time: 3.8462 data: 3.7684 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8372 (0.8379) time: 0.1145 data: 0.0878 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:13 (0.2253 s / it) +Averaged stats (hcp-val): loss: 0.8372 (0.8379) +Eval (nsd-val): [96] [ 0/62] eta: 0:05:20 loss: 0.8059 (0.8059) time: 5.1726 data: 5.1426 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8212 (0.8217) time: 0.1123 data: 0.0870 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:13 (0.2252 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8217) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [97] [ 0/6250] eta: 9:56:46 lr: 0.000000 grad: 0.6015 (0.6015) loss: 0.7569 (0.7569) time: 5.7291 data: 5.5496 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:21:53 lr: 0.000000 grad: 0.1713 (0.2164) loss: 0.8046 (0.8055) time: 0.1633 data: 0.0562 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:18:42 lr: 0.000000 grad: 0.1798 (0.2059) loss: 0.8009 (0.8023) time: 0.1480 data: 0.0442 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:17:33 lr: 0.000000 grad: 0.1979 (0.2067) loss: 0.7791 (0.7988) time: 0.1326 data: 0.0288 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:16:31 lr: 0.000000 grad: 0.2039 (0.2061) loss: 0.7816 (0.7964) time: 0.1494 data: 0.0626 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:15:55 lr: 0.000000 grad: 0.1939 (0.2047) loss: 0.7841 (0.7946) time: 0.1658 data: 0.0701 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:15:20 lr: 0.000000 grad: 0.1824 (0.2027) loss: 0.7773 (0.7934) time: 0.1130 data: 0.0154 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:14:55 lr: 0.000000 grad: 0.1860 (0.2013) loss: 0.7826 (0.7924) time: 0.1475 data: 0.0540 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:14:29 lr: 0.000000 grad: 0.1797 (0.2001) loss: 0.7864 (0.7918) time: 0.1497 data: 0.0524 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:14:09 lr: 0.000000 grad: 0.1882 (0.1990) loss: 0.7891 (0.7912) time: 0.1543 data: 0.0538 max mem: 9377 +Train: [97] [1000/6250] eta: 0:13:48 lr: 0.000000 grad: 0.1915 (0.1991) loss: 0.7822 (0.7906) time: 0.1725 data: 0.0874 max mem: 9377 +Train: [97] [1100/6250] eta: 0:13:23 lr: 0.000000 grad: 0.1842 (0.1979) loss: 0.7839 (0.7904) time: 0.1410 data: 0.0551 max mem: 9377 +Train: [97] [1200/6250] eta: 0:13:00 lr: 0.000000 grad: 0.1813 (0.1970) loss: 0.7919 (0.7905) time: 0.1319 data: 0.0263 max mem: 9377 +Train: [97] [1300/6250] eta: 0:12:38 lr: 0.000000 grad: 0.1828 (0.1962) loss: 0.7868 (0.7904) time: 0.1376 data: 0.0476 max mem: 9377 +Train: [97] [1400/6250] eta: 0:12:21 lr: 0.000000 grad: 0.1925 (0.1956) loss: 0.7838 (0.7902) time: 0.1480 data: 0.0589 max mem: 9377 +Train: [97] [1500/6250] eta: 0:12:03 lr: 0.000000 grad: 0.1833 (0.1948) loss: 0.7867 (0.7901) time: 0.1611 data: 0.0718 max mem: 9377 +Train: [97] [1600/6250] eta: 0:11:48 lr: 0.000000 grad: 0.1836 (0.1949) loss: 0.7871 (0.7900) time: 0.1512 data: 0.0618 max mem: 9377 +Train: [97] [1700/6250] eta: 0:11:32 lr: 0.000000 grad: 0.1911 (0.1945) loss: 0.7845 (0.7897) time: 0.1507 data: 0.0616 max mem: 9377 +Train: [97] [1800/6250] eta: 0:11:18 lr: 0.000000 grad: 0.1884 (0.1945) loss: 0.7839 (0.7895) time: 0.1578 data: 0.0705 max mem: 9377 +Train: [97] [1900/6250] eta: 0:11:03 lr: 0.000000 grad: 0.1828 (0.1942) loss: 0.7927 (0.7894) time: 0.1886 data: 0.1016 max mem: 9377 +Train: [97] [2000/6250] eta: 0:10:49 lr: 0.000000 grad: 0.1817 (0.1939) loss: 0.7840 (0.7894) time: 0.1652 data: 0.0770 max mem: 9377 +Train: [97] [2100/6250] eta: 0:10:32 lr: 0.000000 grad: 0.1875 (0.1935) loss: 0.7956 (0.7895) time: 0.1453 data: 0.0579 max mem: 9377 +Train: [97] [2200/6250] eta: 0:10:17 lr: 0.000000 grad: 0.1858 (0.1934) loss: 0.7886 (0.7895) time: 0.1586 data: 0.0756 max mem: 9377 +Train: [97] [2300/6250] eta: 0:09:59 lr: 0.000000 grad: 0.1784 (0.1932) loss: 0.7893 (0.7895) time: 0.1222 data: 0.0367 max mem: 9377 +Train: [97] [2400/6250] eta: 0:09:43 lr: 0.000000 grad: 0.1857 (0.1930) loss: 0.7887 (0.7895) time: 0.1406 data: 0.0543 max mem: 9377 +Train: [97] [2500/6250] eta: 0:09:26 lr: 0.000000 grad: 0.1798 (0.1927) loss: 0.7889 (0.7896) time: 0.1443 data: 0.0518 max mem: 9377 +Train: [97] [2600/6250] eta: 0:09:09 lr: 0.000000 grad: 0.1896 (0.1925) loss: 0.7858 (0.7896) time: 0.1466 data: 0.0657 max mem: 9377 +Train: [97] [2700/6250] eta: 0:08:53 lr: 0.000000 grad: 0.1889 (0.1923) loss: 0.7845 (0.7896) time: 0.1364 data: 0.0542 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:37 lr: 0.000000 grad: 0.1921 (0.1922) loss: 0.7952 (0.7898) time: 0.1849 data: 0.0929 max mem: 9377 +Train: [97] [2900/6250] eta: 0:08:20 lr: 0.000000 grad: 0.1743 (0.1919) loss: 0.7983 (0.7900) time: 0.1442 data: 0.0575 max mem: 9377 +Train: [97] [3000/6250] eta: 0:08:06 lr: 0.000000 grad: 0.1817 (0.1916) loss: 0.8000 (0.7902) time: 0.1567 data: 0.0671 max mem: 9377 +Train: [97] [3100/6250] eta: 0:07:51 lr: 0.000000 grad: 0.1849 (0.1913) loss: 0.7904 (0.7903) time: 0.1681 data: 0.0760 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:36 lr: 0.000000 grad: 0.1795 (0.1911) loss: 0.8001 (0.7905) time: 0.1430 data: 0.0576 max mem: 9377 +Train: [97] [3300/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1832 (0.1910) loss: 0.7965 (0.7907) time: 0.1467 data: 0.0640 max mem: 9377 +Train: [97] [3400/6250] eta: 0:07:07 lr: 0.000000 grad: 0.1741 (0.1907) loss: 0.8007 (0.7908) time: 0.1536 data: 0.0719 max mem: 9377 +Train: [97] [3500/6250] eta: 0:06:52 lr: 0.000000 grad: 0.1815 (0.1905) loss: 0.8007 (0.7910) time: 0.1753 data: 0.0851 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:36 lr: 0.000000 grad: 0.1795 (0.1903) loss: 0.7983 (0.7912) time: 0.1549 data: 0.0627 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:22 lr: 0.000000 grad: 0.1880 (0.1902) loss: 0.7988 (0.7912) time: 0.1556 data: 0.0708 max mem: 9377 +Train: [97] [3800/6250] eta: 0:06:07 lr: 0.000000 grad: 0.1770 (0.1901) loss: 0.7921 (0.7912) time: 0.1602 data: 0.0708 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:54 lr: 0.000000 grad: 0.1782 (0.1900) loss: 0.7934 (0.7913) time: 0.1805 data: 0.0951 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:40 lr: 0.000000 grad: 0.1804 (0.1898) loss: 0.7961 (0.7913) time: 0.1592 data: 0.0744 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:25 lr: 0.000000 grad: 0.1752 (0.1897) loss: 0.7942 (0.7914) time: 0.1613 data: 0.0855 max mem: 9377 +Train: [97] [4200/6250] eta: 0:05:10 lr: 0.000000 grad: 0.1806 (0.1896) loss: 0.7922 (0.7914) time: 0.1722 data: 0.0781 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:55 lr: 0.000000 grad: 0.1913 (0.1895) loss: 0.7954 (0.7914) time: 0.1542 data: 0.0618 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:40 lr: 0.000000 grad: 0.1797 (0.1894) loss: 0.7894 (0.7914) time: 0.1509 data: 0.0639 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:25 lr: 0.000000 grad: 0.1750 (0.1893) loss: 0.7953 (0.7914) time: 0.1413 data: 0.0547 max mem: 9377 +Train: [97] [4600/6250] eta: 0:04:10 lr: 0.000000 grad: 0.1739 (0.1892) loss: 0.7897 (0.7914) time: 0.1653 data: 0.0786 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:55 lr: 0.000000 grad: 0.1721 (0.1891) loss: 0.7901 (0.7914) time: 0.1607 data: 0.0744 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:39 lr: 0.000000 grad: 0.1765 (0.1890) loss: 0.8006 (0.7915) time: 0.1469 data: 0.0595 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:24 lr: 0.000000 grad: 0.1698 (0.1887) loss: 0.7928 (0.7917) time: 0.1642 data: 0.0760 max mem: 9377 +Train: [97] [5000/6250] eta: 0:03:09 lr: 0.000000 grad: 0.1756 (0.1886) loss: 0.7969 (0.7917) time: 0.1644 data: 0.0861 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:54 lr: 0.000000 grad: 0.1827 (0.1884) loss: 0.7911 (0.7918) time: 0.1466 data: 0.0576 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:39 lr: 0.000000 grad: 0.1763 (0.1883) loss: 0.7980 (0.7919) time: 0.1667 data: 0.0809 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:24 lr: 0.000000 grad: 0.1913 (0.1882) loss: 0.7927 (0.7920) time: 0.1436 data: 0.0515 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:09 lr: 0.000000 grad: 0.1753 (0.1881) loss: 0.7914 (0.7920) time: 0.1784 data: 0.0787 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:54 lr: 0.000000 grad: 0.1779 (0.1880) loss: 0.7916 (0.7920) time: 0.1284 data: 0.0454 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:38 lr: 0.000000 grad: 0.1770 (0.1879) loss: 0.7896 (0.7920) time: 0.1492 data: 0.0618 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:23 lr: 0.000000 grad: 0.1705 (0.1878) loss: 0.7957 (0.7921) time: 0.1409 data: 0.0522 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:08 lr: 0.000000 grad: 0.1777 (0.1877) loss: 0.7840 (0.7921) time: 0.1566 data: 0.0667 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:53 lr: 0.000000 grad: 0.1732 (0.1876) loss: 0.8037 (0.7922) time: 0.1138 data: 0.0233 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1823 (0.1875) loss: 0.7878 (0.7922) time: 0.1362 data: 0.0446 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1786 (0.1874) loss: 0.7993 (0.7922) time: 0.1281 data: 0.0445 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1708 (0.1872) loss: 0.8008 (0.7923) time: 0.1231 data: 0.0359 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1785 (0.1872) loss: 0.7992 (0.7924) time: 0.1510 data: 0.0662 max mem: 9377 +Train: [97] Total time: 0:15:51 (0.1522 s / it) +Averaged stats: lr: 0.000000 grad: 0.1785 (0.1872) loss: 0.7992 (0.7924) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:03:59 loss: 0.8054 (0.8054) time: 3.8618 data: 3.7870 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.7795 (0.7818) time: 0.1120 data: 0.0870 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-train-subset): loss: 0.7795 (0.7818) +Eval (hcp-val): [97] [ 0/62] eta: 0:05:55 loss: 0.8371 (0.8371) time: 5.7358 data: 5.7050 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8349 (0.8378) time: 0.1227 data: 0.0975 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-val): loss: 0.8349 (0.8378) +Eval (nsd-val): [97] [ 0/62] eta: 0:05:10 loss: 0.8089 (0.8089) time: 5.0147 data: 4.9847 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8212 (0.8217) time: 0.1233 data: 0.0964 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8217) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [98] [ 0/6250] eta: 8:54:48 lr: 0.000000 grad: 0.2196 (0.2196) loss: 0.8305 (0.8305) time: 5.1342 data: 4.9250 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:21:38 lr: 0.000000 grad: 0.2171 (0.2279) loss: 0.8036 (0.8038) time: 0.1753 data: 0.0774 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:18:46 lr: 0.000000 grad: 0.1845 (0.2129) loss: 0.8055 (0.8014) time: 0.1478 data: 0.0449 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:17:33 lr: 0.000000 grad: 0.2027 (0.2066) loss: 0.7953 (0.8003) time: 0.1612 data: 0.0516 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:16:38 lr: 0.000000 grad: 0.1942 (0.2038) loss: 0.7887 (0.7980) time: 0.1362 data: 0.0388 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:15:56 lr: 0.000000 grad: 0.1937 (0.2035) loss: 0.7845 (0.7960) time: 0.1619 data: 0.0622 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:15:16 lr: 0.000000 grad: 0.1912 (0.2022) loss: 0.7895 (0.7950) time: 0.1430 data: 0.0484 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:14:48 lr: 0.000000 grad: 0.1892 (0.2003) loss: 0.7961 (0.7947) time: 0.1423 data: 0.0525 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:14:27 lr: 0.000000 grad: 0.1899 (0.1994) loss: 0.7882 (0.7943) time: 0.1499 data: 0.0547 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:14:02 lr: 0.000000 grad: 0.1852 (0.1979) loss: 0.7906 (0.7944) time: 0.1237 data: 0.0351 max mem: 9377 +Train: [98] [1000/6250] eta: 0:13:41 lr: 0.000000 grad: 0.1896 (0.1967) loss: 0.7874 (0.7942) time: 0.1348 data: 0.0410 max mem: 9377 +Train: [98] [1100/6250] eta: 0:13:19 lr: 0.000000 grad: 0.1737 (0.1954) loss: 0.7992 (0.7941) time: 0.1490 data: 0.0576 max mem: 9377 +Train: [98] [1200/6250] eta: 0:12:59 lr: 0.000000 grad: 0.1917 (0.1952) loss: 0.7885 (0.7938) time: 0.1399 data: 0.0480 max mem: 9377 +Train: [98] [1300/6250] eta: 0:12:43 lr: 0.000000 grad: 0.1853 (0.1948) loss: 0.7975 (0.7936) time: 0.1638 data: 0.0748 max mem: 9377 +Train: [98] [1400/6250] eta: 0:12:24 lr: 0.000000 grad: 0.1869 (0.1946) loss: 0.7960 (0.7934) time: 0.1555 data: 0.0551 max mem: 9377 +Train: [98] [1500/6250] eta: 0:12:08 lr: 0.000000 grad: 0.1935 (0.1941) loss: 0.7893 (0.7932) time: 0.1492 data: 0.0612 max mem: 9377 +Train: [98] [1600/6250] eta: 0:11:55 lr: 0.000000 grad: 0.1834 (0.1937) loss: 0.7934 (0.7930) time: 0.1638 data: 0.0764 max mem: 9377 +Train: [98] [1700/6250] eta: 0:11:39 lr: 0.000000 grad: 0.1865 (0.1935) loss: 0.7948 (0.7930) time: 0.1209 data: 0.0349 max mem: 9377 +Train: [98] [1800/6250] eta: 0:11:23 lr: 0.000000 grad: 0.1807 (0.1933) loss: 0.7955 (0.7929) time: 0.1325 data: 0.0476 max mem: 9377 +Train: [98] [1900/6250] eta: 0:11:08 lr: 0.000000 grad: 0.1792 (0.1935) loss: 0.7957 (0.7928) time: 0.1490 data: 0.0682 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:54 lr: 0.000000 grad: 0.1830 (0.1934) loss: 0.7923 (0.7926) time: 0.1495 data: 0.0588 max mem: 9377 +Train: [98] [2100/6250] eta: 0:10:39 lr: 0.000000 grad: 0.1831 (0.1936) loss: 0.7954 (0.7925) time: 0.1659 data: 0.0806 max mem: 9377 +Train: [98] [2200/6250] eta: 0:10:20 lr: 0.000000 grad: 0.1860 (0.1939) loss: 0.7905 (0.7924) time: 0.1466 data: 0.0629 max mem: 9377 +Train: [98] [2300/6250] eta: 0:10:04 lr: 0.000000 grad: 0.1769 (0.1940) loss: 0.7872 (0.7923) time: 0.1448 data: 0.0421 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:48 lr: 0.000000 grad: 0.1901 (0.1939) loss: 0.7941 (0.7924) time: 0.1447 data: 0.0582 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:32 lr: 0.000000 grad: 0.1805 (0.1937) loss: 0.7919 (0.7924) time: 0.1598 data: 0.0781 max mem: 9377 +Train: [98] [2600/6250] eta: 0:09:16 lr: 0.000000 grad: 0.1961 (0.1936) loss: 0.7919 (0.7925) time: 0.1338 data: 0.0486 max mem: 9377 +Train: [98] [2700/6250] eta: 0:09:00 lr: 0.000000 grad: 0.1755 (0.1935) loss: 0.7922 (0.7925) time: 0.1460 data: 0.0535 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:44 lr: 0.000000 grad: 0.1719 (0.1933) loss: 0.7985 (0.7924) time: 0.1403 data: 0.0501 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:28 lr: 0.000000 grad: 0.1810 (0.1933) loss: 0.7874 (0.7923) time: 0.1216 data: 0.0349 max mem: 9377 +Train: [98] [3000/6250] eta: 0:08:11 lr: 0.000000 grad: 0.1978 (0.1934) loss: 0.7880 (0.7922) time: 0.1311 data: 0.0383 max mem: 9377 +Train: [98] [3100/6250] eta: 0:07:55 lr: 0.000000 grad: 0.1920 (0.1934) loss: 0.7837 (0.7921) time: 0.1381 data: 0.0532 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:40 lr: 0.000000 grad: 0.1848 (0.1933) loss: 0.7858 (0.7920) time: 0.1475 data: 0.0607 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:24 lr: 0.000000 grad: 0.1919 (0.1931) loss: 0.7769 (0.7918) time: 0.1482 data: 0.0640 max mem: 9377 +Train: [98] [3400/6250] eta: 0:07:08 lr: 0.000000 grad: 0.1886 (0.1930) loss: 0.7885 (0.7917) time: 0.1428 data: 0.0474 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:52 lr: 0.000000 grad: 0.1704 (0.1928) loss: 0.7945 (0.7916) time: 0.1201 data: 0.0290 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:37 lr: 0.000000 grad: 0.1814 (0.1926) loss: 0.7883 (0.7916) time: 0.1396 data: 0.0514 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:22 lr: 0.000000 grad: 0.1886 (0.1925) loss: 0.7959 (0.7915) time: 0.1568 data: 0.0628 max mem: 9377 +Train: [98] [3800/6250] eta: 0:06:06 lr: 0.000000 grad: 0.1775 (0.1923) loss: 0.7897 (0.7915) time: 0.1405 data: 0.0468 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:52 lr: 0.000000 grad: 0.1826 (0.1921) loss: 0.7911 (0.7915) time: 0.1472 data: 0.0531 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:38 lr: 0.000000 grad: 0.1766 (0.1919) loss: 0.7873 (0.7915) time: 0.2305 data: 0.1368 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:22 lr: 0.000000 grad: 0.1884 (0.1917) loss: 0.7935 (0.7916) time: 0.1544 data: 0.0724 max mem: 9377 +Train: [98] [4200/6250] eta: 0:05:07 lr: 0.000000 grad: 0.1850 (0.1915) loss: 0.7855 (0.7917) time: 0.1499 data: 0.0627 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:52 lr: 0.000000 grad: 0.1690 (0.1913) loss: 0.7964 (0.7918) time: 0.1436 data: 0.0506 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:38 lr: 0.000000 grad: 0.1778 (0.1911) loss: 0.8063 (0.7918) time: 0.1622 data: 0.0743 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:22 lr: 0.000000 grad: 0.1853 (0.1910) loss: 0.7924 (0.7918) time: 0.1568 data: 0.0656 max mem: 9377 +Train: [98] [4600/6250] eta: 0:04:07 lr: 0.000000 grad: 0.1891 (0.1909) loss: 0.7903 (0.7918) time: 0.1659 data: 0.0820 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:52 lr: 0.000000 grad: 0.1881 (0.1908) loss: 0.7936 (0.7919) time: 0.1429 data: 0.0551 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:37 lr: 0.000000 grad: 0.1824 (0.1907) loss: 0.7916 (0.7919) time: 0.1711 data: 0.0825 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:22 lr: 0.000000 grad: 0.1777 (0.1905) loss: 0.7933 (0.7920) time: 0.1544 data: 0.0635 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:07 lr: 0.000000 grad: 0.1850 (0.1904) loss: 0.8021 (0.7921) time: 0.1400 data: 0.0519 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:52 lr: 0.000000 grad: 0.1700 (0.1903) loss: 0.7965 (0.7922) time: 0.1598 data: 0.0713 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:37 lr: 0.000000 grad: 0.1843 (0.1902) loss: 0.7996 (0.7923) time: 0.1442 data: 0.0497 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:22 lr: 0.000000 grad: 0.1893 (0.1901) loss: 0.7953 (0.7923) time: 0.1270 data: 0.0392 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1957 (0.1901) loss: 0.7940 (0.7924) time: 0.1573 data: 0.0710 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.1790 (0.1900) loss: 0.7951 (0.7925) time: 0.1465 data: 0.0536 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1790 (0.1900) loss: 0.7949 (0.7925) time: 0.1398 data: 0.0556 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1853 (0.1900) loss: 0.7912 (0.7926) time: 0.1596 data: 0.0712 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1848 (0.1899) loss: 0.7922 (0.7926) time: 0.1399 data: 0.0510 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1816 (0.1899) loss: 0.7951 (0.7927) time: 0.1502 data: 0.0602 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1902 (0.1899) loss: 0.7916 (0.7927) time: 0.1302 data: 0.0469 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1815 (0.1899) loss: 0.7920 (0.7927) time: 0.1656 data: 0.0804 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1824 (0.1899) loss: 0.7985 (0.7927) time: 0.1387 data: 0.0583 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1885 (0.1898) loss: 0.7914 (0.7927) time: 0.1193 data: 0.0250 max mem: 9377 +Train: [98] Total time: 0:15:41 (0.1507 s / it) +Averaged stats: lr: 0.000000 grad: 0.1885 (0.1898) loss: 0.7914 (0.7927) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:04:31 loss: 0.8025 (0.8025) time: 4.3750 data: 4.3032 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.7815 (0.7822) time: 0.1390 data: 0.1141 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (hcp-train-subset): loss: 0.7815 (0.7822) +Eval (hcp-val): [98] [ 0/62] eta: 0:05:01 loss: 0.8356 (0.8356) time: 4.8561 data: 4.7896 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8352 (0.8375) time: 0.1373 data: 0.1105 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-val): loss: 0.8352 (0.8375) +Eval (nsd-val): [98] [ 0/62] eta: 0:04:16 loss: 0.8163 (0.8163) time: 4.1419 data: 4.0603 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8212 (0.8216) time: 0.1393 data: 0.1141 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8216) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +Train: [99] [ 0/6250] eta: 11:06:18 lr: 0.000000 grad: 0.2111 (0.2111) loss: 0.8289 (0.8289) time: 6.3966 data: 6.2924 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:21:44 lr: 0.000000 grad: 0.2071 (0.2150) loss: 0.8013 (0.7969) time: 0.1695 data: 0.0674 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:18:41 lr: 0.000000 grad: 0.1995 (0.2130) loss: 0.7966 (0.7961) time: 0.1597 data: 0.0476 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:17:18 lr: 0.000000 grad: 0.1893 (0.2077) loss: 0.7959 (0.7967) time: 0.1684 data: 0.0668 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:16:20 lr: 0.000000 grad: 0.1851 (0.2035) loss: 0.7904 (0.7966) time: 0.1485 data: 0.0507 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:15:46 lr: 0.000000 grad: 0.1891 (0.2015) loss: 0.7891 (0.7961) time: 0.1546 data: 0.0465 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:15:11 lr: 0.000000 grad: 0.1814 (0.2006) loss: 0.7989 (0.7954) time: 0.1586 data: 0.0697 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:14:50 lr: 0.000000 grad: 0.1748 (0.1992) loss: 0.7986 (0.7953) time: 0.1174 data: 0.0246 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:14:26 lr: 0.000000 grad: 0.1854 (0.1986) loss: 0.7927 (0.7945) time: 0.1495 data: 0.0516 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:14:06 lr: 0.000000 grad: 0.1837 (0.1982) loss: 0.7992 (0.7944) time: 0.1579 data: 0.0603 max mem: 9377 +Train: [99] [1000/6250] eta: 0:13:41 lr: 0.000000 grad: 0.2079 (0.1977) loss: 0.7872 (0.7943) time: 0.1535 data: 0.0654 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:19 lr: 0.000000 grad: 0.1747 (0.1972) loss: 0.7976 (0.7943) time: 0.1528 data: 0.0629 max mem: 9377 +Train: [99] [1200/6250] eta: 0:12:56 lr: 0.000000 grad: 0.1822 (0.1965) loss: 0.7863 (0.7943) time: 0.1379 data: 0.0430 max mem: 9377 +Train: [99] [1300/6250] eta: 0:12:37 lr: 0.000000 grad: 0.1819 (0.1959) loss: 0.8007 (0.7943) time: 0.1445 data: 0.0539 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:20 lr: 0.000000 grad: 0.1805 (0.1953) loss: 0.7990 (0.7944) time: 0.1480 data: 0.0618 max mem: 9377 +Train: [99] [1500/6250] eta: 0:12:03 lr: 0.000000 grad: 0.1778 (0.1947) loss: 0.7870 (0.7944) time: 0.1535 data: 0.0745 max mem: 9377 +Train: [99] [1600/6250] eta: 0:11:50 lr: 0.000000 grad: 0.1818 (0.1942) loss: 0.7962 (0.7945) time: 0.1710 data: 0.0861 max mem: 9377 +Train: [99] [1700/6250] eta: 0:11:36 lr: 0.000000 grad: 0.1951 (0.1940) loss: 0.7956 (0.7945) time: 0.1401 data: 0.0582 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:20 lr: 0.000000 grad: 0.1725 (0.1936) loss: 0.7973 (0.7946) time: 0.1437 data: 0.0587 max mem: 9377 +Train: [99] [1900/6250] eta: 0:11:08 lr: 0.000000 grad: 0.1868 (0.1933) loss: 0.7917 (0.7947) time: 0.1766 data: 0.0908 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:56 lr: 0.000000 grad: 0.1858 (0.1932) loss: 0.7967 (0.7948) time: 0.1527 data: 0.0630 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:40 lr: 0.000000 grad: 0.1772 (0.1928) loss: 0.7977 (0.7948) time: 0.1526 data: 0.0595 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:22 lr: 0.000000 grad: 0.1922 (0.1925) loss: 0.7896 (0.7948) time: 0.1451 data: 0.0498 max mem: 9377 +Train: [99] [2300/6250] eta: 0:10:06 lr: 0.000000 grad: 0.1836 (0.1923) loss: 0.8002 (0.7949) time: 0.1224 data: 0.0309 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:49 lr: 0.000000 grad: 0.1811 (0.1921) loss: 0.8019 (0.7951) time: 0.1462 data: 0.0559 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:32 lr: 0.000000 grad: 0.1886 (0.1920) loss: 0.7995 (0.7951) time: 0.1432 data: 0.0556 max mem: 9377 +Train: [99] [2600/6250] eta: 0:09:16 lr: 0.000000 grad: 0.1857 (0.1919) loss: 0.7860 (0.7951) time: 0.1479 data: 0.0593 max mem: 9377 +Train: [99] [2700/6250] eta: 0:09:00 lr: 0.000000 grad: 0.1784 (0.1917) loss: 0.7912 (0.7951) time: 0.1361 data: 0.0436 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:43 lr: 0.000000 grad: 0.1895 (0.1916) loss: 0.7897 (0.7949) time: 0.1313 data: 0.0272 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:26 lr: 0.000000 grad: 0.1988 (0.1915) loss: 0.7848 (0.7947) time: 0.1130 data: 0.0229 max mem: 9377 +Train: [99] [3000/6250] eta: 0:08:09 lr: 0.000000 grad: 0.1710 (0.1913) loss: 0.7977 (0.7947) time: 0.1356 data: 0.0435 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:53 lr: 0.000000 grad: 0.1846 (0.1912) loss: 0.7874 (0.7945) time: 0.1522 data: 0.0612 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:37 lr: 0.000000 grad: 0.1803 (0.1908) loss: 0.7942 (0.7945) time: 0.1605 data: 0.0765 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1939 (0.1907) loss: 0.7911 (0.7944) time: 0.1463 data: 0.0584 max mem: 9377 +Train: [99] [3400/6250] eta: 0:07:06 lr: 0.000000 grad: 0.1803 (0.1906) loss: 0.7925 (0.7944) time: 0.1437 data: 0.0531 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:50 lr: 0.000000 grad: 0.1843 (0.1904) loss: 0.7863 (0.7943) time: 0.1563 data: 0.0641 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:34 lr: 0.000000 grad: 0.1759 (0.1903) loss: 0.7915 (0.7942) time: 0.1335 data: 0.0469 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:19 lr: 0.000000 grad: 0.1849 (0.1902) loss: 0.7902 (0.7941) time: 0.1422 data: 0.0592 max mem: 9377 +Train: [99] [3800/6250] eta: 0:06:04 lr: 0.000000 grad: 0.1710 (0.1899) loss: 0.8001 (0.7940) time: 0.1383 data: 0.0501 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:51 lr: 0.000000 grad: 0.1924 (0.1899) loss: 0.7875 (0.7940) time: 0.1549 data: 0.0634 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:36 lr: 0.000000 grad: 0.1899 (0.1897) loss: 0.7904 (0.7939) time: 0.1464 data: 0.0557 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:21 lr: 0.000000 grad: 0.1808 (0.1896) loss: 0.7898 (0.7939) time: 0.1551 data: 0.0638 max mem: 9377 +Train: [99] [4200/6250] eta: 0:05:07 lr: 0.000000 grad: 0.1850 (0.1896) loss: 0.7942 (0.7939) time: 0.1597 data: 0.0729 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:52 lr: 0.000000 grad: 0.1952 (0.1895) loss: 0.7803 (0.7938) time: 0.1491 data: 0.0617 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:38 lr: 0.000000 grad: 0.1726 (0.1894) loss: 0.8006 (0.7938) time: 0.1769 data: 0.0951 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:22 lr: 0.000000 grad: 0.1790 (0.1892) loss: 0.7912 (0.7938) time: 0.1467 data: 0.0554 max mem: 9377 +Train: [99] [4600/6250] eta: 0:04:07 lr: 0.000000 grad: 0.1766 (0.1891) loss: 0.7943 (0.7939) time: 0.1576 data: 0.0684 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:52 lr: 0.000000 grad: 0.1814 (0.1889) loss: 0.7933 (0.7939) time: 0.1477 data: 0.0574 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:37 lr: 0.000000 grad: 0.1715 (0.1888) loss: 0.7970 (0.7939) time: 0.1643 data: 0.0710 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:22 lr: 0.000000 grad: 0.1695 (0.1887) loss: 0.8037 (0.7940) time: 0.1438 data: 0.0621 max mem: 9377 +Train: [99] [5000/6250] eta: 0:03:08 lr: 0.000000 grad: 0.1675 (0.1885) loss: 0.7948 (0.7940) time: 0.1520 data: 0.0632 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:53 lr: 0.000000 grad: 0.1802 (0.1884) loss: 0.7953 (0.7940) time: 0.1441 data: 0.0518 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:37 lr: 0.000000 grad: 0.1731 (0.1882) loss: 0.8041 (0.7940) time: 0.1339 data: 0.0409 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:22 lr: 0.000000 grad: 0.1731 (0.1881) loss: 0.7918 (0.7940) time: 0.1482 data: 0.0716 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1679 (0.1879) loss: 0.8036 (0.7940) time: 0.1185 data: 0.0312 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.1798 (0.1878) loss: 0.7952 (0.7941) time: 0.1379 data: 0.0557 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:36 lr: 0.000000 grad: 0.1754 (0.1876) loss: 0.8002 (0.7941) time: 0.1101 data: 0.0284 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:21 lr: 0.000000 grad: 0.1788 (0.1875) loss: 0.8017 (0.7942) time: 0.1426 data: 0.0609 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:06 lr: 0.000000 grad: 0.1768 (0.1873) loss: 0.7945 (0.7942) time: 0.1401 data: 0.0641 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.1847 (0.1873) loss: 0.7914 (0.7942) time: 0.1394 data: 0.0620 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1778 (0.1872) loss: 0.8034 (0.7942) time: 0.1496 data: 0.0663 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1669 (0.1872) loss: 0.7994 (0.7942) time: 0.1256 data: 0.0437 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1799 (0.1871) loss: 0.7919 (0.7943) time: 0.1285 data: 0.0511 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1934 (0.1871) loss: 0.7896 (0.7943) time: 0.1303 data: 0.0441 max mem: 9377 +Train: [99] Total time: 0:15:27 (0.1484 s / it) +Averaged stats: lr: 0.000000 grad: 0.1934 (0.1871) loss: 0.7896 (0.7943) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:05:41 loss: 0.8017 (0.8017) time: 5.5051 data: 5.4750 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.7794 (0.7819) time: 0.1387 data: 0.1136 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:13 (0.2167 s / it) +Averaged stats (hcp-train-subset): loss: 0.7794 (0.7819) +Making plots (hcp-train-subset): example=30 +Eval (hcp-val): [99] [ 0/62] eta: 0:06:12 loss: 0.8341 (0.8341) time: 6.0033 data: 5.9722 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8355 (0.8376) time: 0.1209 data: 0.0961 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:13 (0.2199 s / it) +Averaged stats (hcp-val): loss: 0.8355 (0.8376) +Making plots (hcp-val): example=3 +Eval (nsd-val): [99] [ 0/62] eta: 0:04:27 loss: 0.8124 (0.8124) time: 4.3154 data: 4.2401 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8223 (0.8219) time: 0.1267 data: 0.1019 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:13 (0.2126 s / it) +Averaged stats (nsd-val): loss: 0.8223 (0.8219) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-00099.pth +done! training time: 1 day, 5:36:50 diff --git a/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..287ca075765f2301ac750a70c4d3a9a18ec2ff8b --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9b3e82e8ac57941285fb79924b3b04265d07884f --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.046415888336127774,train,0.8484251968503937,0.015995532945611686,0.8485089157747366,0.01607942211160092,0.848708087444587,0.01597936645895217 +flat_mae,patch,logistic,aabc_age,,0.046415888336127774,test,0.3269230769230769,0.059978435473872535,0.3167299177735611,0.05931230733384628,0.3161630036630037,0.05904017184400353 +flat_mae,patch,logistic,aabc_age,1,0.005994842503189409,train,0.6909448818897638,0.020581250816599318,0.6894709330100421,0.020968361414545132,0.6914292879213025,0.020691179127025006 +flat_mae,patch,logistic,aabc_age,1,0.005994842503189409,test,0.5384615384615384,0.05519544349172783,0.48772710418984055,0.05513868145573847,0.530448717948718,0.05410809153290984 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5492125984251969,0.020489258688870204,0.5410250087895812,0.021176936361674784,0.5499724054400841,0.020500705903245313 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.4807692307692308,0.06353841497483859,0.4606775559588626,0.062053678804835456,0.47573260073260076,0.06271876462253284 +flat_mae,patch,logistic,aabc_age,3,0.005994842503189409,train,0.6850393700787402,0.02017275004504067,0.6831771246930798,0.020443754162839382,0.6848957783919492,0.02019846479987212 +flat_mae,patch,logistic,aabc_age,3,0.005994842503189409,test,0.5384615384615384,0.06300389769101872,0.5169103313840157,0.06628251523583914,0.5336538461538461,0.06289580840658307 +flat_mae,patch,logistic,aabc_age,4,9.999999999999999e-05,train,0.4822834645669291,0.0210428010516412,0.45728569377485156,0.02176333926381084,0.4791864855223259,0.02101130568085721 +flat_mae,patch,logistic,aabc_age,4,9.999999999999999e-05,test,0.5769230769230769,0.06034235414816041,0.5429694160272804,0.06663970422037728,0.565018315018315,0.06043744449065555 +flat_mae,patch,logistic,aabc_age,5,0.005994842503189409,train,0.6811023622047244,0.019980870012848106,0.6811512811200513,0.020072155443880194,0.6809135069976544,0.019857144112961033 +flat_mae,patch,logistic,aabc_age,5,0.005994842503189409,test,0.46153846153846156,0.06231731881950692,0.43421356421356416,0.06333922247789144,0.45650183150183155,0.0616474213267528 +flat_mae,patch,logistic,aabc_age,6,0.046415888336127774,train,0.8523622047244095,0.015288281115386363,0.8516332235484114,0.0154567439186399,0.8514245122164066,0.015396060701500755 +flat_mae,patch,logistic,aabc_age,6,0.046415888336127774,test,0.5192307692307693,0.0683808513284411,0.5195726495726496,0.06896968536618439,0.5206043956043955,0.0687060014621711 +flat_mae,patch,logistic,aabc_age,7,0.005994842503189409,train,0.687007874015748,0.019006599827472395,0.68440587361963,0.01924982629246103,0.6882673013815022,0.01898150932378291 +flat_mae,patch,logistic,aabc_age,7,0.005994842503189409,test,0.5384615384615384,0.060782812503683864,0.5070993914807302,0.06302930688534455,0.532051282051282,0.06035866002205527 +flat_mae,patch,logistic,aabc_age,8,0.046415888336127774,train,0.8366141732283464,0.016030507429720908,0.836195844866827,0.016202585894743944,0.8359481836018788,0.016105266602912664 +flat_mae,patch,logistic,aabc_age,8,0.046415888336127774,test,0.5576923076923077,0.06474550771467291,0.5577777777777777,0.06492434504623282,0.5590659340659341,0.06473729888165897 +flat_mae,patch,logistic,aabc_age,9,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,9,21.54434690031882,test,0.4807692307692308,0.06509987774827412,0.45744104093250204,0.06584195999695652,0.4743589743589744,0.06455884673657948 +flat_mae,patch,logistic,aabc_age,10,0.3593813663804626,train,0.9960629921259843,0.0029014733045635006,0.9960165932547393,0.0029509043399357244,0.9958677685950413,0.0030453480138807184 +flat_mae,patch,logistic,aabc_age,10,0.3593813663804626,test,0.5384615384615384,0.06709450854520864,0.5353296703296704,0.06801653013603917,0.5368589743589743,0.06727048866367215 +flat_mae,patch,logistic,aabc_age,11,9.999999999999999e-05,train,0.4862204724409449,0.021506303623407507,0.4553597350354601,0.022125886315060402,0.4832863514573572,0.021390885089217423 +flat_mae,patch,logistic,aabc_age,11,9.999999999999999e-05,test,0.5576923076923077,0.060191644134951476,0.5042962749615975,0.06287764559826164,0.5469322344322345,0.05950815358352338 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6811023622047244,0.020106531253431002,0.6776707406367626,0.020391152562291726,0.6810987094089058,0.020044744021015824 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.4230769230769231,0.0656901909224806,0.4297369297369297,0.06495014599248462,0.42124542124542125,0.06564201717376401 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,train,0.6791338582677166,0.02002420415241274,0.6769865069326397,0.020317784653526322,0.6794001215982692,0.020064150739029854 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,test,0.4807692307692308,0.06327449124729136,0.4617257742257742,0.0656823241352244,0.4787087912087912,0.06317904756654873 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,train,0.844488188976378,0.015618301775887841,0.8441057443169617,0.01573707637839785,0.8436775373089958,0.015714765970822827 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,test,0.5384615384615384,0.06021827898786509,0.5318167823555755,0.06351924109970726,0.5398351648351648,0.06038447202234779 +flat_mae,patch,logistic,aabc_age,15,9.999999999999999e-05,train,0.4940944881889764,0.021539290201444537,0.46950784170630944,0.022019713725794497,0.49111567850491666,0.021389114978481658 +flat_mae,patch,logistic,aabc_age,15,9.999999999999999e-05,test,0.5384615384615384,0.05897812000270316,0.49687986305633364,0.057480713930717436,0.5290750915750916,0.057833422241754404 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,train,0.6791338582677166,0.019982649489308488,0.6743597147477769,0.02063924449342121,0.6792001749173842,0.0199626364830176 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,test,0.36538461538461536,0.06068587272116135,0.36352627257799675,0.06036554721840611,0.3660714285714286,0.060828399194328024 +flat_mae,patch,logistic,aabc_age,17,0.005994842503189409,train,0.6732283464566929,0.020327262343837374,0.6711933232639596,0.020614849064071572,0.6745395472478322,0.02028791985450001 +flat_mae,patch,logistic,aabc_age,17,0.005994842503189409,test,0.5192307692307693,0.0616524305226413,0.4979166666666666,0.061143790784986406,0.5128205128205129,0.06096300513790906 +flat_mae,patch,logistic,aabc_age,18,0.000774263682681127,train,0.5590551181102362,0.021803308493632847,0.5522880125099148,0.022442554770798463,0.5587976299845221,0.021817791239157315 +flat_mae,patch,logistic,aabc_age,18,0.000774263682681127,test,0.5,0.06099442209437859,0.47514041514041516,0.05884670213661691,0.4906135531135531,0.06020963082756595 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.8562992125984252,0.015654633966820476,0.8564345804200748,0.015721467783461607,0.8575472200219031,0.015529955399877329 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.40384615384615385,0.06816499084083678,0.4084669356408487,0.0685552353576906,0.4049908424908425,0.06849839391423165 +flat_mae,patch,logistic,aabc_age,20,0.000774263682681127,train,0.5452755905511811,0.021450415899014277,0.5345501650530318,0.0222677807136452,0.5446523612887881,0.021484454236092035 +flat_mae,patch,logistic,aabc_age,20,0.000774263682681127,test,0.5384615384615384,0.056741537439568175,0.5166915030388032,0.0625569348086195,0.5336538461538463,0.05632598273793801 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,train,0.9940944881889764,0.0033767595124630317,0.9940853895896431,0.0033854944041010184,0.9940692074439622,0.0033984648797639665 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,test,0.3076923076923077,0.06103258417731423,0.3125925925925926,0.05961882012278606,0.3067765567765568,0.06075299381702839 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,train,0.8503937007874016,0.015428801836041986,0.8499562679573284,0.015519263384332347,0.8504786013897493,0.015446067749181471 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,test,0.34615384615384615,0.06475849536465074,0.3566406711568002,0.06417541144447803,0.34706959706959706,0.06507525936385292 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,train,0.8523622047244095,0.01562544016368054,0.8532803624839458,0.015596648167245548,0.8528122716436288,0.01557651718788025 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,test,0.4423076923076923,0.059212734517112814,0.42248847926267286,0.05475109674361226,0.4356684981684981,0.05823920190153227 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,train,0.6968503937007874,0.020147487680664294,0.6960686341417721,0.020325505666749222,0.6976776216989617,0.02009033598509028 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,test,0.4423076923076923,0.06683544217973685,0.4401242236024845,0.06701285499402448,0.4375,0.0666050214376075 +flat_mae,patch,logistic,aabc_age,25,0.000774263682681127,train,0.5688976377952756,0.02080945948522469,0.5597815339655863,0.02152296086192698,0.5688282884755911,0.020772691471699585 +flat_mae,patch,logistic,aabc_age,25,0.000774263682681127,test,0.28846153846153844,0.05904671703743916,0.2734646962233169,0.05672688626938461,0.2831959706959707,0.05827554943177018 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,train,0.5649606299212598,0.02177988339561627,0.5566479633215817,0.02206984186968756,0.5641433267675382,0.02172069286316667 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,test,0.5192307692307693,0.06393788590841794,0.5045977011494253,0.066554409306599,0.5144230769230769,0.06394094227934322 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,train,0.6929133858267716,0.020473387319131543,0.6921901222203017,0.020679347771869427,0.6938629315156246,0.02046829065996268 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,test,0.4230769230769231,0.06201997912488272,0.403968253968254,0.0644098896823417,0.4168956043956044,0.06175971655585291 +flat_mae,patch,logistic,aabc_age,28,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,28,2.782559402207126,test,0.4423076923076923,0.06580030754770665,0.42652381574283105,0.06562130433115755,0.4358974358974359,0.06540111199277875 +flat_mae,patch,logistic,aabc_age,29,0.000774263682681127,train,0.547244094488189,0.020726082833980454,0.5342814512625833,0.021052259406622514,0.5456482587856667,0.020590637710830823 +flat_mae,patch,logistic,aabc_age,29,0.000774263682681127,test,0.38461538461538464,0.06312487623750244,0.38681626928471247,0.060015679757790055,0.38369963369963367,0.06298079712438281 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,train,0.6712598425196851,0.02134349937545226,0.6680528114603126,0.021820796489913582,0.6711856454585732,0.021343584487865174 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,test,0.5576923076923077,0.05937589613151213,0.5285714285714286,0.06391745935104283,0.5556318681318682,0.05910997530886905 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,train,0.6712598425196851,0.02092868208988488,0.6684713211730369,0.021236736400896607,0.6717707412315947,0.02094115487006628 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,test,0.6153846153846154,0.06821341163310349,0.614609250398724,0.06966901128454268,0.6110347985347986,0.06830997790692363 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,train,0.6771653543307087,0.019533742988332527,0.6765521454990305,0.01966940258555827,0.6785041974418332,0.019456907207935175 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,test,0.46153846153846156,0.06228692673911237,0.4462962962962963,0.06184382623303706,0.4565018315018315,0.06182489227622527 +flat_mae,patch,logistic,aabc_age,33,9.999999999999999e-05,train,0.4862204724409449,0.02013867078861269,0.45117091216408867,0.020889788507338285,0.48209853871102,0.020012150236888862 +flat_mae,patch,logistic,aabc_age,33,9.999999999999999e-05,test,0.5,0.058587920074860356,0.47426478772902336,0.06422098272202606,0.49221611721611724,0.05831191145775572 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.6968503937007874,0.019256392500084332,0.6954625554434085,0.019506717721369644,0.6975776483585192,0.019243247205871908 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.3076923076923077,0.0621674585246034,0.3067367415193502,0.064577378178551,0.3042582417582418,0.06188363824494052 +flat_mae,patch,logistic,aabc_age,35,0.046415888336127774,train,0.8366141732283464,0.015004545470382298,0.8357718544180283,0.015286299850720667,0.8356306423802574,0.015160122596603277 +flat_mae,patch,logistic,aabc_age,35,0.046415888336127774,test,0.4230769230769231,0.06712016734116003,0.42630681818181815,0.06656670215100685,0.4210164835164835,0.06704574641209346 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,train,0.84251968503937,0.017211811365829614,0.8421314778954562,0.017360612539161265,0.842214138579832,0.017333500256935596 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,test,0.4423076923076923,0.062379766007100344,0.43418226934355963,0.06287846010655004,0.44619963369963367,0.06314270056641545 +flat_mae,patch,logistic,aabc_age,37,0.046415888336127774,train,0.8543307086614174,0.016518343249591578,0.8540500601753988,0.016649675558825325,0.8547284273354444,0.01649983792887907 +flat_mae,patch,logistic,aabc_age,37,0.046415888336127774,test,0.4230769230769231,0.06628323661093581,0.4225,0.06560986750290151,0.4196428571428571,0.06608899002915668 +flat_mae,patch,logistic,aabc_age,38,0.000774263682681127,train,0.5590551181102362,0.020166125652818074,0.5514219546064945,0.02078773607753825,0.5589652111954797,0.020228210999688453 +flat_mae,patch,logistic,aabc_age,38,0.000774263682681127,test,0.4807692307692308,0.06423803505656905,0.46704031262854795,0.0650581837070459,0.4757326007326007,0.06364079905334616 +flat_mae,patch,logistic,aabc_age,39,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,39,21.54434690031882,test,0.5192307692307693,0.06423887558242808,0.5179487179487179,0.06456009358655798,0.5146520146520146,0.0646743178666026 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,train,0.4744094488188976,0.018961009230516766,0.4315714410868505,0.018765090338878496,0.4691491141930497,0.01875985972104341 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,test,0.5,0.05703471381432238,0.45512820512820507,0.058655275506595356,0.4933608058608059,0.05610604310340992 +flat_mae,patch,logistic,aabc_age,41,0.005994842503189409,train,0.6909448818897638,0.019887638913062625,0.6904646946564885,0.020087062983869426,0.6920643703645455,0.019899138311476522 +flat_mae,patch,logistic,aabc_age,41,0.005994842503189409,test,0.36538461538461536,0.05879238367933576,0.34413079907469807,0.05698827899476846,0.3601190476190476,0.05793493615434646 +flat_mae,patch,logistic,aabc_age,42,9.999999999999999e-05,train,0.4921259842519685,0.020345397756505516,0.45448462881021034,0.02008292679382595,0.4885320748999308,0.020170173460363157 +flat_mae,patch,logistic,aabc_age,42,9.999999999999999e-05,test,0.4230769230769231,0.060696109854580384,0.3842165898617511,0.0627620883606583,0.4178113553113553,0.05981872892727983 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,train,0.8248031496062992,0.0166066758282133,0.8243599745320307,0.016754603654138778,0.825156816695404,0.0166397493502236 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,test,0.5769230769230769,0.06932552276405043,0.5824871169698755,0.06837608584373515,0.578525641025641,0.06951892011256879 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,train,0.6751968503937008,0.019603217352574066,0.6729310870674459,0.019756726252388838,0.675802999296111,0.01957630903837703 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,test,0.4807692307692308,0.06426851723583102,0.4604166666666667,0.06836738325562237,0.47596153846153844,0.06421081443809325 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.65748031496063,0.020722217951286825,0.6549037339210911,0.02100682554699613,0.6579430137574824,0.020664062909156837 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.5384615384615384,0.06488241526523875,0.532577250335871,0.06702968243397744,0.5396062271062271,0.06500635959061056 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,train,0.5570866141732284,0.02142704554381124,0.5507295922325929,0.021902764200799944,0.5568314876224854,0.021413281271926148 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,test,0.46153846153846156,0.05956661777088508,0.4228449444293847,0.057285103708569374,0.4562728937728938,0.05877123394021956 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6948818897637795,0.020511510267264735,0.6929511768024463,0.020810680013597774,0.696214222969798,0.020441081517142218 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.4807692307692308,0.06950027670116474,0.475,0.07155749786684638,0.4789377289377289,0.06917068982031893 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.844488188976378,0.015798087769067333,0.8438827413140308,0.015989785750274405,0.8445478088337115,0.015841853534049325 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.36538461538461536,0.06403475387742201,0.3621359483428449,0.06150468317267114,0.3644688644688645,0.06406059247995173 +flat_mae,patch,logistic,aabc_age,49,0.005994842503189409,train,0.6850393700787402,0.020064452226873423,0.6816397773039564,0.0206408396048104,0.6846958317110642,0.020058463140633018 +flat_mae,patch,logistic,aabc_age,49,0.005994842503189409,test,0.36538461538461536,0.06407322918524437,0.36473443223443225,0.06223835454812998,0.3630952380952381,0.063809840620376 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,train,0.6948818897637795,0.020160655430914304,0.6936891478982137,0.020329417779787154,0.6953439514450823,0.020078606979998345 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,test,0.40384615384615385,0.07117737656665384,0.4065612648221344,0.07167949397646858,0.4036172161172161,0.0711365339555842 +flat_mae,patch,logistic,aabc_age,51,9.999999999999999e-05,train,0.4822834645669291,0.020535156814237645,0.448587479497075,0.02074230930605468,0.4798068236959353,0.02024887564326592 +flat_mae,patch,logistic,aabc_age,51,9.999999999999999e-05,test,0.5,0.06246242657572479,0.45423904052936304,0.06193097472090697,0.49061355311355315,0.061419757860950434 +flat_mae,patch,logistic,aabc_age,52,0.000774263682681127,train,0.5728346456692913,0.020245904037440768,0.5607472553677508,0.02125738321164806,0.570970043480012,0.020230945737418873 +flat_mae,patch,logistic,aabc_age,52,0.000774263682681127,test,0.34615384615384615,0.06256244218057375,0.34623515310014163,0.06239480575235427,0.34546703296703296,0.062385431244669165 +flat_mae,patch,logistic,aabc_age,53,0.005994842503189409,train,0.6948818897637795,0.020493693919910603,0.6938002644522292,0.02056399262499162,0.6955615193262612,0.020463961531229848 +flat_mae,patch,logistic,aabc_age,53,0.005994842503189409,test,0.4807692307692308,0.061944943583015374,0.47139208173690933,0.06327508635463391,0.4787087912087912,0.061998967003897 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,train,0.84251968503937,0.015085715682646144,0.8426627679647185,0.015149465500562816,0.8434519379963904,0.015033950113066798 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,test,0.40384615384615385,0.0655346130144871,0.39387959866220734,0.06686603228238441,0.40476190476190477,0.0659363584130358 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,train,0.6633858267716536,0.021008013832185337,0.6585571723815218,0.021509299726014373,0.6631387505298347,0.020917729123253168 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,test,0.5384615384615384,0.06765278566536653,0.5324175824175824,0.06841526373241891,0.5352564102564102,0.06774613604925014 +flat_mae,patch,logistic,aabc_age,56,0.3593813663804626,train,0.9980314960629921,0.001893549357967288,0.9980665982539895,0.0018619304493154491,0.9979338842975207,0.0019874443674532728 +flat_mae,patch,logistic,aabc_age,56,0.3593813663804626,test,0.46153846153846156,0.0626183612976069,0.436026936026936,0.06309698305086024,0.4652014652014652,0.06330651531832289 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,train,0.562992125984252,0.021649051431017835,0.5546996171996172,0.022092204405889184,0.5621771844055015,0.021614109030607013 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,test,0.4423076923076923,0.0605604373171738,0.3895173453996983,0.05155163535336095,0.4340659340659341,0.05902527887840181 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,train,0.6830708661417323,0.01975617697362161,0.6804726800074486,0.0200680502291589,0.6833647717922702,0.019718457001743572 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,test,0.46153846153846156,0.057039006169919124,0.4301693404634581,0.057744932163959525,0.4562728937728938,0.05600877126103236 +flat_mae,patch,logistic,aabc_age,59,9.999999999999999e-05,train,0.47244094488188976,0.020685782322725447,0.4439848915951836,0.020667640359383586,0.46873831246919284,0.02052884460241061 +flat_mae,patch,logistic,aabc_age,59,9.999999999999999e-05,test,0.5384615384615384,0.055125609658639475,0.46616071428571426,0.047481294518821104,0.5274725274725275,0.0533478727967722 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.687007874015748,0.019733915307278138,0.6832259864598915,0.02017953399018254,0.6867119607433221,0.019704101589955923 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.4230769230769231,0.057767079751001614,0.3815500338066261,0.05977867943483717,0.4178113553113553,0.05683784835697136 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,train,0.8366141732283464,0.01729219014583406,0.8358026427611909,0.017538144293998074,0.8361157648128366,0.017358471222781568 +flat_mae,patch,logistic,aabc_age,61,0.046415888336127774,test,0.38461538461538464,0.06641161764766676,0.3873792270531401,0.06527619325006166,0.38255494505494503,0.06615880481974296 +flat_mae,patch,logistic,aabc_age,62,0.3593813663804626,train,0.9921259842519685,0.003924489941794036,0.9921411452260656,0.003937544744750882,0.9920030917414828,0.003994703929213977 +flat_mae,patch,logistic,aabc_age,62,0.3593813663804626,test,0.46153846153846156,0.05828834416149388,0.44278033794162824,0.05742984985507615,0.46520146520146516,0.05910922706174871 +flat_mae,patch,logistic,aabc_age,63,9.999999999999999e-05,train,0.4940944881889764,0.019932622572528813,0.4682834566165641,0.02067467683088922,0.4908481239535165,0.019802945742115205 +flat_mae,patch,logistic,aabc_age,63,9.999999999999999e-05,test,0.46153846153846156,0.06253519718985433,0.4253787878787879,0.061703455288722536,0.45215201465201466,0.06168244113080652 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,train,0.8484251968503937,0.014810357497149719,0.8484102691782871,0.014860455687843138,0.8489975814602916,0.014799089021473581 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,test,0.2692307692307692,0.05800269071497902,0.28474632407822065,0.058702618670260075,0.26991758241758246,0.05829165226960374 +flat_mae,patch,logistic,aabc_age,65,9.999999999999999e-05,train,0.484251968503937,0.02059148610515035,0.4524323507635756,0.020709279604936726,0.48056753211134107,0.02048326032725668 +flat_mae,patch,logistic,aabc_age,65,9.999999999999999e-05,test,0.5961538461538461,0.060017255901258594,0.5605921855921856,0.067110333708732,0.5856227106227107,0.05978085046912368 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6732283464566929,0.020853240328909662,0.6726719929589222,0.02096771226971461,0.6744719393773171,0.02077094562435508 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.5,0.061129155634926394,0.49498266739646046,0.06272636224596949,0.4981684981684982,0.06127492238557759 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.8385826771653543,0.015969717219279013,0.8382895919746786,0.016121524884267664,0.8382818538557584,0.0160666419770118 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.5,0.0660317634701834,0.49031385281385287,0.06743980697803519,0.49954212454212454,0.06594788923795235 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.8562992125984252,0.01519744756282531,0.8557547725848146,0.015326594567107346,0.8557743115025441,0.01525347912031134 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.06571860399645296,0.4450892857142857,0.06635174663769425,0.4551282051282052,0.0652580578724308 +flat_mae,patch,logistic,aabc_age,69,0.046415888336127774,train,0.8385826771653543,0.015672859958446608,0.8385471285569212,0.015862404655887057,0.8387845974886313,0.01575238544330602 +flat_mae,patch,logistic,aabc_age,69,0.046415888336127774,test,0.34615384615384615,0.06066050400023174,0.3581349206349206,0.05817978044254251,0.34386446886446886,0.06030859381278872 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,test,0.28846153846153844,0.06077014361534441,0.2915527950310559,0.06018056718814621,0.2875457875457875,0.06092041177985334 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,train,0.6909448818897638,0.02100517452759471,0.6876519644151357,0.021524331837359478,0.6915968691322602,0.020935994769250555 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,test,0.4423076923076923,0.06719673271502093,0.4446682946682946,0.06744168058937347,0.4432234432234432,0.06744835960430218 +flat_mae,patch,logistic,aabc_age,72,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,72,21.54434690031882,test,0.40384615384615385,0.06727690504670025,0.3977777777777778,0.0646479965651477,0.39743589743589747,0.06646912577352955 +flat_mae,patch,logistic,aabc_age,73,0.000774263682681127,train,0.5511811023622047,0.020922660048746357,0.541067486609957,0.021507745758496646,0.550098031412247,0.02079851589010486 +flat_mae,patch,logistic,aabc_age,73,0.000774263682681127,test,0.4423076923076923,0.057966974376304964,0.4294469897918174,0.05304606874511814,0.43704212454212454,0.05728384876930522 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6909448818897638,0.020328794578621176,0.6878755827702788,0.02063348482798123,0.6913293145808599,0.020212188187798578 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.5192307692307693,0.06799515950401419,0.5213920817369093,0.06916211701146155,0.5203754578754579,0.06828785249034532 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.6614173228346457,0.01948495916924816,0.6585608892855825,0.019787965949334258,0.6626779621357566,0.019504302542788822 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.5384615384615384,0.06775124182796038,0.5395859762426478,0.0685711214427652,0.5398351648351648,0.06789861483745838 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.8484251968503937,0.015127987976331848,0.8484836662714175,0.015216683416352815,0.8491151760010281,0.01512137348150778 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.4230769230769231,0.062390448366214504,0.40431879948008986,0.062244117145241966,0.4194139194139195,0.06183557496474066 +flat_mae,patch,logistic,aabc_age,77,0.005994842503189409,train,0.6929133858267716,0.019923178068923856,0.6902414149948674,0.020302597226574022,0.6918224684448656,0.020030249410085493 +flat_mae,patch,logistic,aabc_age,77,0.005994842503189409,test,0.4230769230769231,0.0664289784024836,0.41780503978779837,0.06606119480017603,0.42101648351648346,0.06629978341728682 +flat_mae,patch,logistic,aabc_age,78,0.3593813663804626,train,0.9940944881889764,0.0035256867218931513,0.9940853895896431,0.003543268646969582,0.9940692074439622,0.003552204217137125 +flat_mae,patch,logistic,aabc_age,78,0.3593813663804626,test,0.46153846153846156,0.06818223058867565,0.45828676570805504,0.06785089987788108,0.45810439560439564,0.06811349836300244 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,train,0.6692913385826772,0.019114785867230574,0.6670995973301341,0.019526294463064094,0.6710600194864104,0.01911043721685484 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,test,0.36538461538461536,0.06608013558949942,0.35644904820317114,0.06457436115404287,0.36309523809523814,0.06545582061502234 +flat_mae,patch,logistic,aabc_age,80,0.3593813663804626,train,0.9960629921259843,0.002692968065936539,0.9960165932547393,0.0027374378932833354,0.9958677685950413,0.0028265036725119244 +flat_mae,patch,logistic,aabc_age,80,0.3593813663804626,test,0.4423076923076923,0.06101779738431351,0.41100783747842573,0.06253930896851867,0.44139194139194143,0.06083417129375684 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,train,0.8562992125984252,0.01546223892784177,0.8570332922173409,0.015431292462546686,0.8569944897188088,0.015510697400134686 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,test,0.5,0.06640394361640467,0.4964387464387464,0.06604964103444848,0.4981684981684982,0.06649117815739108 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,train,0.8562992125984252,0.015226514233187832,0.855506892622507,0.015385136141323216,0.8557743115025441,0.015289810492958312 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,test,0.5,0.06551656369455891,0.4988010074216971,0.06703036269274021,0.5027472527472527,0.06570994008621109 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,train,0.8484251968503937,0.015012514154903001,0.847273548690703,0.015262377459161355,0.8472922808114478,0.01517091622239071 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,test,0.4230769230769231,0.06652850081069273,0.4336080586080586,0.06556249861230605,0.4226190476190476,0.06671711641431947 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,train,0.687007874015748,0.0196444191134888,0.6855714576321537,0.01993050087214469,0.687782178948923,0.01964924730309247 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,test,0.46153846153846156,0.0583741629439625,0.41962474645030423,0.05913807976499922,0.4548992673992674,0.05730455458778222 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,train,0.4940944881889764,0.019538827076657567,0.4588348656847109,0.020288046937431376,0.4899954736290946,0.01935009911907777 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,test,0.4807692307692308,0.05118817529411288,0.3934587813620072,0.04939893841569461,0.47115384615384615,0.04929028325753728 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,train,0.84251968503937,0.015782287545257845,0.8423752698692449,0.01588950319216953,0.842631653141896,0.01582109716766648 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,test,0.4423076923076923,0.06657997815502992,0.4329789833822092,0.066983926277027,0.4391025641025641,0.066537229515335 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.6771653543307087,0.02023939893540661,0.6729809628266399,0.020810908186044426,0.6769164913337259,0.020287408199401304 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.5,0.0665410292968305,0.4883554827000761,0.06928196378557482,0.49954212454212454,0.06665679986980262 +flat_mae,patch,logistic,aabc_age,88,0.046415888336127774,train,0.8366141732283464,0.01600285756303857,0.8362185139962918,0.016129247974201184,0.8373035775591737,0.01591158961686018 +flat_mae,patch,logistic,aabc_age,88,0.046415888336127774,test,0.5576923076923077,0.06683238771237549,0.555632974111235,0.06664332642483219,0.5560897435897436,0.06674406372505183 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,train,0.5078740157480315,0.020603067513668492,0.48242133553136773,0.02126787971232275,0.5052785684009444,0.020539135103502637 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,test,0.34615384615384615,0.05947233457568028,0.30000000000000004,0.05075775617710891,0.34065934065934067,0.058327226514864515 +flat_mae,patch,logistic,aabc_age,90,0.005994842503189409,train,0.6751968503937008,0.020771666601990965,0.6733553576092492,0.02119356055879041,0.6754678368741956,0.020776102259514272 +flat_mae,patch,logistic,aabc_age,90,0.005994842503189409,test,0.4807692307692308,0.05973277523997878,0.4620051085568327,0.059991511442298515,0.47710622710622713,0.0592900776538579 +flat_mae,patch,logistic,aabc_age,91,0.000774263682681127,train,0.5570866141732284,0.022111288798217627,0.5479599258197044,0.022783794331393444,0.5573342312553582,0.022071546598525515 +flat_mae,patch,logistic,aabc_age,91,0.000774263682681127,test,0.5961538461538461,0.0652532617244569,0.5887947838946024,0.06823721411111658,0.594551282051282,0.06518000855947043 +flat_mae,patch,logistic,aabc_age,92,0.005994842503189409,train,0.6771653543307087,0.019699293658005636,0.6732816643450446,0.020274828322663942,0.6772840192255687,0.019719361523197955 +flat_mae,patch,logistic,aabc_age,92,0.005994842503189409,test,0.5192307692307693,0.057469364105155074,0.4873967819107887,0.061732641861154756,0.5185439560439561,0.057810530061651724 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,train,0.6771653543307087,0.020253310899534537,0.6761844030682828,0.020404894143120793,0.6787217653230122,0.02029446262607285 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,test,0.38461538461538464,0.06027887606291514,0.37901470510166163,0.059856996107925574,0.3853021978021978,0.06036677865394071 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,train,0.5590551181102362,0.020490263290201496,0.5501963321188819,0.020889177935282305,0.5577773984491425,0.02045240047380237 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,test,0.4423076923076923,0.059565425716840155,0.41323022312373225,0.06478174282963017,0.440018315018315,0.05930829010066717 +flat_mae,patch,logistic,aabc_age,95,0.046415888336127774,train,0.8523622047244095,0.015463171673992266,0.8522031472337455,0.015601407626149784,0.8522771625408285,0.01545494430725223 +flat_mae,patch,logistic,aabc_age,95,0.046415888336127774,test,0.46153846153846156,0.06304342450074445,0.4391852770885029,0.059146331578146125,0.45352564102564097,0.06205425386449689 +flat_mae,patch,logistic,aabc_age,96,0.005994842503189409,train,0.6732283464566929,0.020423630486506663,0.6699423977456571,0.020843938496194505,0.6741220326857682,0.020369736278136683 +flat_mae,patch,logistic,aabc_age,96,0.005994842503189409,test,0.5192307692307693,0.06953219636514402,0.5141681235431235,0.07003214628585469,0.5132783882783882,0.06962741530451888 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,train,0.6751968503937008,0.02091566549096934,0.6732071602846829,0.0212364295851726,0.6760205671772899,0.0209390853511164 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,test,0.46153846153846156,0.06428832060087178,0.4456630824372759,0.0643636060591542,0.45650183150183155,0.0637047911244806 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,train,0.6791338582677166,0.021253316291979157,0.6760046169630644,0.021564922138115356,0.678429876733111,0.02128213909823141 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,test,0.46153846153846156,0.053236590944430465,0.4052579365079365,0.06063440087546672,0.4562728937728938,0.05234100042095505 +flat_mae,patch,logistic,aabc_age,99,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,99,166.81005372000556,test,0.36538461538461536,0.060143123183554324,0.33986117540308186,0.05748897665543638,0.36881868131868134,0.06097992685402485 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,train,0.8503937007874016,0.015196166501685654,0.8501771391843976,0.0153558162746725,0.8503786280493068,0.015283676895349096 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,test,0.5192307692307693,0.06929723305353352,0.5211640211640212,0.06934106588843617,0.5176282051282052,0.06923930923813122 diff --git a/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..92cd28e99ef5bbd73615e1014609606e4372eae4 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:48 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:19:23 time: 5.1043 data: 4.2980 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:42 time: 0.2601 data: 0.0816 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:04 time: 0.1902 data: 0.0509 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:51 time: 0.2212 data: 0.0644 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:41 time: 0.1975 data: 0.0576 max mem: 3393 +extract (train) [100/228] eta: 0:00:33 time: 0.2170 data: 0.0704 max mem: 3393 +extract (train) [120/228] eta: 0:00:27 time: 0.1888 data: 0.0571 max mem: 3393 +extract (train) [140/228] eta: 0:00:21 time: 0.1903 data: 0.0571 max mem: 3393 +extract (train) [160/228] eta: 0:00:16 time: 0.2068 data: 0.0657 max mem: 3393 +extract (train) [180/228] eta: 0:00:11 time: 0.1934 data: 0.0578 max mem: 3393 +extract (train) [200/228] eta: 0:00:06 time: 0.1891 data: 0.0564 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1683 data: 0.0487 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1648 data: 0.0480 max mem: 3393 +extract (train) Total time: 0:00:51 (0.2239 s / it) +extract (validation) [ 0/27] eta: 0:01:47 time: 3.9893 data: 3.8581 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1740 data: 0.0464 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1532 data: 0.0388 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.3191 s / it) +extract (test) [ 0/26] eta: 0:01:46 time: 4.0951 data: 3.9539 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1802 data: 0.0478 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1645 data: 0.0401 max mem: 3393 +extract (test) Total time: 0:00:08 (0.3365 s / it) +feature extraction time: 0:01:08 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.046416 | train | 0.84843 | 0.015996 | 0.84851 | 0.016079 | 0.84871 | 0.015979 | +| flat_mae | patch | logistic | aabc_age | | 0.046416 | test | 0.32692 | 0.059978 | 0.31673 | 0.059312 | 0.31616 | 0.05904 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05519544349172783, "f1": 0.48772710418984055, "f1_std": 0.05513868145573847, "bacc": 0.530448717948718, "bacc_std": 0.05410809153290984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06353841497483859, "f1": 0.4606775559588626, "f1_std": 0.062053678804835456, "bacc": 0.47573260073260076, "bacc_std": 0.06271876462253284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06300389769101872, "f1": 0.5169103313840157, "f1_std": 0.06628251523583914, "bacc": 0.5336538461538461, "bacc_std": 0.06289580840658307} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06034235414816041, "f1": 0.5429694160272804, "f1_std": 0.06663970422037728, "bacc": 0.565018315018315, "bacc_std": 0.06043744449065555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06231731881950692, "f1": 0.43421356421356416, "f1_std": 0.06333922247789144, "bacc": 0.45650183150183155, "bacc_std": 0.0616474213267528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0683808513284411, "f1": 0.5195726495726496, "f1_std": 0.06896968536618439, "bacc": 0.5206043956043955, "bacc_std": 0.0687060014621711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.060782812503683864, "f1": 0.5070993914807302, "f1_std": 0.06302930688534455, "bacc": 0.532051282051282, "bacc_std": 0.06035866002205527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06474550771467291, "f1": 0.5577777777777777, "f1_std": 0.06492434504623282, "bacc": 0.5590659340659341, "bacc_std": 0.06473729888165897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06509987774827412, "f1": 0.45744104093250204, "f1_std": 0.06584195999695652, "bacc": 0.4743589743589744, "bacc_std": 0.06455884673657948} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06709450854520864, "f1": 0.5353296703296704, "f1_std": 0.06801653013603917, "bacc": 0.5368589743589743, "bacc_std": 0.06727048866367215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.060191644134951476, "f1": 0.5042962749615975, "f1_std": 0.06287764559826164, "bacc": 0.5469322344322345, "bacc_std": 0.05950815358352338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0656901909224806, "f1": 0.4297369297369297, "f1_std": 0.06495014599248462, "bacc": 0.42124542124542125, "bacc_std": 0.06564201717376401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06327449124729136, "f1": 0.4617257742257742, "f1_std": 0.0656823241352244, "bacc": 0.4787087912087912, "bacc_std": 0.06317904756654873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06021827898786509, "f1": 0.5318167823555755, "f1_std": 0.06351924109970726, "bacc": 0.5398351648351648, "bacc_std": 0.06038447202234779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05897812000270316, "f1": 0.49687986305633364, "f1_std": 0.057480713930717436, "bacc": 0.5290750915750916, "bacc_std": 0.057833422241754404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06068587272116135, "f1": 0.36352627257799675, "f1_std": 0.06036554721840611, "bacc": 0.3660714285714286, "bacc_std": 0.060828399194328024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0616524305226413, "f1": 0.4979166666666666, "f1_std": 0.061143790784986406, "bacc": 0.5128205128205129, "bacc_std": 0.06096300513790906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06099442209437859, "f1": 0.47514041514041516, "f1_std": 0.05884670213661691, "bacc": 0.4906135531135531, "bacc_std": 0.06020963082756595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06816499084083678, "f1": 0.4084669356408487, "f1_std": 0.0685552353576906, "bacc": 0.4049908424908425, "bacc_std": 0.06849839391423165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.056741537439568175, "f1": 0.5166915030388032, "f1_std": 0.0625569348086195, "bacc": 0.5336538461538463, "bacc_std": 0.05632598273793801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06103258417731423, "f1": 0.3125925925925926, "f1_std": 0.05961882012278606, "bacc": 0.3067765567765568, "bacc_std": 0.06075299381702839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06475849536465074, "f1": 0.3566406711568002, "f1_std": 0.06417541144447803, "bacc": 0.34706959706959706, "bacc_std": 0.06507525936385292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.059212734517112814, "f1": 0.42248847926267286, "f1_std": 0.05475109674361226, "bacc": 0.4356684981684981, "bacc_std": 0.05823920190153227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06683544217973685, "f1": 0.4401242236024845, "f1_std": 0.06701285499402448, "bacc": 0.4375, "bacc_std": 0.0666050214376075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.000774263682681127, "split": "test", "acc": 0.28846153846153844, "acc_std": 0.05904671703743916, "f1": 0.2734646962233169, "f1_std": 0.05672688626938461, "bacc": 0.2831959706959707, "bacc_std": 0.05827554943177018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06393788590841794, "f1": 0.5045977011494253, "f1_std": 0.066554409306599, "bacc": 0.5144230769230769, "bacc_std": 0.06394094227934322} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06201997912488272, "f1": 0.403968253968254, "f1_std": 0.0644098896823417, "bacc": 0.4168956043956044, "bacc_std": 0.06175971655585291} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06580030754770665, "f1": 0.42652381574283105, "f1_std": 0.06562130433115755, "bacc": 0.4358974358974359, "bacc_std": 0.06540111199277875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06312487623750244, "f1": 0.38681626928471247, "f1_std": 0.060015679757790055, "bacc": 0.38369963369963367, "bacc_std": 0.06298079712438281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.05937589613151213, "f1": 0.5285714285714286, "f1_std": 0.06391745935104283, "bacc": 0.5556318681318682, "bacc_std": 0.05910997530886905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06821341163310349, "f1": 0.614609250398724, "f1_std": 0.06966901128454268, "bacc": 0.6110347985347986, "bacc_std": 0.06830997790692363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06228692673911237, "f1": 0.4462962962962963, "f1_std": 0.06184382623303706, "bacc": 0.4565018315018315, "bacc_std": 0.06182489227622527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.058587920074860356, "f1": 0.47426478772902336, "f1_std": 0.06422098272202606, "bacc": 0.49221611721611724, "bacc_std": 0.05831191145775572} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.0621674585246034, "f1": 0.3067367415193502, "f1_std": 0.064577378178551, "bacc": 0.3042582417582418, "bacc_std": 0.06188363824494052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06712016734116003, "f1": 0.42630681818181815, "f1_std": 0.06656670215100685, "bacc": 0.4210164835164835, "bacc_std": 0.06704574641209346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.062379766007100344, "f1": 0.43418226934355963, "f1_std": 0.06287846010655004, "bacc": 0.44619963369963367, "bacc_std": 0.06314270056641545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06628323661093581, "f1": 0.4225, "f1_std": 0.06560986750290151, "bacc": 0.4196428571428571, "bacc_std": 0.06608899002915668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06423803505656905, "f1": 0.46704031262854795, "f1_std": 0.0650581837070459, "bacc": 0.4757326007326007, "bacc_std": 0.06364079905334616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06423887558242808, "f1": 0.5179487179487179, "f1_std": 0.06456009358655798, "bacc": 0.5146520146520146, "bacc_std": 0.0646743178666026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.05703471381432238, "f1": 0.45512820512820507, "f1_std": 0.058655275506595356, "bacc": 0.4933608058608059, "bacc_std": 0.05610604310340992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.05879238367933576, "f1": 0.34413079907469807, "f1_std": 0.05698827899476846, "bacc": 0.3601190476190476, "bacc_std": 0.05793493615434646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.060696109854580384, "f1": 0.3842165898617511, "f1_std": 0.0627620883606583, "bacc": 0.4178113553113553, "bacc_std": 0.05981872892727983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06932552276405043, "f1": 0.5824871169698755, "f1_std": 0.06837608584373515, "bacc": 0.578525641025641, "bacc_std": 0.06951892011256879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06426851723583102, "f1": 0.4604166666666667, "f1_std": 0.06836738325562237, "bacc": 0.47596153846153844, "bacc_std": 0.06421081443809325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06488241526523875, "f1": 0.532577250335871, "f1_std": 0.06702968243397744, "bacc": 0.5396062271062271, "bacc_std": 0.06500635959061056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05956661777088508, "f1": 0.4228449444293847, "f1_std": 0.057285103708569374, "bacc": 0.4562728937728938, "bacc_std": 0.05877123394021956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06950027670116474, "f1": 0.475, "f1_std": 0.07155749786684638, "bacc": 0.4789377289377289, "bacc_std": 0.06917068982031893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06403475387742201, "f1": 0.3621359483428449, "f1_std": 0.06150468317267114, "bacc": 0.3644688644688645, "bacc_std": 0.06406059247995173} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06407322918524437, "f1": 0.36473443223443225, "f1_std": 0.06223835454812998, "bacc": 0.3630952380952381, "bacc_std": 0.063809840620376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.07117737656665384, "f1": 0.4065612648221344, "f1_std": 0.07167949397646858, "bacc": 0.4036172161172161, "bacc_std": 0.0711365339555842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.06246242657572479, "f1": 0.45423904052936304, "f1_std": 0.06193097472090697, "bacc": 0.49061355311355315, "bacc_std": 0.061419757860950434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.000774263682681127, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06256244218057375, "f1": 0.34623515310014163, "f1_std": 0.06239480575235427, "bacc": 0.34546703296703296, "bacc_std": 0.062385431244669165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.061944943583015374, "f1": 0.47139208173690933, "f1_std": 0.06327508635463391, "bacc": 0.4787087912087912, "bacc_std": 0.061998967003897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0655346130144871, "f1": 0.39387959866220734, "f1_std": 0.06686603228238441, "bacc": 0.40476190476190477, "bacc_std": 0.0659363584130358} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06765278566536653, "f1": 0.5324175824175824, "f1_std": 0.06841526373241891, "bacc": 0.5352564102564102, "bacc_std": 0.06774613604925014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0626183612976069, "f1": 0.436026936026936, "f1_std": 0.06309698305086024, "bacc": 0.4652014652014652, "bacc_std": 0.06330651531832289} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0605604373171738, "f1": 0.3895173453996983, "f1_std": 0.05155163535336095, "bacc": 0.4340659340659341, "bacc_std": 0.05902527887840181} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.057039006169919124, "f1": 0.4301693404634581, "f1_std": 0.057744932163959525, "bacc": 0.4562728937728938, "bacc_std": 0.05600877126103236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.055125609658639475, "f1": 0.46616071428571426, "f1_std": 0.047481294518821104, "bacc": 0.5274725274725275, "bacc_std": 0.0533478727967722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.057767079751001614, "f1": 0.3815500338066261, "f1_std": 0.05977867943483717, "bacc": 0.4178113553113553, "bacc_std": 0.05683784835697136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06641161764766676, "f1": 0.3873792270531401, "f1_std": 0.06527619325006166, "bacc": 0.38255494505494503, "bacc_std": 0.06615880481974296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05828834416149388, "f1": 0.44278033794162824, "f1_std": 0.05742984985507615, "bacc": 0.46520146520146516, "bacc_std": 0.05910922706174871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06253519718985433, "f1": 0.4253787878787879, "f1_std": 0.061703455288722536, "bacc": 0.45215201465201466, "bacc_std": 0.06168244113080652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.2692307692307692, "acc_std": 0.05800269071497902, "f1": 0.28474632407822065, "f1_std": 0.058702618670260075, "bacc": 0.26991758241758246, "bacc_std": 0.05829165226960374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.060017255901258594, "f1": 0.5605921855921856, "f1_std": 0.067110333708732, "bacc": 0.5856227106227107, "bacc_std": 0.05978085046912368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.061129155634926394, "f1": 0.49498266739646046, "f1_std": 0.06272636224596949, "bacc": 0.4981684981684982, "bacc_std": 0.06127492238557759} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0660317634701834, "f1": 0.49031385281385287, "f1_std": 0.06743980697803519, "bacc": 0.49954212454212454, "bacc_std": 0.06594788923795235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06571860399645296, "f1": 0.4450892857142857, "f1_std": 0.06635174663769425, "bacc": 0.4551282051282052, "bacc_std": 0.0652580578724308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06066050400023174, "f1": 0.3581349206349206, "f1_std": 0.05817978044254251, "bacc": 0.34386446886446886, "bacc_std": 0.06030859381278872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.28846153846153844, "acc_std": 0.06077014361534441, "f1": 0.2915527950310559, "f1_std": 0.06018056718814621, "bacc": 0.2875457875457875, "bacc_std": 0.06092041177985334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06719673271502093, "f1": 0.4446682946682946, "f1_std": 0.06744168058937347, "bacc": 0.4432234432234432, "bacc_std": 0.06744835960430218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06727690504670025, "f1": 0.3977777777777778, "f1_std": 0.0646479965651477, "bacc": 0.39743589743589747, "bacc_std": 0.06646912577352955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.057966974376304964, "f1": 0.4294469897918174, "f1_std": 0.05304606874511814, "bacc": 0.43704212454212454, "bacc_std": 0.05728384876930522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06799515950401419, "f1": 0.5213920817369093, "f1_std": 0.06916211701146155, "bacc": 0.5203754578754579, "bacc_std": 0.06828785249034532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06775124182796038, "f1": 0.5395859762426478, "f1_std": 0.0685711214427652, "bacc": 0.5398351648351648, "bacc_std": 0.06789861483745838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.062390448366214504, "f1": 0.40431879948008986, "f1_std": 0.062244117145241966, "bacc": 0.4194139194139195, "bacc_std": 0.06183557496474066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0664289784024836, "f1": 0.41780503978779837, "f1_std": 0.06606119480017603, "bacc": 0.42101648351648346, "bacc_std": 0.06629978341728682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06818223058867565, "f1": 0.45828676570805504, "f1_std": 0.06785089987788108, "bacc": 0.45810439560439564, "bacc_std": 0.06811349836300244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06608013558949942, "f1": 0.35644904820317114, "f1_std": 0.06457436115404287, "bacc": 0.36309523809523814, "bacc_std": 0.06545582061502234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06101779738431351, "f1": 0.41100783747842573, "f1_std": 0.06253930896851867, "bacc": 0.44139194139194143, "bacc_std": 0.06083417129375684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06640394361640467, "f1": 0.4964387464387464, "f1_std": 0.06604964103444848, "bacc": 0.4981684981684982, "bacc_std": 0.06649117815739108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06551656369455891, "f1": 0.4988010074216971, "f1_std": 0.06703036269274021, "bacc": 0.5027472527472527, "bacc_std": 0.06570994008621109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06652850081069273, "f1": 0.4336080586080586, "f1_std": 0.06556249861230605, "bacc": 0.4226190476190476, "bacc_std": 0.06671711641431947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0583741629439625, "f1": 0.41962474645030423, "f1_std": 0.05913807976499922, "bacc": 0.4548992673992674, "bacc_std": 0.05730455458778222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05118817529411288, "f1": 0.3934587813620072, "f1_std": 0.04939893841569461, "bacc": 0.47115384615384615, "bacc_std": 0.04929028325753728} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06657997815502992, "f1": 0.4329789833822092, "f1_std": 0.066983926277027, "bacc": 0.4391025641025641, "bacc_std": 0.066537229515335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.0665410292968305, "f1": 0.4883554827000761, "f1_std": 0.06928196378557482, "bacc": 0.49954212454212454, "bacc_std": 0.06665679986980262} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06683238771237549, "f1": 0.555632974111235, "f1_std": 0.06664332642483219, "bacc": 0.5560897435897436, "bacc_std": 0.06674406372505183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 9.999999999999999e-05, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.05947233457568028, "f1": 0.30000000000000004, "f1_std": 0.05075775617710891, "bacc": 0.34065934065934067, "bacc_std": 0.058327226514864515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05973277523997878, "f1": 0.4620051085568327, "f1_std": 0.059991511442298515, "bacc": 0.47710622710622713, "bacc_std": 0.0592900776538579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.000774263682681127, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.0652532617244569, "f1": 0.5887947838946024, "f1_std": 0.06823721411111658, "bacc": 0.594551282051282, "bacc_std": 0.06518000855947043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.057469364105155074, "f1": 0.4873967819107887, "f1_std": 0.061732641861154756, "bacc": 0.5185439560439561, "bacc_std": 0.057810530061651724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06027887606291514, "f1": 0.37901470510166163, "f1_std": 0.059856996107925574, "bacc": 0.3853021978021978, "bacc_std": 0.06036677865394071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.059565425716840155, "f1": 0.41323022312373225, "f1_std": 0.06478174282963017, "bacc": 0.440018315018315, "bacc_std": 0.05930829010066717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06304342450074445, "f1": 0.4391852770885029, "f1_std": 0.059146331578146125, "bacc": 0.45352564102564097, "bacc_std": 0.06205425386449689} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06953219636514402, "f1": 0.5141681235431235, "f1_std": 0.07003214628585469, "bacc": 0.5132783882783882, "bacc_std": 0.06962741530451888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06428832060087178, "f1": 0.4456630824372759, "f1_std": 0.0643636060591542, "bacc": 0.45650183150183155, "bacc_std": 0.0637047911244806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.053236590944430465, "f1": 0.4052579365079365, "f1_std": 0.06063440087546672, "bacc": 0.4562728937728938, "bacc_std": 0.05234100042095505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 166.81005372000556, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.060143123183554324, "f1": 0.33986117540308186, "f1_std": 0.05748897665543638, "bacc": 0.36881868131868134, "bacc_std": 0.06097992685402485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06929723305353352, "f1": 0.5211640211640212, "f1_std": 0.06934106588843617, "bacc": 0.5176282051282052, "bacc_std": 0.06923930923813122} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 2.4056 | 17.013 | 0.72112 | 0.15487 | 0.71523 | 0.16222 | 0.72081 | 0.15553 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 2.4056 | 17.013 | 0.46135 | 0.073368 | 0.4454 | 0.070617 | 0.45801 | 0.072585 | + + +done! total time: 0:05:18 diff --git a/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97c85b1fd5783857e8771186a4cd00bd57a4be43 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..0af583e0985312be7d8a3dc042ae949bf353275f --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.046415888336127774,train,0.9395085066162571,0.01030059191239679,0.9379053320421398,0.010600504362475819,0.9368706088992974,0.010860031382280125 +flat_mae,patch,logistic,aabc_sex,,0.046415888336127774,test,0.8909090909090909,0.04322501571394827,0.8879076086956521,0.04405879086853515,0.8939393939393939,0.04295598575961621 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,train,0.9905482041587902,0.0042157620270605245,0.9902792019022628,0.004353503437656171,0.9887892376681615,0.005000309668867754 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,test,0.8,0.05434234913114179,0.795677136102668,0.05554868055034275,0.7975543478260869,0.05552464842644602 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,train,0.9376181474480151,0.010585268675002096,0.9360045457044925,0.010857230308625215,0.9357396172220757,0.01095265381761098 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,test,0.8727272727272727,0.04457920799121454,0.8699763593380614,0.045416599887653036,0.8722826086956521,0.04503645609879754 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9527410207939508,0.00927931162668911,0.9515185952306762,0.009520096834104718,0.9512441747999649,0.009622670808044851 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7818181818181819,0.05501541782398009,0.76890756302521,0.05949547243133371,0.7635869565217391,0.05845054871854141 +flat_mae,patch,logistic,aabc_sex,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,4,166.81005372000556,test,0.8363636363636363,0.049069840596830394,0.8354935194416749,0.04903514525246431,0.8471467391304348,0.046533311286120024 +flat_mae,patch,logistic,aabc_sex,5,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,5,166.81005372000556,test,0.8909090909090909,0.04118374067356884,0.8863636363636364,0.04365222320679524,0.8817934782608696,0.04472697718917101 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,train,0.9357277882797732,0.010214193981141916,0.9341056302939711,0.010496542008869751,0.9341056302939711,0.010752687166396243 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,test,0.9272727272727272,0.03447648617788006,0.9252717391304348,0.035560205878337525,0.9252717391304348,0.036108038895586135 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,train,0.943289224952741,0.01039441728281396,0.9417862487895061,0.01066908959499767,0.9412497435446525,0.010771509142993674 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,test,0.8181818181818182,0.05073867581185474,0.8131793478260869,0.05273892912661631,0.8131793478260869,0.05317950145150657 +flat_mae,patch,logistic,aabc_sex,8,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,8,2.782559402207126,test,0.8,0.04999305736923843,0.7989365237620472,0.05003390588998069,0.8097826086956521,0.04885595118745879 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9395085066162571,0.010592659942941287,0.938056206088993,0.010826019191247456,0.9385899352267065,0.010790871385368817 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.9090909090909091,0.03778059320789572,0.905982905982906,0.039348400548454975,0.9035326086956521,0.04000123524480207 +flat_mae,patch,logistic,aabc_sex,10,0.005994842503189409,train,0.8941398865784499,0.013226720308674094,0.8907637393433434,0.013742943041884155,0.8884272692634603,0.014060465810216435 +flat_mae,patch,logistic,aabc_sex,10,0.005994842503189409,test,0.8363636363636363,0.05014252743780709,0.8281846581048247,0.054143573497827384,0.8226902173913043,0.05411814749173465 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.9376181474480151,0.010357216296720938,0.9359246820459175,0.010664702052497583,0.9351314516838125,0.01088853567258338 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.9090909090909091,0.038799761437453534,0.9045470322804582,0.042004431063851926,0.8974184782608696,0.04358854864001018 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,train,0.945179584120983,0.009782888318698025,0.943691387252473,0.01006918428189069,0.9428837304727571,0.010298502835816346 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,test,0.8363636363636363,0.050030060385219975,0.8281846581048247,0.05404298462811266,0.8226902173913043,0.054060235875286924 +flat_mae,patch,logistic,aabc_sex,13,0.046415888336127774,train,0.941398865784499,0.010201706421583879,0.939956136014968,0.01046494578381314,0.9402239221548111,0.010619420873355568 +flat_mae,patch,logistic,aabc_sex,13,0.046415888336127774,test,0.9090909090909091,0.038198697567689,0.905982905982906,0.03982763123673051,0.9035326086956521,0.04062088305717147 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,train,0.943289224952741,0.009911177456130045,0.9418579090829157,0.010149880836053449,0.9418579090829157,0.010156386663002382 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,test,0.9454545454545454,0.028831823556389517,0.9442755825734549,0.029312447874994662,0.9470108695652174,0.02839133516814527 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,train,0.945179584120983,0.010227856366367137,0.943691387252473,0.01054129392997349,0.9428837304727571,0.010845009485355822 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,test,0.8727272727272727,0.04471491329606241,0.8699763593380614,0.04551501163201456,0.8722826086956521,0.04499036289502761 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,train,0.9867674858223062,0.004891161061981731,0.986408265888528,0.005035463387861549,0.9855212638119524,0.005398369378602549 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,test,0.8181818181818182,0.05153413696499894,0.8106060606060606,0.054708594559211336,0.8070652173913043,0.05469936409250377 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.9376181474480151,0.010424324915010675,0.9358427325549344,0.010775570511281553,0.9345232861455495,0.011137433687941282 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8727272727272727,0.04443622972751731,0.8683760683760684,0.046377533815408364,0.8661684782608696,0.046694496884839995 +flat_mae,patch,logistic,aabc_sex,18,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,18,2.782559402207126,test,0.8363636363636363,0.04869028446880229,0.8343927735028438,0.048981208377916904,0.8410326086956521,0.04832204238716813 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,train,0.945179584120983,0.009833795928770444,0.9437615704675844,0.01009711769791349,0.9434918960110203,0.010228025967645146 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,test,0.8727272727272727,0.042540963243901844,0.8683760683760684,0.04426125663836883,0.8661684782608696,0.04454285334517586 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9395085066162571,0.010829811415986632,0.9379817696884434,0.011114859668780315,0.9379817696884434,0.011308583931330602 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8727272727272727,0.04335928699361926,0.8663658451926415,0.04682463026194992,0.8600543478260869,0.047666584844238837 +flat_mae,patch,logistic,aabc_sex,21,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,21,166.81005372000556,test,0.7636363636363637,0.05541317085433959,0.7518222839291913,0.05928373442315285,0.7479619565217391,0.058136551004822214 +flat_mae,patch,logistic,aabc_sex,22,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,166.81005372000556,test,0.9454545454545454,0.030505254932755393,0.9442755825734549,0.031015368170402017,0.9470108695652174,0.02998541570611496 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,train,0.9886578449905482,0.004801309850904235,0.988342539813128,0.00495004502853667,0.9871552507400569,0.005459370823109495 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,test,0.8727272727272727,0.044259953355989645,0.8683760683760684,0.04583194629319204,0.8661684782608696,0.04594650532850321 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,train,0.9527410207939508,0.009606935482148886,0.9515185952306762,0.009870692239219239,0.9512441747999649,0.01009647240805501 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,test,0.8,0.05364703745873738,0.790003471017008,0.0576868606402007,0.7853260869565217,0.05682768888497544 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,train,0.9319470699432892,0.011072625492404346,0.9301434985474073,0.01137566539560833,0.9296213253612357,0.011532379106569308 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,test,0.8909090909090909,0.0387290823307475,0.8891129032258065,0.03917119645922695,0.8940217391304348,0.038349641489366613 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,train,0.9527410207939508,0.008972230102848682,0.9514580924590283,0.009234443679317911,0.9506360092617017,0.009467027291588081 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,test,0.8363636363636363,0.04773895960808957,0.8307692307692308,0.05013521162540233,0.8288043478260869,0.05053323506829333 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,train,0.9357277882797732,0.009968726940353624,0.9339410589410589,0.010293282438053232,0.9328892992174448,0.010634594376665737 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,test,0.8545454545454545,0.046767425267120494,0.8505434782608696,0.0483064760433193,0.8505434782608696,0.048390911779477006 +flat_mae,patch,logistic,aabc_sex,28,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,28,166.81005372000556,test,0.8181818181818182,0.05065421914663545,0.8106060606060606,0.05367139697995689,0.8070652173913043,0.05381760006426981 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,train,0.9395085066162571,0.010601123428852555,0.9379817696884434,0.010879338863925881,0.9379817696884434,0.011057716132023379 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,test,0.8181818181818182,0.05216602865327725,0.8131793478260869,0.05367809408658117,0.8131793478260869,0.053691673853678935 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,train,0.9489603024574669,0.009263472832408894,0.9475747398557507,0.009544118049720508,0.9467598698672295,0.009840214249400304 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,test,0.8545454545454545,0.04987907360277526,0.8505434782608696,0.0514657448314935,0.8505434782608696,0.051561843454031704 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.945179584120983,0.010234493223442786,0.9436193710331242,0.010559329485701657,0.942275564934494,0.010830671805556278 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.8,0.051004209840831774,0.7931623931623932,0.053088832890884584,0.7914402173913043,0.05292184989758564 +flat_mae,patch,logistic,aabc_sex,32,0.3593813663804626,train,0.9924385633270322,0.003814778524321624,0.9922381665052675,0.00392195059068998,0.9916395556727923,0.004245230862567638 +flat_mae,patch,logistic,aabc_sex,32,0.3593813663804626,test,0.8181818181818182,0.046429557413924266,0.8035714285714286,0.0529507123787952,0.7948369565217391,0.05146567597630498 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.9319470699432892,0.010720344010727861,0.9301434985474073,0.0110205234027685,0.9296213253612357,0.011189378611528212 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.8545454545454545,0.04745258094157289,0.8505434782608696,0.04900074222536466,0.8505434782608696,0.049198236565956184 +flat_mae,patch,logistic,aabc_sex,34,0.046415888336127774,train,0.9395085066162571,0.010474074676520588,0.9378268790033496,0.010792343759876057,0.9367654386119171,0.011031430940119893 +flat_mae,patch,logistic,aabc_sex,34,0.046415888336127774,test,0.8363636363636363,0.04866833613499395,0.8281846581048247,0.05194976407862822,0.8226902173913043,0.051904100575297527 +flat_mae,patch,logistic,aabc_sex,35,0.005994842503189409,train,0.8960302457466919,0.012964115701627553,0.89293113663378,0.013390567211332246,0.8912775872680911,0.013570102306833011 +flat_mae,patch,logistic,aabc_sex,35,0.005994842503189409,test,0.8545454545454545,0.04836467531351592,0.84593837535014,0.05345277162250995,0.8383152173913043,0.053445274348386196 +flat_mae,patch,logistic,aabc_sex,36,0.046415888336127774,train,0.941398865784499,0.009072951290349162,0.9398080346491953,0.00935497858864453,0.9390075910782849,0.009651856698093934 +flat_mae,patch,logistic,aabc_sex,36,0.046415888336127774,test,0.9272727272727272,0.036208761096802114,0.9260752688172043,0.03653640086263044,0.9313858695652174,0.03459130739938364 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9508506616257089,0.009147847056200163,0.9495480822842386,0.009402210730337077,0.9490020223335971,0.009575866051345392 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.05671854245175239,0.7782258064516129,0.05771710329227032,0.7819293478260869,0.05776866841902089 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,train,0.947069943289225,0.009854263750082812,0.945734048477388,0.010112776020723486,0.945734048477388,0.010267954308219517 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,test,0.8727272727272727,0.03862044163906847,0.8639095086603039,0.04353677921207701,0.8539402173913043,0.044131296843154304 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,train,0.9017013232514177,0.012864696788294216,0.8988378934980876,0.013314885308379466,0.8973958791289312,0.013622645355910297 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,test,0.8181818181818182,0.051213944382107024,0.8131793478260869,0.05262951713057947,0.8131793478260869,0.05227424489830055 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,train,0.9376181474480151,0.009869667139122431,0.9359246820459175,0.010155893793810619,0.9351314516838125,0.010331868966035657 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,test,0.8727272727272727,0.042205306939818554,0.8699763593380614,0.0431309813057661,0.8722826086956521,0.04302909118090275 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,train,0.8960302457466919,0.013841772640466533,0.8932078034098625,0.014256482174172127,0.8924939183446173,0.014497536483091628 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,test,0.8909090909090909,0.04181816600790215,0.8863636363636364,0.04453067927487248,0.8817934782608696,0.04547684743392351 +flat_mae,patch,logistic,aabc_sex,42,0.046415888336127774,train,0.943289224952741,0.01056617842391996,0.9417862487895061,0.010864827984589471,0.9412497435446525,0.011063203878071194 +flat_mae,patch,logistic,aabc_sex,42,0.046415888336127774,test,0.8,0.054053539421271384,0.7997351870241642,0.054020940032274166,0.8158967391304348,0.05087395037827523 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9300567107750473,0.011437963963442254,0.9282475209414007,0.011739295970553863,0.9279873384331311,0.011904883027557425 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.8909090909090909,0.04331341097452305,0.8879076086956521,0.044790485398507905,0.8879076086956521,0.04519039190533377 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,train,0.9527410207939508,0.009035568010604891,0.951577529044329,0.009259831951137151,0.951852340338228,0.009363882187220904 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,test,0.8181818181818182,0.04938958300179205,0.8106060606060606,0.05248764992148359,0.8070652173913043,0.05234408171113471 +flat_mae,patch,logistic,aabc_sex,45,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,45,21.54434690031882,test,0.9090909090909091,0.03793790489865215,0.9086075108009306,0.03776036431390722,0.921875,0.032602887022279176 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9395085066162571,0.010762670568976302,0.9378268790033496,0.011086285847213365,0.9367654386119171,0.01132182513026984 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8727272727272727,0.04588939631565091,0.8699763593380614,0.04665570536771815,0.8722826086956521,0.046047160059402924 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,train,0.945179584120983,0.009521824509585603,0.9436193710331242,0.009820831932206989,0.942275564934494,0.010110889926445763 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,test,0.8909090909090909,0.04138967811599233,0.8863636363636364,0.04389513148719344,0.8817934782608696,0.04482133314347521 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,train,0.8922495274102079,0.013521753269140586,0.8893244508065848,0.01392182551946684,0.888617778950145,0.014154140327536256 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,test,0.9272727272727272,0.03365259559205865,0.9242424242424243,0.035761048274334954,0.9191576086956521,0.03752685856347527 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,train,0.9829867674858223,0.0059625464243829715,0.9825249132852503,0.0061342107923055555,0.98164512441748,0.006414539924122078 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,test,0.8363636363636363,0.04785631386619222,0.8281846581048247,0.05198137813142108,0.8226902173913043,0.05207341166328228 +flat_mae,patch,logistic,aabc_sex,50,0.3593813663804626,train,0.9867674858223062,0.004631123792151527,0.986408265888528,0.004767283516616543,0.9855212638119524,0.005110025747191115 +flat_mae,patch,logistic,aabc_sex,50,0.3593813663804626,test,0.8545454545454545,0.048904751412759476,0.8521505376344086,0.0494101743149758,0.8566576086956521,0.04853084180759932 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,train,0.9924385633270322,0.003717813162628199,0.9922477212110554,0.003813346207133026,0.9922477212110554,0.003910659398448159 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,test,0.8909090909090909,0.041403652970949834,0.8879076086956521,0.042516319955734766,0.8879076086956521,0.04236311464726782 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,train,0.9395085066162571,0.01031916648528968,0.938056206088993,0.010560080744284835,0.9385899352267065,0.010620078548375454 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,test,0.8545454545454545,0.04650913355602563,0.8505434782608696,0.04807893064557041,0.8505434782608696,0.04802436780243676 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.9319470699432892,0.011325464089475207,0.9299646954986761,0.011701607881168932,0.9284049942847094,0.011957989001338742 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.8727272727272727,0.04510350264018551,0.8699763593380614,0.045909240056821884,0.8722826086956521,0.04549921840247693 +flat_mae,patch,logistic,aabc_sex,54,0.3593813663804626,train,0.996219281663516,0.002666612766467009,0.9961190832526338,0.0027422795054660764,0.9955156950672646,0.0031628658149350838 +flat_mae,patch,logistic,aabc_sex,54,0.3593813663804626,test,0.8,0.05501061054675971,0.795677136102668,0.0563944570034366,0.7975543478260869,0.056583661389840514 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9905482041587902,0.004004528580965628,0.9902792019022628,0.004132993967239661,0.9887892376681615,0.004749765962625147 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8363636363636363,0.04832686230074024,0.8328267477203647,0.04917386885146217,0.8349184782608696,0.049009301627502076 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,train,0.947069943289225,0.010091342788443633,0.9455985191279309,0.010389414958910508,0.9445177174008617,0.010583139843100449 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,test,0.8545454545454545,0.046759097757981746,0.8505434782608696,0.04837743093584775,0.8505434782608696,0.04869409677615898 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,train,0.943289224952741,0.00986734965314525,0.9418579090829157,0.010114571816641136,0.9418579090829157,0.010205839705379862 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,test,0.8909090909090909,0.04187341806559225,0.8879076086956521,0.04324370316732738,0.8879076086956521,0.04343313034334395 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,train,0.9867674858223062,0.005118645846703618,0.9864252066645893,0.005256083080141426,0.9861294293502154,0.005437774747831518 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,test,0.8181818181818182,0.052198464243897874,0.8151881720430108,0.053016274605375835,0.8192934782608696,0.05264639016316048 +flat_mae,patch,logistic,aabc_sex,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,59,166.81005372000556,test,0.8363636363636363,0.04990839873671372,0.8354935194416749,0.0498036477897074,0.8471467391304348,0.04756754186141633 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,train,0.8998109640831758,0.012572203976252314,0.8969595401639856,0.012986320653970582,0.8957618922008265,0.013249716179735659 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,test,0.8545454545454545,0.04753631408749306,0.8484848484848485,0.050467918932907176,0.8444293478260869,0.05093246491214439 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,train,0.8903591682419659,0.014202030278472366,0.8873112181935712,0.014652767071485268,0.8863756264837774,0.014856847696811155 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,test,0.8909090909090909,0.041417830631442734,0.8891129032258065,0.041888670070611235,0.8940217391304348,0.04094134989256607 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,train,0.9376181474480151,0.0107368247210592,0.9360823383385143,0.011021917875122801,0.9363477827603388,0.011230977367506051 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,test,0.9272727272727272,0.03374137906424433,0.9260752688172043,0.03405999803421964,0.9313858695652174,0.032381348861966704 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,train,0.9319470699432892,0.011273113115729948,0.9301434985474073,0.011592974541210827,0.9296213253612357,0.011783912726327214 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,test,0.9636363636363636,0.024938244386039924,0.9626358695652174,0.0256809833661737,0.9626358695652174,0.026178175124468732 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,train,0.9867674858223062,0.0049719306738089325,0.9864252066645893,0.00510346440887624,0.9861294293502154,0.0052547281740026874 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,test,0.8181818181818182,0.05179739295906088,0.8166666666666667,0.05198021378744028,0.8254076086956521,0.05114391229835617 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.9376181474480151,0.01047874632219787,0.9360823383385143,0.010733961488061322,0.9363477827603388,0.010795098897989255 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.8727272727272727,0.04544529360389039,0.8699763593380614,0.04638976933343186,0.8722826086956521,0.046174079841619226 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.947069943289225,0.00983174040616608,0.9456671655368724,0.010085122835275719,0.9451258829391249,0.01014831749762653 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8727272727272727,0.04092967562915002,0.8639095086603039,0.04614711513528126,0.8539402173913043,0.04695341855420785 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,train,0.9357277882797732,0.010953828070189044,0.9339410589410589,0.011291871939008062,0.9328892992174448,0.011552700991540462 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,test,0.8909090909090909,0.04136673315080909,0.8879076086956521,0.0426581644776873,0.8879076086956521,0.04312749512914843 +flat_mae,patch,logistic,aabc_sex,68,0.046415888336127774,train,0.941398865784499,0.01057146921770809,0.9398080346491953,0.010896188504095013,0.9390075910782849,0.011213453006679027 +flat_mae,patch,logistic,aabc_sex,68,0.046415888336127774,test,0.9454545454545454,0.028957215518390454,0.9427282193682749,0.03145724019252021,0.9347826086956521,0.03462275768503206 +flat_mae,patch,logistic,aabc_sex,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,69,166.81005372000556,test,0.8909090909090909,0.04284857057125711,0.8863636363636364,0.04541065942066133,0.8817934782608696,0.04636775985839513 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,train,0.9357277882797732,0.01073997941573217,0.9339410589410589,0.011066206231853686,0.9328892992174448,0.011315086795086833 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,test,0.9454545454545454,0.03110438835930731,0.9442755825734549,0.031618086205212136,0.9470108695652174,0.030596487754536734 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,train,0.8998109640831758,0.012763660308650192,0.8966861598440545,0.013226103900733442,0.8945455611243003,0.013485959249981716 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,test,0.8,0.05217971487683788,0.790003471017008,0.05614983132619298,0.7853260869565217,0.05531699302503915 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,train,0.9395085066162571,0.010176747133780336,0.9379817696884434,0.01043539620984108,0.9379817696884434,0.010540045423081418 +flat_mae,patch,logistic,aabc_sex,72,0.046415888336127774,test,0.9272727272727272,0.03505897746407398,0.9252717391304348,0.03609753080578298,0.9252717391304348,0.036233851426998684 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,train,0.9848771266540642,0.005413126574867791,0.9844567197508374,0.00557788333310837,0.9832791113455845,0.005979072660678224 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,test,0.9454545454545454,0.029434583583665703,0.9427282193682749,0.03210466456289536,0.9347826086956521,0.03519352385003507 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,train,0.9376181474480151,0.009850618638172862,0.9360045457044925,0.010118631997269612,0.9357396172220757,0.010358079319488421 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,test,0.9090909090909091,0.03945050670572994,0.905982905982906,0.04122527157904843,0.9035326086956521,0.04222734334658748 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.947069943289225,0.009539389204667834,0.9456671655368724,0.009803364252982893,0.9451258829391249,0.009967436821789643 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.8909090909090909,0.03948647221015453,0.8879076086956521,0.040822957745517655,0.8879076086956521,0.04133006732408728 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.947069943289225,0.009712549879703016,0.9456671655368724,0.010000748530493693,0.9451258829391249,0.010295570296412287 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8363636363636363,0.04975920862488875,0.8250265111346766,0.05510415419185682,0.8165760869565217,0.054248246067336824 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,train,0.9905482041587902,0.004194682715197683,0.9902916184918056,0.004320324023497329,0.9893974032064246,0.004746760089807288 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,test,0.8545454545454545,0.0447475933144791,0.8428571428571429,0.05198070232959144,0.8322010869565217,0.05145448308125144 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,train,0.9338374291115312,0.010594980672276558,0.9321260333229466,0.010893472934763738,0.9318634778276034,0.011132710969984682 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,test,0.9636363636363636,0.025641160092887597,0.9626358695652174,0.026409460272590453,0.9626358695652174,0.026747259832623514 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,train,0.9319470699432892,0.010826187134861727,0.9302294908994988,0.011101292434261762,0.9302294908994988,0.01121162982732118 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,test,0.9272727272727272,0.03520741845193407,0.9242424242424243,0.037425083286635,0.9191576086956521,0.03924440405893466 +flat_mae,patch,logistic,aabc_sex,80,0.046415888336127774,train,0.945179584120983,0.0099127486009156,0.9437615704675844,0.010188710205759498,0.9434918960110203,0.010371655362798615 +flat_mae,patch,logistic,aabc_sex,80,0.046415888336127774,test,0.8545454545454545,0.044865639634743965,0.84593837535014,0.0494339975726891,0.8383152173913043,0.04986685476727319 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,train,0.943289224952741,0.009209479104696454,0.9417862487895061,0.009461770128097463,0.9412497435446525,0.009610993683041158 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,test,0.8727272727272727,0.043506230198854294,0.8663658451926415,0.04693324401556137,0.8600543478260869,0.04787362339277557 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.9905482041587902,0.0041333886562374845,0.9903037190461352,0.004244584204022702,0.9900055687446877,0.004445780327308059 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.8727272727272727,0.04417155136978123,0.8663658451926415,0.04742938208571879,0.8600543478260869,0.04831342736450297 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,train,0.9527410207939508,0.00944908543368649,0.951577529044329,0.009674156832025839,0.951852340338228,0.009704012020266528 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,test,0.8363636363636363,0.043502582805435806,0.8212351029252438,0.051787427580501734,0.8104619565217391,0.04978391760964377 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,train,0.9357277882797732,0.011408157673941148,0.9339410589410589,0.011745594352392138,0.9328892992174448,0.011923656605364496 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,test,0.8181818181818182,0.05492564470519202,0.8151881720430108,0.05556209001603648,0.8192934782608696,0.05550978440387449 +flat_mae,patch,logistic,aabc_sex,85,0.005994842503189409,train,0.8960302457466919,0.013502625805968432,0.8930712209248908,0.01397619598052258,0.8918857528063542,0.014302501883636068 +flat_mae,patch,logistic,aabc_sex,85,0.005994842503189409,test,0.8727272727272727,0.04529683184513692,0.8683760683760684,0.04725914959285144,0.8661684782608696,0.04764497333191647 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,train,0.9848771266540642,0.005371037023489482,0.9844567197508374,0.005535672283192924,0.9832791113455845,0.005951295362136416 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,test,0.8727272727272727,0.0472147756673937,0.8711943793911007,0.04747329880588248,0.8783967391304348,0.04616173169207714 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,train,0.9489603024574669,0.009794803682581914,0.9476400828491303,0.010053238818162986,0.9473680354054925,0.01015907316523235 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,test,0.8545454545454545,0.04479604468962309,0.84593837535014,0.049190137008675264,0.8383152173913043,0.04918908678320517 +flat_mae,patch,logistic,aabc_sex,88,0.046415888336127774,train,0.9338374291115312,0.010938978494165589,0.9322085406620606,0.01119461602672022,0.9324716433658665,0.01122598314449496 +flat_mae,patch,logistic,aabc_sex,88,0.046415888336127774,test,0.9272727272727272,0.03677900946450412,0.9242424242424243,0.03888035666721627,0.9191576086956521,0.04060824639548038 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,train,0.9319470699432892,0.011422607545399147,0.9301434985474073,0.011729688232830045,0.9296213253612357,0.01185186888162806 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,test,0.8545454545454545,0.046682525443876535,0.8484848484848485,0.04960032033046009,0.8444293478260869,0.05002872624117813 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,train,0.9508506616257089,0.009568414029835796,0.9493518927677125,0.009900564917243573,0.9471775257188078,0.010288754794229252 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,test,0.8727272727272727,0.04530347258322669,0.8683760683760684,0.04762602213503136,0.8661684782608696,0.04816742483098075 +flat_mae,patch,logistic,aabc_sex,91,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,91,2.782559402207126,test,0.8727272727272727,0.04537593201920561,0.8699763593380614,0.04620252683025869,0.8722826086956521,0.045848957574472306 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,train,0.9357277882797732,0.010849722992143953,0.9341056302939711,0.011137253542211924,0.9341056302939711,0.011318026278731864 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,test,0.9090909090909091,0.036636503496236546,0.9027925061859314,0.04172062939029353,0.8913043478260869,0.04380451504984804 +flat_mae,patch,logistic,aabc_sex,93,0.005994842503189409,train,0.8979206049149339,0.013564614654700735,0.8952152478211111,0.01393208658746175,0.894736070810985,0.01404640027943546 +flat_mae,patch,logistic,aabc_sex,93,0.005994842503189409,test,0.8909090909090909,0.04106707273242052,0.884453781512605,0.04538219647153068,0.8756793478260869,0.04676350863545627 +flat_mae,patch,logistic,aabc_sex,94,0.3593813663804626,train,0.9867674858223062,0.005131341573377586,0.986408265888528,0.00528175688205094,0.9855212638119524,0.00565891581195954 +flat_mae,patch,logistic,aabc_sex,94,0.3593813663804626,test,0.9454545454545454,0.02927349519037926,0.9442755825734549,0.029793003230557103,0.9470108695652174,0.02892038844062629 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.9357277882797732,0.010580524502982986,0.9340244152947736,0.010882922591237208,0.9334974647557079,0.011119475107254995 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.8727272727272727,0.04240908993471692,0.8683760683760684,0.044193000083357616,0.8661684782608696,0.04453122460865525 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,train,0.8941398865784499,0.013358019452307936,0.8911970382558618,0.013738319010105719,0.8902517658782496,0.01386387451134892 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,test,0.9090909090909091,0.03716247996777627,0.905982905982906,0.0386582921612847,0.9035326086956521,0.039344638089985624 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,train,0.941398865784499,0.01060374156578298,0.9396520951935851,0.010974349069836135,0.9377912600017586,0.011363368027157318 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,test,0.9272727272727272,0.0330669243747178,0.9252717391304348,0.03403091335779827,0.9252717391304348,0.03423037262908611 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,train,0.9395085066162571,0.010074745402435897,0.9377463959988231,0.010416482516086309,0.936157273073654,0.010759514589755752 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,test,0.8545454545454545,0.0478067736132212,0.8505434782608696,0.04936212869106104,0.8505434782608696,0.049741480854435326 +flat_mae,patch,logistic,aabc_sex,99,0.046415888336127774,train,0.9376181474480151,0.01062889706329474,0.9360045457044925,0.01091212041028964,0.9357396172220757,0.01104918966212122 +flat_mae,patch,logistic,aabc_sex,99,0.046415888336127774,test,0.9272727272727272,0.0361824394647436,0.9242424242424243,0.038474873171692445,0.9191576086956521,0.04034329172989457 +flat_mae,patch,logistic,aabc_sex,100,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,100,166.81005372000556,test,0.9090909090909091,0.03855499024416,0.9079959852793577,0.03866014824471662,0.9157608695652174,0.03610969674075427 diff --git a/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0d54795be3bcfc9c5d484b70a385022a96e9bd2e --- /dev/null +++ b/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:50 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:19:59 time: 5.0838 data: 4.1989 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:38 time: 0.2259 data: 0.0746 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:05 time: 0.2042 data: 0.0589 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:51 time: 0.2163 data: 0.0673 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:41 time: 0.1880 data: 0.0512 max mem: 3393 +extract (train) [100/236] eta: 0:00:34 time: 0.2054 data: 0.0640 max mem: 3393 +extract (train) [120/236] eta: 0:00:29 time: 0.2336 data: 0.0748 max mem: 3393 +extract (train) [140/236] eta: 0:00:23 time: 0.2144 data: 0.0670 max mem: 3393 +extract (train) [160/236] eta: 0:00:18 time: 0.2023 data: 0.0649 max mem: 3393 +extract (train) [180/236] eta: 0:00:13 time: 0.2169 data: 0.0766 max mem: 3393 +extract (train) [200/236] eta: 0:00:08 time: 0.2142 data: 0.0739 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1885 data: 0.0585 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1770 data: 0.0546 max mem: 3393 +extract (train) Total time: 0:00:54 (0.2303 s / it) +extract (validation) [ 0/29] eta: 0:02:04 time: 4.3050 data: 4.1605 max mem: 3393 +extract (validation) [20/29] eta: 0:00:03 time: 0.1983 data: 0.0646 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1724 data: 0.0512 max mem: 3393 +extract (validation) Total time: 0:00:09 (0.3429 s / it) +extract (test) [ 0/28] eta: 0:01:54 time: 4.1000 data: 3.9571 max mem: 3393 +extract (test) [20/28] eta: 0:00:02 time: 0.1882 data: 0.0577 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1639 data: 0.0476 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3309 s / it) +feature extraction time: 0:01:13 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.046416 | train | 0.93951 | 0.010301 | 0.93791 | 0.010601 | 0.93687 | 0.01086 | +| flat_mae | patch | logistic | aabc_sex | | 0.046416 | test | 0.89091 | 0.043225 | 0.88791 | 0.044059 | 0.89394 | 0.042956 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05434234913114179, "f1": 0.795677136102668, "f1_std": 0.05554868055034275, "bacc": 0.7975543478260869, "bacc_std": 0.05552464842644602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04457920799121454, "f1": 0.8699763593380614, "f1_std": 0.045416599887653036, "bacc": 0.8722826086956521, "bacc_std": 0.04503645609879754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05501541782398009, "f1": 0.76890756302521, "f1_std": 0.05949547243133371, "bacc": 0.7635869565217391, "bacc_std": 0.05845054871854141} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049069840596830394, "f1": 0.8354935194416749, "f1_std": 0.04903514525246431, "bacc": 0.8471467391304348, "bacc_std": 0.046533311286120024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 166.81005372000556, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04118374067356884, "f1": 0.8863636363636364, "f1_std": 0.04365222320679524, "bacc": 0.8817934782608696, "bacc_std": 0.04472697718917101} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03447648617788006, "f1": 0.9252717391304348, "f1_std": 0.035560205878337525, "bacc": 0.9252717391304348, "bacc_std": 0.036108038895586135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05073867581185474, "f1": 0.8131793478260869, "f1_std": 0.05273892912661631, "bacc": 0.8131793478260869, "bacc_std": 0.05317950145150657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 2.782559402207126, "split": "test", "acc": 0.8, "acc_std": 0.04999305736923843, "f1": 0.7989365237620472, "f1_std": 0.05003390588998069, "bacc": 0.8097826086956521, "bacc_std": 0.04885595118745879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03778059320789572, "f1": 0.905982905982906, "f1_std": 0.039348400548454975, "bacc": 0.9035326086956521, "bacc_std": 0.04000123524480207} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05014252743780709, "f1": 0.8281846581048247, "f1_std": 0.054143573497827384, "bacc": 0.8226902173913043, "bacc_std": 0.05411814749173465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038799761437453534, "f1": 0.9045470322804582, "f1_std": 0.042004431063851926, "bacc": 0.8974184782608696, "bacc_std": 0.04358854864001018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050030060385219975, "f1": 0.8281846581048247, "f1_std": 0.05404298462811266, "bacc": 0.8226902173913043, "bacc_std": 0.054060235875286924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038198697567689, "f1": 0.905982905982906, "f1_std": 0.03982763123673051, "bacc": 0.9035326086956521, "bacc_std": 0.04062088305717147} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.028831823556389517, "f1": 0.9442755825734549, "f1_std": 0.029312447874994662, "bacc": 0.9470108695652174, "bacc_std": 0.02839133516814527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04471491329606241, "f1": 0.8699763593380614, "f1_std": 0.04551501163201456, "bacc": 0.8722826086956521, "bacc_std": 0.04499036289502761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05153413696499894, "f1": 0.8106060606060606, "f1_std": 0.054708594559211336, "bacc": 0.8070652173913043, "bacc_std": 0.05469936409250377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04443622972751731, "f1": 0.8683760683760684, "f1_std": 0.046377533815408364, "bacc": 0.8661684782608696, "bacc_std": 0.046694496884839995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04869028446880229, "f1": 0.8343927735028438, "f1_std": 0.048981208377916904, "bacc": 0.8410326086956521, "bacc_std": 0.04832204238716813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042540963243901844, "f1": 0.8683760683760684, "f1_std": 0.04426125663836883, "bacc": 0.8661684782608696, "bacc_std": 0.04454285334517586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04335928699361926, "f1": 0.8663658451926415, "f1_std": 0.04682463026194992, "bacc": 0.8600543478260869, "bacc_std": 0.047666584844238837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 166.81005372000556, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05541317085433959, "f1": 0.7518222839291913, "f1_std": 0.05928373442315285, "bacc": 0.7479619565217391, "bacc_std": 0.058136551004822214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 166.81005372000556, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030505254932755393, "f1": 0.9442755825734549, "f1_std": 0.031015368170402017, "bacc": 0.9470108695652174, "bacc_std": 0.02998541570611496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044259953355989645, "f1": 0.8683760683760684, "f1_std": 0.04583194629319204, "bacc": 0.8661684782608696, "bacc_std": 0.04594650532850321} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05364703745873738, "f1": 0.790003471017008, "f1_std": 0.0576868606402007, "bacc": 0.7853260869565217, "bacc_std": 0.05682768888497544} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.0387290823307475, "f1": 0.8891129032258065, "f1_std": 0.03917119645922695, "bacc": 0.8940217391304348, "bacc_std": 0.038349641489366613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04773895960808957, "f1": 0.8307692307692308, "f1_std": 0.05013521162540233, "bacc": 0.8288043478260869, "bacc_std": 0.05053323506829333} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046767425267120494, "f1": 0.8505434782608696, "f1_std": 0.0483064760433193, "bacc": 0.8505434782608696, "bacc_std": 0.048390911779477006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05065421914663545, "f1": 0.8106060606060606, "f1_std": 0.05367139697995689, "bacc": 0.8070652173913043, "bacc_std": 0.05381760006426981} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05216602865327725, "f1": 0.8131793478260869, "f1_std": 0.05367809408658117, "bacc": 0.8131793478260869, "bacc_std": 0.053691673853678935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04987907360277526, "f1": 0.8505434782608696, "f1_std": 0.0514657448314935, "bacc": 0.8505434782608696, "bacc_std": 0.051561843454031704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.051004209840831774, "f1": 0.7931623931623932, "f1_std": 0.053088832890884584, "bacc": 0.7914402173913043, "bacc_std": 0.05292184989758564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.046429557413924266, "f1": 0.8035714285714286, "f1_std": 0.0529507123787952, "bacc": 0.7948369565217391, "bacc_std": 0.05146567597630498} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04745258094157289, "f1": 0.8505434782608696, "f1_std": 0.04900074222536466, "bacc": 0.8505434782608696, "bacc_std": 0.049198236565956184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04866833613499395, "f1": 0.8281846581048247, "f1_std": 0.05194976407862822, "bacc": 0.8226902173913043, "bacc_std": 0.051904100575297527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04836467531351592, "f1": 0.84593837535014, "f1_std": 0.05345277162250995, "bacc": 0.8383152173913043, "bacc_std": 0.053445274348386196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.036208761096802114, "f1": 0.9260752688172043, "f1_std": 0.03653640086263044, "bacc": 0.9313858695652174, "bacc_std": 0.03459130739938364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05671854245175239, "f1": 0.7782258064516129, "f1_std": 0.05771710329227032, "bacc": 0.7819293478260869, "bacc_std": 0.05776866841902089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.03862044163906847, "f1": 0.8639095086603039, "f1_std": 0.04353677921207701, "bacc": 0.8539402173913043, "bacc_std": 0.044131296843154304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051213944382107024, "f1": 0.8131793478260869, "f1_std": 0.05262951713057947, "bacc": 0.8131793478260869, "bacc_std": 0.05227424489830055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042205306939818554, "f1": 0.8699763593380614, "f1_std": 0.0431309813057661, "bacc": 0.8722826086956521, "bacc_std": 0.04302909118090275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04181816600790215, "f1": 0.8863636363636364, "f1_std": 0.04453067927487248, "bacc": 0.8817934782608696, "bacc_std": 0.04547684743392351} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.054053539421271384, "f1": 0.7997351870241642, "f1_std": 0.054020940032274166, "bacc": 0.8158967391304348, "bacc_std": 0.05087395037827523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04331341097452305, "f1": 0.8879076086956521, "f1_std": 0.044790485398507905, "bacc": 0.8879076086956521, "bacc_std": 0.04519039190533377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04938958300179205, "f1": 0.8106060606060606, "f1_std": 0.05248764992148359, "bacc": 0.8070652173913043, "bacc_std": 0.05234408171113471} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 21.54434690031882, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03793790489865215, "f1": 0.9086075108009306, "f1_std": 0.03776036431390722, "bacc": 0.921875, "bacc_std": 0.032602887022279176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04588939631565091, "f1": 0.8699763593380614, "f1_std": 0.04665570536771815, "bacc": 0.8722826086956521, "bacc_std": 0.046047160059402924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04138967811599233, "f1": 0.8863636363636364, "f1_std": 0.04389513148719344, "bacc": 0.8817934782608696, "bacc_std": 0.04482133314347521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03365259559205865, "f1": 0.9242424242424243, "f1_std": 0.035761048274334954, "bacc": 0.9191576086956521, "bacc_std": 0.03752685856347527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04785631386619222, "f1": 0.8281846581048247, "f1_std": 0.05198137813142108, "bacc": 0.8226902173913043, "bacc_std": 0.05207341166328228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048904751412759476, "f1": 0.8521505376344086, "f1_std": 0.0494101743149758, "bacc": 0.8566576086956521, "bacc_std": 0.04853084180759932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041403652970949834, "f1": 0.8879076086956521, "f1_std": 0.042516319955734766, "bacc": 0.8879076086956521, "bacc_std": 0.04236311464726782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04650913355602563, "f1": 0.8505434782608696, "f1_std": 0.04807893064557041, "bacc": 0.8505434782608696, "bacc_std": 0.04802436780243676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04510350264018551, "f1": 0.8699763593380614, "f1_std": 0.045909240056821884, "bacc": 0.8722826086956521, "bacc_std": 0.04549921840247693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05501061054675971, "f1": 0.795677136102668, "f1_std": 0.0563944570034366, "bacc": 0.7975543478260869, "bacc_std": 0.056583661389840514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04832686230074024, "f1": 0.8328267477203647, "f1_std": 0.04917386885146217, "bacc": 0.8349184782608696, "bacc_std": 0.049009301627502076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046759097757981746, "f1": 0.8505434782608696, "f1_std": 0.04837743093584775, "bacc": 0.8505434782608696, "bacc_std": 0.04869409677615898} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04187341806559225, "f1": 0.8879076086956521, "f1_std": 0.04324370316732738, "bacc": 0.8879076086956521, "bacc_std": 0.04343313034334395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052198464243897874, "f1": 0.8151881720430108, "f1_std": 0.053016274605375835, "bacc": 0.8192934782608696, "bacc_std": 0.05264639016316048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04990839873671372, "f1": 0.8354935194416749, "f1_std": 0.0498036477897074, "bacc": 0.8471467391304348, "bacc_std": 0.04756754186141633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04753631408749306, "f1": 0.8484848484848485, "f1_std": 0.050467918932907176, "bacc": 0.8444293478260869, "bacc_std": 0.05093246491214439} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041417830631442734, "f1": 0.8891129032258065, "f1_std": 0.041888670070611235, "bacc": 0.8940217391304348, "bacc_std": 0.04094134989256607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03374137906424433, "f1": 0.9260752688172043, "f1_std": 0.03405999803421964, "bacc": 0.9313858695652174, "bacc_std": 0.032381348861966704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.024938244386039924, "f1": 0.9626358695652174, "f1_std": 0.0256809833661737, "bacc": 0.9626358695652174, "bacc_std": 0.026178175124468732} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05179739295906088, "f1": 0.8166666666666667, "f1_std": 0.05198021378744028, "bacc": 0.8254076086956521, "bacc_std": 0.05114391229835617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04544529360389039, "f1": 0.8699763593380614, "f1_std": 0.04638976933343186, "bacc": 0.8722826086956521, "bacc_std": 0.046174079841619226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04092967562915002, "f1": 0.8639095086603039, "f1_std": 0.04614711513528126, "bacc": 0.8539402173913043, "bacc_std": 0.04695341855420785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04136673315080909, "f1": 0.8879076086956521, "f1_std": 0.0426581644776873, "bacc": 0.8879076086956521, "bacc_std": 0.04312749512914843} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.028957215518390454, "f1": 0.9427282193682749, "f1_std": 0.03145724019252021, "bacc": 0.9347826086956521, "bacc_std": 0.03462275768503206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04284857057125711, "f1": 0.8863636363636364, "f1_std": 0.04541065942066133, "bacc": 0.8817934782608696, "bacc_std": 0.04636775985839513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03110438835930731, "f1": 0.9442755825734549, "f1_std": 0.031618086205212136, "bacc": 0.9470108695652174, "bacc_std": 0.030596487754536734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05217971487683788, "f1": 0.790003471017008, "f1_std": 0.05614983132619298, "bacc": 0.7853260869565217, "bacc_std": 0.05531699302503915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03505897746407398, "f1": 0.9252717391304348, "f1_std": 0.03609753080578298, "bacc": 0.9252717391304348, "bacc_std": 0.036233851426998684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029434583583665703, "f1": 0.9427282193682749, "f1_std": 0.03210466456289536, "bacc": 0.9347826086956521, "bacc_std": 0.03519352385003507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03945050670572994, "f1": 0.905982905982906, "f1_std": 0.04122527157904843, "bacc": 0.9035326086956521, "bacc_std": 0.04222734334658748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03948647221015453, "f1": 0.8879076086956521, "f1_std": 0.040822957745517655, "bacc": 0.8879076086956521, "bacc_std": 0.04133006732408728} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04975920862488875, "f1": 0.8250265111346766, "f1_std": 0.05510415419185682, "bacc": 0.8165760869565217, "bacc_std": 0.054248246067336824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0447475933144791, "f1": 0.8428571428571429, "f1_std": 0.05198070232959144, "bacc": 0.8322010869565217, "bacc_std": 0.05145448308125144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.025641160092887597, "f1": 0.9626358695652174, "f1_std": 0.026409460272590453, "bacc": 0.9626358695652174, "bacc_std": 0.026747259832623514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03520741845193407, "f1": 0.9242424242424243, "f1_std": 0.037425083286635, "bacc": 0.9191576086956521, "bacc_std": 0.03924440405893466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044865639634743965, "f1": 0.84593837535014, "f1_std": 0.0494339975726891, "bacc": 0.8383152173913043, "bacc_std": 0.04986685476727319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043506230198854294, "f1": 0.8663658451926415, "f1_std": 0.04693324401556137, "bacc": 0.8600543478260869, "bacc_std": 0.04787362339277557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04417155136978123, "f1": 0.8663658451926415, "f1_std": 0.04742938208571879, "bacc": 0.8600543478260869, "bacc_std": 0.04831342736450297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.043502582805435806, "f1": 0.8212351029252438, "f1_std": 0.051787427580501734, "bacc": 0.8104619565217391, "bacc_std": 0.04978391760964377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05492564470519202, "f1": 0.8151881720430108, "f1_std": 0.05556209001603648, "bacc": 0.8192934782608696, "bacc_std": 0.05550978440387449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04529683184513692, "f1": 0.8683760683760684, "f1_std": 0.04725914959285144, "bacc": 0.8661684782608696, "bacc_std": 0.04764497333191647} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0472147756673937, "f1": 0.8711943793911007, "f1_std": 0.04747329880588248, "bacc": 0.8783967391304348, "bacc_std": 0.04616173169207714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04479604468962309, "f1": 0.84593837535014, "f1_std": 0.049190137008675264, "bacc": 0.8383152173913043, "bacc_std": 0.04918908678320517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03677900946450412, "f1": 0.9242424242424243, "f1_std": 0.03888035666721627, "bacc": 0.9191576086956521, "bacc_std": 0.04060824639548038} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046682525443876535, "f1": 0.8484848484848485, "f1_std": 0.04960032033046009, "bacc": 0.8444293478260869, "bacc_std": 0.05002872624117813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04530347258322669, "f1": 0.8683760683760684, "f1_std": 0.04762602213503136, "bacc": 0.8661684782608696, "bacc_std": 0.04816742483098075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04537593201920561, "f1": 0.8699763593380614, "f1_std": 0.04620252683025869, "bacc": 0.8722826086956521, "bacc_std": 0.045848957574472306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.036636503496236546, "f1": 0.9027925061859314, "f1_std": 0.04172062939029353, "bacc": 0.8913043478260869, "bacc_std": 0.04380451504984804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04106707273242052, "f1": 0.884453781512605, "f1_std": 0.04538219647153068, "bacc": 0.8756793478260869, "bacc_std": 0.04676350863545627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02927349519037926, "f1": 0.9442755825734549, "f1_std": 0.029793003230557103, "bacc": 0.9470108695652174, "bacc_std": 0.02892038844062629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04240908993471692, "f1": 0.8683760683760684, "f1_std": 0.044193000083357616, "bacc": 0.8661684782608696, "bacc_std": 0.04453122460865525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03716247996777627, "f1": 0.905982905982906, "f1_std": 0.0386582921612847, "bacc": 0.9035326086956521, "bacc_std": 0.039344638089985624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.0330669243747178, "f1": 0.9252717391304348, "f1_std": 0.03403091335779827, "bacc": 0.9252717391304348, "bacc_std": 0.03423037262908611} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0478067736132212, "f1": 0.8505434782608696, "f1_std": 0.04936212869106104, "bacc": 0.8505434782608696, "bacc_std": 0.049741480854435326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.0361824394647436, "f1": 0.9242424242424243, "f1_std": 0.038474873171692445, "bacc": 0.9191576086956521, "bacc_std": 0.04034329172989457} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 166.81005372000556, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03855499024416, "f1": 0.9079959852793577, "f1_std": 0.03866014824471662, "bacc": 0.9157608695652174, "bacc_std": 0.03610969674075427} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 13.73 | 45.421 | 0.95085 | 0.03067 | 0.94952 | 0.031525 | 0.94894 | 0.031715 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 13.73 | 45.421 | 0.86909 | 0.045206 | 0.86452 | 0.046924 | 0.86347 | 0.047191 | + + +done! total time: 0:05:05 diff --git a/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bf2f0d895866667d3ce9a1358ac9da526afe54c --- /dev/null +++ b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..1bfcfabc8b5d8287da71ff0a22dd8025ce4ebb76 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.8361823361823362,0.013732222990594314,0.8331262828688306,0.014048469797661946,0.8311527349136516,0.014070568687815317 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.6693548387096774,0.04159720571640734,0.6644445911160979,0.042657590710188216,0.6639172558261325,0.04219831885696091 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,train,0.8176638176638177,0.014739601701295678,0.8144774332080769,0.01514202876274195,0.8127722406792174,0.015198734790086231 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,test,0.6532258064516129,0.04224420789808378,0.6526610644257702,0.04241796388327861,0.6554621848739496,0.04263994921489466 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,train,0.9131054131054132,0.01033810256477935,0.9119329224276751,0.010523554055651004,0.9108527131782945,0.010702520867795618 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,test,0.6532258064516129,0.04228070065979321,0.6526610644257702,0.04233409712087747,0.6554621848739496,0.04250035624827325 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,train,0.9173789173789174,0.010074609053616804,0.916450522030337,0.010204104022674956,0.9162052417866371,0.010312353074316908 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,test,0.5645161290322581,0.04462592817119303,0.5571428571428572,0.04563753745283136,0.5572478991596639,0.0451540051762055 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,train,0.9045584045584045,0.01113247416667033,0.9033961489088576,0.01129073638494645,0.9028054632705795,0.01139263612070365 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,test,0.7258064516129032,0.041917921260348316,0.7246603970741902,0.04208211149434743,0.7263655462184874,0.04213745979234176 +flat_mae,patch,logistic,abide_dx,5,2.782559402207126,train,0.9971509971509972,0.0020514620845338946,0.9971207087486158,0.0020733164547973785,0.9971207087486158,0.0020867501577141595 +flat_mae,patch,logistic,abide_dx,5,2.782559402207126,test,0.6209677419354839,0.04253923634142914,0.6197559861681998,0.04267041715095853,0.6213235294117647,0.04266593296126044 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.8262108262108262,0.013741334393241874,0.8228988312365071,0.01410278570753716,0.8208194905869325,0.014122665297930414 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.6048387096774194,0.04468527938313004,0.6004471624909581,0.04536728193670636,0.6003151260504203,0.04519225133048166 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,train,0.9131054131054132,0.010891443523092082,0.9121554951535976,0.011046150756541072,0.9120339608711702,0.01121924730169462 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,test,0.6048387096774194,0.04317689818572759,0.6017043592264831,0.04351500287343292,0.601890756302521,0.04337567729987715 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9074074074074074,0.011178851145467718,0.9062197495493903,0.01133574545004193,0.905389442598745,0.011393849243699156 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.6129032258064516,0.04259773606174214,0.607905138339921,0.04314091863082814,0.6076680672268908,0.04297101493106213 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.9145299145299145,0.010201474830474875,0.9133475971033574,0.010381317495696261,0.9121447028423773,0.010538132301563266 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.6532258064516129,0.04225823688711459,0.6480760345851759,0.04301197855494882,0.6475840336134454,0.04263818701186095 +flat_mae,patch,logistic,abide_dx,10,0.3593813663804626,train,0.9202279202279202,0.009817022233667328,0.9191244239631337,0.009980227126710327,0.9179032853451459,0.010107467202540694 +flat_mae,patch,logistic,abide_dx,10,0.3593813663804626,test,0.6612903225806451,0.044038165738633,0.6609375,0.04408681219332392,0.664390756302521,0.043928329042569825 +flat_mae,patch,logistic,abide_dx,11,2.782559402207126,train,0.9957264957264957,0.0025468539823207295,0.9956823293279728,0.002572351445595851,0.995828719084533,0.00250517359678404 +flat_mae,patch,logistic,abide_dx,11,2.782559402207126,test,0.6290322580645161,0.04322527683835397,0.6191239316239316,0.04546779294351958,0.6192226890756303,0.04433124626380628 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,train,0.8076923076923077,0.014971838240266037,0.8044055700142201,0.015282728032900062,0.8028423772609818,0.01526194424208718 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,test,0.5806451612903226,0.042677199911658775,0.5735449735449736,0.0437004170643649,0.5735294117647058,0.043094968058688475 +flat_mae,patch,logistic,abide_dx,13,0.046415888336127774,train,0.8333333333333334,0.014125744568164738,0.8303559810391301,0.014464426638245662,0.8284606866002215,0.014508685765764293 +flat_mae,patch,logistic,abide_dx,13,0.046415888336127774,test,0.6532258064516129,0.04210951481902213,0.650475254015077,0.04266138987651826,0.6507352941176471,0.04275522126152742 +flat_mae,patch,logistic,abide_dx,14,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,14,1291.5496650148827,test,0.6209677419354839,0.043903199141989,0.6167554415729598,0.044449866851817336,0.6165966386554622,0.04430741992550199 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,train,0.9045584045584045,0.011209737962001776,0.9033961489088576,0.01136170198385481,0.9028054632705795,0.01145297929659692 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,test,0.6693548387096774,0.040012029928783326,0.6630211440312852,0.04114627108201514,0.6622899159663866,0.04057696371830671 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,train,0.8219373219373219,0.014432758243500678,0.8186155248574245,0.014912730281080127,0.8166482096714655,0.015019039915724978 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,test,0.6370967741935484,0.041082753849903716,0.6330637206549615,0.041503581077858155,0.6328781512605042,0.04136811062594128 +flat_mae,patch,logistic,abide_dx,17,2.782559402207126,train,0.9928774928774928,0.003126553152561216,0.9928038822132881,0.003157503201411677,0.9929494278331488,0.003102189642939484 +flat_mae,patch,logistic,abide_dx,17,2.782559402207126,test,0.6370967741935484,0.04093512481095228,0.6368842324461508,0.04095659568412819,0.6407563025210083,0.041006078917970444 +flat_mae,patch,logistic,abide_dx,18,0.3593813663804626,train,0.9088319088319088,0.010615087553362615,0.9076923076923077,0.01075375096960997,0.9069767441860466,0.0108041614355196 +flat_mae,patch,logistic,abide_dx,18,0.3593813663804626,test,0.6532258064516129,0.04074648149745541,0.6465831510572015,0.04167842055499143,0.6460084033613445,0.04108178311952098 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,train,0.8176638176638177,0.014142693487900464,0.8140397350993378,0.014516848775927774,0.8118863049095607,0.014513467552402137 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,test,0.6290322580645161,0.044582497860760135,0.6242424242424243,0.04523300286106727,0.6239495798319328,0.045023224806432435 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.8290598290598291,0.013976232336800265,0.8259389050515737,0.014346899388704192,0.8239940937615357,0.014380371721010657 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.6693548387096774,0.04024227810313777,0.6667322189446083,0.04078713760836048,0.6670168067226891,0.04086965788927231 +flat_mae,patch,logistic,abide_dx,21,2.782559402207126,train,0.9985754985754985,0.0014910386448010742,0.998559926150059,0.0015082557357732915,0.9984126984126984,0.001661443061349775 +flat_mae,patch,logistic,abide_dx,21,2.782559402207126,test,0.6209677419354839,0.043503115656240154,0.6167554415729598,0.04415345905748401,0.6165966386554622,0.04412264221171354 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,train,0.8304843304843305,0.01391924245456064,0.8271848074556005,0.014322324764236987,0.8249907715023994,0.014352192338562391 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,test,0.6370967741935484,0.03882410506676452,0.6094351508364246,0.04474164758305395,0.6171218487394958,0.040287276246000016 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,train,0.8304843304843305,0.013718510519156136,0.8264524103831892,0.014241468088135784,0.8235142118863049,0.014250724664969225 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,test,0.6048387096774194,0.04438916853285834,0.5953379953379954,0.046220804685687615,0.5955882352941176,0.04516838893524283 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,train,0.9202279202279202,0.010421691363663442,0.9193798449612403,0.010536917769712469,0.9193798449612403,0.010588900254888489 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,test,0.6370967741935484,0.044438721268403564,0.6351748937561295,0.04471465496243104,0.6360294117647058,0.04494583722288871 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,train,0.9145299145299145,0.010482016962630495,0.9134615384615385,0.010631815053334812,0.9127353266888151,0.010725335782042213 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,test,0.6370967741935484,0.04151141349764327,0.6330637206549615,0.042236331611118755,0.6328781512605042,0.042041432436894316 +flat_mae,patch,logistic,abide_dx,26,0.3593813663804626,train,0.9145299145299145,0.009846412693126005,0.9135695055486244,0.009959172406901669,0.9133259505352529,0.0100065005270732 +flat_mae,patch,logistic,abide_dx,26,0.3593813663804626,test,0.6209677419354839,0.04117677178773878,0.607462787095036,0.04341431624415916,0.608718487394958,0.041807631569762346 +flat_mae,patch,logistic,abide_dx,27,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,27,1291.5496650148827,test,0.5967741935483871,0.042052476384051594,0.5958279009126467,0.04218691150717608,0.5976890756302521,0.042327638017954805 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,train,0.9145299145299145,0.010435758212607319,0.9134615384615385,0.010589245554817618,0.9127353266888151,0.010694115381840818 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,test,0.6370967741935484,0.04175149152719045,0.6330637206549615,0.04234159285920153,0.6328781512605042,0.042147732923265686 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.8048433048433048,0.014713442346051143,0.8010446942976241,0.015155931490621877,0.799077150239941,0.015175135485338467 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.717741935483871,0.03999693991936923,0.7094074322062269,0.042178966374794506,0.7079831932773109,0.04119321168639709 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,train,0.9273504273504274,0.009475836583925885,0.9263701482592037,0.009638363466604844,0.925249169435216,0.009817567494863915 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,test,0.6693548387096774,0.04239847088028616,0.6614052614052615,0.04370932280331933,0.6607142857142857,0.0428960666771149 +flat_mae,patch,logistic,abide_dx,31,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,31,10000.0,test,0.5645161290322581,0.04392305671911358,0.5634941329856584,0.04388324705931793,0.5651260504201681,0.043892617010609425 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,train,0.8233618233618234,0.014042587649848217,0.8197032336103263,0.014495893346004167,0.8173495754891104,0.01452397973454008 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,test,0.6774193548387096,0.04196206043712621,0.6743697478991597,0.04235026071545418,0.6743697478991597,0.042186223885294145 +flat_mae,patch,logistic,abide_dx,33,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,33,10000.0,test,0.5564516129032258,0.04412948912704457,0.543354536324071,0.04580919218608076,0.5451680672268907,0.044583151077540774 +flat_mae,patch,logistic,abide_dx,34,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,34,21.54434690031882,test,0.6129032258064516,0.04339414440901715,0.6063492063492064,0.044367187277124145,0.60609243697479,0.043837154292372664 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,train,0.8148148148148148,0.014710893456188586,0.8114338138058714,0.015117280841143858,0.8095976375046142,0.01514378287518257 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,test,0.6532258064516129,0.039697251141344536,0.6493719997369632,0.040377521613365636,0.6491596638655461,0.0403276035244077 +flat_mae,patch,logistic,abide_dx,36,0.046415888336127774,train,0.8091168091168092,0.014143398247798441,0.8043120090533884,0.014732309651517717,0.8014765596160944,0.014716749240996231 +flat_mae,patch,logistic,abide_dx,36,0.046415888336127774,test,0.6693548387096774,0.040348053244541424,0.6595915634415801,0.04199384639262717,0.6591386554621849,0.04096939995111847 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,test,0.6290322580645161,0.044910417828746024,0.6266038229903116,0.04527356914323872,0.6271008403361344,0.045285622673545356 +flat_mae,patch,logistic,abide_dx,38,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,38,10000.0,test,0.6693548387096774,0.043026971068552196,0.665680278818965,0.04349491860461719,0.6654411764705883,0.04328892390285515 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,train,0.9188034188034188,0.01042885888860151,0.9177078127602866,0.01061628078501772,0.9166112956810631,0.010811097994714984 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,test,0.6532258064516129,0.04301325631154159,0.6429862738533645,0.044196523987448194,0.6428571428571428,0.04335183651853802 +flat_mae,patch,logistic,abide_dx,40,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,40,21.54434690031882,test,0.5887096774193549,0.04161193734813034,0.5841388834089565,0.042455994835944165,0.5840336134453781,0.04225883762677999 +flat_mae,patch,logistic,abide_dx,41,0.3593813663804626,train,0.9045584045584045,0.011328673919265762,0.9032705869287578,0.01150713988575204,0.9022148394241418,0.011595086504448052 +flat_mae,patch,logistic,abide_dx,41,0.3593813663804626,test,0.6129032258064516,0.039431690562332436,0.6003223207091055,0.04121761124905071,0.6013655462184874,0.03996148470274818 +flat_mae,patch,logistic,abide_dx,42,2.782559402207126,train,0.9943019943019943,0.0027482040289131112,0.9942344177336826,0.002785136835367313,0.9936507936507937,0.0030622844893603046 +flat_mae,patch,logistic,abide_dx,42,2.782559402207126,test,0.6774193548387096,0.04172084961267863,0.6704756842944459,0.043293293816540866,0.6696428571428572,0.04261000330576604 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,train,0.9131054131054132,0.010154819356637531,0.9119329224276751,0.010314986745158294,0.9108527131782945,0.01041271916109372 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,test,0.6451612903225806,0.04204148822972659,0.6436781609195402,0.04232236660664587,0.6449579831932774,0.042543151388024406 +flat_mae,patch,logistic,abide_dx,44,0.3593813663804626,train,0.9173789173789174,0.010195008909775434,0.9161787593567035,0.010384483750420006,0.9147286821705426,0.010531839098697754 +flat_mae,patch,logistic,abide_dx,44,0.3593813663804626,test,0.6048387096774194,0.042165930094924156,0.5972691721349506,0.04324290162240511,0.5971638655462186,0.04253495376579031 +flat_mae,patch,logistic,abide_dx,45,0.3593813663804626,train,0.9173789173789174,0.010246305167615748,0.916450522030337,0.010357177139065903,0.9162052417866371,0.010374721724827857 +flat_mae,patch,logistic,abide_dx,45,0.3593813663804626,test,0.7338709677419355,0.03999665373620214,0.7274725274725276,0.041405708079099636,0.7258403361344539,0.04088606238623065 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,train,0.8347578347578347,0.013783352118059861,0.8318701738448198,0.014092684708063767,0.8300479881875231,0.014117194910708243 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,test,0.5564516129032258,0.044002562951559215,0.5529334644378892,0.044279279157840266,0.553046218487395,0.04429408171384627 +flat_mae,patch,logistic,abide_dx,47,2.782559402207126,train,0.9943019943019943,0.002890473106981836,0.9942414174972314,0.0029215090479896046,0.9942414174972314,0.002952255536303985 +flat_mae,patch,logistic,abide_dx,47,2.782559402207126,test,0.5806451612903226,0.0457333440905709,0.5778999738151349,0.046122552762727614,0.5782563025210083,0.04630939411376973 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,train,0.9202279202279202,0.010885927297279125,0.9193798449612403,0.010985922321510038,0.9193798449612403,0.01094901399811559 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,test,0.6048387096774194,0.04420419874021367,0.6004471624909581,0.04488844648362244,0.6003151260504203,0.04463122956583296 +flat_mae,patch,logistic,abide_dx,49,0.046415888336127774,train,0.8262108262108262,0.013850168540875032,0.8227566225165563,0.014213560166173178,0.8205241786637135,0.014190787838425707 +flat_mae,patch,logistic,abide_dx,49,0.046415888336127774,test,0.5967741935483871,0.04372143336730839,0.5915678524374176,0.04447282120639658,0.5913865546218487,0.044234320450347676 +flat_mae,patch,logistic,abide_dx,50,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,50,1291.5496650148827,test,0.5645161290322581,0.04519324906073886,0.5640625,0.04525149486024833,0.5667016806722689,0.0455868266443871 +flat_mae,patch,logistic,abide_dx,51,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,51,1291.5496650148827,test,0.5725806451612904,0.042877663376743425,0.5643931861867832,0.04354361173333113,0.5646008403361344,0.043053202779155836 +flat_mae,patch,logistic,abide_dx,52,0.3593813663804626,train,0.9102564102564102,0.010799846440751167,0.9091635430038512,0.010947735213283797,0.9085640457733482,0.011018588472233722 +flat_mae,patch,logistic,abide_dx,52,0.3593813663804626,test,0.6854838709677419,0.038616743280635385,0.6794591370053689,0.03954545885547814,0.6785714285714286,0.039105118016161323 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,train,0.8176638176638177,0.014325827464901247,0.8147528140848554,0.01463893946289815,0.8133628645256552,0.014675950226240839 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,test,0.5967741935483871,0.04323609350389588,0.5860042735042735,0.04446341889197636,0.5866596638655462,0.0435464486082643 +flat_mae,patch,logistic,abide_dx,54,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,54,10000.0,test,0.6048387096774194,0.045183358663384666,0.6035753898349319,0.04532982267760377,0.6050420168067226,0.04549952812570808 +flat_mae,patch,logistic,abide_dx,55,0.3593813663804626,train,0.9102564102564102,0.010697238981481776,0.9093289158874289,0.010820473629170794,0.9094499815430048,0.01091263155531017 +flat_mae,patch,logistic,abide_dx,55,0.3593813663804626,test,0.6209677419354839,0.04036441700619171,0.607462787095036,0.04221346696264637,0.608718487394958,0.04086882774524789 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,train,0.8105413105413105,0.014902103139540743,0.8070069184482997,0.015314307584186634,0.8051310446659283,0.015296226727619964 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,test,0.6935483870967742,0.040065191412936685,0.6869519000797236,0.041294181765220504,0.6859243697478992,0.040749066365223435 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,train,0.8148148148148148,0.014205553320732443,0.8118583268049313,0.014518099723434376,0.810483573274271,0.014548521307502726 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,test,0.6612903225806451,0.043990988576781714,0.6539994685091681,0.04535045203700507,0.6533613445378151,0.04468445342149215 +flat_mae,patch,logistic,abide_dx,58,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,58,10000.0,test,0.5806451612903226,0.04486080622864319,0.5735449735449736,0.04573917432105179,0.5735294117647058,0.04526755093684735 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,train,0.8205128205128205,0.014504723987808821,0.8169453642384106,0.014890581194104736,0.814765596160945,0.014900379126406658 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,test,0.6532258064516129,0.04225553735363857,0.6521171788347361,0.0424897164357544,0.6538865546218487,0.0427007544129227 +flat_mae,patch,logistic,abide_dx,60,2.782559402207126,train,0.9928774928774928,0.0031896352362229,0.9927952559531508,0.0032300177832324493,0.9923588039867111,0.003432556055411046 +flat_mae,patch,logistic,abide_dx,60,2.782559402207126,test,0.6209677419354839,0.04216623857232452,0.6118548118548119,0.043366675866586955,0.6118697478991597,0.04259716597338024 +flat_mae,patch,logistic,abide_dx,61,0.3593813663804626,train,0.9188034188034188,0.010484167517004206,0.9178659178659179,0.010617661758644408,0.9174972314507198,0.010695990625742311 +flat_mae,patch,logistic,abide_dx,61,0.3593813663804626,test,0.6290322580645161,0.041811662732767124,0.6227513227513227,0.04281508823395428,0.6223739495798319,0.04238711318752693 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,train,0.9259259259259259,0.010026746076672345,0.924951275071751,0.010175273465103836,0.9239571797711332,0.010274282881372403 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,test,0.6370967741935484,0.043428901193755545,0.6342182890855457,0.04370758647469968,0.634453781512605,0.04366081226416784 +flat_mae,patch,logistic,abide_dx,63,2.782559402207126,train,0.99002849002849,0.003529144134341237,0.989913358334411,0.0035737190532663574,0.9894795127353266,0.0037454410254108774 +flat_mae,patch,logistic,abide_dx,63,2.782559402207126,test,0.6693548387096774,0.044395219155568634,0.6667322189446083,0.04482159608833423,0.6670168067226891,0.044812278421158064 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.9116809116809117,0.010843544378470897,0.9106884890669118,0.010989858525003419,0.9104466592838686,0.011109757873171741 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.7338709677419355,0.04076478811489186,0.7287731159276198,0.04187265808620514,0.7274159663865546,0.041486249136840944 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,train,0.905982905982906,0.011224580954483478,0.9046172089231453,0.011439014461950978,0.9032115171650055,0.011613318248174362 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,test,0.6451612903225806,0.04250428147620581,0.6356837606837606,0.044102336117294666,0.6355042016806722,0.04310882777618087 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,train,0.8333333333333334,0.01367368699930276,0.8308534759534494,0.01393703266766837,0.8296419342930971,0.014003233099134385 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,test,0.6532258064516129,0.04143415912195637,0.6465831510572015,0.04265193687152699,0.6460084033613445,0.04215181125918449 +flat_mae,patch,logistic,abide_dx,67,2.782559402207126,train,0.9928774928774928,0.0032973426524438238,0.9927996307502949,0.0033346182313481526,0.9926541159099298,0.003410819681388979 +flat_mae,patch,logistic,abide_dx,67,2.782559402207126,test,0.6129032258064516,0.044710919663420064,0.6025641025641025,0.04617930004089794,0.6029411764705883,0.045171939190597483 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,train,0.9202279202279202,0.010192747576022544,0.9191782962311166,0.010366511877399533,0.9181985972683647,0.01051848260488645 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,test,0.6612903225806451,0.03954191390480414,0.6580882352941176,0.03980207753565382,0.6580882352941176,0.03979232892020779 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,train,0.9131054131054132,0.01102512421766315,0.9121554951535976,0.01116898655797386,0.9120339608711702,0.011289081166005726 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,test,0.7016129032258065,0.04021786079272462,0.6928021426180114,0.042111459798796644,0.6917016806722689,0.04110312531500887 +flat_mae,patch,logistic,abide_dx,70,2.782559402207126,train,0.9943019943019943,0.0026368225528191013,0.994237967036575,0.002668971312931279,0.9939461055740126,0.002838053985388493 +flat_mae,patch,logistic,abide_dx,70,2.782559402207126,test,0.6370967741935484,0.04006168500723785,0.6283716283716283,0.041477373318290586,0.6281512605042017,0.04068783085409267 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,train,0.9273504273504274,0.00941683321258643,0.9265995985755378,0.009521804799569013,0.9267257290513105,0.009594906146258226 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,test,0.6290322580645161,0.04283650827515094,0.6255252100840336,0.04325241038452638,0.6255252100840336,0.04311766204179228 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.8233618233618234,0.013930155365125043,0.8197032336103263,0.014398452130888958,0.8173495754891104,0.014449418812518532 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.6129032258064516,0.044649691523836715,0.6112852664576802,0.044889653195191795,0.6123949579831933,0.04492010669605996 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.8290598290598291,0.014346361982251612,0.8262032085561497,0.014640045581249482,0.8245847176079735,0.01465622754229318 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.6048387096774194,0.043948730299078545,0.5953379953379954,0.04583258547658649,0.5955882352941176,0.04488359244345984 +flat_mae,patch,logistic,abide_dx,74,2.782559402207126,train,0.9957264957264957,0.002475970342335943,0.995679778450177,0.002503563711441709,0.9955334071613142,0.0025739629003520034 +flat_mae,patch,logistic,abide_dx,74,2.782559402207126,test,0.6774193548387096,0.0446307080837653,0.6753076721654884,0.04500980392817246,0.6759453781512605,0.045013263939792034 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,train,0.9145299145299145,0.010137491585366646,0.9136715419426773,0.010239309055232695,0.9139165743816906,0.010275323142876047 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,test,0.7096774193548387,0.04091198580766188,0.7077769049489395,0.04103048682026622,0.7085084033613445,0.040965768364221715 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,train,0.8233618233618234,0.01373176105523508,0.8202750134203245,0.014097741248480426,0.818530823181986,0.014180252042143583 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,test,0.7016129032258065,0.039947217386184286,0.6909813430322624,0.04237293606728756,0.6901260504201681,0.041049845752477165 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,train,0.9188034188034188,0.010220139649442345,0.9179642572314833,0.01032618402968656,0.9180878552971576,0.010336628760780635 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,test,0.6370967741935484,0.043367230731887384,0.635936582501468,0.04356322684526382,0.6376050420168067,0.0436609388877889 +flat_mae,patch,logistic,abide_dx,78,2.782559402207126,train,0.9971509971509972,0.0020013935778450697,0.9971207087486158,0.0020225837137120505,0.9971207087486158,0.0020246147757083627 +flat_mae,patch,logistic,abide_dx,78,2.782559402207126,test,0.5967741935483871,0.04473437327073084,0.5929621848739496,0.045228337442327196,0.5929621848739496,0.045128301774348324 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,train,0.9145299145299145,0.01128343936610374,0.9134615384615385,0.011465542679056003,0.9127353266888151,0.011634342263494545 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,test,0.6290322580645161,0.04436468805599709,0.6227513227513227,0.045475535880137606,0.6223739495798319,0.044852501756747806 +flat_mae,patch,logistic,abide_dx,80,0.046415888336127774,train,0.811965811965812,0.01448093414647114,0.808532795556731,0.014863188692844009,0.8067183462532299,0.014870613045673858 +flat_mae,patch,logistic,abide_dx,80,0.046415888336127774,test,0.6532258064516129,0.04201216314637051,0.650475254015077,0.04244577969521459,0.6507352941176471,0.0424212355760643 +flat_mae,patch,logistic,abide_dx,81,0.046415888336127774,train,0.8176638176638177,0.014287263841230345,0.8140397350993378,0.014682528977364488,0.8118863049095607,0.014694115648932159 +flat_mae,patch,logistic,abide_dx,81,0.046415888336127774,test,0.6612903225806451,0.041819330453556604,0.6522435897435898,0.043021538846998705,0.6517857142857143,0.042128640559751664 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,train,0.8945868945868946,0.011264798745726915,0.8933367280731292,0.011412505213885727,0.8928755998523441,0.011487759425428795 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,test,0.6774193548387096,0.04175028584771948,0.6688034188034189,0.04356643274812478,0.6680672268907563,0.0426099866164883 +flat_mae,patch,logistic,abide_dx,83,0.3593813663804626,train,0.9116809116809117,0.011263980174564227,0.9105769230769231,0.011432933915639938,0.9098560354374308,0.01153708554560834 +flat_mae,patch,logistic,abide_dx,83,0.3593813663804626,test,0.6774193548387096,0.04473764428103986,0.6743697478991597,0.045243578335739144,0.6743697478991597,0.045128574817796384 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,train,0.9173789173789174,0.009878414014869006,0.9162918068107992,0.010039889918726601,0.9153193060169804,0.010174850611443898 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,test,0.6048387096774194,0.04564369084602895,0.6017043592264831,0.04609442440024641,0.601890756302521,0.04604308690144586 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,train,0.8390313390313391,0.013332138585768444,0.8361557765591598,0.013683074816789967,0.83421926910299,0.013727963686044946 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,test,0.6532258064516129,0.04271385565762564,0.650475254015077,0.04321020705420851,0.6507352941176471,0.043220535134742624 +flat_mae,patch,logistic,abide_dx,86,2.782559402207126,train,0.99002849002849,0.003757665196220296,0.9899194830504128,0.0038004668530987476,0.9897748246585456,0.0038760810432220657 +flat_mae,patch,logistic,abide_dx,86,2.782559402207126,test,0.6774193548387096,0.043262502478562156,0.6760710553814002,0.043660736420618115,0.6775210084033614,0.043909701709000874 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.9245014245014245,0.009842375472018125,0.923482703092898,0.009985356046419766,0.9223698781838316,0.010042272020658558 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.6451612903225806,0.0441091022718192,0.6443285528031291,0.04415636811687999,0.6465336134453781,0.04416177395033841 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,train,0.9216524216524217,0.010055244678892203,0.9207478154846576,0.010165984187427451,0.9203765227021041,0.010172984760279783 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,test,0.6048387096774194,0.04380479770263744,0.6035753898349319,0.043932054119556314,0.6050420168067226,0.04396473645852784 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,train,0.905982905982906,0.011179895209655984,0.9047458491295302,0.01135604503447857,0.9038021410114434,0.01148916198967954 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,test,0.6290322580645161,0.04144694024121685,0.6242424242424243,0.04209252779303939,0.6239495798319328,0.04187317491941256 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,train,0.8190883190883191,0.014106061898957542,0.8159963510504146,0.014412279168191891,0.8143595422665191,0.014432823083625233 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,test,0.6370967741935484,0.0414132840239994,0.626380984265149,0.04260436716010786,0.6265756302521008,0.04171575519743921 +flat_mae,patch,logistic,abide_dx,91,2.782559402207126,train,0.9928774928774928,0.003272010003634499,0.992790757381258,0.0033186652872346945,0.9920634920634921,0.0036459540040498554 +flat_mae,patch,logistic,abide_dx,91,2.782559402207126,test,0.6209677419354839,0.04384783545738154,0.6179613241560145,0.04441657529668909,0.618172268907563,0.044338546362727965 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9971509971509972,0.0018809871119495943,0.9971207087486158,0.001900930262644332,0.9971207087486158,0.0019032010611078208 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.6129032258064516,0.04591938316506598,0.6003223207091055,0.048119706947185134,0.6013655462184874,0.046608089800002514 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,train,0.8276353276353277,0.014229699745067787,0.8242803504380476,0.014574269132420376,0.8221114802510152,0.014571656872486984 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,test,0.6693548387096774,0.041915330144764944,0.665680278818965,0.04257158379976692,0.6654411764705883,0.04250980546017989 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,train,0.8219373219373219,0.014473074679088474,0.8191597885560604,0.01483417142527356,0.8178294573643411,0.014977589512586972 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,test,0.6532258064516129,0.040188797420242964,0.650475254015077,0.04067923950523327,0.6507352941176471,0.04059038142193952 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,train,0.8233618233618234,0.01398796570456453,0.8198509933774834,0.014373040938740903,0.8176448874123292,0.014366967903733648 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,test,0.6532258064516129,0.04483283786106316,0.6465831510572015,0.04596791946163409,0.6460084033613445,0.045374386904243134 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,train,0.9202279202279202,0.010204818007473774,0.9194267724798321,0.010314624742351331,0.9196751568844592,0.010376776451891006 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,test,0.6290322580645161,0.043294012149872385,0.6266038229903116,0.04367329662157315,0.6271008403361344,0.043553319735386564 +flat_mae,patch,logistic,abide_dx,97,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,97,10000.0,test,0.6532258064516129,0.043082767667910554,0.6521171788347361,0.04328758247879169,0.6538865546218487,0.04342410975923602 +flat_mae,patch,logistic,abide_dx,98,2.782559402207126,train,0.9886039886039886,0.004126138149736874,0.988482834994463,0.004169616992152129,0.988482834994463,0.004166220204865611 +flat_mae,patch,logistic,abide_dx,98,2.782559402207126,test,0.6048387096774194,0.044664115289446506,0.602745995423341,0.044951425963249234,0.6034663865546219,0.04503046358154634 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,train,0.9102564102564102,0.010349286274827108,0.9093809356962399,0.01043051930950495,0.9097452934662237,0.010399701009622559 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,test,0.6532258064516129,0.042699004611314845,0.6465831510572015,0.04381897067081977,0.6460084033613445,0.04325480480457568 +flat_mae,patch,logistic,abide_dx,100,2.782559402207126,train,0.9928774928774928,0.0033492582315881817,0.9928038822132881,0.003382801092239378,0.9929494278331488,0.003340480668961541 +flat_mae,patch,logistic,abide_dx,100,2.782559402207126,test,0.6129032258064516,0.04320872315289635,0.6003223207091055,0.04577449782110545,0.6013655462184874,0.04424317770250198 diff --git a/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..669003ac939ea5ee897e33d1ded8789ab35cd6a7 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:23 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:21:01 time: 4.3656 data: 3.4930 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:45 time: 0.1939 data: 0.0570 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:10 time: 0.1644 data: 0.0428 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:55 time: 0.1578 data: 0.0440 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:45 time: 0.1543 data: 0.0410 max mem: 2851 +extract (train) [100/289] eta: 0:00:39 time: 0.1579 data: 0.0419 max mem: 2851 +extract (train) [120/289] eta: 0:00:34 time: 0.1710 data: 0.0484 max mem: 2851 +extract (train) [140/289] eta: 0:00:29 time: 0.1614 data: 0.0457 max mem: 2851 +extract (train) [160/289] eta: 0:00:24 time: 0.1563 data: 0.0429 max mem: 2851 +extract (train) [180/289] eta: 0:00:20 time: 0.1568 data: 0.0424 max mem: 2851 +extract (train) [200/289] eta: 0:00:16 time: 0.1570 data: 0.0441 max mem: 2851 +extract (train) [220/289] eta: 0:00:12 time: 0.1558 data: 0.0416 max mem: 2851 +extract (train) [240/289] eta: 0:00:08 time: 0.1549 data: 0.0415 max mem: 2851 +extract (train) [260/289] eta: 0:00:05 time: 0.1762 data: 0.0480 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1465 data: 0.0387 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1556 data: 0.0446 max mem: 2851 +extract (train) Total time: 0:00:51 (0.1780 s / it) +extract (validation) [ 0/62] eta: 0:04:25 time: 4.2758 data: 4.0888 max mem: 2851 +extract (validation) [20/62] eta: 0:00:16 time: 0.1878 data: 0.0532 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1412 data: 0.0364 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1569 data: 0.0468 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1578 data: 0.0472 max mem: 2851 +extract (validation) Total time: 0:00:14 (0.2335 s / it) +extract (test) [ 0/62] eta: 0:04:18 time: 4.1618 data: 4.0203 max mem: 2851 +extract (test) [20/62] eta: 0:00:16 time: 0.1971 data: 0.0587 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1385 data: 0.0354 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1374 data: 0.0364 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1380 data: 0.0368 max mem: 2851 +extract (test) Total time: 0:00:14 (0.2266 s / it) +feature extraction time: 0:01:20 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.83618 | 0.013732 | 0.83313 | 0.014048 | 0.83115 | 0.014071 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.66935 | 0.041597 | 0.66444 | 0.042658 | 0.66392 | 0.042198 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04224420789808378, "f1": 0.6526610644257702, "f1_std": 0.04241796388327861, "bacc": 0.6554621848739496, "bacc_std": 0.04263994921489466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04228070065979321, "f1": 0.6526610644257702, "f1_std": 0.04233409712087747, "bacc": 0.6554621848739496, "bacc_std": 0.04250035624827325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04462592817119303, "f1": 0.5571428571428572, "f1_std": 0.04563753745283136, "bacc": 0.5572478991596639, "bacc_std": 0.0451540051762055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.7258064516129032, "acc_std": 0.041917921260348316, "f1": 0.7246603970741902, "f1_std": 0.04208211149434743, "bacc": 0.7263655462184874, "bacc_std": 0.04213745979234176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04253923634142914, "f1": 0.6197559861681998, "f1_std": 0.04267041715095853, "bacc": 0.6213235294117647, "bacc_std": 0.04266593296126044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04468527938313004, "f1": 0.6004471624909581, "f1_std": 0.04536728193670636, "bacc": 0.6003151260504203, "bacc_std": 0.04519225133048166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04317689818572759, "f1": 0.6017043592264831, "f1_std": 0.04351500287343292, "bacc": 0.601890756302521, "bacc_std": 0.04337567729987715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04259773606174214, "f1": 0.607905138339921, "f1_std": 0.04314091863082814, "bacc": 0.6076680672268908, "bacc_std": 0.04297101493106213} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04225823688711459, "f1": 0.6480760345851759, "f1_std": 0.04301197855494882, "bacc": 0.6475840336134454, "bacc_std": 0.04263818701186095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.044038165738633, "f1": 0.6609375, "f1_std": 0.04408681219332392, "bacc": 0.664390756302521, "bacc_std": 0.043928329042569825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04322527683835397, "f1": 0.6191239316239316, "f1_std": 0.04546779294351958, "bacc": 0.6192226890756303, "bacc_std": 0.04433124626380628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042677199911658775, "f1": 0.5735449735449736, "f1_std": 0.0437004170643649, "bacc": 0.5735294117647058, "bacc_std": 0.043094968058688475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04210951481902213, "f1": 0.650475254015077, "f1_std": 0.04266138987651826, "bacc": 0.6507352941176471, "bacc_std": 0.04275522126152742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 1291.5496650148827, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.043903199141989, "f1": 0.6167554415729598, "f1_std": 0.044449866851817336, "bacc": 0.6165966386554622, "bacc_std": 0.04430741992550199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.040012029928783326, "f1": 0.6630211440312852, "f1_std": 0.04114627108201514, "bacc": 0.6622899159663866, "bacc_std": 0.04057696371830671} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.041082753849903716, "f1": 0.6330637206549615, "f1_std": 0.041503581077858155, "bacc": 0.6328781512605042, "bacc_std": 0.04136811062594128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04093512481095228, "f1": 0.6368842324461508, "f1_std": 0.04095659568412819, "bacc": 0.6407563025210083, "bacc_std": 0.041006078917970444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04074648149745541, "f1": 0.6465831510572015, "f1_std": 0.04167842055499143, "bacc": 0.6460084033613445, "bacc_std": 0.04108178311952098} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.044582497860760135, "f1": 0.6242424242424243, "f1_std": 0.04523300286106727, "bacc": 0.6239495798319328, "bacc_std": 0.045023224806432435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04024227810313777, "f1": 0.6667322189446083, "f1_std": 0.04078713760836048, "bacc": 0.6670168067226891, "bacc_std": 0.04086965788927231} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.043503115656240154, "f1": 0.6167554415729598, "f1_std": 0.04415345905748401, "bacc": 0.6165966386554622, "bacc_std": 0.04412264221171354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.03882410506676452, "f1": 0.6094351508364246, "f1_std": 0.04474164758305395, "bacc": 0.6171218487394958, "bacc_std": 0.040287276246000016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04438916853285834, "f1": 0.5953379953379954, "f1_std": 0.046220804685687615, "bacc": 0.5955882352941176, "bacc_std": 0.04516838893524283} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.044438721268403564, "f1": 0.6351748937561295, "f1_std": 0.04471465496243104, "bacc": 0.6360294117647058, "bacc_std": 0.04494583722288871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04151141349764327, "f1": 0.6330637206549615, "f1_std": 0.042236331611118755, "bacc": 0.6328781512605042, "bacc_std": 0.042041432436894316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04117677178773878, "f1": 0.607462787095036, "f1_std": 0.04341431624415916, "bacc": 0.608718487394958, "bacc_std": 0.041807631569762346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 1291.5496650148827, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042052476384051594, "f1": 0.5958279009126467, "f1_std": 0.04218691150717608, "bacc": 0.5976890756302521, "bacc_std": 0.042327638017954805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04175149152719045, "f1": 0.6330637206549615, "f1_std": 0.04234159285920153, "bacc": 0.6328781512605042, "bacc_std": 0.042147732923265686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.717741935483871, "acc_std": 0.03999693991936923, "f1": 0.7094074322062269, "f1_std": 0.042178966374794506, "bacc": 0.7079831932773109, "bacc_std": 0.04119321168639709} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04239847088028616, "f1": 0.6614052614052615, "f1_std": 0.04370932280331933, "bacc": 0.6607142857142857, "bacc_std": 0.0428960666771149} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 10000.0, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04392305671911358, "f1": 0.5634941329856584, "f1_std": 0.04388324705931793, "bacc": 0.5651260504201681, "bacc_std": 0.043892617010609425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04196206043712621, "f1": 0.6743697478991597, "f1_std": 0.04235026071545418, "bacc": 0.6743697478991597, "bacc_std": 0.042186223885294145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 10000.0, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04412948912704457, "f1": 0.543354536324071, "f1_std": 0.04580919218608076, "bacc": 0.5451680672268907, "bacc_std": 0.044583151077540774} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 21.54434690031882, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04339414440901715, "f1": 0.6063492063492064, "f1_std": 0.044367187277124145, "bacc": 0.60609243697479, "bacc_std": 0.043837154292372664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.039697251141344536, "f1": 0.6493719997369632, "f1_std": 0.040377521613365636, "bacc": 0.6491596638655461, "bacc_std": 0.0403276035244077} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.040348053244541424, "f1": 0.6595915634415801, "f1_std": 0.04199384639262717, "bacc": 0.6591386554621849, "bacc_std": 0.04096939995111847} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.044910417828746024, "f1": 0.6266038229903116, "f1_std": 0.04527356914323872, "bacc": 0.6271008403361344, "bacc_std": 0.045285622673545356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 10000.0, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.043026971068552196, "f1": 0.665680278818965, "f1_std": 0.04349491860461719, "bacc": 0.6654411764705883, "bacc_std": 0.04328892390285515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04301325631154159, "f1": 0.6429862738533645, "f1_std": 0.044196523987448194, "bacc": 0.6428571428571428, "bacc_std": 0.04335183651853802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 21.54434690031882, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04161193734813034, "f1": 0.5841388834089565, "f1_std": 0.042455994835944165, "bacc": 0.5840336134453781, "bacc_std": 0.04225883762677999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.039431690562332436, "f1": 0.6003223207091055, "f1_std": 0.04121761124905071, "bacc": 0.6013655462184874, "bacc_std": 0.03996148470274818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04172084961267863, "f1": 0.6704756842944459, "f1_std": 0.043293293816540866, "bacc": 0.6696428571428572, "bacc_std": 0.04261000330576604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04204148822972659, "f1": 0.6436781609195402, "f1_std": 0.04232236660664587, "bacc": 0.6449579831932774, "bacc_std": 0.042543151388024406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.042165930094924156, "f1": 0.5972691721349506, "f1_std": 0.04324290162240511, "bacc": 0.5971638655462186, "bacc_std": 0.04253495376579031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.7338709677419355, "acc_std": 0.03999665373620214, "f1": 0.7274725274725276, "f1_std": 0.041405708079099636, "bacc": 0.7258403361344539, "bacc_std": 0.04088606238623065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.044002562951559215, "f1": 0.5529334644378892, "f1_std": 0.044279279157840266, "bacc": 0.553046218487395, "bacc_std": 0.04429408171384627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.0457333440905709, "f1": 0.5778999738151349, "f1_std": 0.046122552762727614, "bacc": 0.5782563025210083, "bacc_std": 0.04630939411376973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04420419874021367, "f1": 0.6004471624909581, "f1_std": 0.04488844648362244, "bacc": 0.6003151260504203, "bacc_std": 0.04463122956583296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04372143336730839, "f1": 0.5915678524374176, "f1_std": 0.04447282120639658, "bacc": 0.5913865546218487, "bacc_std": 0.044234320450347676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 1291.5496650148827, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04519324906073886, "f1": 0.5640625, "f1_std": 0.04525149486024833, "bacc": 0.5667016806722689, "bacc_std": 0.0455868266443871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 1291.5496650148827, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.042877663376743425, "f1": 0.5643931861867832, "f1_std": 0.04354361173333113, "bacc": 0.5646008403361344, "bacc_std": 0.043053202779155836} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.038616743280635385, "f1": 0.6794591370053689, "f1_std": 0.03954545885547814, "bacc": 0.6785714285714286, "bacc_std": 0.039105118016161323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04323609350389588, "f1": 0.5860042735042735, "f1_std": 0.04446341889197636, "bacc": 0.5866596638655462, "bacc_std": 0.0435464486082643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 10000.0, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.045183358663384666, "f1": 0.6035753898349319, "f1_std": 0.04532982267760377, "bacc": 0.6050420168067226, "bacc_std": 0.04549952812570808} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04036441700619171, "f1": 0.607462787095036, "f1_std": 0.04221346696264637, "bacc": 0.608718487394958, "bacc_std": 0.04086882774524789} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.040065191412936685, "f1": 0.6869519000797236, "f1_std": 0.041294181765220504, "bacc": 0.6859243697478992, "bacc_std": 0.040749066365223435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.043990988576781714, "f1": 0.6539994685091681, "f1_std": 0.04535045203700507, "bacc": 0.6533613445378151, "bacc_std": 0.04468445342149215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 10000.0, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04486080622864319, "f1": 0.5735449735449736, "f1_std": 0.04573917432105179, "bacc": 0.5735294117647058, "bacc_std": 0.04526755093684735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04225553735363857, "f1": 0.6521171788347361, "f1_std": 0.0424897164357544, "bacc": 0.6538865546218487, "bacc_std": 0.0427007544129227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04216623857232452, "f1": 0.6118548118548119, "f1_std": 0.043366675866586955, "bacc": 0.6118697478991597, "bacc_std": 0.04259716597338024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.041811662732767124, "f1": 0.6227513227513227, "f1_std": 0.04281508823395428, "bacc": 0.6223739495798319, "bacc_std": 0.04238711318752693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.043428901193755545, "f1": 0.6342182890855457, "f1_std": 0.04370758647469968, "bacc": 0.634453781512605, "bacc_std": 0.04366081226416784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.044395219155568634, "f1": 0.6667322189446083, "f1_std": 0.04482159608833423, "bacc": 0.6670168067226891, "bacc_std": 0.044812278421158064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.7338709677419355, "acc_std": 0.04076478811489186, "f1": 0.7287731159276198, "f1_std": 0.04187265808620514, "bacc": 0.7274159663865546, "bacc_std": 0.041486249136840944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04250428147620581, "f1": 0.6356837606837606, "f1_std": 0.044102336117294666, "bacc": 0.6355042016806722, "bacc_std": 0.04310882777618087} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04143415912195637, "f1": 0.6465831510572015, "f1_std": 0.04265193687152699, "bacc": 0.6460084033613445, "bacc_std": 0.04215181125918449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 2.782559402207126, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.044710919663420064, "f1": 0.6025641025641025, "f1_std": 0.04617930004089794, "bacc": 0.6029411764705883, "bacc_std": 0.045171939190597483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.03954191390480414, "f1": 0.6580882352941176, "f1_std": 0.03980207753565382, "bacc": 0.6580882352941176, "bacc_std": 0.03979232892020779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.7016129032258065, "acc_std": 0.04021786079272462, "f1": 0.6928021426180114, "f1_std": 0.042111459798796644, "bacc": 0.6917016806722689, "bacc_std": 0.04110312531500887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04006168500723785, "f1": 0.6283716283716283, "f1_std": 0.041477373318290586, "bacc": 0.6281512605042017, "bacc_std": 0.04068783085409267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04283650827515094, "f1": 0.6255252100840336, "f1_std": 0.04325241038452638, "bacc": 0.6255252100840336, "bacc_std": 0.04311766204179228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.044649691523836715, "f1": 0.6112852664576802, "f1_std": 0.044889653195191795, "bacc": 0.6123949579831933, "bacc_std": 0.04492010669605996} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043948730299078545, "f1": 0.5953379953379954, "f1_std": 0.04583258547658649, "bacc": 0.5955882352941176, "bacc_std": 0.04488359244345984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 2.782559402207126, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.0446307080837653, "f1": 0.6753076721654884, "f1_std": 0.04500980392817246, "bacc": 0.6759453781512605, "bacc_std": 0.045013263939792034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.7096774193548387, "acc_std": 0.04091198580766188, "f1": 0.7077769049489395, "f1_std": 0.04103048682026622, "bacc": 0.7085084033613445, "bacc_std": 0.040965768364221715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.7016129032258065, "acc_std": 0.039947217386184286, "f1": 0.6909813430322624, "f1_std": 0.04237293606728756, "bacc": 0.6901260504201681, "bacc_std": 0.041049845752477165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.043367230731887384, "f1": 0.635936582501468, "f1_std": 0.04356322684526382, "bacc": 0.6376050420168067, "bacc_std": 0.0436609388877889} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04473437327073084, "f1": 0.5929621848739496, "f1_std": 0.045228337442327196, "bacc": 0.5929621848739496, "bacc_std": 0.045128301774348324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04436468805599709, "f1": 0.6227513227513227, "f1_std": 0.045475535880137606, "bacc": 0.6223739495798319, "bacc_std": 0.044852501756747806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04201216314637051, "f1": 0.650475254015077, "f1_std": 0.04244577969521459, "bacc": 0.6507352941176471, "bacc_std": 0.0424212355760643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.041819330453556604, "f1": 0.6522435897435898, "f1_std": 0.043021538846998705, "bacc": 0.6517857142857143, "bacc_std": 0.042128640559751664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04175028584771948, "f1": 0.6688034188034189, "f1_std": 0.04356643274812478, "bacc": 0.6680672268907563, "bacc_std": 0.0426099866164883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04473764428103986, "f1": 0.6743697478991597, "f1_std": 0.045243578335739144, "bacc": 0.6743697478991597, "bacc_std": 0.045128574817796384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04564369084602895, "f1": 0.6017043592264831, "f1_std": 0.04609442440024641, "bacc": 0.601890756302521, "bacc_std": 0.04604308690144586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04271385565762564, "f1": 0.650475254015077, "f1_std": 0.04321020705420851, "bacc": 0.6507352941176471, "bacc_std": 0.043220535134742624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.043262502478562156, "f1": 0.6760710553814002, "f1_std": 0.043660736420618115, "bacc": 0.6775210084033614, "bacc_std": 0.043909701709000874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0441091022718192, "f1": 0.6443285528031291, "f1_std": 0.04415636811687999, "bacc": 0.6465336134453781, "bacc_std": 0.04416177395033841} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04380479770263744, "f1": 0.6035753898349319, "f1_std": 0.043932054119556314, "bacc": 0.6050420168067226, "bacc_std": 0.04396473645852784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04144694024121685, "f1": 0.6242424242424243, "f1_std": 0.04209252779303939, "bacc": 0.6239495798319328, "bacc_std": 0.04187317491941256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0414132840239994, "f1": 0.626380984265149, "f1_std": 0.04260436716010786, "bacc": 0.6265756302521008, "bacc_std": 0.04171575519743921} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04384783545738154, "f1": 0.6179613241560145, "f1_std": 0.04441657529668909, "bacc": 0.618172268907563, "bacc_std": 0.044338546362727965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04591938316506598, "f1": 0.6003223207091055, "f1_std": 0.048119706947185134, "bacc": 0.6013655462184874, "bacc_std": 0.046608089800002514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.041915330144764944, "f1": 0.665680278818965, "f1_std": 0.04257158379976692, "bacc": 0.6654411764705883, "bacc_std": 0.04250980546017989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.040188797420242964, "f1": 0.650475254015077, "f1_std": 0.04067923950523327, "bacc": 0.6507352941176471, "bacc_std": 0.04059038142193952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04483283786106316, "f1": 0.6465831510572015, "f1_std": 0.04596791946163409, "bacc": 0.6460084033613445, "bacc_std": 0.045374386904243134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043294012149872385, "f1": 0.6266038229903116, "f1_std": 0.04367329662157315, "bacc": 0.6271008403361344, "bacc_std": 0.043553319735386564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 10000.0, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.043082767667910554, "f1": 0.6521171788347361, "f1_std": 0.04328758247879169, "bacc": 0.6538865546218487, "bacc_std": 0.04342410975923602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044664115289446506, "f1": 0.602745995423341, "f1_std": 0.044951425963249234, "bacc": 0.6034663865546219, "bacc_std": 0.04503046358154634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.042699004611314845, "f1": 0.6465831510572015, "f1_std": 0.04381897067081977, "bacc": 0.6460084033613445, "bacc_std": 0.04325480480457568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 2.782559402207126, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04320872315289635, "f1": 0.6003223207091055, "f1_std": 0.04577449782110545, "bacc": 0.6013655462184874, "bacc_std": 0.04424317770250198} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 652.75 | 2387 | 0.91142 | 0.068317 | 0.91 | 0.069616 | 0.90916 | 0.07035 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 652.75 | 2387 | 0.63718 | 0.038129 | 0.63171 | 0.03861 | 0.63212 | 0.038321 | + + +done! total time: 0:05:32 diff --git a/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2a05aea17fefb94a4994af64796dc1f5757f73d3 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..245333743e6a944cedc3d4fbc346bed0db0c1c96 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7753424657534247,0.021426601880731724,0.7670164404035372,0.022627127007595393,0.7636624534408011,0.022385867380850456 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6461538461538462,0.05808248776193549,0.6289401836684041,0.06275252309683452,0.6283783783783784,0.06025818161615298 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,train,0.7863013698630137,0.020486564761088295,0.7778973974911065,0.02168272533307503,0.7740886609269096,0.021459137317555357 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,test,0.5538461538461539,0.06144838792595152,0.5469838981014179,0.0619751739730121,0.5472972972972974,0.06211726030634472 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,train,0.7726027397260274,0.02156280723045406,0.7658466080800117,0.022345963184884972,0.7633876778408744,0.022159282174377373 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,test,0.676923076923077,0.052703758441368806,0.656084656084656,0.05742278985641698,0.6554054054054055,0.05452749586119458 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7835616438356164,0.022163333703567597,0.7762456448020858,0.0232818177614098,0.773096415704952,0.023188678014285372 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.5692307692307692,0.05966295868504389,0.5565302144249512,0.06202075336121701,0.5564671814671815,0.06113709230650295 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,train,0.7698630136986301,0.022135556036801063,0.7636884942656308,0.022987730536497306,0.7616779629968858,0.02296523508814606 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,test,0.6615384615384615,0.056447669644406256,0.6474358974358974,0.0604896125671534,0.6462355212355213,0.05878811615071605 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,train,0.8657534246575342,0.01837836237033308,0.8622758179900047,0.019071718892056586,0.8595438725041217,0.019338984193801173 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,test,0.5076923076923077,0.06240414945997168,0.4980694980694981,0.06404919032277155,0.4980694980694981,0.06387131046086399 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7616438356164383,0.021623933280593297,0.7519935020813646,0.022925461890076482,0.7486566526225804,0.022554533091312078 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.676923076923077,0.059494390367555675,0.6719538572458543,0.06068906250913259,0.6727799227799228,0.06085753647029573 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,train,0.7616438356164383,0.021515136698370265,0.7550241080038573,0.022389536515746423,0.7529614703547658,0.022384037567075608 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,test,0.6615384615384615,0.054873496259268705,0.6474358974358974,0.057987684640399176,0.6462355212355213,0.05665735618357195 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.7698630136986301,0.022294685141323704,0.7623255813953489,0.023610818396606774,0.7595255541307931,0.02355648231663574 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.6461538461538462,0.06032775176596667,0.6375757575757576,0.06248242828405936,0.6370656370656371,0.06176586245437029 +flat_mae,patch,logistic,adhd200_dx,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,9,2.782559402207126,test,0.5230769230769231,0.06223452494134229,0.5157414083153088,0.06326009116777041,0.515926640926641,0.06356634225308534 +flat_mae,patch,logistic,adhd200_dx,10,0.3593813663804626,train,0.9561643835616438,0.010478023360395895,0.9552915237628614,0.010709041574550535,0.9539903523233804,0.010934142786512708 +flat_mae,patch,logistic,adhd200_dx,10,0.3593813663804626,test,0.5384615384615384,0.06256906243475681,0.5294401544401545,0.06326176099658457,0.5294401544401545,0.06295730815583547 +flat_mae,patch,logistic,adhd200_dx,11,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,11,2.782559402207126,test,0.5846153846153846,0.058392988150023264,0.5810455956075435,0.05879363425814908,0.583011583011583,0.05907973932158574 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7643835616438356,0.021276181028366224,0.7576136644427971,0.022134371250423095,0.7553886548207852,0.022032275865191995 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5846153846153846,0.05556526947291476,0.5578231292517006,0.061284629605962496,0.5612934362934363,0.05729953979515908 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7589041095890411,0.022991208188541327,0.7515008974438324,0.024020996227985238,0.7490993466446846,0.023937944924906005 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6923076923076923,0.05556360820008918,0.6862934362934363,0.05635237303895853,0.6862934362934363,0.056091573955748486 +flat_mae,patch,logistic,adhd200_dx,14,0.005994842503189409,train,0.7643835616438356,0.02090174747422029,0.7551176433876303,0.022072508664281877,0.7518013067106307,0.021763142699525166 +flat_mae,patch,logistic,adhd200_dx,14,0.005994842503189409,test,0.7076923076923077,0.05258693787912507,0.6834145091002307,0.06139776678575,0.6824324324324325,0.05632472688573688 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,train,0.8328767123287671,0.019352766863889165,0.8285474468855161,0.020101262256583513,0.8261128411797033,0.020327938952204565 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,test,0.6615384615384615,0.058262617040248876,0.6575670498084292,0.05913400580411037,0.6592664092664093,0.059298611689254446 +flat_mae,patch,logistic,adhd200_dx,16,0.000774263682681127,train,0.7095890410958904,0.02196576670895028,0.7042591573411606,0.02235734278751768,0.7039750870122733,0.022366706302365303 +flat_mae,patch,logistic,adhd200_dx,16,0.000774263682681127,test,0.5846153846153846,0.06112918515820826,0.5810455956075435,0.061742541721635305,0.583011583011583,0.06200704448769667 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7835616438356164,0.02038884597875222,0.7779548902287831,0.021047545397844723,0.7759662941930756,0.021020298594422948 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.5846153846153846,0.05583762596469142,0.5411764705882354,0.06452260688678585,0.5526061776061776,0.057573672084012234 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,train,0.852054794520548,0.018854554066454317,0.8472093023255813,0.019810151217966462,0.8431031324418392,0.019979178503045782 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,test,0.6923076923076923,0.05475342036275102,0.6862934362934363,0.05580396395250179,0.6862934362934363,0.055405560745009796 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,train,0.8383561643835616,0.019708710816328367,0.8335536129725385,0.02054254280255867,0.8302497404897112,0.020649497123407638 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,test,0.6461538461538462,0.060149328966730205,0.6375757575757576,0.061900842763545995,0.6370656370656371,0.0614362600419495 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7561643835616438,0.021246906544674826,0.7493924783027965,0.022209383817706038,0.7473896318006961,0.02221535146410071 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.6153846153846154,0.055750067795219745,0.5966741126830479,0.059535022278046336,0.597007722007722,0.05736636019664363 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,train,0.8246575342465754,0.01937504803594551,0.8196194712132444,0.02020786761556039,0.8166788789155524,0.020331762558014756 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,test,0.6307692307692307,0.05916387031200594,0.6285714285714286,0.05956659080461617,0.6322393822393823,0.05979899094207656 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7616438356164383,0.0214817834910265,0.7545621072645906,0.022434329675493313,0.7522440007327349,0.022320257991881226 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6615384615384615,0.06029120653792572,0.6575670498084292,0.06086514191238794,0.6592664092664093,0.06089226219894604 +flat_mae,patch,logistic,adhd200_dx,23,0.3593813663804626,train,0.9671232876712329,0.008749296945188854,0.9665199046046598,0.00891807980770436,0.9658514990535507,0.00907495098394615 +flat_mae,patch,logistic,adhd200_dx,23,0.3593813663804626,test,0.5384615384615384,0.05814960505495576,0.5125,0.06114970314872786,0.5164092664092664,0.05858747537263065 +flat_mae,patch,logistic,adhd200_dx,24,0.046415888336127774,train,0.8438356164383561,0.018802729147160992,0.8394985535197685,0.019472147426444472,0.8365390486658119,0.019527908024763296 +flat_mae,patch,logistic,adhd200_dx,24,0.046415888336127774,test,0.6923076923076923,0.05561894871014106,0.6794871794871795,0.05851261887289578,0.6776061776061776,0.057060359125781494 +flat_mae,patch,logistic,adhd200_dx,25,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,25,2.782559402207126,test,0.5384615384615384,0.05939219561026755,0.5374762808349146,0.059789638639948885,0.5424710424710424,0.06052828186644284 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7671232876712328,0.0208516435255868,0.7582310539645432,0.0220273402624882,0.754945960798681,0.02177495723666072 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.6307692307692307,0.056604304935915485,0.6153846153846154,0.06002809840924439,0.6148648648648649,0.05814834709706886 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,train,0.6876712328767123,0.023822622816416658,0.6798639748876716,0.02454291058466579,0.6788178543078708,0.02438023487010111 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,test,0.6,0.06171819905566393,0.5953065134099617,0.06242979123411166,0.5965250965250966,0.06244070005495106 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,train,0.7616438356164383,0.02057991740904121,0.7535869759212843,0.021581987884693125,0.7508090614886731,0.021414604231498127 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,test,0.5692307692307692,0.05931844859059672,0.5512820512820513,0.06300062527682129,0.5521235521235521,0.0608066369223949 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7643835616438356,0.02302214065644916,0.757148604320109,0.02395942960035701,0.7546711851987543,0.023765583507315415 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.6153846153846154,0.056394519016903844,0.5905769715293525,0.0622670037475929,0.5926640926640927,0.05833358909677081 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,train,0.8547945205479452,0.017897266093137153,0.8517863025873231,0.018360487486272507,0.8505526042620749,0.018525887424018433 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,test,0.6,0.061589793975085606,0.599146110056926,0.061590228278598114,0.6052123552123552,0.06172480754898591 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,train,0.8547945205479452,0.017794549864465922,0.8510330276218419,0.01838303257683579,0.8484001953959822,0.018516487070857502 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,test,0.5692307692307692,0.053213656653512076,0.5190274841437632,0.06208119513034616,0.5347490347490347,0.05463714710627362 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,train,0.7917808219178082,0.020900000673394016,0.7853871387014917,0.021770908068624698,0.782530377969103,0.021669575851933822 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,test,0.6153846153846154,0.061710367584836355,0.606060606060606,0.06315471375299682,0.6056949806949807,0.06268813885726467 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,train,0.7698630136986301,0.022780280061760652,0.7632505559673832,0.023627142850247904,0.7609604933748549,0.02349745898789278 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,test,0.6461538461538462,0.05702921497747226,0.6458185264155414,0.05722033695243646,0.6544401544401545,0.05740898132552582 +flat_mae,patch,logistic,adhd200_dx,34,0.005994842503189409,train,0.7753424657534247,0.021216078198007273,0.7697267187788515,0.02197557074759865,0.7679672711729865,0.022051756813035183 +flat_mae,patch,logistic,adhd200_dx,34,0.005994842503189409,test,0.6307692307692307,0.05906461410229694,0.6306818181818181,0.05927868147399855,0.640926640926641,0.058783845431430094 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7671232876712328,0.022143231860045434,0.7565692943844204,0.02357379219253745,0.7527935519325883,0.02304448804066619 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6307692307692307,0.058611273831220764,0.6198830409356726,0.06080419396297386,0.6192084942084942,0.059810726715029405 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,train,0.7616438356164383,0.022769018927725707,0.7567058174546625,0.023388699402777886,0.7558313488428894,0.023505296129434285 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,test,0.676923076923077,0.05403942457395364,0.6655231560891939,0.05724773363530985,0.6640926640926641,0.05609950895576042 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7753424657534247,0.02110527882178397,0.7675086999751429,0.02212192427922826,0.764379923062832,0.021898447898352304 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.6153846153846154,0.060406794554937006,0.606060606060606,0.0620299302292987,0.6056949806949807,0.06137132196208981 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7616438356164383,0.021819513149075435,0.7550241080038573,0.022766767256336758,0.7529614703547658,0.022752548003097343 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6307692307692307,0.060883118208911055,0.6235521235521235,0.06202129146571236,0.6235521235521235,0.0615002610901815 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7780821917808219,0.021475942542478112,0.7714888584877223,0.02241340092855584,0.7689595163949441,0.022353800376568648 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6153846153846154,0.05748966256445717,0.5905769715293525,0.06223607664020985,0.5926640926640927,0.05892938548288434 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7698630136986301,0.021999750250207287,0.761333914559721,0.023129720715957158,0.7580906148867314,0.02286345680007723 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.6307692307692307,0.05777851200666336,0.6198830409356726,0.05995510400601928,0.6192084942084942,0.05914888718855722 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,train,0.7863013698630137,0.020101088868295768,0.7797394318252151,0.02103651327357477,0.7769585394150332,0.02100659248530823 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,test,0.6615384615384615,0.05779417468239282,0.6549227799227799,0.05855164964930633,0.6549227799227799,0.05846758714622786 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7726027397260274,0.02199453078608194,0.7649163103616852,0.02294827696942031,0.7619527385968126,0.022777923022533633 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.5692307692307692,0.06302710227593979,0.5666666666666667,0.06308229107595638,0.5694980694980695,0.06346328942019117 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,train,0.7808219178082192,0.020022205724812028,0.7731792194879443,0.021076685295562326,0.7699517616169017,0.020912558796430433 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,test,0.6923076923076923,0.06154519193964362,0.6862934362934363,0.06273842952168064,0.6862934362934363,0.06241712111944975 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,train,0.8328767123287671,0.019130630925159545,0.8279113625648279,0.0199530702090356,0.8246779019356415,0.02003432375281342 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,test,0.6615384615384615,0.05653435673220542,0.6515594541910331,0.058928417010029994,0.6505791505791505,0.05816277155213548 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7863013698630137,0.021819439531125223,0.7801612305411416,0.022818432297451993,0.7776760090370641,0.02285443693538583 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.47692307692307695,0.06091922287731789,0.4475,0.06308361871832754,0.45366795366795365,0.06086969631628947 +flat_mae,patch,logistic,adhd200_dx,46,0.3593813663804626,train,0.9506849315068493,0.010766720338237126,0.9497798569069895,0.010985352169817874,0.9491359833913415,0.011206136697418276 +flat_mae,patch,logistic,adhd200_dx,46,0.3593813663804626,test,0.6153846153846154,0.0594115124420418,0.6094688776736361,0.060006437900991985,0.61003861003861,0.05998702841841971 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7698630136986301,0.022560375408281264,0.7623255813953489,0.02366575952217736,0.7595255541307931,0.023501868909244467 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.6,0.056547752245546146,0.5775,0.06073388249379751,0.5791505791505791,0.05785323386127255 +flat_mae,patch,logistic,adhd200_dx,48,0.3593813663804626,train,0.9506849315068493,0.010711244032947142,0.9497029642332191,0.01095344873445036,0.9484185137693106,0.011226626887521693 +flat_mae,patch,logistic,adhd200_dx,48,0.3593813663804626,test,0.5230769230769231,0.05987186317461649,0.5226249703861644,0.060066148516639405,0.528957528957529,0.06083545724573932 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7835616438356164,0.0201700289893548,0.7757793485276164,0.0213026479820828,0.7723789460829211,0.0211658399842959 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5846153846153846,0.061165608931623754,0.578226387887527,0.06194752285100933,0.5786679536679536,0.06205189591758259 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,train,0.8465753424657534,0.018736081516952004,0.8432707643233959,0.019245107818934192,0.8418361116199549,0.01942613723720718 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,test,0.6615384615384615,0.05686509312898421,0.6549227799227799,0.05803001641891944,0.6549227799227799,0.05783813968739375 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7671232876712328,0.021328316480225366,0.7622987871683484,0.021879079020645554,0.7614031873969591,0.021917166313488178 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.6307692307692307,0.06073319281957085,0.6235521235521235,0.06205181788525067,0.6235521235521235,0.06188145885801166 +flat_mae,patch,logistic,adhd200_dx,52,0.3593813663804626,train,0.9616438356164384,0.010073508768441248,0.9609398887054363,0.010281107066372615,0.960279660499481,0.010521542259495791 +flat_mae,patch,logistic,adhd200_dx,52,0.3593813663804626,test,0.6461538461538462,0.05591082502547772,0.6289401836684041,0.06057020102812904,0.6283783783783784,0.05827827116028905 +flat_mae,patch,logistic,adhd200_dx,53,0.3593813663804626,train,0.9561643835616438,0.010278393204220894,0.9554252915674422,0.010462630211000228,0.9554252915674422,0.010567870890715673 +flat_mae,patch,logistic,adhd200_dx,53,0.3593813663804626,test,0.6,0.056804191861320866,0.5775,0.06146996493861908,0.5791505791505791,0.058486052438955216 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,train,0.7808219178082192,0.019733161571408232,0.7740917249489385,0.020710990493923005,0.7713867008609635,0.020682880613608873 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,test,0.5692307692307692,0.061336931546969585,0.564176245210728,0.06182144651438368,0.5651544401544402,0.0620880263918537 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7643835616438356,0.02197484086575483,0.7589093701996927,0.022482049343598872,0.7575410636868779,0.02241049288645186 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5538461538461539,0.06347580445712594,0.5521501544309813,0.06388051918932083,0.555984555984556,0.06429060901600607 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,train,0.7506849315068493,0.02219897465000023,0.7411650107149814,0.02330641613234879,0.7382304451364718,0.022909916535897364 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,test,0.676923076923077,0.06037691474661095,0.6719538572458543,0.06169320857638493,0.6727799227799228,0.06188946634270963 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,train,0.8493150684931506,0.01831683293925976,0.8448381137879596,0.019029650296039865,0.8413934175978507,0.01910188610091723 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,test,0.6153846153846154,0.05868624023482343,0.5966741126830479,0.06252875614530863,0.597007722007722,0.06005479055451763 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7780821917808219,0.020461263008870776,0.772732513894334,0.0210502957076569,0.7711119252610368,0.021036965819924657 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.6461538461538462,0.058454250557885216,0.6407113674597452,0.05985844228537383,0.6414092664092663,0.059612241941113385 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7561643835616438,0.0222839897271787,0.7484298647089345,0.023245064038733502,0.7459546925566343,0.023069720590000214 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6461538461538462,0.05806182381406379,0.6233308138070043,0.06363907793779376,0.6240347490347491,0.06030457152523246 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7698630136986301,0.022213185010044958,0.7632505559673832,0.022977212925267023,0.7609604933748549,0.022810573031705757 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6461538461538462,0.05786690298900108,0.6336682185738789,0.06097201953198227,0.6327220077220077,0.05976684311176707 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7726027397260274,0.022172207840569846,0.7658466080800117,0.02298554935082609,0.7633876778408744,0.02287728858958465 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6,0.059941588924887844,0.5833333333333333,0.06298246012486837,0.5834942084942085,0.06146597406551436 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8465753424657534,0.018641687797130643,0.8421670373115888,0.01940924093409728,0.8389662331318313,0.019541222747691822 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.6307692307692307,0.05445837452782645,0.6198830409356726,0.05714149566478462,0.6192084942084942,0.05622284339881475 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7726027397260274,0.021926416802568596,0.7649163103616852,0.02302729380907466,0.7619527385968126,0.022846810532178265 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6153846153846154,0.057267969712829755,0.5966741126830479,0.060381835362405316,0.597007722007722,0.05848493761508999 +flat_mae,patch,logistic,adhd200_dx,64,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,64,2.782559402207126,test,0.5846153846153846,0.05955157875778256,0.578226387887527,0.06056740928784022,0.5786679536679536,0.0604796022582975 +flat_mae,patch,logistic,adhd200_dx,65,0.046415888336127774,train,0.8410958904109589,0.0190140586598893,0.8365301457870027,0.01970878802466244,0.8333943945777615,0.019794711249637765 +flat_mae,patch,logistic,adhd200_dx,65,0.046415888336127774,test,0.6461538461538462,0.059574340213168285,0.6407113674597452,0.06092487942318154,0.6414092664092663,0.060926292344954155 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7671232876712328,0.02230223080698232,0.7597363876433645,0.02330986750324697,0.7570983696647737,0.02314792571665072 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.5692307692307692,0.06215758556915625,0.5666666666666667,0.061988224329717866,0.5694980694980695,0.061861046574940834 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7808219178082192,0.02115011426584819,0.7740917249489385,0.02215030281440895,0.7713867008609635,0.02211700783492282 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6307692307692307,0.05546015361548554,0.6036585365853658,0.06252262529361582,0.6061776061776062,0.057635823585645424 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7780821917808219,0.02215988507780811,0.7696084161309176,0.023491529616394714,0.7660896379068205,0.02324731638084602 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.6615384615384615,0.05710676391818312,0.6425000000000001,0.06139516688389159,0.6418918918918919,0.05883373291566716 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7835616438356164,0.020462544722881743,0.7775506268081003,0.021343527761409013,0.7752488245710447,0.021383568004235926 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5230769230769231,0.06217098773524778,0.5062484685126194,0.0638167229773955,0.5072393822393823,0.06262693819155023 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,train,0.7808219178082192,0.02164377943464184,0.771689497716895,0.023203879433083715,0.767799352750809,0.022916413924816217 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,test,0.5384615384615384,0.06414554456526161,0.5294401544401545,0.06531173895780198,0.5294401544401545,0.06544629613000721 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,train,0.7726027397260274,0.02234959008894636,0.7653896491105794,0.023266622293491915,0.7626702082188435,0.023073413446157286 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,test,0.676923076923077,0.058055749578931434,0.6719538572458543,0.05917749341939609,0.6727799227799228,0.05905585322257612 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7671232876712328,0.022027803444765855,0.7576948008840918,0.023404818589079123,0.7542284911766501,0.023031027751714183 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.7076923076923077,0.054491352117756284,0.7006060606060607,0.055930149404584344,0.6998069498069499,0.05540308535763771 +flat_mae,patch,logistic,adhd200_dx,73,0.005994842503189409,train,0.7698630136986301,0.022014878198060424,0.7623255813953489,0.022911263487455806,0.7595255541307931,0.022709343038389413 +flat_mae,patch,logistic,adhd200_dx,73,0.005994842503189409,test,0.6153846153846154,0.05844905939686881,0.5966741126830479,0.06210467280896653,0.597007722007722,0.059973276807517704 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.7616438356164383,0.021744302286554686,0.7535869759212843,0.022855058945364833,0.7508090614886731,0.022689971420168622 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.676923076923077,0.05924495334660515,0.6690909090909091,0.060723659782969994,0.6684362934362934,0.06000939315514726 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7506849315068493,0.022441151915748123,0.7432776064491695,0.02326412594689309,0.7411003236245954,0.02309908951207374 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.6153846153846154,0.061640500019090945,0.6018132810585641,0.06418466604836708,0.6013513513513513,0.06291398646699677 +flat_mae,patch,logistic,adhd200_dx,76,0.046415888336127774,train,0.8465753424657534,0.018947716233355102,0.8421670373115888,0.019735624738458662,0.8389662331318313,0.019866861299461167 +flat_mae,patch,logistic,adhd200_dx,76,0.046415888336127774,test,0.6,0.049466847424136914,0.5427489177489178,0.06064190090706154,0.5617760617760618,0.05120698604988148 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.8410958904109589,0.01945987571889742,0.8365301457870027,0.020321121518805767,0.8333943945777615,0.020524389677231424 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6923076923076923,0.057959063292564346,0.6832358674463938,0.06020524480961262,0.6819498069498069,0.05923024751047185 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.8383561643835616,0.01925423237732051,0.8344594854292062,0.01981806400885171,0.8324021493558039,0.019920042125583095 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5692307692307692,0.05697939002458664,0.5376016260162602,0.06230416561344967,0.5434362934362934,0.058070070029221335 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7698630136986301,0.02111045645502575,0.7623255813953489,0.02228513487718042,0.7595255541307931,0.02215454930097178 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.6307692307692307,0.06313356556216128,0.6198830409356726,0.06538733938634304,0.6192084942084942,0.0643657599616476 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7780821917808219,0.020546802525305156,0.7710429105777943,0.021552113751849157,0.7682420467729132,0.021456011362455247 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6307692307692307,0.0604733908915083,0.61,0.06571793292489095,0.6105212355212355,0.06273191120532054 +flat_mae,patch,logistic,adhd200_dx,81,0.046415888336127774,train,0.8575342465753425,0.01841231567081647,0.8539730411768327,0.019015465439397816,0.8515448494840325,0.019185635971397004 +flat_mae,patch,logistic,adhd200_dx,81,0.046415888336127774,test,0.5692307692307692,0.05937811044484322,0.5565302144249512,0.06118526090577671,0.5564671814671815,0.06023746726659708 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,train,0.8438356164383561,0.01859504430578471,0.8400710282960127,0.019185120735888496,0.8379739879098737,0.019375776461908654 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,test,0.7692307692307693,0.04972957878282461,0.7656813266041816,0.05077660315446027,0.7668918918918919,0.050883192126963704 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,train,0.7616438356164383,0.02101106875722617,0.7540831261761494,0.021959595164795776,0.751526531110704,0.021781596591712315 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,test,0.6923076923076923,0.057022574165825256,0.6904761904761905,0.05741029187541291,0.6949806949806949,0.05761744943134138 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7643835616438356,0.020294076666790065,0.7534093765711413,0.021714631317653388,0.749648897844538,0.021195834254502666 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.6461538461538462,0.057618208824690834,0.644808743169399,0.05804709329573783,0.6500965250965252,0.05866758785448414 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,train,0.7671232876712328,0.02147237512715002,0.7582310539645432,0.022847898002730883,0.754945960798681,0.022622057621497233 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,test,0.7230769230769231,0.053760203930297915,0.7115384615384616,0.05688644442605713,0.708976833976834,0.0558089812696526 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,train,0.7808219178082192,0.020942333856204223,0.7726989662473533,0.022191714818820835,0.7692342919948708,0.02200035991093387 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,test,0.6153846153846154,0.06159943901996157,0.6018132810585641,0.06414322361442466,0.6013513513513513,0.06286155222592037 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7863013698630137,0.02037272579859228,0.7778973974911065,0.021533775600724628,0.7740886609269096,0.021253627216642836 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.6,0.06127172963178863,0.588206627680312,0.06353784643919844,0.5878378378378378,0.06269512333059202 +flat_mae,patch,logistic,adhd200_dx,88,0.046415888336127774,train,0.8465753424657534,0.019341185878723837,0.843010752688172,0.019965844118232306,0.841118641997924,0.020168776208211166 +flat_mae,patch,logistic,adhd200_dx,88,0.046415888336127774,test,0.6461538461538462,0.056530672409559425,0.6375757575757576,0.058683707386595875,0.6370656370656371,0.058214998276720845 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,train,0.7643835616438356,0.022156643923591856,0.7576136644427971,0.023079652545021374,0.7553886548207852,0.02296921831313526 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,test,0.6307692307692307,0.05889696951245862,0.6153846153846154,0.061739555432902916,0.6148648648648649,0.06023509942784888 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7397260273972602,0.022908264026450652,0.730928307040483,0.02400819721947619,0.7285217072723942,0.023710130334884418 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6153846153846154,0.0573980681282225,0.5966741126830479,0.061139793513809146,0.597007722007722,0.05876924635275366 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,train,0.852054794520548,0.01801640085987499,0.8483566196836339,0.018649264548169703,0.8459730109299628,0.01882398704554842 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,test,0.5692307692307692,0.05324922756891775,0.5289855072463768,0.05916404875025031,0.5390926640926641,0.05401802759551665 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,train,0.8493150684931506,0.01900815125739631,0.8454116324377604,0.019713497772629453,0.8428283568419125,0.01991976664475763 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,test,0.6307692307692307,0.05853320303885792,0.61,0.06364383081718492,0.6105212355212355,0.060336868649215544 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7890410958904109,0.020161021160994833,0.780985778297292,0.021287331892530514,0.77723331501496,0.02110422936031576 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.7076923076923077,0.05124231794063374,0.6834145091002307,0.058995490237569566,0.6824324324324325,0.05439679051265934 +flat_mae,patch,logistic,adhd200_dx,94,0.3593813663804626,train,0.9671232876712329,0.009016847515930359,0.9665199046046598,0.00920199657567831,0.9658514990535507,0.009501102613137573 +flat_mae,patch,logistic,adhd200_dx,94,0.3593813663804626,test,0.5384615384615384,0.0594730310318748,0.5125,0.0639629294612766,0.5164092664092664,0.06077912930313141 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7835616438356164,0.022491027547337842,0.7775506268081003,0.023338356229707754,0.7752488245710447,0.023288060602506273 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.5230769230769231,0.0581957602311005,0.49987589972697943,0.061941229216866824,0.502895752895753,0.05925808029460242 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7534246575342466,0.022143682020068212,0.7463398813936248,0.02308608900204428,0.7442449777126457,0.023003468544168487 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.6923076923076923,0.05830972190500529,0.6862934362934363,0.05956389813696815,0.6862934362934363,0.05940370489161519 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,train,0.7671232876712328,0.023113595969339994,0.7592516431414847,0.024276267515159565,0.7563809000427428,0.0240167906502308 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,test,0.6461538461538462,0.053640389269157485,0.6167649320687003,0.060600182335279945,0.6196911196911197,0.05594178277987686 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7506849315068493,0.022899287688015593,0.7422576414808837,0.023822068516289603,0.7396653843805336,0.02355026576054057 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6615384615384615,0.059296666639501866,0.6474358974358974,0.06254306988999375,0.6462355212355213,0.06065076247947131 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7726027397260274,0.022772958057533623,0.7667120998606203,0.023630240022653917,0.7648226170849362,0.023614988680008282 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5692307692307692,0.0573920886039141,0.545,0.06086605745586423,0.5477799227799228,0.05818045268706809 +flat_mae,patch,logistic,adhd200_dx,100,0.046415888336127774,train,0.8438356164383561,0.019061043262065133,0.8394985535197685,0.019793976258839878,0.8365390486658119,0.019916675032071233 +flat_mae,patch,logistic,adhd200_dx,100,0.046415888336127774,test,0.676923076923077,0.05481761043862806,0.6690909090909091,0.056780702344057236,0.6684362934362934,0.0563222617391827 diff --git a/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5a1bde19947aa5e4f7cd29ff56708fa7efdf66c --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:31 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:09:52 time: 3.9244 data: 3.0990 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:44 time: 0.1643 data: 0.0471 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:27 time: 0.1531 data: 0.0405 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:20 time: 0.1636 data: 0.0455 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:14 time: 0.1522 data: 0.0396 max mem: 2851 +extract (train) [100/151] eta: 0:00:09 time: 0.1584 data: 0.0433 max mem: 2851 +extract (train) [120/151] eta: 0:00:05 time: 0.1639 data: 0.0446 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1344 data: 0.0326 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1342 data: 0.0330 max mem: 2851 +extract (train) Total time: 0:00:27 (0.1817 s / it) +extract (validation) [ 0/32] eta: 0:02:04 time: 3.8931 data: 3.7581 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1709 data: 0.0430 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1358 data: 0.0319 max mem: 2851 +extract (validation) Total time: 0:00:09 (0.2846 s / it) +extract (test) [ 0/33] eta: 0:02:17 time: 4.1641 data: 3.9471 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1701 data: 0.0476 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1325 data: 0.0349 max mem: 2851 +extract (test) Total time: 0:00:09 (0.2857 s / it) +feature extraction time: 0:00:46 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.77534 | 0.021427 | 0.76702 | 0.022627 | 0.76366 | 0.022386 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.64615 | 0.058082 | 0.62894 | 0.062753 | 0.62838 | 0.060258 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06144838792595152, "f1": 0.5469838981014179, "f1_std": 0.0619751739730121, "bacc": 0.5472972972972974, "bacc_std": 0.06211726030634472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.052703758441368806, "f1": 0.656084656084656, "f1_std": 0.05742278985641698, "bacc": 0.6554054054054055, "bacc_std": 0.05452749586119458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05966295868504389, "f1": 0.5565302144249512, "f1_std": 0.06202075336121701, "bacc": 0.5564671814671815, "bacc_std": 0.06113709230650295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.056447669644406256, "f1": 0.6474358974358974, "f1_std": 0.0604896125671534, "bacc": 0.6462355212355213, "bacc_std": 0.05878811615071605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06240414945997168, "f1": 0.4980694980694981, "f1_std": 0.06404919032277155, "bacc": 0.4980694980694981, "bacc_std": 0.06387131046086399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.059494390367555675, "f1": 0.6719538572458543, "f1_std": 0.06068906250913259, "bacc": 0.6727799227799228, "bacc_std": 0.06085753647029573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.054873496259268705, "f1": 0.6474358974358974, "f1_std": 0.057987684640399176, "bacc": 0.6462355212355213, "bacc_std": 0.05665735618357195} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06032775176596667, "f1": 0.6375757575757576, "f1_std": 0.06248242828405936, "bacc": 0.6370656370656371, "bacc_std": 0.06176586245437029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06223452494134229, "f1": 0.5157414083153088, "f1_std": 0.06326009116777041, "bacc": 0.515926640926641, "bacc_std": 0.06356634225308534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06256906243475681, "f1": 0.5294401544401545, "f1_std": 0.06326176099658457, "bacc": 0.5294401544401545, "bacc_std": 0.06295730815583547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.058392988150023264, "f1": 0.5810455956075435, "f1_std": 0.05879363425814908, "bacc": 0.583011583011583, "bacc_std": 0.05907973932158574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05556526947291476, "f1": 0.5578231292517006, "f1_std": 0.061284629605962496, "bacc": 0.5612934362934363, "bacc_std": 0.05729953979515908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05556360820008918, "f1": 0.6862934362934363, "f1_std": 0.05635237303895853, "bacc": 0.6862934362934363, "bacc_std": 0.056091573955748486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05258693787912507, "f1": 0.6834145091002307, "f1_std": 0.06139776678575, "bacc": 0.6824324324324325, "bacc_std": 0.05632472688573688} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.058262617040248876, "f1": 0.6575670498084292, "f1_std": 0.05913400580411037, "bacc": 0.6592664092664093, "bacc_std": 0.059298611689254446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06112918515820826, "f1": 0.5810455956075435, "f1_std": 0.061742541721635305, "bacc": 0.583011583011583, "bacc_std": 0.06200704448769667} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05583762596469142, "f1": 0.5411764705882354, "f1_std": 0.06452260688678585, "bacc": 0.5526061776061776, "bacc_std": 0.057573672084012234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05475342036275102, "f1": 0.6862934362934363, "f1_std": 0.05580396395250179, "bacc": 0.6862934362934363, "bacc_std": 0.055405560745009796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.060149328966730205, "f1": 0.6375757575757576, "f1_std": 0.061900842763545995, "bacc": 0.6370656370656371, "bacc_std": 0.0614362600419495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055750067795219745, "f1": 0.5966741126830479, "f1_std": 0.059535022278046336, "bacc": 0.597007722007722, "bacc_std": 0.05736636019664363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05916387031200594, "f1": 0.6285714285714286, "f1_std": 0.05956659080461617, "bacc": 0.6322393822393823, "bacc_std": 0.05979899094207656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06029120653792572, "f1": 0.6575670498084292, "f1_std": 0.06086514191238794, "bacc": 0.6592664092664093, "bacc_std": 0.06089226219894604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05814960505495576, "f1": 0.5125, "f1_std": 0.06114970314872786, "bacc": 0.5164092664092664, "bacc_std": 0.05858747537263065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05561894871014106, "f1": 0.6794871794871795, "f1_std": 0.05851261887289578, "bacc": 0.6776061776061776, "bacc_std": 0.057060359125781494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05939219561026755, "f1": 0.5374762808349146, "f1_std": 0.059789638639948885, "bacc": 0.5424710424710424, "bacc_std": 0.06052828186644284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.056604304935915485, "f1": 0.6153846153846154, "f1_std": 0.06002809840924439, "bacc": 0.6148648648648649, "bacc_std": 0.05814834709706886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.06171819905566393, "f1": 0.5953065134099617, "f1_std": 0.06242979123411166, "bacc": 0.5965250965250966, "bacc_std": 0.06244070005495106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05931844859059672, "f1": 0.5512820512820513, "f1_std": 0.06300062527682129, "bacc": 0.5521235521235521, "bacc_std": 0.0608066369223949} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.056394519016903844, "f1": 0.5905769715293525, "f1_std": 0.0622670037475929, "bacc": 0.5926640926640927, "bacc_std": 0.05833358909677081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.061589793975085606, "f1": 0.599146110056926, "f1_std": 0.061590228278598114, "bacc": 0.6052123552123552, "bacc_std": 0.06172480754898591} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.053213656653512076, "f1": 0.5190274841437632, "f1_std": 0.06208119513034616, "bacc": 0.5347490347490347, "bacc_std": 0.05463714710627362} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.061710367584836355, "f1": 0.606060606060606, "f1_std": 0.06315471375299682, "bacc": 0.6056949806949807, "bacc_std": 0.06268813885726467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05702921497747226, "f1": 0.6458185264155414, "f1_std": 0.05722033695243646, "bacc": 0.6544401544401545, "bacc_std": 0.05740898132552582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05906461410229694, "f1": 0.6306818181818181, "f1_std": 0.05927868147399855, "bacc": 0.640926640926641, "bacc_std": 0.058783845431430094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058611273831220764, "f1": 0.6198830409356726, "f1_std": 0.06080419396297386, "bacc": 0.6192084942084942, "bacc_std": 0.059810726715029405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05403942457395364, "f1": 0.6655231560891939, "f1_std": 0.05724773363530985, "bacc": 0.6640926640926641, "bacc_std": 0.05609950895576042} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060406794554937006, "f1": 0.606060606060606, "f1_std": 0.0620299302292987, "bacc": 0.6056949806949807, "bacc_std": 0.06137132196208981} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.060883118208911055, "f1": 0.6235521235521235, "f1_std": 0.06202129146571236, "bacc": 0.6235521235521235, "bacc_std": 0.0615002610901815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05748966256445717, "f1": 0.5905769715293525, "f1_std": 0.06223607664020985, "bacc": 0.5926640926640927, "bacc_std": 0.05892938548288434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05777851200666336, "f1": 0.6198830409356726, "f1_std": 0.05995510400601928, "bacc": 0.6192084942084942, "bacc_std": 0.05914888718855722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05779417468239282, "f1": 0.6549227799227799, "f1_std": 0.05855164964930633, "bacc": 0.6549227799227799, "bacc_std": 0.05846758714622786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06302710227593979, "f1": 0.5666666666666667, "f1_std": 0.06308229107595638, "bacc": 0.5694980694980695, "bacc_std": 0.06346328942019117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.06154519193964362, "f1": 0.6862934362934363, "f1_std": 0.06273842952168064, "bacc": 0.6862934362934363, "bacc_std": 0.06241712111944975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05653435673220542, "f1": 0.6515594541910331, "f1_std": 0.058928417010029994, "bacc": 0.6505791505791505, "bacc_std": 0.05816277155213548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06091922287731789, "f1": 0.4475, "f1_std": 0.06308361871832754, "bacc": 0.45366795366795365, "bacc_std": 0.06086969631628947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0594115124420418, "f1": 0.6094688776736361, "f1_std": 0.060006437900991985, "bacc": 0.61003861003861, "bacc_std": 0.05998702841841971} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.056547752245546146, "f1": 0.5775, "f1_std": 0.06073388249379751, "bacc": 0.5791505791505791, "bacc_std": 0.05785323386127255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05987186317461649, "f1": 0.5226249703861644, "f1_std": 0.060066148516639405, "bacc": 0.528957528957529, "bacc_std": 0.06083545724573932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061165608931623754, "f1": 0.578226387887527, "f1_std": 0.06194752285100933, "bacc": 0.5786679536679536, "bacc_std": 0.06205189591758259} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05686509312898421, "f1": 0.6549227799227799, "f1_std": 0.05803001641891944, "bacc": 0.6549227799227799, "bacc_std": 0.05783813968739375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06073319281957085, "f1": 0.6235521235521235, "f1_std": 0.06205181788525067, "bacc": 0.6235521235521235, "bacc_std": 0.06188145885801166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05591082502547772, "f1": 0.6289401836684041, "f1_std": 0.06057020102812904, "bacc": 0.6283783783783784, "bacc_std": 0.05827827116028905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.056804191861320866, "f1": 0.5775, "f1_std": 0.06146996493861908, "bacc": 0.5791505791505791, "bacc_std": 0.058486052438955216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.061336931546969585, "f1": 0.564176245210728, "f1_std": 0.06182144651438368, "bacc": 0.5651544401544402, "bacc_std": 0.0620880263918537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06347580445712594, "f1": 0.5521501544309813, "f1_std": 0.06388051918932083, "bacc": 0.555984555984556, "bacc_std": 0.06429060901600607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06037691474661095, "f1": 0.6719538572458543, "f1_std": 0.06169320857638493, "bacc": 0.6727799227799228, "bacc_std": 0.06188946634270963} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05868624023482343, "f1": 0.5966741126830479, "f1_std": 0.06252875614530863, "bacc": 0.597007722007722, "bacc_std": 0.06005479055451763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.058454250557885216, "f1": 0.6407113674597452, "f1_std": 0.05985844228537383, "bacc": 0.6414092664092663, "bacc_std": 0.059612241941113385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05806182381406379, "f1": 0.6233308138070043, "f1_std": 0.06363907793779376, "bacc": 0.6240347490347491, "bacc_std": 0.06030457152523246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05786690298900108, "f1": 0.6336682185738789, "f1_std": 0.06097201953198227, "bacc": 0.6327220077220077, "bacc_std": 0.05976684311176707} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.059941588924887844, "f1": 0.5833333333333333, "f1_std": 0.06298246012486837, "bacc": 0.5834942084942085, "bacc_std": 0.06146597406551436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05445837452782645, "f1": 0.6198830409356726, "f1_std": 0.05714149566478462, "bacc": 0.6192084942084942, "bacc_std": 0.05622284339881475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.057267969712829755, "f1": 0.5966741126830479, "f1_std": 0.060381835362405316, "bacc": 0.597007722007722, "bacc_std": 0.05848493761508999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05955157875778256, "f1": 0.578226387887527, "f1_std": 0.06056740928784022, "bacc": 0.5786679536679536, "bacc_std": 0.0604796022582975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.059574340213168285, "f1": 0.6407113674597452, "f1_std": 0.06092487942318154, "bacc": 0.6414092664092663, "bacc_std": 0.060926292344954155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06215758556915625, "f1": 0.5666666666666667, "f1_std": 0.061988224329717866, "bacc": 0.5694980694980695, "bacc_std": 0.061861046574940834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05546015361548554, "f1": 0.6036585365853658, "f1_std": 0.06252262529361582, "bacc": 0.6061776061776062, "bacc_std": 0.057635823585645424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05710676391818312, "f1": 0.6425000000000001, "f1_std": 0.06139516688389159, "bacc": 0.6418918918918919, "bacc_std": 0.05883373291566716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06217098773524778, "f1": 0.5062484685126194, "f1_std": 0.0638167229773955, "bacc": 0.5072393822393823, "bacc_std": 0.06262693819155023} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06414554456526161, "f1": 0.5294401544401545, "f1_std": 0.06531173895780198, "bacc": 0.5294401544401545, "bacc_std": 0.06544629613000721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.058055749578931434, "f1": 0.6719538572458543, "f1_std": 0.05917749341939609, "bacc": 0.6727799227799228, "bacc_std": 0.05905585322257612} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.054491352117756284, "f1": 0.7006060606060607, "f1_std": 0.055930149404584344, "bacc": 0.6998069498069499, "bacc_std": 0.05540308535763771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05844905939686881, "f1": 0.5966741126830479, "f1_std": 0.06210467280896653, "bacc": 0.597007722007722, "bacc_std": 0.059973276807517704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05924495334660515, "f1": 0.6690909090909091, "f1_std": 0.060723659782969994, "bacc": 0.6684362934362934, "bacc_std": 0.06000939315514726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.061640500019090945, "f1": 0.6018132810585641, "f1_std": 0.06418466604836708, "bacc": 0.6013513513513513, "bacc_std": 0.06291398646699677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.049466847424136914, "f1": 0.5427489177489178, "f1_std": 0.06064190090706154, "bacc": 0.5617760617760618, "bacc_std": 0.05120698604988148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.057959063292564346, "f1": 0.6832358674463938, "f1_std": 0.06020524480961262, "bacc": 0.6819498069498069, "bacc_std": 0.05923024751047185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05697939002458664, "f1": 0.5376016260162602, "f1_std": 0.06230416561344967, "bacc": 0.5434362934362934, "bacc_std": 0.058070070029221335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06313356556216128, "f1": 0.6198830409356726, "f1_std": 0.06538733938634304, "bacc": 0.6192084942084942, "bacc_std": 0.0643657599616476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0604733908915083, "f1": 0.61, "f1_std": 0.06571793292489095, "bacc": 0.6105212355212355, "bacc_std": 0.06273191120532054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05937811044484322, "f1": 0.5565302144249512, "f1_std": 0.06118526090577671, "bacc": 0.5564671814671815, "bacc_std": 0.06023746726659708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.7692307692307693, "acc_std": 0.04972957878282461, "f1": 0.7656813266041816, "f1_std": 0.05077660315446027, "bacc": 0.7668918918918919, "bacc_std": 0.050883192126963704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.057022574165825256, "f1": 0.6904761904761905, "f1_std": 0.05741029187541291, "bacc": 0.6949806949806949, "bacc_std": 0.05761744943134138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057618208824690834, "f1": 0.644808743169399, "f1_std": 0.05804709329573783, "bacc": 0.6500965250965252, "bacc_std": 0.05866758785448414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.053760203930297915, "f1": 0.7115384615384616, "f1_std": 0.05688644442605713, "bacc": 0.708976833976834, "bacc_std": 0.0558089812696526} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06159943901996157, "f1": 0.6018132810585641, "f1_std": 0.06414322361442466, "bacc": 0.6013513513513513, "bacc_std": 0.06286155222592037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06127172963178863, "f1": 0.588206627680312, "f1_std": 0.06353784643919844, "bacc": 0.5878378378378378, "bacc_std": 0.06269512333059202} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.056530672409559425, "f1": 0.6375757575757576, "f1_std": 0.058683707386595875, "bacc": 0.6370656370656371, "bacc_std": 0.058214998276720845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05889696951245862, "f1": 0.6153846153846154, "f1_std": 0.061739555432902916, "bacc": 0.6148648648648649, "bacc_std": 0.06023509942784888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0573980681282225, "f1": 0.5966741126830479, "f1_std": 0.061139793513809146, "bacc": 0.597007722007722, "bacc_std": 0.05876924635275366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05324922756891775, "f1": 0.5289855072463768, "f1_std": 0.05916404875025031, "bacc": 0.5390926640926641, "bacc_std": 0.05401802759551665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05853320303885792, "f1": 0.61, "f1_std": 0.06364383081718492, "bacc": 0.6105212355212355, "bacc_std": 0.060336868649215544} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05124231794063374, "f1": 0.6834145091002307, "f1_std": 0.058995490237569566, "bacc": 0.6824324324324325, "bacc_std": 0.05439679051265934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0594730310318748, "f1": 0.5125, "f1_std": 0.0639629294612766, "bacc": 0.5164092664092664, "bacc_std": 0.06077912930313141} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0581957602311005, "f1": 0.49987589972697943, "f1_std": 0.061941229216866824, "bacc": 0.502895752895753, "bacc_std": 0.05925808029460242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05830972190500529, "f1": 0.6862934362934363, "f1_std": 0.05956389813696815, "bacc": 0.6862934362934363, "bacc_std": 0.05940370489161519} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.053640389269157485, "f1": 0.6167649320687003, "f1_std": 0.060600182335279945, "bacc": 0.6196911196911197, "bacc_std": 0.05594178277987686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.059296666639501866, "f1": 0.6474358974358974, "f1_std": 0.06254306988999375, "bacc": 0.6462355212355213, "bacc_std": 0.06065076247947131} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0573920886039141, "f1": 0.545, "f1_std": 0.06086605745586423, "bacc": 0.5477799227799228, "bacc_std": 0.05818045268706809} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05481761043862806, "f1": 0.6690909090909091, "f1_std": 0.056780702344057236, "bacc": 0.6684362934362934, "bacc_std": 0.0563222617391827} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|--------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 0.15058 | 0.54732 | 0.80781 | 0.068382 | 0.80189 | 0.07069 | 0.79949 | 0.071224 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 0.15058 | 0.54732 | 0.62123 | 0.053997 | 0.60782 | 0.057301 | 0.60935 | 0.05591 | + + +done! total time: 0:04:30 diff --git a/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be076035150ad7f425b04db2607a615726aa80b2 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9927813c23986837982e13743cdb32cf3dad40fe --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,1291.5496650148827,test,0.7317073170731707,0.06992985352508084,0.6479313036690086,0.08484840004413485,0.6684027777777778,0.09164850008065151 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.9132791327913279,0.013805702629965718,0.8661224489795918,0.023457123427345166,0.8341893335524694,0.026497564541970355 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.7804878048780488,0.05546184865970421,0.6660633484162897,0.09146553041124618,0.6516129032258065,0.0841684461404771 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,test,0.6341463414634146,0.07338652731553436,0.5467943994104643,0.08235896971145601,0.5548387096774194,0.08940409839360475 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,test,0.7073170731707317,0.07046280255371605,0.6272727272727273,0.08359472574380761,0.6370967741935484,0.08758613842452871 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,test,0.8048780487804879,0.054184912651696504,0.7152777777777778,0.08299050688026824,0.7016129032258065,0.08053589268398174 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,train,0.981029810298103,0.006665203532130145,0.9726796763445977,0.009908834993311242,0.9593023255813953,0.0142991866474187 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,test,0.7073170731707317,0.058697664552470365,0.5729166666666666,0.0827886234129304,0.5693548387096774,0.07646222575037528 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.3593813663804626,train,0.986449864498645,0.005764703501864259,0.9806516564069758,0.008427675931208288,0.9709302325581395,0.012367299954580902 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.3593813663804626,test,0.7804878048780488,0.06033929447765094,0.6917293233082706,0.08600759238818942,0.685483870967742,0.08590399282542502 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,test,0.6829268292682927,0.06682497760838899,0.5839188134270101,0.08314669447078835,0.5870967741935484,0.08681058572527098 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,test,0.8536585365853658,0.05532137463380842,0.8136363636363637,0.06771415536027024,0.8354838709677419,0.07156604824346714 +flat_mae,patch,logistic,adni_ad_vs_cn,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,2.782559402207126,test,0.6829268292682927,0.07359493296579013,0.6072218128224024,0.08669925062546759,0.6209677419354839,0.0945497813040358 +flat_mae,patch,logistic,adni_ad_vs_cn,10,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,21.54434690031882,test,0.8048780487804879,0.05079665167641269,0.6893939393939394,0.09229976622357053,0.667741935483871,0.08312581990864132 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.3593813663804626,train,0.986449864498645,0.00575393321529179,0.9806516564069758,0.008421790922884904,0.9709302325581395,0.012344193932806238 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.3593813663804626,test,0.7073170731707317,0.045958649909317494,0.4831932773109243,0.07022891444758722,0.5016129032258064,0.053300188105007064 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,test,0.5853658536585366,0.07527477328533666,0.5306397306397306,0.0774398279528836,0.5564516129032258,0.09083291862039256 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,train,0.907859078590786,0.01387251834478729,0.8577551020408163,0.023350221223544545,0.8266085956118004,0.02587338498356205 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,test,0.8048780487804879,0.048375366517095196,0.6893939393939394,0.08712755338745673,0.667741935483871,0.07950568170588294 +flat_mae,patch,logistic,adni_ad_vs_cn,14,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,14,166.81005372000556,test,0.6829268292682927,0.07499355514645153,0.6259649122807017,0.08175675826005904,0.6548387096774193,0.09155697264749753 +flat_mae,patch,logistic,adni_ad_vs_cn,15,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,15,21.54434690031882,test,0.7317073170731707,0.0670509231396011,0.6835087719298245,0.07334822206231775,0.7209677419354839,0.08139611497988672 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,train,0.8997289972899729,0.014622373062006658,0.8428467833834041,0.025462462574174945,0.8091667351466842,0.02750319444247394 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,test,0.8292682926829268,0.035884539546512204,0.6800445930880714,0.09951340553485565,0.65,0.07356330607035003 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,train,0.983739837398374,0.006925896175771048,0.9768796992481203,0.010047567837861043,0.9691634481058427,0.01338891610829961 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,test,0.8048780487804879,0.05225159925995645,0.6893939393939394,0.09534652576597244,0.667741935483871,0.0839746131301848 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,test,0.8048780487804879,0.05429728466462406,0.7152777777777778,0.082713441960249,0.7016129032258065,0.08138299959001553 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,test,0.6097560975609756,0.06578316009656501,0.47096774193548385,0.07482664469266766,0.47096774193548385,0.0745666090012121 +flat_mae,patch,logistic,adni_ad_vs_cn,20,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,20,166.81005372000556,test,0.7317073170731707,0.06805692756545081,0.6676492262343405,0.07852157053411407,0.6870967741935483,0.08441351878701801 +flat_mae,patch,logistic,adni_ad_vs_cn,21,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,2.782559402207126,test,0.8048780487804879,0.057011348630731616,0.7354838709677419,0.08043753431664928,0.7354838709677419,0.08428496557177918 +flat_mae,patch,logistic,adni_ad_vs_cn,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,22,21.54434690031882,test,0.7560975609756098,0.053564685108561015,0.6117424242424243,0.09142435244521777,0.6016129032258064,0.07809083394018446 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.7804878048780488,0.05575665236691298,0.6917293233082706,0.08037557294801019,0.685483870967742,0.0822169306387854 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,train,0.9024390243902439,0.013822157356548022,0.8478699038021071,0.02413514550824222,0.8149806886350563,0.02680921045845689 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,test,0.6829268292682927,0.05944290248265276,0.5176470588235295,0.08248686536367149,0.5193548387096775,0.07322404687384279 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,test,0.6829268292682927,0.06414830749096953,0.5839188134270101,0.08103259423175596,0.5870967741935484,0.0835639168528611 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,test,0.7560975609756098,0.05561170300400226,0.6117424242424243,0.0907795133232513,0.6016129032258064,0.07806327005137194 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,train,0.983739837398374,0.006210944615853769,0.9766829555986183,0.009150686140343584,0.9651162790697674,0.013324642809593246 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,test,0.7804878048780488,0.04719118279394965,0.6328358208955224,0.08971266620064929,0.6177419354838709,0.07373793458511196 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,test,0.6829268292682927,0.05770735830771881,0.5176470588235295,0.07630379235173403,0.5193548387096775,0.06805854858414051 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,test,0.8048780487804879,0.0603889041972838,0.764367816091954,0.06761931770734746,0.8032258064516129,0.07107471959021931 +flat_mae,patch,logistic,adni_ad_vs_cn,30,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,166.81005372000556,test,0.6585365853658537,0.06578089928156572,0.5370967741935484,0.08086726436412947,0.5370967741935484,0.07961482947165188 +flat_mae,patch,logistic,adni_ad_vs_cn,31,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,31,21.54434690031882,test,0.6585365853658537,0.06828102687985006,0.5651515151515152,0.08135893499037991,0.5709677419354839,0.0860779452210957 +flat_mae,patch,logistic,adni_ad_vs_cn,32,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,2.782559402207126,test,0.6097560975609756,0.07734284405471742,0.5494505494505495,0.07998609886060822,0.5725806451612903,0.09120088374939211 +flat_mae,patch,logistic,adni_ad_vs_cn,33,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,1291.5496650148827,test,0.6585365853658537,0.06968235911564329,0.6057692307692308,0.07455674678716934,0.6387096774193548,0.08557845497726356 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.3593813663804626,train,0.991869918699187,0.004530452090494968,0.9884880564885973,0.006517767079363977,0.9825581395348837,0.009719400124375888 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.3593813663804626,test,0.7317073170731707,0.06843476509180646,0.6676492262343405,0.08027441828237081,0.6870967741935483,0.08759811172587938 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,test,0.8048780487804879,0.0556999482546803,0.7152777777777778,0.08772271008547591,0.7016129032258065,0.08570695535153229 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.7317073170731707,0.050509739550022074,0.5512437810945273,0.08668193385462111,0.5516129032258065,0.07047995670831998 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.3593813663804626,train,0.975609756097561,0.007917057419274896,0.9648738695859115,0.011854518375774255,0.9517215876407263,0.016340655104352536 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.3593813663804626,test,0.7073170731707317,0.06202321531933412,0.5729166666666666,0.08738097309853335,0.5693548387096774,0.08080985576500992 +flat_mae,patch,logistic,adni_ad_vs_cn,38,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,38,166.81005372000556,test,0.7073170731707317,0.06760806670891127,0.6272727272727273,0.08496490849956405,0.6370967741935484,0.09103630772181627 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,train,0.924119241192412,0.012216835832908219,0.8828571428571428,0.02090351152516702,0.8493508094338073,0.02463738954567022 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,test,0.7073170731707317,0.060707135787439144,0.5729166666666666,0.08589802069058475,0.5693548387096774,0.08107815187680702 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,train,0.907859078590786,0.01362192365554608,0.8591476558289923,0.02301209277965941,0.8306557646478758,0.026404320269406884 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,test,0.7073170731707317,0.06479282868052265,0.5729166666666666,0.0895829431740989,0.5693548387096774,0.08497141614021794 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,test,0.7804878048780488,0.05766280781505529,0.6917293233082706,0.08291206173944157,0.685483870967742,0.08257140037718713 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,train,0.8997289972899729,0.013490621607280644,0.8428467833834041,0.02371954954049131,0.8091667351466842,0.02574997699659361 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,test,0.8048780487804879,0.05181881673673416,0.6893939393939394,0.09457907561069223,0.667741935483871,0.08354904340257853 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,test,0.7804878048780488,0.06262920731229424,0.7119437939110069,0.0795075066713662,0.7193548387096774,0.08357449953128206 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,train,0.983739837398374,0.006847525816490754,0.9766829555986183,0.010165848219757042,0.9651162790697674,0.014690331548169098 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,test,0.8536585365853658,0.04493366119500908,0.7670454545454546,0.0835545238845509,0.7338709677419355,0.08039217738824919 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,test,0.8536585365853658,0.049395609464650755,0.7864583333333333,0.08136828540039126,0.7677419354838709,0.08331649091679788 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,test,0.7317073170731707,0.064556769146184,0.6232247284878863,0.0898590013612739,0.6193548387096774,0.08904377783117386 +flat_mae,patch,logistic,adni_ad_vs_cn,47,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,10000.0,test,0.7073170731707317,0.06814707473809793,0.5729166666666666,0.0934813972699264,0.5693548387096774,0.08752876762122452 +flat_mae,patch,logistic,adni_ad_vs_cn,48,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,48,21.54434690031882,test,0.6585365853658537,0.06127673935872114,0.5370967741935484,0.07881410711690445,0.5370967741935484,0.07858679427977512 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,test,0.6341463414634146,0.06594436390980933,0.48621553884711777,0.08154185314934626,0.48709677419354835,0.07815159599327148 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,train,0.981029810298103,0.006933933614788916,0.9729123189697663,0.010159825600791459,0.9633494946174705,0.013869672241915867 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,test,0.7560975609756098,0.06776794923476746,0.6893939393939394,0.08321432109663705,0.7032258064516128,0.08677698010875352 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,train,0.981029810298103,0.007061895924076901,0.9729123189697663,0.010336438910442345,0.9633494946174705,0.014056059944137216 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,test,0.7317073170731707,0.06855864587238321,0.6676492262343405,0.07820674511016,0.6870967741935483,0.08424040835361475 +flat_mae,patch,logistic,adni_ad_vs_cn,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,52,166.81005372000556,test,0.6585365853658537,0.06757284389900756,0.5370967741935484,0.08502408558952379,0.5370967741935484,0.08371219462931676 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.6585365853658537,0.06816252404886046,0.5370967741935484,0.0848524940841089,0.5370967741935484,0.08447781180158848 +flat_mae,patch,logistic,adni_ad_vs_cn,54,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,54,21.54434690031882,test,0.7317073170731707,0.06920814977472615,0.6676492262343405,0.08185649255310246,0.6870967741935483,0.08932670431656074 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,train,0.989159891598916,0.005262300449779655,0.9845864661654136,0.007647121661165884,0.9767441860465116,0.011289470150980812 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,test,0.8292682926829268,0.05587365708647088,0.7602339181286549,0.08319389199344399,0.7516129032258064,0.0845426884517849 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,train,0.986449864498645,0.005701781094579302,0.9806516564069758,0.008334875153800589,0.9709302325581395,0.012232309441277704 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,test,0.7317073170731707,0.06153095398766191,0.5918552036199095,0.09143919971187767,0.5854838709677419,0.08199250866772813 +flat_mae,patch,logistic,adni_ad_vs_cn,57,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,57,2.782559402207126,test,0.6585365853658537,0.06898678627496206,0.5370967741935484,0.08467589973038896,0.5370967741935484,0.08431800244253954 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,test,0.7073170731707317,0.06459052370532581,0.603225806451613,0.0850373581631687,0.603225806451613,0.08591351863765931 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,test,0.7073170731707317,0.06360233010671051,0.603225806451613,0.08345334130487347,0.603225806451613,0.08407180148165286 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,test,0.8048780487804879,0.06227071446381061,0.7515151515151515,0.07685587632281662,0.7693548387096774,0.08173587573332691 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,train,0.9105691056910569,0.012689873714178337,0.8612481626234888,0.021837584621319284,0.8283753800640973,0.024520087447523452 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,test,0.6585365853658537,0.055579174303868724,0.4564393939393939,0.06849629072840561,0.4693548387096774,0.059368593339661276 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9132791327913279,0.012354509162126569,0.8647732478240953,0.02136546524492615,0.8301421645163941,0.024234880937907512 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.7317073170731707,0.06494973116820522,0.6232247284878863,0.0869585767071662,0.6193548387096774,0.08472041765438729 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,test,0.7804878048780488,0.0586351000287686,0.6917293233082706,0.0856869735155503,0.685483870967742,0.08610743205042416 +flat_mae,patch,logistic,adni_ad_vs_cn,64,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,64,166.81005372000556,test,0.6829268292682927,0.06857648348270028,0.5839188134270101,0.08381270961349649,0.5870967741935484,0.08781043080742063 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,test,0.7560975609756098,0.0658339807998844,0.6693548387096775,0.08696292066937136,0.6693548387096775,0.0886972858794029 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.3593813663804626,train,0.981029810298103,0.007186910119013327,0.9729123189697663,0.010497375709823412,0.9633494946174705,0.014065934384889796 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.3593813663804626,test,0.6829268292682927,0.06884522122431841,0.5839188134270101,0.08506363301912728,0.5870967741935484,0.08633843211178513 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.3593813663804626,train,0.986449864498645,0.005912827030327675,0.9806516564069758,0.008653484857582468,0.9709302325581395,0.012685076594133229 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.3593813663804626,test,0.8536585365853658,0.04579175894621716,0.7670454545454546,0.08559285950937079,0.7338709677419355,0.08110000121893167 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,test,0.5609756097560976,0.06422037830694376,0.4409090909090909,0.06587165977114515,0.43870967741935485,0.07065695077646711 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,train,0.907859078590786,0.012533406275556087,0.8563215758131013,0.022048544340039014,0.8225614265757252,0.024644325599253832 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,test,0.8292682926829268,0.056036424798729584,0.7602339181286549,0.08258252796534983,0.7516129032258064,0.08358899318377765 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,train,0.907859078590786,0.013425047558188224,0.8563215758131013,0.023201986901709323,0.8225614265757252,0.025695044211955783 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,test,0.8536585365853658,0.04691808143198926,0.7670454545454546,0.08962379159599836,0.7338709677419355,0.08323888838170446 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,train,0.907859078590786,0.013432660413070518,0.8577551020408163,0.023036372003322134,0.8266085956118004,0.025932032659389644 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,test,0.6829268292682927,0.039154508683773535,0.4057971014492754,0.014080943845812776,0.45161290322580644,0.025892497677979277 +flat_mae,patch,logistic,adni_ad_vs_cn,72,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,2.782559402207126,test,0.7073170731707317,0.04555927818428874,0.4831932773109243,0.06858982333449908,0.5016129032258064,0.05274607609276905 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.3593813663804626,train,0.978319783197832,0.0073130070002755945,0.9686411149825784,0.010982680163533595,0.9534883720930232,0.01568895106454469 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.3593813663804626,test,0.7317073170731707,0.06063130107328542,0.6232247284878863,0.08451075835520476,0.6193548387096774,0.08191051816752488 +flat_mae,patch,logistic,adni_ad_vs_cn,74,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,2.782559402207126,test,0.7317073170731707,0.05118233261508775,0.5512437810945273,0.09022340942480672,0.5516129032258065,0.07263402739407937 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.3593813663804626,train,0.983739837398374,0.0063906645289342505,0.9766829555986183,0.00944533682786063,0.9651162790697674,0.013710204716143815 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.3593813663804626,test,0.7560975609756098,0.057755170559469056,0.6440972222222222,0.08856428218869233,0.635483870967742,0.0847500198140434 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,test,0.6341463414634146,0.07497386417505555,0.5467943994104643,0.08491946584465494,0.5548387096774194,0.09287450692984044 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,train,0.8915989159891599,0.014796505750620817,0.8326530612244898,0.025190759075332147,0.8038663817897937,0.027351205191516313 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,test,0.8048780487804879,0.042145748640817174,0.6554621848739496,0.09626948392304728,0.6338709677419355,0.07569076216836154 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,test,0.7560975609756098,0.06430215704280014,0.6693548387096775,0.08593415136788689,0.6693548387096775,0.08780044917977728 +flat_mae,patch,logistic,adni_ad_vs_cn,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,2.782559402207126,test,0.8048780487804879,0.06400808619351041,0.764367816091954,0.07116682076497517,0.8032258064516129,0.0735083815929318 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.7073170731707317,0.06511118939121088,0.603225806451613,0.08493754378586207,0.603225806451613,0.08552077693894868 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,test,0.8292682926829268,0.054431768027620876,0.7602339181286549,0.07806768043265946,0.7516129032258064,0.08031511942286101 +flat_mae,patch,logistic,adni_ad_vs_cn,82,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,21.54434690031882,test,0.7073170731707317,0.07142427963650938,0.646551724137931,0.0820809704163993,0.6709677419354838,0.09187481478670621 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.7804878048780488,0.06174877620070178,0.7119437939110069,0.08066776679938707,0.7193548387096774,0.08629084570236367 +flat_mae,patch,logistic,adni_ad_vs_cn,84,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,84,21.54434690031882,test,0.6585365853658537,0.06886284638421239,0.5651515151515152,0.08063260286312862,0.5709677419354839,0.08666669747724844 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.046415888336127774,train,0.9024390243902439,0.012331027123439193,0.8446969696969697,0.022161326449065908,0.8068863505629058,0.023925704129486098 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.046415888336127774,test,0.926829268292683,0.03612391918355827,0.8886877828054298,0.06468359519041748,0.85,0.07405403432629447 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,train,0.997289972899729,0.00252773918992236,0.9961941891766453,0.003576140689774139,0.9941860465116279,0.0054228823318683075 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,test,0.7560975609756098,0.06025405266520914,0.6693548387096775,0.08190508731057872,0.6693548387096775,0.08479596193976645 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,test,0.7804878048780488,0.05975368835362372,0.6660633484162897,0.09805452727161945,0.6516129032258065,0.08856631371508042 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,test,0.6585365853658537,0.07337809640975036,0.5651515151515152,0.08636621385430764,0.5709677419354839,0.09151782071730594 +flat_mae,patch,logistic,adni_ad_vs_cn,89,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,2.782559402207126,test,0.6341463414634146,0.06765246952592768,0.5467943994104643,0.07928852145662012,0.5548387096774194,0.087873988567752 +flat_mae,patch,logistic,adni_ad_vs_cn,90,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,90,2.782559402207126,test,0.7317073170731707,0.06426260445971102,0.6232247284878863,0.08799392198780476,0.6193548387096774,0.086370015114773 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,test,0.6341463414634146,0.0617583901242868,0.48621553884711777,0.07471714587074917,0.48709677419354835,0.07320341244896127 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,train,0.986449864498645,0.005871244786113042,0.9806516564069758,0.008600990789762801,0.9709302325581395,0.012595868174858806 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,test,0.6829268292682927,0.07499468154848843,0.5839188134270101,0.09033063503754353,0.5870967741935484,0.09310764757774186 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,test,0.5365853658536586,0.07572471198201465,0.4754208754208754,0.07627469049333566,0.4903225806451613,0.09052685908856667 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.3593813663804626,train,0.989159891598916,0.005495761236918046,0.9847141673570836,0.007837927689742829,0.9807913550825869,0.010446756682537741 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.3593813663804626,test,0.7073170731707317,0.05636785360194816,0.5340909090909092,0.08507059186107206,0.535483870967742,0.07252972227951661 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.046415888336127774,train,0.9159891598915989,0.01332714510372612,0.8721970839617899,0.021860086673047185,0.8440504560769168,0.02486898877110755 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.046415888336127774,test,0.7560975609756098,0.05235816795246458,0.6117424242424243,0.08792369426934744,0.6016129032258064,0.07618411477751043 +flat_mae,patch,logistic,adni_ad_vs_cn,96,0.3593813663804626,train,0.991869918699187,0.004882864229617418,0.9884880564885973,0.007038136374014403,0.9825581395348837,0.010475447097260676 +flat_mae,patch,logistic,adni_ad_vs_cn,96,0.3593813663804626,test,0.6585365853658537,0.05329365964389332,0.4564393939393939,0.06856219764949614,0.4693548387096774,0.059355766034130494 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,test,0.7073170731707317,0.06656724492099103,0.603225806451613,0.08834348177611429,0.603225806451613,0.08843760477062365 +flat_mae,patch,logistic,adni_ad_vs_cn,98,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,98,2.782559402207126,test,0.7317073170731707,0.05464642719116294,0.5918552036199095,0.08829757125882741,0.5854838709677419,0.07876548761698961 +flat_mae,patch,logistic,adni_ad_vs_cn,99,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,99,21.54434690031882,test,0.7073170731707317,0.06611179758479512,0.6272727272727273,0.0803053858477698,0.6370967741935484,0.08605698635871838 +flat_mae,patch,logistic,adni_ad_vs_cn,100,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,100,166.81005372000556,test,0.5853658536585366,0.07102908052055494,0.4863669859985261,0.07639928389171803,0.4887096774193548,0.08296019499865197 diff --git a/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2905aee0e0ee4f425f569d19419fd682223c700c --- /dev/null +++ b/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:32 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:11:49 time: 4.3260 data: 3.5741 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:56 time: 0.1953 data: 0.0656 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:33 time: 0.1498 data: 0.0431 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:24 time: 0.1643 data: 0.0508 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:18 time: 0.1545 data: 0.0460 max mem: 2851 +extract (train) [100/164] eta: 0:00:13 time: 0.1597 data: 0.0493 max mem: 2851 +extract (train) [120/164] eta: 0:00:08 time: 0.1699 data: 0.0515 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1465 data: 0.0423 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1545 data: 0.0477 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1563 data: 0.0487 max mem: 2851 +extract (train) Total time: 0:00:31 (0.1891 s / it) +extract (validation) [ 0/21] eta: 0:01:27 time: 4.1905 data: 4.0652 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1463 data: 0.0407 max mem: 2851 +extract (validation) Total time: 0:00:07 (0.3523 s / it) +extract (test) [ 0/21] eta: 0:01:23 time: 3.9883 data: 3.8470 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1420 data: 0.0376 max mem: 2851 +extract (test) Total time: 0:00:07 (0.3402 s / it) +feature extraction time: 0:00:45 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|-------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 1291.5 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 1291.5 | test | 0.73171 | 0.06993 | 0.64793 | 0.084848 | 0.6684 | 0.091649 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05546184865970421, "f1": 0.6660633484162897, "f1_std": 0.09146553041124618, "bacc": 0.6516129032258065, "bacc_std": 0.0841684461404771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07338652731553436, "f1": 0.5467943994104643, "f1_std": 0.08235896971145601, "bacc": 0.5548387096774194, "bacc_std": 0.08940409839360475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07046280255371605, "f1": 0.6272727272727273, "f1_std": 0.08359472574380761, "bacc": 0.6370967741935484, "bacc_std": 0.08758613842452871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.054184912651696504, "f1": 0.7152777777777778, "f1_std": 0.08299050688026824, "bacc": 0.7016129032258065, "bacc_std": 0.08053589268398174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.058697664552470365, "f1": 0.5729166666666666, "f1_std": 0.0827886234129304, "bacc": 0.5693548387096774, "bacc_std": 0.07646222575037528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06033929447765094, "f1": 0.6917293233082706, "f1_std": 0.08600759238818942, "bacc": 0.685483870967742, "bacc_std": 0.08590399282542502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06682497760838899, "f1": 0.5839188134270101, "f1_std": 0.08314669447078835, "bacc": 0.5870967741935484, "bacc_std": 0.08681058572527098} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 1291.5496650148827, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05532137463380842, "f1": 0.8136363636363637, "f1_std": 0.06771415536027024, "bacc": 0.8354838709677419, "bacc_std": 0.07156604824346714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07359493296579013, "f1": 0.6072218128224024, "f1_std": 0.08669925062546759, "bacc": 0.6209677419354839, "bacc_std": 0.0945497813040358} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05079665167641269, "f1": 0.6893939393939394, "f1_std": 0.09229976622357053, "bacc": 0.667741935483871, "bacc_std": 0.08312581990864132} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.045958649909317494, "f1": 0.4831932773109243, "f1_std": 0.07022891444758722, "bacc": 0.5016129032258064, "bacc_std": 0.053300188105007064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07527477328533666, "f1": 0.5306397306397306, "f1_std": 0.0774398279528836, "bacc": 0.5564516129032258, "bacc_std": 0.09083291862039256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.048375366517095196, "f1": 0.6893939393939394, "f1_std": 0.08712755338745673, "bacc": 0.667741935483871, "bacc_std": 0.07950568170588294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07499355514645153, "f1": 0.6259649122807017, "f1_std": 0.08175675826005904, "bacc": 0.6548387096774193, "bacc_std": 0.09155697264749753} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0670509231396011, "f1": 0.6835087719298245, "f1_std": 0.07334822206231775, "bacc": 0.7209677419354839, "bacc_std": 0.08139611497988672} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.035884539546512204, "f1": 0.6800445930880714, "f1_std": 0.09951340553485565, "bacc": 0.65, "bacc_std": 0.07356330607035003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05225159925995645, "f1": 0.6893939393939394, "f1_std": 0.09534652576597244, "bacc": 0.667741935483871, "bacc_std": 0.0839746131301848} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05429728466462406, "f1": 0.7152777777777778, "f1_std": 0.082713441960249, "bacc": 0.7016129032258065, "bacc_std": 0.08138299959001553} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06578316009656501, "f1": 0.47096774193548385, "f1_std": 0.07482664469266766, "bacc": 0.47096774193548385, "bacc_std": 0.0745666090012121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06805692756545081, "f1": 0.6676492262343405, "f1_std": 0.07852157053411407, "bacc": 0.6870967741935483, "bacc_std": 0.08441351878701801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.057011348630731616, "f1": 0.7354838709677419, "f1_std": 0.08043753431664928, "bacc": 0.7354838709677419, "bacc_std": 0.08428496557177918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.053564685108561015, "f1": 0.6117424242424243, "f1_std": 0.09142435244521777, "bacc": 0.6016129032258064, "bacc_std": 0.07809083394018446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05575665236691298, "f1": 0.6917293233082706, "f1_std": 0.08037557294801019, "bacc": 0.685483870967742, "bacc_std": 0.0822169306387854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05944290248265276, "f1": 0.5176470588235295, "f1_std": 0.08248686536367149, "bacc": 0.5193548387096775, "bacc_std": 0.07322404687384279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06414830749096953, "f1": 0.5839188134270101, "f1_std": 0.08103259423175596, "bacc": 0.5870967741935484, "bacc_std": 0.0835639168528611} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05561170300400226, "f1": 0.6117424242424243, "f1_std": 0.0907795133232513, "bacc": 0.6016129032258064, "bacc_std": 0.07806327005137194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04719118279394965, "f1": 0.6328358208955224, "f1_std": 0.08971266620064929, "bacc": 0.6177419354838709, "bacc_std": 0.07373793458511196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05770735830771881, "f1": 0.5176470588235295, "f1_std": 0.07630379235173403, "bacc": 0.5193548387096775, "bacc_std": 0.06805854858414051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0603889041972838, "f1": 0.764367816091954, "f1_std": 0.06761931770734746, "bacc": 0.8032258064516129, "bacc_std": 0.07107471959021931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06578089928156572, "f1": 0.5370967741935484, "f1_std": 0.08086726436412947, "bacc": 0.5370967741935484, "bacc_std": 0.07961482947165188} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06828102687985006, "f1": 0.5651515151515152, "f1_std": 0.08135893499037991, "bacc": 0.5709677419354839, "bacc_std": 0.0860779452210957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07734284405471742, "f1": 0.5494505494505495, "f1_std": 0.07998609886060822, "bacc": 0.5725806451612903, "bacc_std": 0.09120088374939211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 1291.5496650148827, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06968235911564329, "f1": 0.6057692307692308, "f1_std": 0.07455674678716934, "bacc": 0.6387096774193548, "bacc_std": 0.08557845497726356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06843476509180646, "f1": 0.6676492262343405, "f1_std": 0.08027441828237081, "bacc": 0.6870967741935483, "bacc_std": 0.08759811172587938} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0556999482546803, "f1": 0.7152777777777778, "f1_std": 0.08772271008547591, "bacc": 0.7016129032258065, "bacc_std": 0.08570695535153229} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.050509739550022074, "f1": 0.5512437810945273, "f1_std": 0.08668193385462111, "bacc": 0.5516129032258065, "bacc_std": 0.07047995670831998} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06202321531933412, "f1": 0.5729166666666666, "f1_std": 0.08738097309853335, "bacc": 0.5693548387096774, "bacc_std": 0.08080985576500992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06760806670891127, "f1": 0.6272727272727273, "f1_std": 0.08496490849956405, "bacc": 0.6370967741935484, "bacc_std": 0.09103630772181627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.060707135787439144, "f1": 0.5729166666666666, "f1_std": 0.08589802069058475, "bacc": 0.5693548387096774, "bacc_std": 0.08107815187680702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06479282868052265, "f1": 0.5729166666666666, "f1_std": 0.0895829431740989, "bacc": 0.5693548387096774, "bacc_std": 0.08497141614021794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05766280781505529, "f1": 0.6917293233082706, "f1_std": 0.08291206173944157, "bacc": 0.685483870967742, "bacc_std": 0.08257140037718713} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05181881673673416, "f1": 0.6893939393939394, "f1_std": 0.09457907561069223, "bacc": 0.667741935483871, "bacc_std": 0.08354904340257853} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06262920731229424, "f1": 0.7119437939110069, "f1_std": 0.0795075066713662, "bacc": 0.7193548387096774, "bacc_std": 0.08357449953128206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04493366119500908, "f1": 0.7670454545454546, "f1_std": 0.0835545238845509, "bacc": 0.7338709677419355, "bacc_std": 0.08039217738824919} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 21.54434690031882, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.049395609464650755, "f1": 0.7864583333333333, "f1_std": 0.08136828540039126, "bacc": 0.7677419354838709, "bacc_std": 0.08331649091679788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.064556769146184, "f1": 0.6232247284878863, "f1_std": 0.0898590013612739, "bacc": 0.6193548387096774, "bacc_std": 0.08904377783117386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 10000.0, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06814707473809793, "f1": 0.5729166666666666, "f1_std": 0.0934813972699264, "bacc": 0.5693548387096774, "bacc_std": 0.08752876762122452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06127673935872114, "f1": 0.5370967741935484, "f1_std": 0.07881410711690445, "bacc": 0.5370967741935484, "bacc_std": 0.07858679427977512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06594436390980933, "f1": 0.48621553884711777, "f1_std": 0.08154185314934626, "bacc": 0.48709677419354835, "bacc_std": 0.07815159599327148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06776794923476746, "f1": 0.6893939393939394, "f1_std": 0.08321432109663705, "bacc": 0.7032258064516128, "bacc_std": 0.08677698010875352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06855864587238321, "f1": 0.6676492262343405, "f1_std": 0.07820674511016, "bacc": 0.6870967741935483, "bacc_std": 0.08424040835361475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06757284389900756, "f1": 0.5370967741935484, "f1_std": 0.08502408558952379, "bacc": 0.5370967741935484, "bacc_std": 0.08371219462931676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06816252404886046, "f1": 0.5370967741935484, "f1_std": 0.0848524940841089, "bacc": 0.5370967741935484, "bacc_std": 0.08447781180158848} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06920814977472615, "f1": 0.6676492262343405, "f1_std": 0.08185649255310246, "bacc": 0.6870967741935483, "bacc_std": 0.08932670431656074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05587365708647088, "f1": 0.7602339181286549, "f1_std": 0.08319389199344399, "bacc": 0.7516129032258064, "bacc_std": 0.0845426884517849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06153095398766191, "f1": 0.5918552036199095, "f1_std": 0.09143919971187767, "bacc": 0.5854838709677419, "bacc_std": 0.08199250866772813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06898678627496206, "f1": 0.5370967741935484, "f1_std": 0.08467589973038896, "bacc": 0.5370967741935484, "bacc_std": 0.08431800244253954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06459052370532581, "f1": 0.603225806451613, "f1_std": 0.0850373581631687, "bacc": 0.603225806451613, "bacc_std": 0.08591351863765931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06360233010671051, "f1": 0.603225806451613, "f1_std": 0.08345334130487347, "bacc": 0.603225806451613, "bacc_std": 0.08407180148165286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06227071446381061, "f1": 0.7515151515151515, "f1_std": 0.07685587632281662, "bacc": 0.7693548387096774, "bacc_std": 0.08173587573332691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.055579174303868724, "f1": 0.4564393939393939, "f1_std": 0.06849629072840561, "bacc": 0.4693548387096774, "bacc_std": 0.059368593339661276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06494973116820522, "f1": 0.6232247284878863, "f1_std": 0.0869585767071662, "bacc": 0.6193548387096774, "bacc_std": 0.08472041765438729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0586351000287686, "f1": 0.6917293233082706, "f1_std": 0.0856869735155503, "bacc": 0.685483870967742, "bacc_std": 0.08610743205042416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06857648348270028, "f1": 0.5839188134270101, "f1_std": 0.08381270961349649, "bacc": 0.5870967741935484, "bacc_std": 0.08781043080742063} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0658339807998844, "f1": 0.6693548387096775, "f1_std": 0.08696292066937136, "bacc": 0.6693548387096775, "bacc_std": 0.0886972858794029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06884522122431841, "f1": 0.5839188134270101, "f1_std": 0.08506363301912728, "bacc": 0.5870967741935484, "bacc_std": 0.08633843211178513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04579175894621716, "f1": 0.7670454545454546, "f1_std": 0.08559285950937079, "bacc": 0.7338709677419355, "bacc_std": 0.08110000121893167} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.5609756097560976, "acc_std": 0.06422037830694376, "f1": 0.4409090909090909, "f1_std": 0.06587165977114515, "bacc": 0.43870967741935485, "bacc_std": 0.07065695077646711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.056036424798729584, "f1": 0.7602339181286549, "f1_std": 0.08258252796534983, "bacc": 0.7516129032258064, "bacc_std": 0.08358899318377765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04691808143198926, "f1": 0.7670454545454546, "f1_std": 0.08962379159599836, "bacc": 0.7338709677419355, "bacc_std": 0.08323888838170446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.039154508683773535, "f1": 0.4057971014492754, "f1_std": 0.014080943845812776, "bacc": 0.45161290322580644, "bacc_std": 0.025892497677979277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04555927818428874, "f1": 0.4831932773109243, "f1_std": 0.06858982333449908, "bacc": 0.5016129032258064, "bacc_std": 0.05274607609276905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06063130107328542, "f1": 0.6232247284878863, "f1_std": 0.08451075835520476, "bacc": 0.6193548387096774, "bacc_std": 0.08191051816752488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05118233261508775, "f1": 0.5512437810945273, "f1_std": 0.09022340942480672, "bacc": 0.5516129032258065, "bacc_std": 0.07263402739407937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.057755170559469056, "f1": 0.6440972222222222, "f1_std": 0.08856428218869233, "bacc": 0.635483870967742, "bacc_std": 0.0847500198140434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07497386417505555, "f1": 0.5467943994104643, "f1_std": 0.08491946584465494, "bacc": 0.5548387096774194, "bacc_std": 0.09287450692984044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.042145748640817174, "f1": 0.6554621848739496, "f1_std": 0.09626948392304728, "bacc": 0.6338709677419355, "bacc_std": 0.07569076216836154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06430215704280014, "f1": 0.6693548387096775, "f1_std": 0.08593415136788689, "bacc": 0.6693548387096775, "bacc_std": 0.08780044917977728} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06400808619351041, "f1": 0.764367816091954, "f1_std": 0.07116682076497517, "bacc": 0.8032258064516129, "bacc_std": 0.0735083815929318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06511118939121088, "f1": 0.603225806451613, "f1_std": 0.08493754378586207, "bacc": 0.603225806451613, "bacc_std": 0.08552077693894868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.054431768027620876, "f1": 0.7602339181286549, "f1_std": 0.07806768043265946, "bacc": 0.7516129032258064, "bacc_std": 0.08031511942286101} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07142427963650938, "f1": 0.646551724137931, "f1_std": 0.0820809704163993, "bacc": 0.6709677419354838, "bacc_std": 0.09187481478670621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06174877620070178, "f1": 0.7119437939110069, "f1_std": 0.08066776679938707, "bacc": 0.7193548387096774, "bacc_std": 0.08629084570236367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06886284638421239, "f1": 0.5651515151515152, "f1_std": 0.08063260286312862, "bacc": 0.5709677419354839, "bacc_std": 0.08666669747724844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.926829268292683, "acc_std": 0.03612391918355827, "f1": 0.8886877828054298, "f1_std": 0.06468359519041748, "bacc": 0.85, "bacc_std": 0.07405403432629447} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06025405266520914, "f1": 0.6693548387096775, "f1_std": 0.08190508731057872, "bacc": 0.6693548387096775, "bacc_std": 0.08479596193976645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05975368835362372, "f1": 0.6660633484162897, "f1_std": 0.09805452727161945, "bacc": 0.6516129032258065, "bacc_std": 0.08856631371508042} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07337809640975036, "f1": 0.5651515151515152, "f1_std": 0.08636621385430764, "bacc": 0.5709677419354839, "bacc_std": 0.09151782071730594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06765246952592768, "f1": 0.5467943994104643, "f1_std": 0.07928852145662012, "bacc": 0.5548387096774194, "bacc_std": 0.087873988567752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06426260445971102, "f1": 0.6232247284878863, "f1_std": 0.08799392198780476, "bacc": 0.6193548387096774, "bacc_std": 0.086370015114773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0617583901242868, "f1": 0.48621553884711777, "f1_std": 0.07471714587074917, "bacc": 0.48709677419354835, "bacc_std": 0.07320341244896127} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07499468154848843, "f1": 0.5839188134270101, "f1_std": 0.09033063503754353, "bacc": 0.5870967741935484, "bacc_std": 0.09310764757774186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 21.54434690031882, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07572471198201465, "f1": 0.4754208754208754, "f1_std": 0.07627469049333566, "bacc": 0.4903225806451613, "bacc_std": 0.09052685908856667} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05636785360194816, "f1": 0.5340909090909092, "f1_std": 0.08507059186107206, "bacc": 0.535483870967742, "bacc_std": 0.07252972227951661} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05235816795246458, "f1": 0.6117424242424243, "f1_std": 0.08792369426934744, "bacc": 0.6016129032258064, "bacc_std": 0.07618411477751043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05329365964389332, "f1": 0.4564393939393939, "f1_std": 0.06856219764949614, "bacc": 0.4693548387096774, "bacc_std": 0.059355766034130494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06656724492099103, "f1": 0.603225806451613, "f1_std": 0.08834348177611429, "bacc": 0.603225806451613, "bacc_std": 0.08843760477062365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05464642719116294, "f1": 0.5918552036199095, "f1_std": 0.08829757125882741, "bacc": 0.5854838709677419, "bacc_std": 0.07876548761698961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06611179758479512, "f1": 0.6272727272727273, "f1_std": 0.0803053858477698, "bacc": 0.6370967741935484, "bacc_std": 0.08605698635871838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07102908052055494, "f1": 0.4863669859985261, "f1_std": 0.07639928389171803, "bacc": 0.4887096774193548, "bacc_std": 0.08296019499865197} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 173.85 | 1010.3 | 0.98317 | 0.03268 | 0.97423 | 0.050783 | 0.9675 | 0.062145 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 173.85 | 1010.3 | 0.72805 | 0.072417 | 0.62157 | 0.092665 | 0.62269 | 0.088606 | + + +done! total time: 0:04:33 diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..794ed4410ac537e08e69024d7ef210005c65ecc7 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..1865568e52a8920963406430602cea272a0ef544 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 9, "eval/id_best": 28, "eval/lr_best": 0.00057, "eval/wd_best": 0.05, "eval/train/loss": 0.0016463210340589285, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.04136589542031288, "eval/validation/acc": 0.9885912698412699, "eval/validation/acc_std": 0.0017393379167976947, "eval/validation/f1": 0.9867324740687838, "eval/validation/f1_std": 0.0022074422759830632, "eval/test/loss": 0.05726751312613487, "eval/test/acc": 0.9839285714285714, "eval/test/acc_std": 0.0016794757217431084, "eval/test/f1": 0.9811112233233088, "eval/test/f1_std": 0.002181461327531815} diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..69d173e1a5587856706a442c7ab9bd7c22a7a8c2 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 9, "eval/best/id_best": 28, "eval/best/lr_best": 0.00057, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.0016463210340589285, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.04136589542031288, "eval/best/validation/acc": 0.9885912698412699, "eval/best/validation/acc_std": 0.0017393379167976947, "eval/best/validation/f1": 0.9867324740687838, "eval/best/validation/f1_std": 0.0022074422759830632, "eval/best/test/loss": 0.05726751312613487, "eval/best/test/acc": 0.9839285714285714, "eval/best/test/acc_std": 0.0016794757217431084, "eval/best/test/f1": 0.9811112233233088, "eval/best/test/f1_std": 0.002181461327531815} diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..12fef1776fa419d1f8aeeff42d8be5967fd4b81e --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 32, "eval/last/lr_best": 0.0011099999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.00022208498558029532, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.04859401285648346, "eval/last/validation/acc": 0.9880952380952381, "eval/last/validation/acc_std": 0.00176602098318897, "eval/last/validation/f1": 0.986139862261585, "eval/last/validation/f1_std": 0.0022508004413239855, "eval/last/test/loss": 0.06103328987956047, "eval/last/test/acc": 0.9837301587301587, "eval/last/test/acc_std": 0.0016927998412674871, "eval/last/test/f1": 0.9813939671088777, "eval/last/test/f1_std": 0.002146773665593086} diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..d3fae47e282c01a7fb784e810aaa872849de6066 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",train,0.0016463210340589285,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",validation,0.04136589542031288,0.9885912698412699,0.0017393379167976947,0.9867324740687838,0.0022074422759830632 +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",test,0.05726751312613487,0.9839285714285714,0.0016794757217431084,0.9811112233233088,0.002181461327531815 diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..d3fae47e282c01a7fb784e810aaa872849de6066 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",train,0.0016463210340589285,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",validation,0.04136589542031288,0.9885912698412699,0.0017393379167976947,0.9867324740687838,0.0022074422759830632 +flat_mae,patch,attn,hcpya_task21,best,9,0.00057,0.05,28,"[1.9, 1.0]",test,0.05726751312613487,0.9839285714285714,0.0016794757217431084,0.9811112233233088,0.002181461327531815 diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..492984c38117570d3c206ca6ca8104d2a7e073ce --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",train,0.00022208498558029532,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",validation,0.04859401285648346,0.9880952380952381,0.00176602098318897,0.986139862261585,0.0022508004413239855 +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",test,0.06103328987956047,0.9837301587301587,0.0016927998412674871,0.9813939671088777,0.002146773665593086 diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f9207dcca8edda675b164e4d46b0f2c004f0778 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,887 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:22:58 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:21:45 lr: nan time: 3.2630 data: 2.7036 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:39 lr: 0.000003 loss: 3.1174 (3.1148) grad: 0.2940 (0.3064) time: 0.4434 data: 0.0037 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:02 lr: 0.000006 loss: 3.0625 (3.0642) grad: 0.2940 (0.3029) time: 0.4353 data: 0.0041 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:44 lr: 0.000009 loss: 2.9402 (3.0042) grad: 0.2935 (0.2973) time: 0.4380 data: 0.0042 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:31 lr: 0.000012 loss: 2.8005 (2.9419) grad: 0.2797 (0.2890) time: 0.4370 data: 0.0042 max mem: 22446 +train: [0] [100/400] eta: 0:02:21 lr: 0.000015 loss: 2.6645 (2.8757) grad: 0.2573 (0.2841) time: 0.4598 data: 0.0043 max mem: 22446 +train: [0] [120/400] eta: 0:02:11 lr: 0.000018 loss: 2.5665 (2.8093) grad: 0.2576 (0.2786) time: 0.4545 data: 0.0043 max mem: 22446 +train: [0] [140/400] eta: 0:02:00 lr: 0.000021 loss: 2.4237 (2.7466) grad: 0.2599 (0.2766) time: 0.4374 data: 0.0041 max mem: 22446 +train: [0] [160/400] eta: 0:01:50 lr: 0.000024 loss: 2.3236 (2.6907) grad: 0.2463 (0.2714) time: 0.4440 data: 0.0039 max mem: 22446 +train: [0] [180/400] eta: 0:01:41 lr: 0.000027 loss: 2.2246 (2.6330) grad: 0.2294 (0.2667) time: 0.4619 data: 0.0044 max mem: 22446 +train: [0] [200/400] eta: 0:01:31 lr: 0.000030 loss: 2.1581 (2.5789) grad: 0.2334 (0.2633) time: 0.4460 data: 0.0041 max mem: 22446 +train: [0] [220/400] eta: 0:01:22 lr: 0.000033 loss: 2.0308 (2.5275) grad: 0.2229 (0.2594) time: 0.4349 data: 0.0042 max mem: 22446 +train: [0] [240/400] eta: 0:01:13 lr: 0.000036 loss: 1.9486 (2.4753) grad: 0.2276 (0.2571) time: 0.4436 data: 0.0043 max mem: 22446 +train: [0] [260/400] eta: 0:01:03 lr: 0.000039 loss: 1.8625 (2.4279) grad: 0.2231 (0.2544) time: 0.4432 data: 0.0042 max mem: 22446 +train: [0] [280/400] eta: 0:00:54 lr: 0.000042 loss: 1.8423 (2.3860) grad: 0.2106 (0.2509) time: 0.4521 data: 0.0042 max mem: 22446 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 1.7793 (2.3442) grad: 0.1945 (0.2473) time: 0.5914 data: 0.1569 max mem: 22446 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 1.7455 (2.3038) grad: 0.1927 (0.2441) time: 0.4467 data: 0.0029 max mem: 22446 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 1.6610 (2.2654) grad: 0.2006 (0.2419) time: 0.4330 data: 0.0040 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.6500 (2.2301) grad: 0.2003 (0.2394) time: 0.4339 data: 0.0041 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.5961 (2.1960) grad: 0.1924 (0.2368) time: 0.4421 data: 0.0043 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.5547 (2.1619) grad: 0.1927 (0.2348) time: 0.4350 data: 0.0043 max mem: 22446 +train: [0] Total time: 0:03:03 (0.4583 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.5547 (2.1619) grad: 0.1927 (0.2348) +eval (validation): [0] [ 0/63] eta: 0:03:11 time: 3.0328 data: 2.7539 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:20 time: 0.3591 data: 0.0044 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3286 data: 0.0032 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3105 data: 0.0030 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3084 data: 0.0030 max mem: 22446 +eval (validation): [0] Total time: 0:00:23 (0.3796 s / it) +cv: [0] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.079 acc: 0.977 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:20:56 lr: nan time: 3.1403 data: 2.8005 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:34 lr: 0.000063 loss: 1.4893 (1.4973) grad: 0.1864 (0.1905) time: 0.4352 data: 0.0029 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:02 lr: 0.000066 loss: 1.4743 (1.4760) grad: 0.1894 (0.1906) time: 0.4453 data: 0.0042 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:44 lr: 0.000069 loss: 1.4315 (1.4540) grad: 0.1858 (0.1872) time: 0.4352 data: 0.0043 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:30 lr: 0.000072 loss: 1.3976 (1.4376) grad: 0.1738 (0.1852) time: 0.4287 data: 0.0042 max mem: 22446 +train: [1] [100/400] eta: 0:02:19 lr: 0.000075 loss: 1.3836 (1.4271) grad: 0.1766 (0.1845) time: 0.4528 data: 0.0044 max mem: 22446 +train: [1] [120/400] eta: 0:02:10 lr: 0.000078 loss: 1.3519 (1.4092) grad: 0.1726 (0.1830) time: 0.4550 data: 0.0044 max mem: 22446 +train: [1] [140/400] eta: 0:02:00 lr: 0.000081 loss: 1.3178 (1.3951) grad: 0.1690 (0.1813) time: 0.4559 data: 0.0043 max mem: 22446 +train: [1] [160/400] eta: 0:01:50 lr: 0.000084 loss: 1.2869 (1.3784) grad: 0.1647 (0.1796) time: 0.4350 data: 0.0040 max mem: 22446 +train: [1] [180/400] eta: 0:01:40 lr: 0.000087 loss: 1.2486 (1.3635) grad: 0.1666 (0.1785) time: 0.4468 data: 0.0041 max mem: 22446 +train: [1] [200/400] eta: 0:01:31 lr: 0.000090 loss: 1.2205 (1.3487) grad: 0.1640 (0.1770) time: 0.4571 data: 0.0043 max mem: 22446 +train: [1] [220/400] eta: 0:01:22 lr: 0.000093 loss: 1.1850 (1.3328) grad: 0.1641 (0.1770) time: 0.4384 data: 0.0043 max mem: 22446 +train: [1] [240/400] eta: 0:01:12 lr: 0.000096 loss: 1.1710 (1.3188) grad: 0.1678 (0.1760) time: 0.4385 data: 0.0041 max mem: 22446 +train: [1] [260/400] eta: 0:01:03 lr: 0.000099 loss: 1.1642 (1.3063) grad: 0.1617 (0.1749) time: 0.4428 data: 0.0043 max mem: 22446 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 1.1213 (1.2930) grad: 0.1611 (0.1744) time: 0.4410 data: 0.0041 max mem: 22446 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 1.1045 (1.2805) grad: 0.1535 (0.1729) time: 0.5942 data: 0.1592 max mem: 22446 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 1.0980 (1.2682) grad: 0.1533 (0.1717) time: 0.4432 data: 0.0033 max mem: 22446 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 1.0644 (1.2558) grad: 0.1515 (0.1705) time: 0.4318 data: 0.0041 max mem: 22446 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 1.0654 (1.2454) grad: 0.1461 (0.1693) time: 0.4259 data: 0.0041 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.0603 (1.2344) grad: 0.1534 (0.1688) time: 0.4375 data: 0.0042 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.0260 (1.2240) grad: 0.1532 (0.1678) time: 0.4398 data: 0.0042 max mem: 22446 +train: [1] Total time: 0:03:02 (0.4563 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.0260 (1.2240) grad: 0.1532 (0.1678) +eval (validation): [1] [ 0/63] eta: 0:03:07 time: 2.9739 data: 2.7019 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:18 time: 0.3131 data: 0.0034 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:08 time: 0.3238 data: 0.0034 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3042 data: 0.0025 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3036 data: 0.0028 max mem: 22446 +eval (validation): [1] Total time: 0:00:22 (0.3602 s / it) +cv: [1] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.061 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:20:04 lr: nan time: 3.0120 data: 2.6837 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:30 lr: 0.000123 loss: 0.9591 (0.9800) grad: 0.1596 (0.1600) time: 0.4304 data: 0.0031 max mem: 22446 +train: [2] [ 40/400] eta: 0:02:58 lr: 0.000126 loss: 0.9777 (0.9863) grad: 0.1614 (0.1616) time: 0.4343 data: 0.0041 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:41 lr: 0.000129 loss: 0.9600 (0.9757) grad: 0.1624 (0.1629) time: 0.4327 data: 0.0041 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:30 lr: 0.000132 loss: 0.9567 (0.9756) grad: 0.1745 (0.1696) time: 0.4518 data: 0.0043 max mem: 22446 +train: [2] [100/400] eta: 0:02:18 lr: 0.000135 loss: 0.9648 (0.9693) grad: 0.1830 (0.1700) time: 0.4343 data: 0.0043 max mem: 22446 +train: [2] [120/400] eta: 0:02:08 lr: 0.000138 loss: 0.9459 (0.9699) grad: 0.1802 (0.1745) time: 0.4509 data: 0.0041 max mem: 22446 +train: [2] [140/400] eta: 0:01:59 lr: 0.000141 loss: 0.9095 (0.9623) grad: 0.1791 (0.1765) time: 0.4580 data: 0.0043 max mem: 22446 +train: [2] [160/400] eta: 0:01:50 lr: 0.000144 loss: 0.9095 (0.9585) grad: 0.1815 (0.1794) time: 0.4652 data: 0.0046 max mem: 22446 +train: [2] [180/400] eta: 0:01:40 lr: 0.000147 loss: 0.9204 (0.9537) grad: 0.1859 (0.1804) time: 0.4267 data: 0.0040 max mem: 22446 +train: [2] [200/400] eta: 0:01:31 lr: 0.000150 loss: 0.8856 (0.9451) grad: 0.1697 (0.1814) time: 0.4611 data: 0.0042 max mem: 22446 +train: [2] [220/400] eta: 0:01:22 lr: 0.000153 loss: 0.8821 (0.9454) grad: 0.1870 (0.1826) time: 0.4545 data: 0.0041 max mem: 22446 +train: [2] [240/400] eta: 0:01:12 lr: 0.000156 loss: 0.8801 (0.9379) grad: 0.1849 (0.1828) time: 0.4404 data: 0.0041 max mem: 22446 +train: [2] [260/400] eta: 0:01:03 lr: 0.000159 loss: 0.8454 (0.9349) grad: 0.1849 (0.1848) time: 0.4583 data: 0.0043 max mem: 22446 +train: [2] [280/400] eta: 0:00:54 lr: 0.000162 loss: 0.8604 (0.9311) grad: 0.2010 (0.1858) time: 0.4529 data: 0.0040 max mem: 22446 +train: [2] [300/400] eta: 0:00:46 lr: 0.000165 loss: 0.8210 (0.9254) grad: 0.1907 (0.1871) time: 0.5888 data: 0.1614 max mem: 22446 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 0.8210 (0.9215) grad: 0.2080 (0.1908) time: 0.4405 data: 0.0034 max mem: 22446 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 0.8395 (0.9166) grad: 0.2157 (0.1917) time: 0.4336 data: 0.0039 max mem: 22446 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 0.8316 (0.9130) grad: 0.1925 (0.1920) time: 0.4439 data: 0.0042 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.7982 (0.9073) grad: 0.1974 (0.1931) time: 0.4398 data: 0.0041 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.7745 (0.8994) grad: 0.1996 (0.1937) time: 0.4384 data: 0.0036 max mem: 22446 +train: [2] Total time: 0:03:03 (0.4588 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.7745 (0.8994) grad: 0.1996 (0.1937) +eval (validation): [2] [ 0/63] eta: 0:03:09 time: 3.0078 data: 2.7246 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:20 time: 0.3525 data: 0.0039 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3214 data: 0.0031 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3164 data: 0.0034 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3145 data: 0.0034 max mem: 22446 +eval (validation): [2] Total time: 0:00:23 (0.3766 s / it) +cv: [2] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.052 acc: 0.983 f1: 0.981 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:25 lr: nan time: 3.0641 data: 2.7246 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:33 lr: 0.000183 loss: 0.7363 (0.7453) grad: 0.2310 (0.2491) time: 0.4364 data: 0.0039 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:01 lr: 0.000186 loss: 0.7818 (0.7846) grad: 0.2373 (0.2514) time: 0.4457 data: 0.0035 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:44 lr: 0.000189 loss: 0.8180 (0.8055) grad: 0.2443 (0.2540) time: 0.4386 data: 0.0039 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:31 lr: 0.000192 loss: 0.8180 (0.8201) grad: 0.2515 (0.2552) time: 0.4372 data: 0.0043 max mem: 22446 +train: [3] [100/400] eta: 0:02:19 lr: 0.000195 loss: 0.8024 (0.8129) grad: 0.2362 (0.2516) time: 0.4404 data: 0.0042 max mem: 22446 +train: [3] [120/400] eta: 0:02:09 lr: 0.000198 loss: 0.7626 (0.8156) grad: 0.2438 (0.2565) time: 0.4462 data: 0.0043 max mem: 22446 +train: [3] [140/400] eta: 0:02:00 lr: 0.000201 loss: 0.7894 (0.8191) grad: 0.2547 (0.2564) time: 0.4730 data: 0.0045 max mem: 22446 +train: [3] [160/400] eta: 0:01:51 lr: 0.000204 loss: 0.8298 (0.8190) grad: 0.2555 (0.2613) time: 0.4608 data: 0.0044 max mem: 22446 +train: [3] [180/400] eta: 0:01:41 lr: 0.000207 loss: 0.8216 (0.8233) grad: 0.3043 (0.2665) time: 0.4352 data: 0.0040 max mem: 22446 +train: [3] [200/400] eta: 0:01:31 lr: 0.000210 loss: 0.8216 (0.8270) grad: 0.2997 (0.2734) time: 0.4489 data: 0.0041 max mem: 22446 +train: [3] [220/400] eta: 0:01:22 lr: 0.000213 loss: 0.8249 (0.8269) grad: 0.2997 (0.2776) time: 0.4395 data: 0.0041 max mem: 22446 +train: [3] [240/400] eta: 0:01:12 lr: 0.000216 loss: 0.8132 (0.8417) grad: 0.2929 (0.2838) time: 0.4395 data: 0.0041 max mem: 22446 +train: [3] [260/400] eta: 0:01:03 lr: 0.000219 loss: 0.8132 (0.8427) grad: 0.3803 (0.2974) time: 0.4525 data: 0.0043 max mem: 22446 +train: [3] [280/400] eta: 0:00:54 lr: 0.000222 loss: 0.8203 (0.8455) grad: 0.4150 (0.3049) time: 0.4383 data: 0.0043 max mem: 22446 +train: [3] [300/400] eta: 0:00:46 lr: 0.000225 loss: 0.8255 (0.8441) grad: 0.3404 (0.3066) time: 0.6116 data: 0.1718 max mem: 22446 +train: [3] [320/400] eta: 0:00:37 lr: 0.000228 loss: 0.8069 (0.8426) grad: 0.3117 (0.3124) time: 0.4280 data: 0.0033 max mem: 22446 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 0.7070 (0.8341) grad: 0.3431 (0.3156) time: 0.4401 data: 0.0040 max mem: 22446 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 0.6556 (0.8292) grad: 0.3821 (0.3191) time: 0.4376 data: 0.0043 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.7073 (0.8309) grad: 0.3867 (0.3238) time: 0.4364 data: 0.0040 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.8691 (0.8349) grad: 0.4108 (0.3312) time: 0.4293 data: 0.0038 max mem: 22446 +train: [3] Total time: 0:03:03 (0.4580 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.8691 (0.8349) grad: 0.4108 (0.3312) +eval (validation): [3] [ 0/63] eta: 0:03:10 time: 3.0169 data: 2.7370 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:19 time: 0.3296 data: 0.0034 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:08 time: 0.3198 data: 0.0027 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3095 data: 0.0034 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3058 data: 0.0033 max mem: 22446 +eval (validation): [3] Total time: 0:00:23 (0.3661 s / it) +cv: [3] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.064 acc: 0.983 f1: 0.981 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:20:45 lr: nan time: 3.1146 data: 2.7310 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:33 lr: 0.000243 loss: 0.9943 (1.1083) grad: 0.5011 (0.5242) time: 0.4345 data: 0.0037 max mem: 22446 +train: [4] [ 40/400] eta: 0:02:59 lr: 0.000246 loss: 0.8575 (0.9852) grad: 0.4266 (0.4632) time: 0.4346 data: 0.0042 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:43 lr: 0.000249 loss: 0.8099 (0.9263) grad: 0.4054 (0.4629) time: 0.4413 data: 0.0042 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:31 lr: 0.000252 loss: 0.7963 (0.9047) grad: 0.4535 (0.4707) time: 0.4450 data: 0.0043 max mem: 22446 +train: [4] [100/400] eta: 0:02:19 lr: 0.000255 loss: 0.9045 (0.9100) grad: 0.4629 (0.4946) time: 0.4424 data: 0.0045 max mem: 22446 +train: [4] [120/400] eta: 0:02:09 lr: 0.000258 loss: 0.9694 (0.9353) grad: 0.4879 (0.4925) time: 0.4421 data: 0.0043 max mem: 22446 +train: [4] [140/400] eta: 0:02:00 lr: 0.000261 loss: 0.9988 (0.9366) grad: 0.4551 (0.4899) time: 0.4755 data: 0.0046 max mem: 22446 +train: [4] [160/400] eta: 0:01:51 lr: 0.000264 loss: 0.9216 (0.9492) grad: 0.4693 (0.4952) time: 0.4597 data: 0.0046 max mem: 22446 +train: [4] [180/400] eta: 0:01:41 lr: 0.000267 loss: 0.9836 (0.9547) grad: 0.4776 (0.5062) time: 0.4332 data: 0.0042 max mem: 22446 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 0.8024 (0.9410) grad: 0.4726 (0.5070) time: 0.4448 data: 0.0042 max mem: 22446 +train: [4] [220/400] eta: 0:01:22 lr: 0.000273 loss: 0.8943 (0.9603) grad: 0.4726 (0.5032) time: 0.4507 data: 0.0043 max mem: 22446 +train: [4] [240/400] eta: 0:01:13 lr: 0.000276 loss: 0.9069 (0.9507) grad: 0.4561 (0.5008) time: 0.4466 data: 0.0043 max mem: 22446 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 0.9069 (0.9750) grad: 0.5318 (0.5078) time: 0.4522 data: 0.0043 max mem: 22446 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 1.0259 (0.9843) grad: 0.5483 (0.5129) time: 0.4403 data: 0.0043 max mem: 22446 +train: [4] [300/400] eta: 0:00:46 lr: 0.000285 loss: 0.7877 (0.9875) grad: 0.5449 (0.5157) time: 0.6161 data: 0.1680 max mem: 22446 +train: [4] [320/400] eta: 0:00:37 lr: 0.000288 loss: 0.7609 (0.9736) grad: 0.4982 (0.5137) time: 0.4487 data: 0.0030 max mem: 22446 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 0.7579 (0.9649) grad: 0.4298 (0.5104) time: 0.4269 data: 0.0042 max mem: 22446 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 0.8598 (0.9609) grad: 0.4925 (0.5109) time: 0.4372 data: 0.0044 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.9122 (0.9700) grad: 0.5419 (0.5175) time: 0.4351 data: 0.0040 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.0147 (0.9741) grad: 0.6030 (0.5260) time: 0.4427 data: 0.0044 max mem: 22446 +train: [4] Total time: 0:03:03 (0.4597 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.0147 (0.9741) grad: 0.6030 (0.5260) +eval (validation): [4] [ 0/63] eta: 0:03:08 time: 2.9865 data: 2.7159 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:19 time: 0.3315 data: 0.0042 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3230 data: 0.0032 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3095 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3090 data: 0.0031 max mem: 22446 +eval (validation): [4] Total time: 0:00:23 (0.3678 s / it) +cv: [4] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.048 acc: 0.985 f1: 0.982 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:20:58 lr: nan time: 3.1457 data: 2.7619 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:37 lr: 0.000300 loss: 0.8453 (0.9253) grad: 0.5993 (0.6515) time: 0.4446 data: 0.0031 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:03 lr: 0.000300 loss: 0.8681 (0.8945) grad: 0.5151 (0.5797) time: 0.4403 data: 0.0042 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:45 lr: 0.000300 loss: 0.8921 (0.9568) grad: 0.4986 (0.5636) time: 0.4456 data: 0.0043 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:32 lr: 0.000300 loss: 0.7123 (0.9286) grad: 0.5319 (0.5555) time: 0.4420 data: 0.0044 max mem: 22446 +train: [5] [100/400] eta: 0:02:20 lr: 0.000300 loss: 0.7510 (0.9439) grad: 0.5336 (0.5579) time: 0.4359 data: 0.0041 max mem: 22446 +train: [5] [120/400] eta: 0:02:10 lr: 0.000300 loss: 1.0709 (0.9932) grad: 0.5364 (0.5645) time: 0.4553 data: 0.0043 max mem: 22446 +train: [5] [140/400] eta: 0:02:01 lr: 0.000300 loss: 1.1493 (1.0169) grad: 0.5637 (0.5679) time: 0.4673 data: 0.0045 max mem: 22446 +train: [5] [160/400] eta: 0:01:51 lr: 0.000299 loss: 1.0447 (1.0245) grad: 0.5250 (0.5646) time: 0.4492 data: 0.0043 max mem: 22446 +train: [5] [180/400] eta: 0:01:41 lr: 0.000299 loss: 1.0450 (1.0371) grad: 0.5536 (0.5778) time: 0.4427 data: 0.0042 max mem: 22446 +train: [5] [200/400] eta: 0:01:32 lr: 0.000299 loss: 1.0450 (1.0449) grad: 0.6086 (0.5816) time: 0.4497 data: 0.0044 max mem: 22446 +train: [5] [220/400] eta: 0:01:22 lr: 0.000299 loss: 0.9419 (1.0537) grad: 0.5360 (0.5793) time: 0.4547 data: 0.0042 max mem: 22446 +train: [5] [240/400] eta: 0:01:13 lr: 0.000299 loss: 0.9135 (1.0379) grad: 0.4891 (0.5703) time: 0.4530 data: 0.0043 max mem: 22446 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 0.9525 (1.0540) grad: 0.5418 (0.5777) time: 0.4472 data: 0.0039 max mem: 22446 +train: [5] [280/400] eta: 0:00:54 lr: 0.000298 loss: 1.0063 (1.0521) grad: 0.6122 (0.5817) time: 0.4393 data: 0.0041 max mem: 22446 +train: [5] [300/400] eta: 0:00:46 lr: 0.000298 loss: 0.8329 (1.0473) grad: 0.6135 (0.5851) time: 0.6154 data: 0.1776 max mem: 22446 +train: [5] [320/400] eta: 0:00:37 lr: 0.000298 loss: 0.7933 (1.0351) grad: 0.5913 (0.5823) time: 0.4450 data: 0.0050 max mem: 22446 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 0.7978 (1.0295) grad: 0.4764 (0.5776) time: 0.4406 data: 0.0043 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 0.8447 (1.0199) grad: 0.5174 (0.5766) time: 0.4395 data: 0.0042 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.8494 (1.0202) grad: 0.5259 (0.5723) time: 0.4400 data: 0.0040 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.7693 (0.9992) grad: 0.4303 (0.5645) time: 0.4384 data: 0.0041 max mem: 22446 +train: [5] Total time: 0:03:04 (0.4616 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.7693 (0.9992) grad: 0.4303 (0.5645) +eval (validation): [5] [ 0/63] eta: 0:03:15 time: 3.1104 data: 2.8285 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3641 data: 0.0036 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3298 data: 0.0033 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3088 data: 0.0033 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3079 data: 0.0033 max mem: 22446 +eval (validation): [5] Total time: 0:00:24 (0.3824 s / it) +cv: [5] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.047 acc: 0.985 f1: 0.982 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:20:43 lr: nan time: 3.1099 data: 2.7254 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:41 lr: 0.000296 loss: 0.7402 (0.7388) grad: 0.4660 (0.5161) time: 0.4553 data: 0.0040 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:03 lr: 0.000296 loss: 0.7752 (0.8032) grad: 0.4562 (0.4827) time: 0.4323 data: 0.0042 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:44 lr: 0.000296 loss: 0.6575 (0.7808) grad: 0.4448 (0.4905) time: 0.4339 data: 0.0041 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:31 lr: 0.000295 loss: 0.7300 (0.8071) grad: 0.4751 (0.5033) time: 0.4402 data: 0.0042 max mem: 22446 +train: [6] [100/400] eta: 0:02:19 lr: 0.000295 loss: 0.7300 (0.7897) grad: 0.4751 (0.4978) time: 0.4300 data: 0.0042 max mem: 22446 +train: [6] [120/400] eta: 0:02:10 lr: 0.000295 loss: 0.5680 (0.7516) grad: 0.4414 (0.4910) time: 0.4666 data: 0.0044 max mem: 22446 +train: [6] [140/400] eta: 0:02:00 lr: 0.000294 loss: 0.5680 (0.7344) grad: 0.4206 (0.4836) time: 0.4586 data: 0.0044 max mem: 22446 +train: [6] [160/400] eta: 0:01:51 lr: 0.000294 loss: 0.5646 (0.7290) grad: 0.4206 (0.4747) time: 0.4570 data: 0.0042 max mem: 22446 +train: [6] [180/400] eta: 0:01:41 lr: 0.000293 loss: 0.6563 (0.7355) grad: 0.4570 (0.4809) time: 0.4422 data: 0.0041 max mem: 22446 +train: [6] [200/400] eta: 0:01:31 lr: 0.000293 loss: 0.6563 (0.7471) grad: 0.4619 (0.4804) time: 0.4493 data: 0.0043 max mem: 22446 +train: [6] [220/400] eta: 0:01:22 lr: 0.000292 loss: 0.5785 (0.7367) grad: 0.4139 (0.4763) time: 0.4530 data: 0.0042 max mem: 22446 +train: [6] [240/400] eta: 0:01:13 lr: 0.000292 loss: 0.6039 (0.7359) grad: 0.4307 (0.4802) time: 0.4450 data: 0.0042 max mem: 22446 +train: [6] [260/400] eta: 0:01:04 lr: 0.000291 loss: 0.5084 (0.7262) grad: 0.4179 (0.4736) time: 0.4512 data: 0.0043 max mem: 22446 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 0.5629 (0.7235) grad: 0.4032 (0.4732) time: 0.4337 data: 0.0041 max mem: 22446 +train: [6] [300/400] eta: 0:00:46 lr: 0.000290 loss: 0.6487 (0.7199) grad: 0.4071 (0.4702) time: 0.6155 data: 0.1682 max mem: 22446 +train: [6] [320/400] eta: 0:00:37 lr: 0.000290 loss: 0.5636 (0.7104) grad: 0.4007 (0.4670) time: 0.4356 data: 0.0035 max mem: 22446 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 0.4516 (0.7052) grad: 0.3574 (0.4620) time: 0.4401 data: 0.0043 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.4516 (0.6962) grad: 0.3845 (0.4581) time: 0.4350 data: 0.0042 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.4800 (0.6847) grad: 0.3820 (0.4530) time: 0.4402 data: 0.0041 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.4424 (0.6723) grad: 0.3696 (0.4489) time: 0.4430 data: 0.0043 max mem: 22446 +train: [6] Total time: 0:03:04 (0.4601 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.4424 (0.6723) grad: 0.3696 (0.4489) +eval (validation): [6] [ 0/63] eta: 0:03:10 time: 3.0247 data: 2.7318 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3457 data: 0.0033 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3246 data: 0.0031 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3097 data: 0.0032 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3090 data: 0.0032 max mem: 22446 +eval (validation): [6] Total time: 0:00:23 (0.3742 s / it) +cv: [6] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 0.043 acc: 0.987 f1: 0.984 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:20:29 lr: nan time: 3.0747 data: 2.7361 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:32 lr: 0.000286 loss: 0.4440 (0.5466) grad: 0.3482 (0.4140) time: 0.4343 data: 0.0029 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:00 lr: 0.000286 loss: 0.4437 (0.5184) grad: 0.3544 (0.3857) time: 0.4371 data: 0.0038 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:43 lr: 0.000285 loss: 0.4848 (0.5424) grad: 0.4106 (0.4081) time: 0.4385 data: 0.0042 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:30 lr: 0.000284 loss: 0.4963 (0.5354) grad: 0.4394 (0.4073) time: 0.4424 data: 0.0042 max mem: 22446 +train: [7] [100/400] eta: 0:02:18 lr: 0.000284 loss: 0.4390 (0.5337) grad: 0.3841 (0.4031) time: 0.4297 data: 0.0041 max mem: 22446 +train: [7] [120/400] eta: 0:02:09 lr: 0.000283 loss: 0.3729 (0.5077) grad: 0.3417 (0.3919) time: 0.4671 data: 0.0043 max mem: 22446 +train: [7] [140/400] eta: 0:02:00 lr: 0.000282 loss: 0.4208 (0.5235) grad: 0.3379 (0.3887) time: 0.4643 data: 0.0046 max mem: 22446 +train: [7] [160/400] eta: 0:01:51 lr: 0.000282 loss: 0.3500 (0.4987) grad: 0.3302 (0.3814) time: 0.4565 data: 0.0045 max mem: 22446 +train: [7] [180/400] eta: 0:01:41 lr: 0.000281 loss: 0.3561 (0.5034) grad: 0.3240 (0.3823) time: 0.4345 data: 0.0039 max mem: 22446 +train: [7] [200/400] eta: 0:01:31 lr: 0.000280 loss: 0.4053 (0.4981) grad: 0.3647 (0.3816) time: 0.4624 data: 0.0041 max mem: 22446 +train: [7] [220/400] eta: 0:01:22 lr: 0.000279 loss: 0.4124 (0.5002) grad: 0.3408 (0.3757) time: 0.4555 data: 0.0045 max mem: 22446 +train: [7] [240/400] eta: 0:01:13 lr: 0.000278 loss: 0.4124 (0.5003) grad: 0.3393 (0.3748) time: 0.4516 data: 0.0042 max mem: 22446 +train: [7] [260/400] eta: 0:01:04 lr: 0.000278 loss: 0.4206 (0.4988) grad: 0.3679 (0.3757) time: 0.4491 data: 0.0042 max mem: 22446 +train: [7] [280/400] eta: 0:00:54 lr: 0.000277 loss: 0.4206 (0.5084) grad: 0.3564 (0.3752) time: 0.4518 data: 0.0042 max mem: 22446 +train: [7] [300/400] eta: 0:00:46 lr: 0.000276 loss: 0.5384 (0.5147) grad: 0.3743 (0.3768) time: 0.6146 data: 0.1733 max mem: 22446 +train: [7] [320/400] eta: 0:00:37 lr: 0.000275 loss: 0.4939 (0.5135) grad: 0.3457 (0.3751) time: 0.4501 data: 0.0035 max mem: 22446 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 0.4173 (0.5065) grad: 0.3180 (0.3714) time: 0.4516 data: 0.0043 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.3591 (0.5001) grad: 0.3035 (0.3693) time: 0.4440 data: 0.0040 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.3904 (0.4933) grad: 0.3217 (0.3660) time: 0.4569 data: 0.0045 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.3431 (0.4885) grad: 0.3057 (0.3625) time: 0.4483 data: 0.0044 max mem: 22446 +train: [7] Total time: 0:03:05 (0.4639 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.3431 (0.4885) grad: 0.3057 (0.3625) +eval (validation): [7] [ 0/63] eta: 0:03:14 time: 3.0952 data: 2.8599 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:19 time: 0.3326 data: 0.0028 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3372 data: 0.0033 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3196 data: 0.0036 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3199 data: 0.0036 max mem: 22446 +eval (validation): [7] Total time: 0:00:23 (0.3786 s / it) +cv: [7] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 0.041 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:09 lr: nan time: 3.1744 data: 2.8297 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:33 lr: 0.000270 loss: 0.2727 (0.3087) grad: 0.2616 (0.2567) time: 0.4305 data: 0.0022 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:00 lr: 0.000270 loss: 0.3123 (0.3226) grad: 0.2500 (0.2654) time: 0.4402 data: 0.0039 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:44 lr: 0.000269 loss: 0.3139 (0.3231) grad: 0.2434 (0.2577) time: 0.4441 data: 0.0040 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:30 lr: 0.000268 loss: 0.3139 (0.3189) grad: 0.2382 (0.2529) time: 0.4337 data: 0.0041 max mem: 22446 +train: [8] [100/400] eta: 0:02:20 lr: 0.000267 loss: 0.3039 (0.3175) grad: 0.2650 (0.2695) time: 0.4568 data: 0.0041 max mem: 22446 +train: [8] [120/400] eta: 0:02:10 lr: 0.000266 loss: 0.2816 (0.3270) grad: 0.2687 (0.2711) time: 0.4652 data: 0.0045 max mem: 22446 +train: [8] [140/400] eta: 0:02:01 lr: 0.000265 loss: 0.2816 (0.3286) grad: 0.2684 (0.2714) time: 0.4603 data: 0.0044 max mem: 22446 +train: [8] [160/400] eta: 0:01:51 lr: 0.000264 loss: 0.3156 (0.3280) grad: 0.2592 (0.2706) time: 0.4413 data: 0.0040 max mem: 22446 +train: [8] [180/400] eta: 0:01:41 lr: 0.000263 loss: 0.2992 (0.3279) grad: 0.2331 (0.2687) time: 0.4542 data: 0.0040 max mem: 22446 +train: [8] [200/400] eta: 0:01:32 lr: 0.000262 loss: 0.2848 (0.3262) grad: 0.2892 (0.2715) time: 0.4645 data: 0.0041 max mem: 22446 +train: [8] [220/400] eta: 0:01:23 lr: 0.000260 loss: 0.2758 (0.3259) grad: 0.2582 (0.2692) time: 0.4499 data: 0.0043 max mem: 22446 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 0.2861 (0.3260) grad: 0.2035 (0.2646) time: 0.4528 data: 0.0044 max mem: 22446 +train: [8] [260/400] eta: 0:01:04 lr: 0.000258 loss: 0.2836 (0.3252) grad: 0.2105 (0.2653) time: 0.4464 data: 0.0044 max mem: 22446 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 0.2508 (0.3277) grad: 0.2479 (0.2648) time: 0.4507 data: 0.0043 max mem: 22446 +train: [8] [300/400] eta: 0:00:46 lr: 0.000256 loss: 0.2754 (0.3295) grad: 0.2524 (0.2656) time: 0.6014 data: 0.1652 max mem: 22446 +train: [8] [320/400] eta: 0:00:37 lr: 0.000255 loss: 0.2890 (0.3262) grad: 0.2339 (0.2611) time: 0.4468 data: 0.0035 max mem: 22446 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 0.2477 (0.3230) grad: 0.1890 (0.2577) time: 0.4502 data: 0.0034 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.2226 (0.3183) grad: 0.1917 (0.2547) time: 0.4538 data: 0.0044 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.2054 (0.3134) grad: 0.1743 (0.2519) time: 0.4388 data: 0.0041 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.2204 (0.3097) grad: 0.1724 (0.2484) time: 0.4511 data: 0.0040 max mem: 22446 +train: [8] Total time: 0:03:05 (0.4640 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.2204 (0.3097) grad: 0.1724 (0.2484) +eval (validation): [8] [ 0/63] eta: 0:03:15 time: 3.1093 data: 2.8745 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:20 time: 0.3483 data: 0.0038 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3131 data: 0.0029 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3198 data: 0.0033 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3207 data: 0.0033 max mem: 22446 +eval (validation): [8] Total time: 0:00:23 (0.3761 s / it) +cv: [8] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 0.042 acc: 0.988 f1: 0.985 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:02 lr: nan time: 3.3059 data: 2.9089 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:43 lr: 0.000249 loss: 0.2270 (0.2425) grad: 0.1880 (0.1854) time: 0.4528 data: 0.0024 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:06 lr: 0.000248 loss: 0.2363 (0.2570) grad: 0.1933 (0.2124) time: 0.4426 data: 0.0039 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:47 lr: 0.000247 loss: 0.2471 (0.2622) grad: 0.2281 (0.2178) time: 0.4441 data: 0.0041 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:33 lr: 0.000246 loss: 0.2471 (0.2669) grad: 0.2435 (0.2184) time: 0.4403 data: 0.0041 max mem: 22446 +train: [9] [100/400] eta: 0:02:21 lr: 0.000244 loss: 0.2338 (0.2615) grad: 0.2154 (0.2176) time: 0.4403 data: 0.0041 max mem: 22446 +train: [9] [120/400] eta: 0:02:11 lr: 0.000243 loss: 0.2298 (0.2590) grad: 0.1673 (0.2104) time: 0.4533 data: 0.0042 max mem: 22446 +train: [9] [140/400] eta: 0:02:01 lr: 0.000242 loss: 0.2104 (0.2523) grad: 0.1727 (0.2079) time: 0.4456 data: 0.0042 max mem: 22446 +train: [9] [160/400] eta: 0:01:51 lr: 0.000241 loss: 0.2170 (0.2501) grad: 0.1883 (0.2074) time: 0.4479 data: 0.0043 max mem: 22446 +train: [9] [180/400] eta: 0:01:41 lr: 0.000240 loss: 0.2204 (0.2463) grad: 0.1844 (0.2056) time: 0.4347 data: 0.0039 max mem: 22446 +train: [9] [200/400] eta: 0:01:32 lr: 0.000238 loss: 0.2320 (0.2472) grad: 0.1898 (0.2047) time: 0.4588 data: 0.0042 max mem: 22446 +train: [9] [220/400] eta: 0:01:22 lr: 0.000237 loss: 0.2324 (0.2457) grad: 0.1831 (0.2000) time: 0.4357 data: 0.0042 max mem: 22446 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 0.2114 (0.2420) grad: 0.1658 (0.1986) time: 0.4357 data: 0.0040 max mem: 22446 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 0.2041 (0.2424) grad: 0.1800 (0.1989) time: 0.4366 data: 0.0041 max mem: 22446 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 0.2437 (0.2440) grad: 0.1901 (0.1995) time: 0.4407 data: 0.0043 max mem: 22446 +train: [9] [300/400] eta: 0:00:46 lr: 0.000232 loss: 0.2405 (0.2432) grad: 0.2019 (0.1991) time: 0.6141 data: 0.1776 max mem: 22446 +train: [9] [320/400] eta: 0:00:37 lr: 0.000230 loss: 0.1955 (0.2397) grad: 0.1698 (0.1969) time: 0.4479 data: 0.0038 max mem: 22446 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 0.1988 (0.2402) grad: 0.1644 (0.1964) time: 0.4486 data: 0.0032 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.2160 (0.2387) grad: 0.1751 (0.1957) time: 0.4439 data: 0.0043 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1947 (0.2363) grad: 0.1583 (0.1935) time: 0.4494 data: 0.0044 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1802 (0.2342) grad: 0.1421 (0.1910) time: 0.4379 data: 0.0043 max mem: 22446 +train: [9] Total time: 0:03:04 (0.4603 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1802 (0.2342) grad: 0.1421 (0.1910) +eval (validation): [9] [ 0/63] eta: 0:03:15 time: 3.0963 data: 2.8558 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:21 time: 0.3593 data: 0.0041 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3273 data: 0.0029 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3146 data: 0.0035 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3144 data: 0.0034 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3822 s / it) +cv: [9] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 0.041 acc: 0.989 f1: 0.987 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:21:04 lr: nan time: 3.1613 data: 2.7616 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:32 lr: 0.000224 loss: 0.2426 (0.2344) grad: 0.1273 (0.1498) time: 0.4299 data: 0.0036 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:02 lr: 0.000222 loss: 0.2071 (0.2118) grad: 0.1497 (0.1483) time: 0.4514 data: 0.0045 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:45 lr: 0.000221 loss: 0.1944 (0.2099) grad: 0.1589 (0.1482) time: 0.4456 data: 0.0041 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:32 lr: 0.000220 loss: 0.1948 (0.2096) grad: 0.1698 (0.1633) time: 0.4453 data: 0.0043 max mem: 22446 +train: [10] [100/400] eta: 0:02:21 lr: 0.000218 loss: 0.1748 (0.2047) grad: 0.1444 (0.1603) time: 0.4463 data: 0.0043 max mem: 22446 +train: [10] [120/400] eta: 0:02:13 lr: 0.000217 loss: 0.1748 (0.2014) grad: 0.1479 (0.1586) time: 0.5015 data: 0.0046 max mem: 22446 +train: [10] [140/400] eta: 0:02:02 lr: 0.000215 loss: 0.1827 (0.2006) grad: 0.1404 (0.1548) time: 0.4570 data: 0.0044 max mem: 22446 +train: [10] [160/400] eta: 0:01:52 lr: 0.000214 loss: 0.1725 (0.1962) grad: 0.1321 (0.1533) time: 0.4376 data: 0.0041 max mem: 22446 +train: [10] [180/400] eta: 0:01:42 lr: 0.000213 loss: 0.1629 (0.1969) grad: 0.1484 (0.1534) time: 0.4561 data: 0.0042 max mem: 22446 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 0.1743 (0.1949) grad: 0.1591 (0.1525) time: 0.4526 data: 0.0042 max mem: 22446 +train: [10] [220/400] eta: 0:01:23 lr: 0.000210 loss: 0.1702 (0.1938) grad: 0.1337 (0.1503) time: 0.4403 data: 0.0036 max mem: 22446 +train: [10] [240/400] eta: 0:01:13 lr: 0.000208 loss: 0.1730 (0.1946) grad: 0.1258 (0.1483) time: 0.4423 data: 0.0039 max mem: 22446 +train: [10] [260/400] eta: 0:01:04 lr: 0.000207 loss: 0.1772 (0.1943) grad: 0.1406 (0.1494) time: 0.4540 data: 0.0042 max mem: 22446 +train: [10] [280/400] eta: 0:00:55 lr: 0.000205 loss: 0.1706 (0.1923) grad: 0.1339 (0.1477) time: 0.4508 data: 0.0041 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.1527 (0.1920) grad: 0.1477 (0.1482) time: 0.6246 data: 0.1707 max mem: 22446 +train: [10] [320/400] eta: 0:00:37 lr: 0.000202 loss: 0.1552 (0.1899) grad: 0.1463 (0.1466) time: 0.4445 data: 0.0037 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.1552 (0.1881) grad: 0.1169 (0.1454) time: 0.4513 data: 0.0043 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.1563 (0.1868) grad: 0.1178 (0.1435) time: 0.4378 data: 0.0040 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.1523 (0.1864) grad: 0.1178 (0.1424) time: 0.4664 data: 0.0045 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.1572 (0.1858) grad: 0.1296 (0.1416) time: 0.4508 data: 0.0043 max mem: 22446 +train: [10] Total time: 0:03:06 (0.4667 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.1572 (0.1858) grad: 0.1296 (0.1416) +eval (validation): [10] [ 0/63] eta: 0:03:14 time: 3.0913 data: 2.8084 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:20 time: 0.3547 data: 0.0043 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3345 data: 0.0031 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3271 data: 0.0034 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3279 data: 0.0034 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3869 s / it) +cv: [10] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.046 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:10 lr: nan time: 3.1765 data: 2.8339 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:35 lr: 0.000195 loss: 0.1736 (0.1710) grad: 0.1245 (0.1339) time: 0.4366 data: 0.0041 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:03 lr: 0.000193 loss: 0.1677 (0.1698) grad: 0.1234 (0.1249) time: 0.4480 data: 0.0036 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:46 lr: 0.000192 loss: 0.1594 (0.1691) grad: 0.1234 (0.1235) time: 0.4508 data: 0.0045 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:34 lr: 0.000190 loss: 0.1721 (0.1684) grad: 0.1336 (0.1270) time: 0.4563 data: 0.0043 max mem: 22446 +train: [11] [100/400] eta: 0:02:22 lr: 0.000189 loss: 0.1653 (0.1647) grad: 0.1124 (0.1243) time: 0.4503 data: 0.0044 max mem: 22446 +train: [11] [120/400] eta: 0:02:13 lr: 0.000187 loss: 0.1478 (0.1651) grad: 0.1064 (0.1246) time: 0.4737 data: 0.0045 max mem: 22446 +train: [11] [140/400] eta: 0:02:03 lr: 0.000186 loss: 0.1557 (0.1651) grad: 0.1020 (0.1214) time: 0.4663 data: 0.0045 max mem: 22446 +train: [11] [160/400] eta: 0:01:53 lr: 0.000184 loss: 0.1532 (0.1655) grad: 0.0996 (0.1224) time: 0.4549 data: 0.0043 max mem: 22446 +train: [11] [180/400] eta: 0:01:43 lr: 0.000183 loss: 0.1549 (0.1649) grad: 0.0988 (0.1202) time: 0.4531 data: 0.0038 max mem: 22446 +train: [11] [200/400] eta: 0:01:33 lr: 0.000181 loss: 0.1445 (0.1629) grad: 0.1039 (0.1193) time: 0.4688 data: 0.0041 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.1360 (0.1609) grad: 0.1071 (0.1181) time: 0.4492 data: 0.0041 max mem: 22446 +train: [11] [240/400] eta: 0:01:14 lr: 0.000178 loss: 0.1360 (0.1593) grad: 0.0948 (0.1154) time: 0.4426 data: 0.0043 max mem: 22446 +train: [11] [260/400] eta: 0:01:04 lr: 0.000177 loss: 0.1430 (0.1584) grad: 0.0858 (0.1138) time: 0.4491 data: 0.0044 max mem: 22446 +train: [11] [280/400] eta: 0:00:55 lr: 0.000175 loss: 0.1467 (0.1590) grad: 0.1110 (0.1150) time: 0.4469 data: 0.0044 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.1648 (0.1594) grad: 0.1206 (0.1150) time: 0.6029 data: 0.1676 max mem: 22446 +train: [11] [320/400] eta: 0:00:37 lr: 0.000172 loss: 0.1520 (0.1583) grad: 0.1024 (0.1136) time: 0.4423 data: 0.0032 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.1391 (0.1573) grad: 0.0808 (0.1116) time: 0.4648 data: 0.0045 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.1419 (0.1563) grad: 0.0801 (0.1102) time: 0.4544 data: 0.0043 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.1419 (0.1555) grad: 0.0711 (0.1084) time: 0.4467 data: 0.0041 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.1407 (0.1550) grad: 0.0813 (0.1075) time: 0.4541 data: 0.0040 max mem: 22446 +train: [11] Total time: 0:03:07 (0.4679 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.1407 (0.1550) grad: 0.0813 (0.1075) +eval (validation): [11] [ 0/63] eta: 0:03:19 time: 3.1740 data: 2.8903 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3418 data: 0.0031 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3473 data: 0.0030 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3232 data: 0.0037 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3222 data: 0.0037 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3864 s / it) +cv: [11] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.047 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:21:13 lr: nan time: 3.1846 data: 2.8402 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:42 lr: 0.000164 loss: 0.1280 (0.1292) grad: 0.0814 (0.0853) time: 0.4555 data: 0.0037 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:06 lr: 0.000163 loss: 0.1280 (0.1321) grad: 0.0812 (0.0817) time: 0.4492 data: 0.0037 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:48 lr: 0.000161 loss: 0.1265 (0.1302) grad: 0.0759 (0.0786) time: 0.4516 data: 0.0041 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:34 lr: 0.000160 loss: 0.1309 (0.1314) grad: 0.0713 (0.0800) time: 0.4438 data: 0.0043 max mem: 22446 +train: [12] [100/400] eta: 0:02:23 lr: 0.000158 loss: 0.1373 (0.1350) grad: 0.0751 (0.0807) time: 0.4503 data: 0.0042 max mem: 22446 +train: [12] [120/400] eta: 0:02:13 lr: 0.000156 loss: 0.1354 (0.1360) grad: 0.0826 (0.0813) time: 0.4724 data: 0.0042 max mem: 22446 +train: [12] [140/400] eta: 0:02:03 lr: 0.000155 loss: 0.1218 (0.1363) grad: 0.0840 (0.0810) time: 0.4721 data: 0.0042 max mem: 22446 +train: [12] [160/400] eta: 0:01:53 lr: 0.000153 loss: 0.1310 (0.1362) grad: 0.0865 (0.0829) time: 0.4578 data: 0.0042 max mem: 22446 +train: [12] [180/400] eta: 0:01:43 lr: 0.000152 loss: 0.1353 (0.1369) grad: 0.0859 (0.0826) time: 0.4398 data: 0.0041 max mem: 22446 +train: [12] [200/400] eta: 0:01:33 lr: 0.000150 loss: 0.1341 (0.1366) grad: 0.0837 (0.0840) time: 0.4560 data: 0.0042 max mem: 22446 +train: [12] [220/400] eta: 0:01:24 lr: 0.000149 loss: 0.1314 (0.1371) grad: 0.0837 (0.0840) time: 0.4561 data: 0.0043 max mem: 22446 +train: [12] [240/400] eta: 0:01:14 lr: 0.000147 loss: 0.1379 (0.1382) grad: 0.0773 (0.0838) time: 0.4505 data: 0.0041 max mem: 22446 +train: [12] [260/400] eta: 0:01:04 lr: 0.000145 loss: 0.1384 (0.1378) grad: 0.0757 (0.0827) time: 0.4437 data: 0.0040 max mem: 22446 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 0.1277 (0.1370) grad: 0.0671 (0.0821) time: 0.4544 data: 0.0043 max mem: 22446 +train: [12] [300/400] eta: 0:00:47 lr: 0.000142 loss: 0.1298 (0.1376) grad: 0.0825 (0.0822) time: 0.6138 data: 0.1719 max mem: 22446 +train: [12] [320/400] eta: 0:00:37 lr: 0.000141 loss: 0.1308 (0.1370) grad: 0.0730 (0.0814) time: 0.4477 data: 0.0036 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.1322 (0.1374) grad: 0.0738 (0.0818) time: 0.4542 data: 0.0044 max mem: 22446 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 0.1371 (0.1372) grad: 0.0852 (0.0818) time: 0.4436 data: 0.0044 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.1349 (0.1373) grad: 0.0719 (0.0815) time: 0.4512 data: 0.0044 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1349 (0.1369) grad: 0.0673 (0.0811) time: 0.4542 data: 0.0044 max mem: 22446 +train: [12] Total time: 0:03:07 (0.4683 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1349 (0.1369) grad: 0.0673 (0.0811) +eval (validation): [12] [ 0/63] eta: 0:03:13 time: 3.0679 data: 2.8319 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:19 time: 0.3341 data: 0.0064 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3631 data: 0.0040 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3119 data: 0.0022 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3082 data: 0.0023 max mem: 22446 +eval (validation): [12] Total time: 0:00:24 (0.3831 s / it) +cv: [12] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.048 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:00 lr: nan time: 3.3022 data: 2.8932 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:38 lr: 0.000133 loss: 0.1418 (0.1389) grad: 0.0507 (0.0632) time: 0.4397 data: 0.0038 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:03 lr: 0.000131 loss: 0.1343 (0.1303) grad: 0.0554 (0.0641) time: 0.4420 data: 0.0038 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:47 lr: 0.000130 loss: 0.1222 (0.1281) grad: 0.0676 (0.0679) time: 0.4520 data: 0.0044 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:33 lr: 0.000128 loss: 0.1243 (0.1283) grad: 0.0607 (0.0657) time: 0.4473 data: 0.0043 max mem: 22446 +train: [13] [100/400] eta: 0:02:22 lr: 0.000127 loss: 0.1243 (0.1276) grad: 0.0537 (0.0650) time: 0.4535 data: 0.0043 max mem: 22446 +train: [13] [120/400] eta: 0:02:12 lr: 0.000125 loss: 0.1229 (0.1276) grad: 0.0565 (0.0647) time: 0.4628 data: 0.0043 max mem: 22446 +train: [13] [140/400] eta: 0:02:03 lr: 0.000124 loss: 0.1181 (0.1275) grad: 0.0592 (0.0648) time: 0.4802 data: 0.0044 max mem: 22446 +train: [13] [160/400] eta: 0:01:53 lr: 0.000122 loss: 0.1202 (0.1269) grad: 0.0561 (0.0641) time: 0.4705 data: 0.0044 max mem: 22446 +train: [13] [180/400] eta: 0:01:43 lr: 0.000120 loss: 0.1262 (0.1286) grad: 0.0618 (0.0651) time: 0.4461 data: 0.0042 max mem: 22446 +train: [13] [200/400] eta: 0:01:34 lr: 0.000119 loss: 0.1282 (0.1289) grad: 0.0683 (0.0652) time: 0.4695 data: 0.0042 max mem: 22446 +train: [13] [220/400] eta: 0:01:24 lr: 0.000117 loss: 0.1212 (0.1281) grad: 0.0672 (0.0651) time: 0.4589 data: 0.0043 max mem: 22446 +train: [13] [240/400] eta: 0:01:14 lr: 0.000116 loss: 0.1249 (0.1283) grad: 0.0621 (0.0653) time: 0.4605 data: 0.0044 max mem: 22446 +train: [13] [260/400] eta: 0:01:05 lr: 0.000114 loss: 0.1249 (0.1282) grad: 0.0621 (0.0653) time: 0.4497 data: 0.0044 max mem: 22446 +train: [13] [280/400] eta: 0:00:55 lr: 0.000113 loss: 0.1154 (0.1275) grad: 0.0648 (0.0653) time: 0.4558 data: 0.0044 max mem: 22446 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 0.1154 (0.1274) grad: 0.0639 (0.0652) time: 0.6014 data: 0.1698 max mem: 22446 +train: [13] [320/400] eta: 0:00:37 lr: 0.000110 loss: 0.1266 (0.1272) grad: 0.0569 (0.0647) time: 0.4488 data: 0.0034 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.1266 (0.1266) grad: 0.0531 (0.0642) time: 0.4445 data: 0.0042 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.1058 (0.1257) grad: 0.0514 (0.0637) time: 0.4651 data: 0.0043 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.1093 (0.1257) grad: 0.0542 (0.0634) time: 0.4550 data: 0.0040 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1253 (0.1256) grad: 0.0551 (0.0633) time: 0.4489 data: 0.0042 max mem: 22446 +train: [13] Total time: 0:03:08 (0.4702 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1253 (0.1256) grad: 0.0551 (0.0633) +eval (validation): [13] [ 0/63] eta: 0:03:18 time: 3.1432 data: 2.8531 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3661 data: 0.0043 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3522 data: 0.0035 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3278 data: 0.0037 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3242 data: 0.0037 max mem: 22446 +eval (validation): [13] Total time: 0:00:24 (0.3968 s / it) +cv: [13] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.048 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:20:04 lr: nan time: 3.0104 data: 2.6481 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:41 lr: 0.000102 loss: 0.1137 (0.1229) grad: 0.0515 (0.0531) time: 0.4604 data: 0.0032 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:08 lr: 0.000101 loss: 0.1196 (0.1213) grad: 0.0527 (0.0596) time: 0.4609 data: 0.0041 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:49 lr: 0.000099 loss: 0.1174 (0.1197) grad: 0.0567 (0.0581) time: 0.4528 data: 0.0042 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:35 lr: 0.000098 loss: 0.1085 (0.1185) grad: 0.0552 (0.0586) time: 0.4419 data: 0.0041 max mem: 22446 +train: [14] [100/400] eta: 0:02:23 lr: 0.000096 loss: 0.1067 (0.1181) grad: 0.0555 (0.0581) time: 0.4515 data: 0.0043 max mem: 22446 +train: [14] [120/400] eta: 0:02:13 lr: 0.000095 loss: 0.1206 (0.1208) grad: 0.0524 (0.0583) time: 0.4612 data: 0.0042 max mem: 22446 +train: [14] [140/400] eta: 0:02:03 lr: 0.000093 loss: 0.1272 (0.1221) grad: 0.0538 (0.0583) time: 0.4781 data: 0.0044 max mem: 22446 +train: [14] [160/400] eta: 0:01:54 lr: 0.000092 loss: 0.1082 (0.1211) grad: 0.0501 (0.0575) time: 0.4664 data: 0.0042 max mem: 22446 +train: [14] [180/400] eta: 0:01:43 lr: 0.000090 loss: 0.1060 (0.1201) grad: 0.0467 (0.0566) time: 0.4450 data: 0.0044 max mem: 22446 +train: [14] [200/400] eta: 0:01:34 lr: 0.000089 loss: 0.1150 (0.1201) grad: 0.0494 (0.0563) time: 0.4690 data: 0.0044 max mem: 22446 +train: [14] [220/400] eta: 0:01:24 lr: 0.000088 loss: 0.1097 (0.1195) grad: 0.0515 (0.0562) time: 0.4496 data: 0.0041 max mem: 22446 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 0.1087 (0.1189) grad: 0.0532 (0.0559) time: 0.4664 data: 0.0045 max mem: 22446 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 0.1126 (0.1185) grad: 0.0553 (0.0560) time: 0.4576 data: 0.0043 max mem: 22446 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 0.1164 (0.1196) grad: 0.0524 (0.0561) time: 0.4541 data: 0.0043 max mem: 22446 +train: [14] [300/400] eta: 0:00:47 lr: 0.000082 loss: 0.1160 (0.1196) grad: 0.0524 (0.0561) time: 0.6198 data: 0.1658 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.1136 (0.1193) grad: 0.0520 (0.0557) time: 0.4731 data: 0.0041 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.1213 (0.1199) grad: 0.0516 (0.0556) time: 0.4444 data: 0.0043 max mem: 22446 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 0.1214 (0.1199) grad: 0.0530 (0.0557) time: 0.4502 data: 0.0041 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1214 (0.1202) grad: 0.0530 (0.0556) time: 0.4537 data: 0.0042 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1223 (0.1202) grad: 0.0502 (0.0554) time: 0.4585 data: 0.0041 max mem: 22446 +train: [14] Total time: 0:03:09 (0.4726 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1223 (0.1202) grad: 0.0502 (0.0554) +eval (validation): [14] [ 0/63] eta: 0:03:20 time: 3.1855 data: 2.8911 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3613 data: 0.0032 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3349 data: 0.0033 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3263 data: 0.0035 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3254 data: 0.0035 max mem: 22446 +eval (validation): [14] Total time: 0:00:24 (0.3904 s / it) +cv: [14] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.049 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:36 lr: nan time: 3.2405 data: 2.8488 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:41 lr: 0.000074 loss: 0.1184 (0.1218) grad: 0.0454 (0.0504) time: 0.4495 data: 0.0033 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:06 lr: 0.000072 loss: 0.1200 (0.1207) grad: 0.0492 (0.0537) time: 0.4495 data: 0.0040 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:49 lr: 0.000071 loss: 0.1061 (0.1168) grad: 0.0529 (0.0522) time: 0.4594 data: 0.0044 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:35 lr: 0.000070 loss: 0.1061 (0.1157) grad: 0.0491 (0.0517) time: 0.4530 data: 0.0041 max mem: 22446 +train: [15] [100/400] eta: 0:02:24 lr: 0.000068 loss: 0.1079 (0.1138) grad: 0.0494 (0.0519) time: 0.4507 data: 0.0044 max mem: 22446 +train: [15] [120/400] eta: 0:02:13 lr: 0.000067 loss: 0.1064 (0.1133) grad: 0.0515 (0.0518) time: 0.4668 data: 0.0042 max mem: 22446 +train: [15] [140/400] eta: 0:02:04 lr: 0.000066 loss: 0.1115 (0.1137) grad: 0.0515 (0.0516) time: 0.4749 data: 0.0046 max mem: 22446 +train: [15] [160/400] eta: 0:01:54 lr: 0.000064 loss: 0.1073 (0.1132) grad: 0.0521 (0.0522) time: 0.4756 data: 0.0043 max mem: 22446 +train: [15] [180/400] eta: 0:01:44 lr: 0.000063 loss: 0.1060 (0.1134) grad: 0.0529 (0.0519) time: 0.4464 data: 0.0038 max mem: 22446 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 0.1166 (0.1141) grad: 0.0475 (0.0516) time: 0.4603 data: 0.0043 max mem: 22446 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 0.1163 (0.1137) grad: 0.0497 (0.0517) time: 0.4526 data: 0.0039 max mem: 22446 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 0.1091 (0.1133) grad: 0.0499 (0.0518) time: 0.4564 data: 0.0041 max mem: 22446 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 0.1091 (0.1135) grad: 0.0498 (0.0513) time: 0.4524 data: 0.0040 max mem: 22446 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 0.1134 (0.1137) grad: 0.0468 (0.0513) time: 0.4596 data: 0.0042 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.1117 (0.1136) grad: 0.0475 (0.0513) time: 0.6052 data: 0.1750 max mem: 22446 +train: [15] [320/400] eta: 0:00:38 lr: 0.000054 loss: 0.1051 (0.1135) grad: 0.0516 (0.0515) time: 0.4546 data: 0.0036 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.1045 (0.1137) grad: 0.0538 (0.0517) time: 0.4555 data: 0.0043 max mem: 22446 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 0.1050 (0.1138) grad: 0.0526 (0.0518) time: 0.4543 data: 0.0042 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.1020 (0.1133) grad: 0.0504 (0.0516) time: 0.4540 data: 0.0044 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.1109 (0.1139) grad: 0.0462 (0.0514) time: 0.4528 data: 0.0043 max mem: 22446 +train: [15] Total time: 0:03:08 (0.4717 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.1109 (0.1139) grad: 0.0462 (0.0514) +eval (validation): [15] [ 0/63] eta: 0:03:12 time: 3.0480 data: 2.8056 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:20 time: 0.3450 data: 0.0045 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3366 data: 0.0028 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3365 data: 0.0038 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3363 data: 0.0038 max mem: 22446 +eval (validation): [15] Total time: 0:00:24 (0.3869 s / it) +cv: [15] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.048 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:21:21 lr: nan time: 3.2032 data: 2.8541 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:42 lr: 0.000048 loss: 0.1105 (0.1155) grad: 0.0474 (0.0506) time: 0.4546 data: 0.0030 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:07 lr: 0.000047 loss: 0.1165 (0.1163) grad: 0.0479 (0.0516) time: 0.4502 data: 0.0036 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:48 lr: 0.000046 loss: 0.1150 (0.1152) grad: 0.0474 (0.0503) time: 0.4470 data: 0.0042 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:35 lr: 0.000045 loss: 0.1156 (0.1165) grad: 0.0477 (0.0502) time: 0.4577 data: 0.0043 max mem: 22446 +train: [16] [100/400] eta: 0:02:23 lr: 0.000044 loss: 0.1159 (0.1171) grad: 0.0496 (0.0501) time: 0.4380 data: 0.0042 max mem: 22446 +train: [16] [120/400] eta: 0:02:12 lr: 0.000043 loss: 0.1055 (0.1154) grad: 0.0496 (0.0502) time: 0.4621 data: 0.0042 max mem: 22446 +train: [16] [140/400] eta: 0:02:03 lr: 0.000042 loss: 0.1067 (0.1151) grad: 0.0503 (0.0506) time: 0.4822 data: 0.0043 max mem: 22446 +train: [16] [160/400] eta: 0:01:54 lr: 0.000041 loss: 0.1093 (0.1144) grad: 0.0508 (0.0505) time: 0.4762 data: 0.0043 max mem: 22446 +train: [16] [180/400] eta: 0:01:44 lr: 0.000040 loss: 0.1139 (0.1146) grad: 0.0469 (0.0502) time: 0.4582 data: 0.0043 max mem: 22446 +train: [16] [200/400] eta: 0:01:34 lr: 0.000039 loss: 0.1143 (0.1148) grad: 0.0478 (0.0507) time: 0.4456 data: 0.0041 max mem: 22446 +train: [16] [220/400] eta: 0:01:24 lr: 0.000038 loss: 0.1086 (0.1147) grad: 0.0515 (0.0506) time: 0.4769 data: 0.0043 max mem: 22446 +train: [16] [240/400] eta: 0:01:15 lr: 0.000036 loss: 0.1086 (0.1144) grad: 0.0482 (0.0505) time: 0.4599 data: 0.0043 max mem: 22446 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 0.1169 (0.1150) grad: 0.0483 (0.0504) time: 0.4430 data: 0.0041 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.1172 (0.1148) grad: 0.0500 (0.0508) time: 0.4566 data: 0.0046 max mem: 22446 +train: [16] [300/400] eta: 0:00:47 lr: 0.000033 loss: 0.1075 (0.1144) grad: 0.0511 (0.0509) time: 0.5978 data: 0.1652 max mem: 22446 +train: [16] [320/400] eta: 0:00:37 lr: 0.000032 loss: 0.1095 (0.1145) grad: 0.0511 (0.0509) time: 0.4464 data: 0.0030 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.1150 (0.1151) grad: 0.0516 (0.0511) time: 0.4734 data: 0.0039 max mem: 22446 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 0.1116 (0.1148) grad: 0.0516 (0.0511) time: 0.4633 data: 0.0043 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.1048 (0.1142) grad: 0.0477 (0.0509) time: 0.4542 data: 0.0042 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.1044 (0.1141) grad: 0.0508 (0.0511) time: 0.4444 data: 0.0043 max mem: 22446 +train: [16] Total time: 0:03:08 (0.4718 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.1044 (0.1141) grad: 0.0508 (0.0511) +eval (validation): [16] [ 0/63] eta: 0:03:24 time: 3.2435 data: 2.9502 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3632 data: 0.0040 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3281 data: 0.0031 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3347 data: 0.0037 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3350 data: 0.0038 max mem: 22446 +eval (validation): [16] Total time: 0:00:24 (0.3933 s / it) +cv: [16] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 0.041 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:12 lr: nan time: 3.3303 data: 2.9349 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:50 lr: 0.000028 loss: 0.0963 (0.1053) grad: 0.0497 (0.0497) time: 0.4705 data: 0.0034 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:11 lr: 0.000027 loss: 0.1037 (0.1106) grad: 0.0502 (0.0515) time: 0.4520 data: 0.0042 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:51 lr: 0.000026 loss: 0.1180 (0.1137) grad: 0.0482 (0.0504) time: 0.4536 data: 0.0041 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:38 lr: 0.000025 loss: 0.1206 (0.1155) grad: 0.0485 (0.0503) time: 0.4593 data: 0.0042 max mem: 22446 +train: [17] [100/400] eta: 0:02:25 lr: 0.000024 loss: 0.1152 (0.1145) grad: 0.0483 (0.0499) time: 0.4498 data: 0.0041 max mem: 22446 +train: [17] [120/400] eta: 0:02:14 lr: 0.000023 loss: 0.1087 (0.1132) grad: 0.0443 (0.0488) time: 0.4471 data: 0.0037 max mem: 22446 +train: [17] [140/400] eta: 0:02:04 lr: 0.000023 loss: 0.1113 (0.1130) grad: 0.0449 (0.0490) time: 0.4823 data: 0.0042 max mem: 22446 +train: [17] [160/400] eta: 0:01:54 lr: 0.000022 loss: 0.1084 (0.1128) grad: 0.0495 (0.0493) time: 0.4743 data: 0.0042 max mem: 22446 +train: [17] [180/400] eta: 0:01:44 lr: 0.000021 loss: 0.1048 (0.1126) grad: 0.0491 (0.0492) time: 0.4612 data: 0.0043 max mem: 22446 +train: [17] [200/400] eta: 0:01:34 lr: 0.000020 loss: 0.1036 (0.1125) grad: 0.0473 (0.0494) time: 0.4468 data: 0.0039 max mem: 22446 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 0.1059 (0.1124) grad: 0.0482 (0.0494) time: 0.4719 data: 0.0044 max mem: 22446 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 0.1063 (0.1125) grad: 0.0458 (0.0494) time: 0.4555 data: 0.0043 max mem: 22446 +train: [17] [260/400] eta: 0:01:05 lr: 0.000018 loss: 0.1075 (0.1126) grad: 0.0488 (0.0494) time: 0.4495 data: 0.0043 max mem: 22446 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 0.1065 (0.1122) grad: 0.0461 (0.0492) time: 0.4532 data: 0.0043 max mem: 22446 +train: [17] [300/400] eta: 0:00:47 lr: 0.000016 loss: 0.1065 (0.1120) grad: 0.0483 (0.0495) time: 0.6256 data: 0.1694 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.1105 (0.1120) grad: 0.0494 (0.0496) time: 0.4272 data: 0.0035 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.1104 (0.1120) grad: 0.0466 (0.0494) time: 0.4534 data: 0.0043 max mem: 22446 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 0.1063 (0.1117) grad: 0.0466 (0.0494) time: 0.4536 data: 0.0043 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.1092 (0.1119) grad: 0.0472 (0.0493) time: 0.4403 data: 0.0040 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.1151 (0.1119) grad: 0.0478 (0.0493) time: 0.4453 data: 0.0043 max mem: 22446 +train: [17] Total time: 0:03:08 (0.4715 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.1151 (0.1119) grad: 0.0478 (0.0493) +eval (validation): [17] [ 0/63] eta: 0:03:20 time: 3.1754 data: 2.8879 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3742 data: 0.0033 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3341 data: 0.0035 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3265 data: 0.0036 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3230 data: 0.0036 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3931 s / it) +cv: [17] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.049 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:40 lr: nan time: 3.2517 data: 2.8560 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:48 lr: 0.000012 loss: 0.1020 (0.1058) grad: 0.0442 (0.0471) time: 0.4685 data: 0.0034 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:09 lr: 0.000012 loss: 0.1043 (0.1110) grad: 0.0470 (0.0480) time: 0.4492 data: 0.0043 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:49 lr: 0.000011 loss: 0.1049 (0.1101) grad: 0.0498 (0.0483) time: 0.4383 data: 0.0042 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:36 lr: 0.000011 loss: 0.1066 (0.1105) grad: 0.0480 (0.0485) time: 0.4648 data: 0.0042 max mem: 22446 +train: [18] [100/400] eta: 0:02:24 lr: 0.000010 loss: 0.1128 (0.1119) grad: 0.0480 (0.0491) time: 0.4492 data: 0.0040 max mem: 22446 +train: [18] [120/400] eta: 0:02:13 lr: 0.000009 loss: 0.1055 (0.1106) grad: 0.0502 (0.0494) time: 0.4548 data: 0.0041 max mem: 22446 +train: [18] [140/400] eta: 0:02:03 lr: 0.000009 loss: 0.1094 (0.1104) grad: 0.0488 (0.0494) time: 0.4699 data: 0.0043 max mem: 22446 +train: [18] [160/400] eta: 0:01:54 lr: 0.000008 loss: 0.1071 (0.1102) grad: 0.0479 (0.0492) time: 0.4787 data: 0.0043 max mem: 22446 +train: [18] [180/400] eta: 0:01:44 lr: 0.000008 loss: 0.1071 (0.1101) grad: 0.0504 (0.0496) time: 0.4561 data: 0.0044 max mem: 22446 +train: [18] [200/400] eta: 0:01:34 lr: 0.000007 loss: 0.1063 (0.1098) grad: 0.0514 (0.0498) time: 0.4488 data: 0.0040 max mem: 22446 +train: [18] [220/400] eta: 0:01:24 lr: 0.000007 loss: 0.1013 (0.1095) grad: 0.0467 (0.0496) time: 0.4770 data: 0.0044 max mem: 22446 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 0.1088 (0.1095) grad: 0.0497 (0.0497) time: 0.4676 data: 0.0044 max mem: 22446 +train: [18] [260/400] eta: 0:01:05 lr: 0.000006 loss: 0.1116 (0.1100) grad: 0.0497 (0.0495) time: 0.4636 data: 0.0044 max mem: 22446 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 0.1095 (0.1101) grad: 0.0502 (0.0499) time: 0.4691 data: 0.0044 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.1048 (0.1098) grad: 0.0514 (0.0500) time: 0.6123 data: 0.1742 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.1090 (0.1099) grad: 0.0493 (0.0500) time: 0.4546 data: 0.0035 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.1130 (0.1102) grad: 0.0488 (0.0500) time: 0.4646 data: 0.0035 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.1091 (0.1101) grad: 0.0487 (0.0502) time: 0.4637 data: 0.0041 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.1091 (0.1096) grad: 0.0478 (0.0500) time: 0.4648 data: 0.0045 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.1045 (0.1093) grad: 0.0472 (0.0500) time: 0.4551 data: 0.0045 max mem: 22446 +train: [18] Total time: 0:03:10 (0.4761 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.1045 (0.1093) grad: 0.0472 (0.0500) +eval (validation): [18] [ 0/63] eta: 0:03:25 time: 3.2564 data: 2.9536 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:22 time: 0.3798 data: 0.0043 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3570 data: 0.0033 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3331 data: 0.0035 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3249 data: 0.0034 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.4075 s / it) +cv: [18] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.049 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:24:12 lr: nan time: 3.6300 data: 3.2218 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 0.1099 (0.1068) grad: 0.0462 (0.0446) time: 0.4639 data: 0.0023 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:15 lr: 0.000003 loss: 0.1099 (0.1096) grad: 0.0480 (0.0487) time: 0.4651 data: 0.0043 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:54 lr: 0.000002 loss: 0.1061 (0.1091) grad: 0.0496 (0.0483) time: 0.4507 data: 0.0044 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 0.1068 (0.1097) grad: 0.0507 (0.0486) time: 0.4595 data: 0.0041 max mem: 22446 +train: [19] [100/400] eta: 0:02:27 lr: 0.000002 loss: 0.1068 (0.1110) grad: 0.0471 (0.0481) time: 0.4598 data: 0.0042 max mem: 22446 +train: [19] [120/400] eta: 0:02:15 lr: 0.000002 loss: 0.1073 (0.1102) grad: 0.0463 (0.0479) time: 0.4482 data: 0.0041 max mem: 22446 +train: [19] [140/400] eta: 0:02:05 lr: 0.000001 loss: 0.1052 (0.1093) grad: 0.0468 (0.0478) time: 0.4726 data: 0.0045 max mem: 22446 +train: [19] [160/400] eta: 0:01:55 lr: 0.000001 loss: 0.1110 (0.1094) grad: 0.0469 (0.0480) time: 0.4778 data: 0.0044 max mem: 22446 +train: [19] [180/400] eta: 0:01:45 lr: 0.000001 loss: 0.1121 (0.1096) grad: 0.0478 (0.0482) time: 0.4754 data: 0.0043 max mem: 22446 +train: [19] [200/400] eta: 0:01:35 lr: 0.000001 loss: 0.1049 (0.1095) grad: 0.0479 (0.0483) time: 0.4451 data: 0.0041 max mem: 22446 +train: [19] [220/400] eta: 0:01:25 lr: 0.000001 loss: 0.1073 (0.1102) grad: 0.0490 (0.0486) time: 0.4708 data: 0.0042 max mem: 22446 +train: [19] [240/400] eta: 0:01:16 lr: 0.000001 loss: 0.1168 (0.1100) grad: 0.0488 (0.0485) time: 0.4774 data: 0.0043 max mem: 22446 +train: [19] [260/400] eta: 0:01:06 lr: 0.000000 loss: 0.1050 (0.1098) grad: 0.0488 (0.0488) time: 0.4680 data: 0.0041 max mem: 22446 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 0.1069 (0.1102) grad: 0.0494 (0.0487) time: 0.4483 data: 0.0044 max mem: 22446 +train: [19] [300/400] eta: 0:00:48 lr: 0.000000 loss: 0.1114 (0.1105) grad: 0.0478 (0.0489) time: 0.6595 data: 0.1808 max mem: 22446 +train: [19] [320/400] eta: 0:00:38 lr: 0.000000 loss: 0.1082 (0.1102) grad: 0.0504 (0.0490) time: 0.4707 data: 0.0039 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.1022 (0.1099) grad: 0.0499 (0.0489) time: 0.4593 data: 0.0045 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.1057 (0.1098) grad: 0.0483 (0.0490) time: 0.4627 data: 0.0045 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.1116 (0.1099) grad: 0.0471 (0.0489) time: 0.4835 data: 0.0042 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.1088 (0.1097) grad: 0.0457 (0.0490) time: 0.4785 data: 0.0045 max mem: 22446 +train: [19] Total time: 0:03:13 (0.4833 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.1088 (0.1097) grad: 0.0457 (0.0490) +eval (validation): [19] [ 0/63] eta: 0:03:05 time: 2.9469 data: 2.7089 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:20 time: 0.3610 data: 0.0046 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3716 data: 0.0034 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3415 data: 0.0032 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3416 data: 0.0023 max mem: 22446 +eval (validation): [19] Total time: 0:00:25 (0.4038 s / it) +cv: [19] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.049 acc: 0.988 f1: 0.986 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9880952380952381, "hparam": [3.7, 1.0], "hparam_id": 32, "epoch": 19, "is_best": false, "best_score": 0.9885912698412699} +eval (train): [20] [ 0/297] eta: 0:14:54 time: 3.0108 data: 2.7208 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:29 time: 0.4159 data: 0.0049 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:58 time: 0.3818 data: 0.0033 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:42 time: 0.3718 data: 0.0037 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3596 data: 0.0035 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:19 time: 0.3686 data: 0.0036 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:10 time: 0.3725 data: 0.0037 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:01 time: 0.3544 data: 0.0036 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:53 time: 0.3565 data: 0.0033 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:45 time: 0.3639 data: 0.0037 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3537 data: 0.0038 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3572 data: 0.0031 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3525 data: 0.0036 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3458 data: 0.0040 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3751 data: 0.0035 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3641 data: 0.0037 max mem: 22446 +eval (train): [20] Total time: 0:01:51 (0.3764 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:06 time: 2.9641 data: 2.7170 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3861 data: 0.0035 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3476 data: 0.0028 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3380 data: 0.0030 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3381 data: 0.0022 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4024 s / it) +eval (test): [20] [ 0/79] eta: 0:03:49 time: 2.9108 data: 2.6816 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3728 data: 0.0163 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3747 data: 0.0129 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3469 data: 0.0025 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3114 data: 0.0021 max mem: 22446 +eval (test): [20] Total time: 0:00:30 (0.3890 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9885912698412699, "hparam": [1.9, 1.0], "hparam_id": 28, "epoch": 9, "is_best": true, "best_score": 0.9885912698412699} +eval (train): [20] [ 0/297] eta: 0:16:00 time: 3.2328 data: 2.9201 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:28 time: 0.4001 data: 0.0036 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:54 time: 0.3508 data: 0.0035 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:40 time: 0.3786 data: 0.0042 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:27 time: 0.3490 data: 0.0031 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:18 time: 0.3666 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:09 time: 0.3611 data: 0.0035 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3489 data: 0.0037 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3619 data: 0.0036 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:44 time: 0.3439 data: 0.0034 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3348 data: 0.0032 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3644 data: 0.0037 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3420 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3346 data: 0.0034 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3362 data: 0.0034 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3216 data: 0.0035 max mem: 22446 +eval (train): [20] Total time: 0:01:48 (0.3648 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:01 time: 2.8779 data: 2.6354 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3493 data: 0.0156 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3364 data: 0.0036 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3282 data: 0.0026 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3189 data: 0.0020 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3813 s / it) +eval (test): [20] [ 0/79] eta: 0:03:40 time: 2.7883 data: 2.5552 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:27 time: 0.3413 data: 0.0029 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3692 data: 0.0029 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3446 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3249 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:30 (0.3801 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 9 | 0.00057 | 0.05 | 28 | [1.9, 1.0] | train | 0.0016463 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 9 | 0.00057 | 0.05 | 28 | [1.9, 1.0] | validation | 0.041366 | 0.98859 | 0.0017393 | 0.98673 | 0.0022074 | +| flat_mae | patch | attn | hcpya_task21 | best | 9 | 0.00057 | 0.05 | 28 | [1.9, 1.0] | test | 0.057268 | 0.98393 | 0.0016795 | 0.98111 | 0.0021815 | + + +done! total time: 1:18:41 diff --git a/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..056064e39a682aa071d21e79f0ecf75eb67388c8 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.1619314897060393, "train/grad": 0.2347842913120985, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.10288330078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.09958740234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.09398193359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.08847412109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.08284912109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0752587890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.06655517578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.056949462890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.044306640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.030762939453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.017528076171875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9975579833984374, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9782171630859375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.94995849609375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.9228216552734376, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8966534423828123, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.863428649902344, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.824208984375, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.779810791015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7382537841796877, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6861927795410154, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6316558837890627, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.569835205078125, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.502704772949219, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.4319564056396485, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3474322509765626, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2722861099243166, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2050829315185547, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1161295127868653, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.014490957260132, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.926974515914917, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8495586204528809, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7475042819976807, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6585865432024003, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5559950749576092, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4578166005015374, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3597886260226368, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2746973140537738, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.190440217666328, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0959255427867174, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.028224991634488, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9733847388997674, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9081070537492633, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8571331559307873, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.8028151181619614, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7515077861770988, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.7109960458055139, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.6664045352302491, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.6309695241972804, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04439699601382017, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04431765815243125, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04418536456301808, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.044057680796831845, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04392695102840662, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04374867331236601, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04354660983197391, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04332250061444938, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04302709811367095, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04271127389743924, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04240021047182381, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04193025056272745, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04147069065831602, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04080402566120028, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04016236809082329, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03955121939070523, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03879123218357563, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037932342197746036, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03703422314487398, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036262755505740644, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03538724333047867, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.034556590365245936, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03370136672630906, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03284309119917452, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.031992114558815954, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.031026656925678252, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03020701895467937, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02950197786092758, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.028612773921340703, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02765291156247258, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.026880793333984913, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.026240033307112755, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.025470544430427254, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024872279376722872, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024254099079407752, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.023588669854216277, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.022791968539822845, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02211197883123532, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021542811170220375, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.020930956944357603, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02012323712464422, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01963612517109141, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01894975150935352, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01860760438721627, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.018409622577019037, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01851206427672878, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01842730476288125, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.018306447404902428, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.018140778238885106, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0598483085632324, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.0498218536376953, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0333361625671387, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.0169713497161865, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0007176399230957, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.978555202484131, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.9534852504730225, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.92586350440979, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8900794982910156, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.852088212966919, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8154585361480713, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.760997772216797, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.709094762802124, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.635519027709961, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.567080497741699, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.502908945083618, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4235599040985107, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.332149028778076, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.2315924167633057, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.138749837875366, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.0248241424560547, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.9080888032913208, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.779981017112732, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.6474405527114868, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.5168659687042236, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.3737075328826904, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.2580974102020264, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.1621558666229248, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.042479157447815, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.9084590673446655, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.7880856394767761, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6748023629188538, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.5234295129776001, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.398535817861557, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.27505195140838623, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1942528635263443, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14843015372753143, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12658895552158356, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11380945891141891, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.10465340316295624, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09740222990512848, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.093332439661026, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08730556070804596, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08239761739969254, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07939658313989639, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.10101698338985443, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.12195812165737152, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.12405942380428314, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.10364090651273727, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.020833333333333332, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.021081349206349208, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.02132936507936508, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.02628968253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.03373015873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08878968253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.17063492063492064, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.18700396825396826, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.18129960317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17435515873015872, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1753472222222222, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1810515873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18998015873015872, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19990079365079366, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2088293650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2214781746031746, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2537202380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.3246527777777778, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.39533730158730157, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.43799603174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.47668650793650796, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5104166666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.5424107142857143, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.564484126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.5791170634920635, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5880456349206349, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.6096230158730159, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.6515376984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.7160218253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7735615079365079, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8038194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8206845238095238, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.855406746031746, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8940972222222222, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9129464285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9454365079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9620535714285714, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9657738095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0025030286970017072, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0029503224065285014, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0031050482949936623, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.005337459067949024, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007864049936804281, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021648191673520768, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03074330734226069, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02765406283527148, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.023300672691191376, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.020300252907635194, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02191545279425594, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02879782706971628, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.03581803623865577, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.04073454440592948, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.04345547110994265, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.052754546884127146, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0835473782416113, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1360238301258376, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1743072918470414, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19972670496240838, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23321304843583035, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2606605696446814, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.28450351463657814, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.3019170694965294, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.31808802209645876, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.331108483259651, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.3741054690381641, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.45654182573804103, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.5727442839682144, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.6775001368549621, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7301565751323122, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7608419945965407, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8185831537217219, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8752441389535432, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8983665189562114, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9346889114822587, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9451801823902308, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9530479144001331, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9592264677499754, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9636648623282091, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9637350412196485, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9648168612931756, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9675043269153318, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9677852208703476, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9716948921435703, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9630391052387148, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9576734454074766, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9544116396184142, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9606718674817601, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.8028151181619614, "validation/loss_best": 0.07939658313989639, "validation/acc_best": 0.9771825396825397, "validation/f1_best": 0.9716948921435703} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.224033698141575, "train/grad": 0.1677784312516451, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.972984619140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.95169677734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.9170526123046874, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8833551025390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8507891845703126, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8065655517578123, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.758291015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.7065252685546874, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.6413778686523437, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.575403137207031, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.513272399902344, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.4244589233398437, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.34230712890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.2294857788085936, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.1273101806640624, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.0339925384521482, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.9217158508300782, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7981900405883788, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.6701581573486328, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.5600054550170899, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.4340968036651611, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.3133661651611328, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.1863130617141724, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.0563543009757996, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.9274663245677948, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.7824183279275894, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.6643849365413189, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5703577452898025, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.4626890940219164, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3598966496065259, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.2881787501461804, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.23786343436688184, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.18714565868489444, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.15592581330798566, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.1324530576169491, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.11785637147724629, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10768298610113561, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.10078721780329943, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09518150245770812, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08963346819393336, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08770560489967466, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.08665117665193975, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08685119966045023, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08962318715639413, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09487050686962903, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10200133506208658, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11155747253447772, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1415761289279908, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.22182656936347483, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04105496120639145, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04056463244371116, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03977109735831618, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03900275007821619, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03826068012043834, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03726052765734494, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03618369538336992, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03505917540751398, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03371163308620453, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.032447299463674424, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.031360687706619504, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02998314074240625, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02887332724407315, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02755951086990535, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02652754698880017, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02567813215777278, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024732927354052664, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023754142075777054, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022785244369879366, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02198071091901511, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02110274077858776, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02031236314214766, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01954947072546929, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.018845108756795525, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.018205667356960477, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.017418022067286074, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.016493054104503243, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.015764755236450583, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014831200209446251, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.014043811829760671, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013351033153012394, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012876158605795354, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012224695493932814, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01177825734601356, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011287133257137611, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010787196421297267, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010533314478816464, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010362293563084678, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010124902064562775, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009968548862379975, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010136899806093424, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010315676254685968, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010660631789360196, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011091285664588214, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011692989218281581, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01211778505705297, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01381200913223438, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017694248739862816, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0241835229483695, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.873377561569214, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8390419483184814, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.7837862968444824, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7310991287231445, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.6807501316070557, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6138665676116943, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.542515993118286, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.4676215648651123, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.3757169246673584, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.284050226211548, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.199131488800049, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.0787575244903564, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.9689974784851074, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8214590549468994, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.6923012733459473, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.5798596143722534, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4518476724624634, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.3206301927566528, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.1930828094482422, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0870448350906372, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9642863869667053, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8382291197776794, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.692328155040741, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.5393303632736206, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.389034628868103, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.24403245747089386, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1834520399570465, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15475305914878845, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1294928640127182, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11075736582279205, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.09913448244333267, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.09144122153520584, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.08312001079320908, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07613195478916168, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.06938822567462921, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06462274491786957, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.061919644474983215, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.061487652361392975, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06328130513429642, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06725212931632996, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07618250697851181, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07686823606491089, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07818172872066498, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08477024734020233, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08156009018421173, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08513455092906952, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.11457806825637817, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.28587672114372253, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.673559308052063, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.17881944444444445, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.17559523809523808, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.18129960317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.19171626984126985, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.19866071428571427, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.20610119047619047, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.21924603174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.23809523809523808, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2924107142857143, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3663194444444444, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.42956349206349204, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.48214285714285715, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5143849206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5548115079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5744047619047619, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5838293650793651, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5952380952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6168154761904762, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6671626984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7170138888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7720734126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8095238095238095, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8348214285714286, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8665674603174603, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9107142857142857, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9471726190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.972718253968254, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9399801587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022415110271687792, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02330231132061527, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02933759453446312, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.03786427892631833, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.04091145483285508, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.04126581499912403, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.049336191274950265, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.06737902110934196, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.11715325369993093, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1657389518032533, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.20167120006613098, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2444150636233457, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.27076975723651586, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3005809198034933, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.31438622676577604, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.32138666218836837, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.33523255884349323, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.37686352009950014, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.48412164454036377, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.5736624509715015, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.6679106758549922, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7355035850827218, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7737607036337066, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8254839150999858, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8936139539517283, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9383624825004487, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9460956133203688, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9482429664613986, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9571751509095039, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9602049718096216, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9622362479878882, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9639197334910851, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9662793855576672, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9681864225164848, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9714303055067107, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9736879844880898, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9760898349260836, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9772196731928727, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9744401476047202, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9739496088603207, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9673353294563557, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9670088462462975, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9649033551718279, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9615544576900698, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9681754527339621, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9723019062309617, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9602692272939182, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9293395050507918, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9270126928313797, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.10078721780329943, "validation/loss_best": 0.061487652361392975, "validation/acc_best": 0.9809027777777778, "validation/f1_best": 0.9772196731928727} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.8994081121683121, "train/grad": 0.19370902068912982, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7449072265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6972308349609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.6222344970703126, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5523806762695314, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.487016296386719, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.401927185058594, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.312169189453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.2189877319335936, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.1055203247070313, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.9937325286865235, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8925397491455078, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7544863510131836, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.635114631652832, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4845932388305665, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.3606220436096192, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.255624713897705, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1355314445495606, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.004986298084259, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8665773916244507, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.7427487605810166, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5970595084130764, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.46588775552809236, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.35093221452087164, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2626746978238225, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.20084699453786015, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15926379373297095, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.13916743801906706, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12651979443617165, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11370709611102939, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10204027556814253, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09380965302698314, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08781642476096749, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08138517116196453, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07640133703127504, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07082839130423962, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06605508531443775, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06210433754138649, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06364386910572647, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06351026099175215, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05876794643700123, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06980430879630148, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06760464735329151, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.11528280014172196, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13445265574380755, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.2540086099412292, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3053213317040354, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.4328711605723947, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8123789388872683, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.3679208431951702, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0363499148003757, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03527246161364019, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03363007985055447, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0321914841234684, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030957424743101, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029541528960689904, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02828573397360742, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02720299002714455, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026105887107551098, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025172665771096944, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024382979972288013, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023324979739263653, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022400075690820812, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0212232525087893, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020283124945126473, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019539527585729955, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018778741657733918, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018087000823579728, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01747765054926276, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01696809106040746, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01629476005677134, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01546437369659543, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014348566506523639, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01337208692682907, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012450806640554219, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.011585366670042277, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011080877298954874, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.010700369438854978, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010279570964630693, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.009841514695435762, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.009600890681613237, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.009477669892366975, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.009377788167912514, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009150630618678405, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008818523085210472, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008586287793004885, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008532739145448432, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00919455511495471, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009352273167314707, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009047094829729757, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010727273551747202, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010893774179276079, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.016286443502467593, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.019060911745764315, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.027196937379194423, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03396528539364226, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04217568369931542, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06578822598094121, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09188659615814686, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.611849546432495, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.551356554031372, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4577014446258545, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3719677925109863, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2924444675445557, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.189408540725708, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0815136432647705, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9701716899871826, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.836477279663086, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7088474035263062, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.5971102714538574, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.4521679878234863, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3329799175262451, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1885106563568115, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0699983835220337, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9653724431991577, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8359729647636414, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6826156973838806, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.5209062099456787, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.38138091564178467, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2491266131401062, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1867486834526062, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.15239094197750092, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1294148564338684, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11305318772792816, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09989414364099503, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09117022156715393, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.08377046138048172, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07460729032754898, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06748882681131363, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06374823302030563, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.062007829546928406, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.06341470032930374, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.06122279539704323, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05371095612645149, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050575681030750275, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05214614048600197, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09371793270111084, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11413411051034927, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.19837124645709991, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.19740095734596252, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.17668330669403076, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3746981620788574, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2821321189403534, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6137141585350037, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6381607055664062, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0126919746398926, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1816091537475586, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.3903234004974365, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2021329365079365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.21006944444444445, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.23561507936507936, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.28943452380952384, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.35639880952380953, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.42509920634920634, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.47197420634920634, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5111607142857143, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5453869047619048, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5687003968253969, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5803571428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5962301587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6267361111111112, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6825396825396826, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7338789682539683, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7673611111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8070436507936508, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8340773809523809, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8787202380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9528769841269841, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9347718253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.949156746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9206349206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.043083708522131677, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.045836473682884446, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06706642783383378, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1153166576599135, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.15886435537789087, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.20175621862943505, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.23842169284020356, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.27291831453600596, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.29787458044521803, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3129962473458161, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3221082861651934, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.3445822168283298, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.39471575437242673, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.4934641827270423, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5934744276079792, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.65486094683606, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7304086104954673, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7807202939482096, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8557010383025972, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9088416657405938, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9367664506013489, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9452069516204861, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9530406185994055, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9582249054299579, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9619303411061734, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9658037790433396, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9665733489689282, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.966827579291637, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9710813530768732, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9751479708208257, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9759596009369786, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.976929362855685, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9753116276601977, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9762318580715518, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9787722324450779, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9808801563987567, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9808261067201852, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9730905931760471, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9615008838263202, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9341292016766694, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9446585810986661, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9400983724326298, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9061592566486025, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9547809821587512, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9075931879203852, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.942047959054504, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9411965626723716, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9326244744157799, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8744251562589097, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.06210433754138649, "validation/loss_best": 0.05214614048600197, "validation/acc_best": 0.9833829365079365, "validation/f1_best": 0.9808261067201852} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.8348791965842247, "train/grad": 0.3311557278037071, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4784991455078127, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4075555419921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.298550872802734, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.198760681152344, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.1063412475585936, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.987805633544922, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.8659664916992187, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.74418212890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.6041912460327148, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.476596565246582, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.3690201187133788, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.231767406463623, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.1171148014068604, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.9683195686340332, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.8348262214660644, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.7132128238677978, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5678738430142403, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4193777483701706, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2983119075745344, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.22484533734619616, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.17385263912379742, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1465860466659069, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.12764699490740894, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.11397577103227377, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1033606537245214, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.09302426395006477, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.08530646938830615, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.07904392163269222, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.07205275076441467, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0657986767590046, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.061362614966928956, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0580029699485749, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.05445867314003408, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.051218249667435885, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.045541133135557174, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.04234634017571807, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04048245127312839, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.046098291939124465, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.051387114338576796, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08141000768169761, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10303956605494022, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.16551612277515232, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2832105667702854, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.581199849974364, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6366529500763863, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1113556373491884, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.5895028260257096, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.485272939307615, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.447255711490288, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03025278258137405, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.029107922287657857, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027659331187605857, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.026612358829006554, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025800140276551246, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.024878565398976208, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023979130163788796, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0230655437707901, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021968725640326738, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020941234594210982, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020083480784669518, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01906335259322077, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018318876172415913, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.017526624696329235, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.016952663706615568, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01647016474045813, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01578448143322021, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014557959004305303, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.013118527156766504, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011971748240757734, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010957481197547168, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.010343024129979313, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009874265677062795, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.009476562583586201, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00914732198463753, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008778689295286312, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.008492123094620184, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008217314238427207, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0079501688410528, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007821571524254977, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.007795806445647031, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.007781500917626544, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007731076901545748, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007665799588430673, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.007293581292615272, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0073622062060167085, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007371737769281026, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008303139345080125, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009417786424164661, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01345035089441808, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016460165508906357, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022748683728168545, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03209182919687009, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05198146097594872, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.056814326881431045, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08046846785582602, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.10369491300545633, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1391486268583685, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.204666827917099, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.330211877822876, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.248230457305908, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.1226561069488525, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0086421966552734, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.9044629335403442, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.7732802629470825, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.642591118812561, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.5170741081237793, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.378359079360962, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.256101131439209, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.1540722846984863, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.0200133323669434, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.900650143623352, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.7319201231002808, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5793372988700867, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.44820135831832886, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.30223244428634644, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.20712362229824066, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.16318529844284058, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.14023634791374207, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12157482653856277, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1082216277718544, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09728211164474487, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.08837218582630157, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.079652339220047, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.07122883945703506, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06746384501457214, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06578866392374039, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06498342752456665, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07099174708127975, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.08029455691576004, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.08610168099403381, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09314022213220596, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0990140363574028, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08311709016561508, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06799964606761932, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06374064087867737, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13714425265789032, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08625591546297073, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2545914351940155, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.384248822927475, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.711920976638794, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7211912870407104, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.4654812812805176, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.6174854040145874, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8664434552192688, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.28110933303833, "validation/loss_047_lr4.3e+01_wd1.0e+00": 7.39836311340332, "validation/loss_048_lr5.0e+01_wd1.0e+00": 14.709033012390137, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3350694444444444, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3950892857142857, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.46081349206349204, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4992559523809524, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5262896825396826, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5600198412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5756448412698413, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5860615079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6029265873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.644593253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6971726190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7614087301587301, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7998511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8271329365079365, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8566468253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.890625, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9513888888888888, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9558531746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9481646825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9404761904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9404761904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9379960317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9206349206349206, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9072420634920635, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.14641747102996083, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1821553703976265, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.22178250042136138, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.2557164349237006, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.27410617560149764, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.30235046552159917, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.31620809289556345, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.32705752081796297, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.35337204453317367, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4325602481375488, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5311198398856855, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6529964296558189, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7229821594008262, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7652408579813444, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8167782310775942, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8728924786635931, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.931280060398184, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9417961417858219, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9498321293296327, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9549503027886528, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9588879886563934, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9591668369416869, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9621263911600131, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.964082906866846, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9669778291794815, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.970314634686486, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9721202096769362, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9741346048053007, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9747353466022994, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9731761462838568, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9710874426381579, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9712746638892301, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9695619254582662, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.969969734573811, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9742097491399173, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9789506983144525, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9809601245406427, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9704765852296184, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9728939643174248, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9440594480680595, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9447398525664455, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9345773923615, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9357329607223583, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.923595388500679, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9459201106133358, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.955468610233684, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9512364616721986, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.91734564545919, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8607102386644573, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.04048245127312839, "validation/loss_best": 0.06374064087867737, "validation/acc_best": 0.9826388888888888, "validation/f1_best": 0.9809601245406427} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.9740851171314716, "train/grad": 0.5260121841728688, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.194744873046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.103251953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9646142578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.841129608154297, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7311392974853517, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5969408416748048, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.4681226348876952, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3473772430419921, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.2149025535583495, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0950349807739257, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9890924835205078, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.8397691679000855, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7032705116271972, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5247098308801651, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.38305796653032304, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.285568572729826, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.21061706162989138, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.16717764046043157, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.14117299117147922, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.12539354730397462, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.11161699523217976, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1010590133536607, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.09174534490332008, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.08315754476934671, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0753884746041149, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06796143910847605, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.062114143762737516, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.05697013688273728, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.050856468444690106, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.044692554445937276, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.039815641585737464, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.036013513058424, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03210971164517105, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.030345537969842552, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.028666056916117667, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.029211373571306466, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04093824881128967, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04676201161928475, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08555204008705913, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.15611042898148297, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.30031716987490653, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.5486384403705596, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.6553005481138825, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6947318132221699, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.5871018765121698, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9104185602534562, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.123921436825767, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.8875505454745145, "train/loss_048_lr5.0e+01_wd1.0e+00": 8.82401643158868, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.026660031592473387, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0258668009750545, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02481435257010162, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02391963457688689, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023106340169906616, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02207183010876179, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021055486034601927, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020130167030729354, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019200286148115992, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01848130077123642, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.017936366549693047, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017271800548769534, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016688046469353138, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.015720903677865862, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.014327988401055336, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012855288265272975, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011321748902555556, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010342669368255883, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009790886582341046, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009462319996673614, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009155219763051718, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008898359826998785, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00862640789942816, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008306644216645509, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008005510875955224, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007780763532500714, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00759360671218019, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.007391636010434013, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007210496150073595, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006963202842161991, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006631330976961181, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0063337157809291965, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006086714481498347, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006193608909670729, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00633776361020864, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006417690899252193, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008950936190958601, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010460761814429134, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014647052856744266, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021153233864461073, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03323928554542363, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04849917729648137, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05827999970060773, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06118715111931579, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09948718982515857, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.11169027423216697, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.21583920860663056, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2402709213271737, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.31988932482898236, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.046462059020996, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9453266859054565, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.7944554090499878, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6636476516723633, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.550266981124878, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.416846513748169, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2917944192886353, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1760197877883911, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0465750694274902, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.923513650894165, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8076211810112, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6397671699523926, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.49540895223617554, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3148081302642822, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.21552003920078278, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.17346785962581635, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14369933307170868, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12217933684587479, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10684900730848312, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0968179926276207, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0877826064825058, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.08057349920272827, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.07329193502664566, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.067137710750103, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.062426675111055374, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.05710263177752495, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.05321353301405907, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.05062171071767807, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.048804882913827896, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04847409576177597, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.047734539955854416, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.046683453023433685, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04952434450387955, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05784066766500473, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.077541783452034, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.056164249777793884, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11610939353704453, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.19638800621032715, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.19580711424350739, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3637884259223938, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.82792067527771, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5496324896812439, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7740551233291626, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.99532151222229, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.831730604171753, "validation/loss_045_lr3.1e+01_wd1.0e+00": 5.217947006225586, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.923677682876587, "validation/loss_047_lr4.3e+01_wd1.0e+00": 9.364212036132812, "validation/loss_048_lr5.0e+01_wd1.0e+00": 9.630857467651367, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.49107142857142855, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5243055555555556, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.560515873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5778769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5853174603174603, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5982142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6319444444444444, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6822916666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7457837301587301, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7901785714285714, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8169642857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8549107142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8893849206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9528769841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9675099206349206, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.984375, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.984375, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.96875, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.933531746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9312996031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9595734126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9474206349206349, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.24937197261192226, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2751884241483507, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.30261317966446066, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.31571998773293264, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.32212428123341186, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.34031086841133756, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4019570625940693, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.49877960854190595, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.6222381278733375, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7026740977073367, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.747124177992209, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8188301125783795, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8731741379827556, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.930180510120297, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9433024442022285, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9478051269005645, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.953800603605164, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9597415404482112, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9617909886965392, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9637835628715374, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.964847483773782, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.967758132762522, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9733120173520401, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9744936608877253, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9767201741669802, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9779679227754281, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9804645855915957, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9806926338802223, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9807530221403624, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9814316460326546, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9816403100606108, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9812969508294491, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9813376890951443, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9782943822968609, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9767693348795942, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9804201103663801, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9674261650983412, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.944772023362705, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9640666191271288, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9473703873003914, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9342575048488568, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9552410388027629, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9591338121227246, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9221140754514501, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9480718972757235, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9059447141333334, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9535006632214641, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9418628763724799, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9365082297505564, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.039815641585737464, "validation/loss_best": 0.047734539955854416, "validation/acc_best": 0.9848710317460317, "validation/f1_best": 0.9816403100606108} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.999202902764082, "train/grad": 0.5645403532683849, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9232826232910156, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8162805938720703, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.6608385467529296, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.530262451171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.4201464462280273, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.292207851409912, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.1722433853149414, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0577360820770263, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9226114511489868, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7868665266036987, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.661415901184082, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4915931037068367, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3552665756642818, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.23460159584879875, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.18332569435238838, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.15733817156404256, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.13662277679890394, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1207716847397387, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.10832076691091061, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.09948273940943181, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.09022063013166189, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.08181390740908683, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.07384921297430992, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.06673374081961811, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.060106700556352734, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.05282772747799754, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04704967562109232, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04229558604769409, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03651461693458259, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.030503928242251278, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.025762299494817852, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.02241529437713325, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.018645807625725864, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02060238684527576, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.024253852525725962, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.031108972504734992, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05563678898848593, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08812286053784192, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11976451240479946, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2690633951500058, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.6141980174556374, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.44550201354548336, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8484315081872046, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0696985012479125, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.2562087603192778, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.66594161644578, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.270437762094661, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.837771102078259, "train/loss_048_lr5.0e+01_wd1.0e+00": 9.564247258156538, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024383647162467242, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023619267586618663, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02245724373497069, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02142896531149745, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020553191644139587, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0195738382730633, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01874971414450556, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018071067333221436, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017397213438525797, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01681346376892179, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.016279817493632436, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015348582123406231, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013930451096966863, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01178573831450194, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0106452285987325, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010062096288893372, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009578609124291688, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009231096666771918, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008953752615489065, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008736509840236977, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008458175731357187, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008197318906895816, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007988481014035643, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00782646326930262, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007564196601742878, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007213171865441836, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006944681758759543, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006739339867490344, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006428119779448025, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005998012135387398, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00550533021974843, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005156727341091028, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004762436985402019, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0051793258416000755, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006610569314943859, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007583467464428395, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011824207133322488, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.015589392295805738, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.017904834962791938, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03146121682263811, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04912422011402751, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04307294602269327, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0650328491762275, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07801613113394437, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1368791540578545, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.21954687912017107, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.20867700476199388, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.24055093101347666, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.3097158457711339, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7950170040130615, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6838724613189697, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5260967016220093, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.397071361541748, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2898236513137817, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.1652579307556152, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.0462031364440918, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9277508854866028, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7803872227668762, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6315789818763733, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5017021298408508, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.330488920211792, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.22622893750667572, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1664428412914276, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13875646889209747, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.12217197567224503, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.10779924690723419, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.09660356491804123, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.08778561651706696, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.08148262649774551, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0748584195971489, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06946767866611481, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0652843788266182, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06168331950902939, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.058030299842357635, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.05402594432234764, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.05072755739092827, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.047784171998500824, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04554826766252518, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.045315515249967575, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04741673171520233, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.05157354474067688, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.06035592034459114, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.06211777776479721, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07335083931684494, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09550617635250092, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.09407038241624832, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.24664506316184998, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.32850053906440735, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6020978093147278, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7535010576248169, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8366351127624512, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.328497290611267, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.897418737411499, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1819374561309814, "validation/loss_045_lr3.1e+01_wd1.0e+00": 6.758997440338135, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.289327621459961, "validation/loss_047_lr4.3e+01_wd1.0e+00": 9.35009479522705, "validation/loss_048_lr5.0e+01_wd1.0e+00": 11.669633865356445, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5647321428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5793650793650794, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5925099206349206, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6044146825396826, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6312003968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6865079365079365, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7497519841269841, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7973710317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8315972222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8588789682539683, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8782242063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9528769841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9704861111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9749503968253969, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.980406746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9816468253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9407242063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.939484126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3099651544192122, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.32113124853268393, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3335188764498208, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.34983551536041363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.40035917403229915, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5160080861664206, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.642833127675348, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7226283335029774, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7814507884218729, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.827861039693755, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8574292309253635, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9279315068638028, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9456712486372402, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9523249728853118, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9568108488559804, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9604558934320829, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9640242426384631, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9657340620627146, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9678408421142963, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9675240609415452, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9689618970222814, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9715761181794074, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9736729362949781, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9734439998830391, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9746174049773705, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9758326193437485, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9785459887194095, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9801564861858625, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9817489431980184, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9818290988130429, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9821586125365827, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9812237882628203, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9793289320573848, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9807701935000007, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9790550319217708, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9748932531188862, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9755600389471439, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9576974774682858, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9540065825760625, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9574922052309337, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.958413714101084, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9589064609006938, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9579529044584036, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9523946299088346, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9550038052935722, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9255878097074169, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9523955822764973, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.939708694164506, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9371498606641928, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.025762299494817852, "validation/loss_best": 0.04741673171520233, "validation/acc_best": 0.9853670634920635, "validation/f1_best": 0.9821586125365827} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.6722502553462982, "train/grad": 0.4489382708072662, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7009735870361329, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5893357849121095, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4342232894897462, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3088797378540038, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2048680686950684, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0818892002105713, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9596394062042236, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.832932996749878, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6754392290115356, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5265730792284011, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.39907848060131074, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.2621778519451618, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.198876438960433, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15757348395884038, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13603286469355225, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1223130031209439, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1097724578436464, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09888433802872897, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08891517718322575, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08055699252523482, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07177601422183216, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06420837249606848, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05676628059707582, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04940639365464449, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.042337717730551956, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0348003157787025, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.028965883953496814, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.024146446594968438, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.018650141237303616, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.014511464806273579, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.012090306505560875, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.008661640034988522, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.008756652334704995, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00637485271319747, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02133500882424414, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.028595636170357465, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.043958876486867665, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09102440055459737, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.12598166695795954, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.17414062050171197, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2131258341576904, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.27068172871135177, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.44654771994799375, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8374360801372677, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.430050544720143, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.2036933163553476, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.307142550582066, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.355247048698366, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.980914186239243, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022705799667164683, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021813667062669993, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020542175406590105, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019547417401336132, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018794250963255762, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01802320496644825, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01738757088780403, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.016835982673801483, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016204877621494234, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015451752790249884, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01437510889954865, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01216483801137656, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010815468141809106, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00997908751713112, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009515723334625364, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009207883188501, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008906597485765815, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008620551900239661, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008323518555844202, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008033611760474742, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007696360953850671, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0073950759798754005, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007054257343406789, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006675193922710605, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006272928829421289, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005782313720555976, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005323173726501409, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0048585101010394285, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00419483470643172, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0036451956263044848, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0033389010495739058, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0028512056769250194, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00321890535800776, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0026584398704289926, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006375318857490129, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008124021755775175, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010332256470683206, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.015914488287927853, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021678088418510127, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.025957170452795067, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03066690568082389, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.033972182093297214, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.045503605435353915, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06938168148990594, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1049493560166429, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.17143465246382683, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.17686809192881253, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.19161876272862904, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2310314588383734, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5981111526489258, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4866055250167847, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3343125581741333, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.212302565574646, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1099337339401245, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9864800572395325, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8590338230133057, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7231824398040771, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5596507787704468, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.4092453122138977, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.285932332277298, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.19425152242183685, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1571350395679474, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.12844917178153992, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.11216632276773453, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.10147546976804733, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.09194454550743103, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0836092010140419, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.0756363719701767, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06954149156808853, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06385305523872375, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.05931137502193451, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.05484510213136673, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.05064857378602028, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04706256464123726, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04431137070059776, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.042961087077856064, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04206589236855507, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04255693778395653, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04359946772456169, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04607848450541496, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0455658920109272, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04879677668213844, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05381644517183304, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12604597210884094, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08866885304450989, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13376794755458832, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1265932321548462, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2259630262851715, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.589937150478363, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.49062836170196533, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6414421796798706, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.8951746225357056, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0299476385116577, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.1771905422210693, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.0491116046905518, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.990390300750732, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.975862979888916, "validation/loss_048_lr5.0e+01_wd1.0e+00": 8.010701179504395, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5840773809523809, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5910218253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6068948412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6527777777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7170138888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7782738095238095, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8211805555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8425099206349206, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.875, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9161706349206349, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9652777777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9806547619047619, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9811507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.986359126984127, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.980406746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9563492063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.32832563937893766, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.335289404310783, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.36479333089254834, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.46566493357733146, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.593686564934575, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7008986149341946, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7706538954294556, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8037780234682721, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8545459231589708, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.903215583631152, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9350196895137415, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.946928749386373, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9514223814699884, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9589440997812113, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9637491961970468, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9662298341588487, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9684186734784442, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9705420538330874, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9728827966359251, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.974219041940396, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9768470026093102, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9780889652645073, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9799906470937942, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9813194509657824, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9836014357264864, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9836414144418839, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9840538935314326, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9844861898322876, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9841863467586339, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.983191970340777, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9845267002784828, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9845992047082675, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9823553702346053, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.981938225405189, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9727013828536635, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9778841133339349, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9758029365159524, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9803929563957109, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9756585745635807, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.958501362710093, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9698633210539799, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9668266600366231, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9652008287047984, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9700530667721022, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9680185160054448, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9667385830438403, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.947597155190744, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9525713739995377, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9484647318547902, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 0.028965883953496814, "validation/loss_best": 0.042961087077856064, "validation/acc_best": 0.9868551587301587, "validation/f1_best": 0.9840538935314326} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.48849301122128963, "train/grad": 0.3624710017442703, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5225870895385742, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4135459899902343, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.265694122314453, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1465892791748047, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0451143550872803, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9191156482696533, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7855111646652222, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6458812808990478, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.48517437040805816, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.34052228897809983, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2471147707849741, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.18475881427526475, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1558084526658058, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1316859093680978, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.11691644691862166, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.10653124196454883, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09643788265995681, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0863670155685395, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07662143959663809, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0690419097058475, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.061476197252050044, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.054770949156954886, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04804657908156514, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04112574298866093, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03440243068151176, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.02688284088857472, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.020844839764758944, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.015912584215402602, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.010672952979803085, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0070225148275494574, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.005043201306834817, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00332669080235064, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0028769434429705143, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0027315464057028295, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.007940649148076772, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.02516997962258756, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0287225254625082, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04994088725186884, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11728323115967214, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1453284002561122, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.11856138215400279, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.18195983982644975, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.37596430752426385, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.49531248888932167, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6640539591107517, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.4766455120965838, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.3059405062068254, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.544947225470096, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.222231661975384, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021542191160842775, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020651553296484052, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019509567720815538, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01870007741264999, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018108733021654187, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01746927136555314, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01687639289535582, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01626566783990711, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015347836608998477, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01379950747359544, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011869265898130834, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010415900032967329, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009784821851644665, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.009261097917333245, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00894334662007168, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008714034274453298, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008461796217598022, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008153161295922473, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007801819477463141, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007533538083080203, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007269948753528297, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006999230615328997, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006677177193341777, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006297053259331733, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005864388004993088, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005236807543842588, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004575903272634605, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003846225898596458, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0028929315990535542, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0020046904633636586, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0016050888351674076, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0013412240811157973, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0012452516446683148, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.001337116021913971, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003455396667986861, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008412489899492358, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008983832766123214, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010516151358485501, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.019212340454322324, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.022262660542748173, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.020283793498182608, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025944366273456008, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0380500326226318, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04838745124701715, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06780261943805882, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12350062482680381, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1142580190165949, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.18121241656763754, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1912441735716214, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4467517137527466, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3392457962036133, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1938072443008423, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0757328271865845, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9732646346092224, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8421043753623962, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7009475827217102, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5574378371238708, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.39372557401657104, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.258161336183548, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.19386026263237, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.15090395510196686, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.12865063548088074, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.10914719104766846, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09756048023700714, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.0898415744304657, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08201229572296143, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07388396561145782, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.06648195534944534, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06108768656849861, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05591148883104324, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.051595136523246765, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04778151214122772, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04515756666660309, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.044154901057481766, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0443136990070343, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04447070136666298, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04417734593153, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04087892919778824, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04047916829586029, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.044350650161504745, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.043962568044662476, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04783939942717552, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.050141341984272, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07298227399587631, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10439315438270569, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1136213168501854, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11762768775224686, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2545418441295624, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.43240663409233093, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6336941123008728, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7255142331123352, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7824774384498596, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0789506435394287, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.2977792024612427, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.3726725578308105, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.9833909273147583, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.289422035217285, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.026953220367432, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5987103174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6163194444444444, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6649305555555556, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7281746031746031, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7782738095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8172123015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8444940476190477, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8754960317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.921875, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9459325396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.982390873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.984375, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.984375, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.970734126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9598214285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.344411743916893, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3744379327142205, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.47883286498207683, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6056559305793436, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.694523974171952, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7648858674093462, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.807389719085936, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8546493751780991, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9111719320225967, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9391353853716918, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9461914428150086, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9549542272980486, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9590215267749067, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9635978619780721, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9640614572295854, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9670755496342577, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.970205258472227, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9730539498870779, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9753505096741745, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9786020459975391, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9797621212415445, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9809219365234076, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9833583891163474, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9825126216281209, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9829494119513694, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9830201300609105, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9843414932572068, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.984420410177661, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9861497593325238, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853524973070633, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9833156348444761, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9848052764886023, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9839650427023888, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.985266203461947, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9783155179581682, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9769484714478238, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9821921110806168, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.979614434747116, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9752420865927486, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9718119168812114, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9684408995615511, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9686173728885592, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9687524086086212, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9697539092816556, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9714613195320555, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9724981748977946, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9717928321424745, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9451728510871443, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9568900402474751, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 0.010672952979803085, "validation/loss_best": 0.04087892919778824, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9861497593325238} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.3096948451548815, "train/grad": 0.24837696447968483, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.375703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.271059169769287, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.129213924407959, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0120641803741455, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9079965019226074, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7725741815567017, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6294958972930909, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4902536082267761, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3331967747211456, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.22725781187415123, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.18098478466272355, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.14661670472472907, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.12778149712830783, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11065424372442066, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.09976870336569846, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09159236820414662, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08277131484821439, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07373047650791704, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06546404513530434, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0590355312731117, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0518485152348876, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04497083084657788, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03759445426054299, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02999814840964973, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02283695072866976, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0156652370095253, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.010637437999248505, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00710833746008575, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.003970052087679505, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0023516422044485806, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0015941384620964526, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.001029363451525569, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0008507712744176387, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0006181962881237268, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.008807406686246395, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.01336600722745061, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03384056133218109, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.02556271289475262, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07564255910925567, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.12091382754035294, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05731028448790312, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11924945608712732, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.13512344975955784, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.16530099982395768, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.26202126204036175, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.5970226480159908, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6361784403212368, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4624469899386168, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.043972287802026, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020261469148099422, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019437590255402027, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018424166147597135, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017692100098356603, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.017107593463733792, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016407341156154873, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015683077797293662, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014860133659094572, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013320303275249898, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01123225617222488, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.010247696228325366, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009573424747213722, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009187999467831104, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008794319046428427, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008510058826068416, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008256187123479322, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00791599408374168, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0075078904395923015, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00714513472514227, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006873049827991053, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006553852140204981, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0061731548357056455, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005677289795130491, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005046871947124601, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004332507104845717, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003491979724203702, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0027572992545901796, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0021240055980888428, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.001350240401807241, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0007809322082903236, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005188087637179705, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00043421373531600695, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004004078732214111, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003348975049721048, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0029665307479763213, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004386100457497833, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007790933660162782, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008128416657313408, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014562431524260493, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01957413297008822, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.013349568290876029, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.020388629272997782, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024279109466778845, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0277197778190696, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.038191035078543864, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0659750693036711, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07539459401396022, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.10507376710545403, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.13419120176458876, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3307719230651855, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2274128198623657, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0861940383911133, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.967763364315033, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8605121970176697, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.718898594379425, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5718515515327454, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.42932695150375366, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.27368658781051636, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.19291281700134277, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.15764404833316803, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12921550869941711, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1127592921257019, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.097637839615345, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.08805634081363678, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08097849786281586, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.07307113707065582, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.06538622081279755, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05940375104546547, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.05526454374194145, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05127435922622681, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04767872393131256, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04455246031284332, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.042144741863012314, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04110965132713318, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04096859320998192, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.040801212191581726, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.040518131107091904, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04086529463529587, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04173664748668671, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.044060129672288895, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.045141153037548065, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.046671491116285324, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.050278447568416595, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.06611258536577225, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09623828530311584, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2148696631193161, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17396366596221924, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.31772753596305847, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5402379631996155, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5053392052650452, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6242814660072327, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.583290159702301, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.9200493693351746, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9561751484870911, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.337925672531128, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.0676355361938477, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.438542366027832, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.014288902282715, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6205357142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.658234126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7286706349206349, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.777281746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8107638888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8387896825396826, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8673115079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.908234126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9451884920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9536210317460317, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9613095238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.984375, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9665178571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3811596708473798, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4556064540477534, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5946676822985208, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6849217860243318, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.746475100811646, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7924194318144069, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8401178062169805, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8952730790151038, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.936868424776329, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9459365299031597, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9541496200168483, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9614138552489405, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9637245191465653, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9667022830137734, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9694659584356361, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9718582356974026, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9738523375208887, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9757801471645704, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9788073681049136, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9797221420504255, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9814047184302082, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9831548174884235, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9851004766512491, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9852675987815338, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9852477232881004, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9847668510535412, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9845124383529109, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9842619158316575, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9847351608898643, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9856233446248798, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9843701403759353, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9846621337095657, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9859374724140892, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9840110719467395, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9815248398061261, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9791744350304628, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713646058935472, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9736019239022898, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9728744642476082, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9715644455662552, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9708891820665628, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9712765643756386, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.974039123786181, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9728072457641204, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9781795331734457, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.967039263233408, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9671425090137246, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9643538556436123, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9620755401151855, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 0.02999814840964973, "validation/loss_best": 0.042144741863012314, "validation/acc_best": 0.9875992063492064, "validation/f1_best": 0.9852675987815338} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.2341659703850746, "train/grad": 0.19095200538635254, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2892355728149414, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.187794418334961, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0481222343444825, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9288423728942871, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8190632104873657, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6747325992584229, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5290692889690399, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3871711453795433, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.24844266496598721, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.18531065065413713, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.15571344919502736, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.13050962839275598, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.11537068255245686, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1006353781092912, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.09044851147569716, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08224595729261637, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07305899129249155, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06417697982862591, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05623046485707164, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.049791591400280596, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04259177826344967, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.035878057898953555, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.028886356689035893, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.022077097734436394, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.016015031719580294, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.010243904758244753, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00656922584399581, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.004364903243258595, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0026687306445091962, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0015856878459453584, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0009173073805868626, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007176034804433584, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004518260061740875, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0006479722727090121, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0030714574921876193, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.005195299368351698, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.009276171699166297, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.011655580513179302, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.029711194364354016, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05070851591415703, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.03470736463554203, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07636473356746137, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.10065395168028772, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07677375326864422, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09513869238086045, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.34625007153488696, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.4302471004705876, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.7514747486636043, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.0633228260371834, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019605686524882912, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018888109223917128, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01803621185012162, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017412701570428908, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01688963532913476, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016232845191843807, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015436810213141144, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014256328083574773, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.011884099340531975, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010507123642601073, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009874401013366878, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00930992560228333, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00895925096469, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008599676698213442, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008327249258290976, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008067614691099153, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007741604836191982, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00742910640547052, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0071312849107198414, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006861383852083236, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006483333773794584, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006009367107180879, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0053730822651414205, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004584379136795178, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003728868771868292, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0027218841461581177, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0019844960403861476, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.001485831823811168, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0010251548072847073, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0006383680674116476, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0003400857007727609, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00027067326291216884, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00017800364113099932, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00044901087457219544, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0011282021480934646, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0028676517410517023, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004520676874575153, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004233050205981064, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007977561530744939, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011394076724705476, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.009268446909557663, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01434689993947146, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01781757382286864, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.021891263798723457, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.021722377034200834, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05089993972705144, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06551168071340825, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07938160511417411, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08761817894707719, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2414509057998657, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1407490968704224, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0010476112365723, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.879684567451477, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7664999961853027, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6184854507446289, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4722279906272888, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3293772339820862, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.21125566959381104, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.16240450739860535, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.13752944767475128, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.11552184075117111, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10214634984731674, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.08951061218976974, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.08108432590961456, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.07403787970542908, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0664304569363594, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0597248412668705, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05451160669326782, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.050871606916189194, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.047426868230104446, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.044597871601581573, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0423312708735466, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04077153652906418, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04033231735229492, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04019670933485031, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04028996452689171, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.040543682873249054, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04136589542031288, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04278167709708214, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04389427974820137, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04500053822994232, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04647563770413399, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05077916011214256, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.06799117475748062, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10654319822788239, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.15563295781612396, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12931673228740692, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.24974046647548676, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.41153886914253235, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3524300754070282, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.40423232316970825, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4839004576206207, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7726781964302063, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8952696919441223, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.4785267114639282, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.8868396282196045, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.7227163314819336, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0906810760498047, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6498015873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6981646825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7673611111111112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8090277777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8325892857142857, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8628472222222222, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8950892857142857, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9370039682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9506448412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.964781746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9685019841269841, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9712301587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9751984126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.970734126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9675099206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.43757855418185326, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5411768582659455, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6679993918918002, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7458740744922966, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7860678300124768, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8353613934598637, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8798053582345257, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9268373248497583, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9427209873294576, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9513260471064359, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9576963998985736, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9625664177819466, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9649760730143226, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.966509146938405, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9692384506177434, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9720462149531244, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.975971053655572, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9779241697004762, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9780948033577255, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9802986475902342, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9823412986027916, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9841415455434421, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9844081671050768, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9850454808635474, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9856310628484446, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9853699938801693, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9852698653504174, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9859693288296667, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9867324740687838, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9857923093661857, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9852824527817687, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9850556860773788, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9852492453956145, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846326822808971, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9814642454397383, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9770619399551392, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9764356401120886, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.981371459816159, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9775540544485329, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9751139322355746, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9763183974480155, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9813237840917892, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.977347689065843, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9740511290856084, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773004150188148, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.972427293230307, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9736178940963036, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9659813571704806, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9637578098263088, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 0.0026687306445091962, "validation/loss_best": 0.04136589542031288, "validation/acc_best": 0.9885912698412699, "validation/f1_best": 0.9867324740687838} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.18576667048037052, "train/grad": 0.14161519141867757, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.212883415222168, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1139225196838378, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9745458698272705, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8515395450592042, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7362614059448243, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5884234547615051, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.44440483033657074, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.30684496358036994, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.20823078714311122, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.16624154299497604, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.14375566923990846, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.12304997831583023, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.11010704764164984, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09699808686971664, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08744627838954329, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07927708870731294, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07036166766658425, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06192588948644698, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.053948435056954624, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04710739159025252, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.039327966710552574, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.0318861317448318, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024410769501701, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.017463878160342575, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.011639796933159231, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0067223984841257335, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.004074149364605546, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0026218661107122896, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0016316673345863819, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0010259073879569768, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007205904740840197, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005385624710470438, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003663512133061886, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00031682322733104227, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00031852977350354194, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0014632000122219325, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.006550189089030028, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0017637575045228004, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.016503302874043585, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.026370313204824924, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.027141655711457133, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.02792391468770802, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04576888104900718, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04252206591889262, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03184569738805294, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1578369400370866, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.18112866748124362, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.4081226064264774, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5072845536191016, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01890383054036647, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018262499864213168, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017504543350078165, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01694047170691192, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.016470220061019062, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01582642686087638, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014912388268858195, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01317169778747484, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010953006618656218, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010064762602560222, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009554977599764243, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009043769068084658, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008697727075777949, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008312187127303331, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007976643647998572, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00764551472151652, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007280362656456418, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006958632669993676, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006643615085049532, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006337968824664131, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0059080965217435735, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005355685639369767, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004656263604410924, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003849530933512142, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002991845273791114, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0020233843973983313, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00136879946818226, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0009366669259543415, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005850796593222185, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00035812354486552066, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00024603295465567497, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00018926753105006356, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00013892152102016554, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00014457039671924576, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00022417366788943127, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0007736109706351613, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.002742931174040262, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0006989388865149039, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005188788109368922, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007661580646434809, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006617463799591162, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0067925220753229975, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011240156649136626, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.012264201982031417, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.010494958453300354, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.028536644871367523, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03208851104948482, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.054616703054562635, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06516325485094378, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.1718018054962158, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0728455781936646, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.9324902892112732, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8070096373558044, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6891083121299744, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5397763252258301, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3937529921531677, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.2606929838657379, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.1799808144569397, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.14453816413879395, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.12495805323123932, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.10681366175413132, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.09570229798555374, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.08483659476041794, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07684148102998734, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.07039798051118851, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.06391514092683792, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05858475714921951, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05436734855175018, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.05131714418530464, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04833144694566727, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.045957718044519424, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04410983994603157, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04277454689145088, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04181499406695366, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04180273413658142, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04176279902458191, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.042259205132722855, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04295744746923447, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.044367801398038864, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04539407417178154, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04644889384508133, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.047746721655130386, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04973787069320679, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.057419028133153915, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09898170083761215, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12777380645275116, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13621293008327484, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2148001790046692, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.33909672498703003, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2890622019767761, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4647904932498932, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5045390725135803, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6062604784965515, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7487884759902954, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5561894178390503, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.499096155166626, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.245168685913086, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.918423652648926, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6803075396825397, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7328869047619048, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7921626984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8278769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8464781746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8745039682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9456845238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9583333333333334, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9635416666666666, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.966765873015873, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.986359126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.986359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9675099206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5035838684138254, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.608245257488613, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7162420081202685, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7783908338206516, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8090682696766821, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8533333867267887, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9054728366292356, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9375193900242671, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9507297913878867, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9566562810612323, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9612958238661085, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9647791270219372, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9666473024447244, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9688930385135499, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9718739112497687, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9721168700645993, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9758505294321941, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.977925155124298, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9789722454466433, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9797940114657432, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9813185368231115, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9816006953872218, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.982607065610024, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9835194813989716, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9844428884911659, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9846146547030814, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9840419631456979, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9841358506670055, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9854627690757743, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9844031565531226, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9854393036606872, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9861212073067229, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9854105903598447, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9845016610314403, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9842512149814385, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9799007209752788, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9806416069466488, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9801796357017959, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9792313860889982, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9763525691469427, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9795964473588908, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9749618232724997, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9789761146925232, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9750193894205443, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9799816585956643, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9734003871310366, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9746612865446396, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9675408981115848, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9649336230632051, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.0005385624710470438, "validation/loss_best": 0.04644889384508133, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9861212073067229} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.15503073498606681, "train/grad": 0.10753268389031291, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.14398042678833, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0466956043243407, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9071919250488282, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7815891885757447, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6638474941253663, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5184109497070313, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.3751318615674972, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2519162079691887, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.18229524303227662, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.15005155036225915, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.13158522251993418, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.11382238104008138, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10232194205746055, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09014491496607661, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08057070322334767, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07265397936105727, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06424466935917734, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.055915785878896715, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04752644198015332, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0403162261005491, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03229994930326938, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02502277409657836, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01792885161936283, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011823361180722714, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00735803940333426, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004196949508041143, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002665205113589764, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0018518121913075447, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012278983183205128, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000831233412027359, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006064589507877826, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00045668762177228925, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003359369374811649, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002503778599202633, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.344156831502914e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0006890316028147936, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.001988387741148472, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.004236707193776965, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.006690920209512115, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0071285608969628815, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.019217266971245407, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.010666729779914022, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.011732838051393628, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.012635216191411018, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05449205620214343, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05956078718416393, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09266056540422142, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.18974924852140249, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.19789617285132408, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018491223086602986, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017886668015271424, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017120608938857914, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016480866251513362, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015895407679490745, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015030282186344265, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013773828381672501, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011745807160623371, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010173779046162963, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009429671687539666, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009013262358494104, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00861270357389003, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008362879876513034, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008072139850119129, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007784418436931446, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007528495820006356, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007251072641229257, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006946086011594161, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006563083985820412, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006147327006910928, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005573003815370612, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004925657694111578, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0041520017612492665, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0032644144614459945, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0023628397726861293, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0014928292483091354, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009701393290015403, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006718164622725453, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.000444045535659825, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00029806392099999355, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00021506762353965314, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00016531665511138272, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00012454371249987162, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.650924245761416e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.875202661513868e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0003476112932065689, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0013429744233158037, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0015726766611030014, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0027217872176220114, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0027801868772705145, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006753386250510513, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005239610761003034, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006417439852391588, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0058651829875818115, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011660962682100637, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.019002876780542098, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.020059591184398748, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03648796206910008, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03762038279919901, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.117689609527588, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0195987224578857, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8776350617408752, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7482867240905762, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6274770498275757, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.4790973365306854, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3329174816608429, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.22001072764396667, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.16136488318443298, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.13267818093299866, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.11612005531787872, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.10014978796243668, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.09018940478563309, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.0800439715385437, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07209830731153488, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06580165773630142, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.059789154678583145, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05454682931303978, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05024559423327446, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04699360951781273, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04400648921728134, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04185345023870468, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04050295799970627, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03990486264228821, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03992206230759621, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04039769992232323, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04109171777963638, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04174461588263512, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04256802424788475, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04392199218273163, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04492374137043953, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.045855481177568436, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.047233399003744125, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04972286522388458, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05491429194808006, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0743756964802742, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1261310577392578, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12515436112880707, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22735010087490082, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2885477840900421, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.28567177057266235, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.42683297395706177, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.44610995054244995, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5603272914886475, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5983830094337463, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5158610343933105, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0420421361923218, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.6888597011566162, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.7893524169921875, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7068452380952381, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7524801587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8058035714285714, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8340773809523809, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8559027777777778, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8874007936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9352678571428571, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9516369047619048, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9608134920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9657738095238095, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9682539682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.982390873015873, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.982390873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.980406746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9749503968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5617016025704159, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6466031911633927, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7412482098717114, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7910506445888318, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8263184878644552, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8720913402986845, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9263788313729261, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9442792597228504, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9539717739196564, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9590152171674023, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9629493619741135, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9664359872557171, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9683559683314007, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9722564817378048, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9728383848979346, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9752271790243325, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9784793082490268, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9798934013761313, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9827783405593464, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9830186448979551, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9835637260548866, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9846472659455314, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9851246551237927, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9851231281883085, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9849448157250917, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9854808266270444, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9854464089212378, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9855582245845953, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9847783530526483, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9841984465487952, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9850148303084405, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9851701905974911, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9857513282962304, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.985319150422143, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9844789192603214, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9837848569531913, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9818472790176235, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9811620141415632, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9794095608752849, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9797317494238256, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9805601613621242, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9776438276396169, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9781639077120642, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9769725996973966, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9807237052091167, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9696406246043979, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9777972213943998, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9713150719802273, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9704148634895473, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0003359369374811649, "validation/loss_best": 0.047233399003744125, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9857513282962304} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.1369016097858548, "train/grad": 0.08112680869176984, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.1063213348388672, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0088532161712647, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8666723823547363, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7363810992240906, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6156905746459961, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4689409673213959, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.3234520661830902, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.219468906596303, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.16593508377671243, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.138688326459378, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.12245178317651152, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10649972361512482, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09587554328143597, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08417204185388982, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07467121301218867, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0669695569947362, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.058881978383287785, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.050712897945195436, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.042783918557688594, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03609669729135931, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.028643173426389695, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.021747649470344185, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.015122656784951687, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.009663271121680736, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0059023331478238105, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0033467101491987707, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002168191084638238, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001531139025464654, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010419310349971055, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000727523760870099, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005419094022363425, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00042385399341583253, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003152927663177252, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00023452522233128548, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.357312694191933e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.5569697394967076e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00011398988775908948, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00020286845974624157, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0029608146473765373, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.002526867799460888, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.011515565644949675, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0016635042522102595, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.007743951864540577, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0047593664098531, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00876324982382357, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.019875103514641522, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.023422381337732077, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.07009826083667577, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07346450770273805, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01814968860242516, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01760613113641739, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016915054325945676, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016339820506982506, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015785330100916328, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014910626155324281, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013392132427543401, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01124253884423524, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010107982067856937, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009540218482725323, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009174241771688685, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00876566483057104, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008461186054628343, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008039788934402168, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00762366914190352, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007274829358793795, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006923713901778683, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0065401028230553495, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0061135449900757526, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005661473527434282, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0050235671809059565, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00430427670275094, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0034486982150701805, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0025740561966085805, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001789289553562412, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0011391575072775594, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007583184924442321, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005301771485392237, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003457143556079245, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002382127640521503, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001787227265049296, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014051417952941846, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001071682908150251, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.395942083552654e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.0511587984374275e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.440338203982264e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00011842028025766726, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0001843886607539097, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0005536941610493903, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0016207467725652938, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0036278086067898503, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0010806973296074301, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.003135101528093871, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002636977682537965, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0036357082944611856, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.007507925583009239, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.008338203462778802, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.022959769883826503, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02231726826962098, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0758898258209229, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9779286980628967, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8340734839439392, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7020314335823059, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.580120861530304, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.4330037236213684, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2890850603580475, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.19657595455646515, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.14939844608306885, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.12469135224819183, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.10970621556043625, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.09540565311908722, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08631838858127594, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07661198824644089, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06880173832178116, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06296518445014954, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.057633399963378906, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05303499847650528, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.049144141376018524, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.046230919659137726, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04336737096309662, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04148700460791588, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04027976468205452, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03991666063666344, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04011467471718788, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04073147848248482, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0414736233651638, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04217622056603432, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04334702715277672, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04453825205564499, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.045692555606365204, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04659412428736687, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04775562509894371, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04970552772283554, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05426795780658722, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07305753976106644, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11476822197437286, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11156197637319565, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15807664394378662, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.251038521528244, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2737712264060974, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.36530083417892456, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.36451455950737, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.43723759055137634, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5660256147384644, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0775351524353027, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8822868466377258, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4478638172149658, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.4069560766220093, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7286706349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7750496031746031, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8194444444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8447420634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8680555555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9067460317460317, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9422123015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9536210317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9625496031746031, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9665178571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.984375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.982390873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9764384920634921, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6042850088974641, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6858400505751403, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7660456004395677, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.806570039805561, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8448950783753547, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.893940327964634, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9336368945722638, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.946156860202102, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.956311066270438, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9608966424013097, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9634870330734151, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9666104453479754, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9677756850388454, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9717633969412518, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9728581196171097, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9751308558329392, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9792996514017428, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9812796115377949, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9823341074334574, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9827086378903411, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9838365772775932, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9837745386468877, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9849478016946156, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9850771570936929, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9854040232118719, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9853604019940024, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9849193430757284, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9849048179926192, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844202990955603, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9849978637209285, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9862586662635219, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9849491845142583, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9859597844228991, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.985319150422143, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.985176440799807, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9830956284697502, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9841850639600641, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9819015111840431, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9816464249886653, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9810919916348521, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9817597361756246, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9798596241264642, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9803509852483927, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9791687898441342, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9805821271112082, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9760314497981637, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9782011095846963, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9725849729853796, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9724736109352989, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0003152927663177252, "validation/loss_best": 0.04775562509894371, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9859597844228991} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.12558027796447277, "train/grad": 0.06333486536517739, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0613479042053222, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9633420944213867, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.81848153591156, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6853114438056945, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5643748664855956, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4194599139690399, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.2799296872317791, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1957936092466116, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.15242857495322823, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.12961871029809116, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1155968554969877, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10142978113144636, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0916492402087897, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.0803216301649809, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07102656939066947, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06368653129786253, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05580678683705628, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0477888143248856, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03966912754811346, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.032842408372089266, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.025329594127833845, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01861807650886476, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012507743863388897, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007878129035234452, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004769067652523518, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0026622953824698924, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001714405668899417, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001229909108951688, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0008546843379735946, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006053852476179599, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004590354487299919, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003619938064366579, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002731491252779961, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00020267595537006854, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.583900518715381e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.757762722671032e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.999992437660694e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.672038808465004e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0012038801796734333, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0027965216338634493, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.006092581301927567, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.002185690002515912, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0036967058945447206, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00678110227920115, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00561877453699708, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0017063956521451472, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.004512574672698975, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.04084329953417182, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.030433794958516957, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01784309192094952, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017315738745965065, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016612709471955894, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016006134767085314, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015389370932243764, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014372483724728227, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012563687299843878, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010673084193840623, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009807444945909082, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009342383374460042, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009023557296022772, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008662060745991766, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008384592729853466, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00796602331334725, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007535492408787831, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0071839360636658965, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0068234026920981706, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006418432063073851, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005919529615784996, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00538047677371651, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0046755542664323, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0039188695378834385, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0030683889737701973, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00221481846034294, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015139672564691863, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009266501350793987, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006122099552885629, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00043610679771518337, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003042968317458872, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00021289544809405925, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.000161716967340908, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00012900423323117138, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010038821485068183, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.804260589182377e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.597765639800855e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.102730188331293e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.452338126025968e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.5462696417895558e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00016004896072681682, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0009153972179173356, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0008010696569572491, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0006357767127613405, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00261886579195813, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0018126226126352956, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001343686619437609, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0023401474813947447, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.002336156127273833, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008380260803733528, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.011982126684056592, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0443514585494995, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9463452696800232, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8011355400085449, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6671779751777649, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.545576274394989, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3985835909843445, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.25949525833129883, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.18223920464515686, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.14170540869235992, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11935043334960938, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.105836883187294, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.09276371449232101, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08419536799192429, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07465848326683044, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06716539710760117, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.061727650463581085, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.056636545807123184, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05228939652442932, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04864758998155594, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.046025678515434265, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04341024532914162, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.041646987199783325, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.040528278797864914, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04026242718100548, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.040504105389118195, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04132438823580742, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04218011349439621, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.042900897562503815, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.043803829699754715, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04499040171504021, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04609866440296173, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04672225937247276, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.048198822885751724, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05011371150612831, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0538649708032608, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07164375483989716, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10944382846355438, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10877672582864761, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14694657921791077, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.22424784302711487, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2458203285932541, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3375012278556824, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2817988991737366, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.42232921719551086, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5128964781761169, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9828816056251526, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7980488538742065, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2852756977081299, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.216370701789856, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7420634920634921, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7854662698412699, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8268849206349206, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8489583333333334, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.873015873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9151785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9474206349206349, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.957093253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9640376984126984, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9675099206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9806547619047619, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.986359126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.986359126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.984375, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.982390873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9776785714285714, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6280839115404214, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7040169658371159, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7781348510540476, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8134525966188471, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8520939755654711, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9035592217281992, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9393146650564617, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9497883966450861, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9569921440631433, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.961145335769176, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9635366267997004, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9657775101722181, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9680474331920147, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9713916507809792, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9744294052107507, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9767545010352918, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9785305874983404, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9808832834935186, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9820675900601986, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9825070597755343, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9836063581016555, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9835469350470488, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9850245024866601, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9850400306178579, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9843315441027488, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9849107418897405, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9845674492224842, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9843619301952822, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9843760225052317, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853601561966162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9856323047994354, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859226365421604, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9855486942674739, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9847682233598993, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.985176440799807, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9834689929152594, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9838405314083543, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9817347043980666, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9830092605566821, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9829144154928043, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9820644236506533, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9802483664808567, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9842559879483799, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9801447639119283, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9819389854721623, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9762629118767727, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9791227258487138, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9740263085490781, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9755315779403921, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0002731491252779961, "validation/loss_best": 0.048198822885751724, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9855486942674739} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.12019470866769552, "train/grad": 0.0553708684630692, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.039562931060791, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9419720268249512, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7968389081954956, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6633455276489257, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5434693050384521, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3988151812553406, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.26272351637482644, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1897170888260007, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.15049434658139943, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.12875645762309432, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.11515613621100783, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10117665101774037, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09123177128843964, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07933776852674783, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0695414170064032, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.061796220364049075, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.053537339521572, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04522089404053986, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03690806078724563, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030032649673521518, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02253060780465603, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.015993451504036783, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010358061762526632, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006363578205928207, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0038700394798070192, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0022508085425943136, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00151197899132967, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0011139438208192588, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007887065876275301, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005677614081650973, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004358241707086563, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00034894315525889395, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00026867320761084554, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000203970056027174, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.935682475566864e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.2417822405695916e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2564854696393013e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0291114449501038e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.382625386118889e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7016855999827384e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 4.1215494275093075e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3955851793289183e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.99081502109766e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0002957148291170597, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0002655654493719339, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0060197746381163595, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0070903040189296005, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0014361984189599753, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.007969269575551153, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018079110137186945, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0175406337575987, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016786820334382356, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016113593368791045, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015437761256471276, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014278441886417567, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012040264271199703, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010313244061544538, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009529139713849872, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009117297732736916, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008849470834247768, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008562323332298546, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008313200067495927, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007915867023402825, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007524792146869004, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00721260248683393, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006838449141941965, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006393427932052873, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005824237823253498, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005240410170517862, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004489937699399888, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0036849370939307847, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002789811241236748, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0019295707752462476, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0012538615200901404, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.000762223748897668, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005185457521656644, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00038340461869665885, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00027205213207707855, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00019246584346547025, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001475117945483362, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001184482669668796, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.302719876359333e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.353754353061958e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.5743156866819846e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.098545774960826e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.7965126947050435e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.3276572529582753e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.28588611407517e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 5.84864791584243e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.960487815899542e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.125989678433605e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0001469752127498307, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00016523391987083096, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00034688592582856947, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.001123219233122643, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0020273653861134756, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0018768242831067774, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00398602901236649, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0214718580245972, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9234022498130798, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7769677639007568, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6418811082839966, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5208850502967834, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3741667866706848, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.24075095355510712, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.17354534566402435, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.13692283630371094, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11630446463823318, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.10344068706035614, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.09086700528860092, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08249621093273163, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07293310016393661, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06563717871904373, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06053509563207626, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.055607497692108154, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05144881084561348, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.0480356439948082, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04573771357536316, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04359137639403343, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04223768040537834, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04154559597373009, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04139111191034317, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04156738519668579, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04215766862034798, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04300207644701004, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04384845867753029, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.044849369674921036, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04594836384057999, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.046832527965307236, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04742148891091347, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04858526214957237, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.050252240151166916, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.053550515323877335, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07054565101861954, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10738692432641983, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10614383220672607, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13807743787765503, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.21733437478542328, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.23596793413162231, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3218546211719513, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2565038204193115, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3894155025482178, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.46074458956718445, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9294116497039795, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7025734186172485, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.0554560422897339, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.0883502960205078, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7529761904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7951388888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8333333333333334, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8531746031746031, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.878968253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9221230158730159, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9471726190476191, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9580853174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9652777777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9677579365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.970734126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9749503968253969, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.986359126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.986359126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.984375, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6485134476612224, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.72245902045017, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7886004318645637, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8216792327392693, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8601319460054442, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9110654696141179, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9384388011502998, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9505051407495608, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9581785192242639, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9624720350980389, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9648698416867512, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9670890127236857, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9694984256012085, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9716331650364326, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9734075329515586, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9775872012134408, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9792072106919211, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9805340333271906, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.983020061832501, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9838004253229423, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9834979761538556, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9834508189261412, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9835884664403154, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.985097262700186, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9848831537906855, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9849781856356249, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9846012047197428, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9845193652772108, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9850991410983293, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853188090457012, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9860369439322315, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859884441851661, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9859597844228991, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9847682233598993, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.985176440799807, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9835740750975223, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.984167504393226, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9826432128029973, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9831202778225425, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.982636536905532, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9816588646781619, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9806579713266329, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9847532341635018, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9808654084688578, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9827408096346277, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9764380544496415, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9793918781365236, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9772560927680777, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9747876585610507, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00026867320761084554, "validation/loss_best": 0.04858526214957237, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9859597844228991} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.11387585978955031, "train/grad": 0.05141706926748157, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0146993064880372, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9162419986724853, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7684370565414429, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6325053882598877, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5126571846008301, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3690358865261078, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.24021295465528966, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.17622379370033742, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1401730776950717, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.11944537408649922, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10611254109069705, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09236228469759226, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08276880371384322, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07173401715233922, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06325749984011055, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05667847380973399, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04949921083636582, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04208588462322951, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.034570359773933884, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0282203746214509, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.021260053478181364, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.015150996129959822, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009857665775343775, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005952414451166987, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0034921596013009547, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002012775568291545, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0013598678167909383, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009983032383024692, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007063110079616309, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005055672395974397, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003908649925142527, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003134369570761919, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00023825231939554215, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018592409789562226, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.424537256360054e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.908004611730575e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1848518624901772e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.092652790248394e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.662332892417908e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.4942105412483216e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 8.464884012937545e-07, "train/loss_041_lr1.6e+01_wd1.0e+00": 8.033681660890579e-07, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2850541174411775e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1784639209508896e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.965255036950111e-05, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.00014888213016092777, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.3833856210112572e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0001757392380386591, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.3096127659082413e-05, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017732778545469046, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017234249697066842, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016529469867236912, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015864815539680423, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015148635171353817, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01393536297371611, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01177678925683722, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01036776923807338, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009631538162939251, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009182902341708541, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008854407556355, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008476138316327706, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00816198298591189, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007702440350549296, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007305739604635164, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007012806988786906, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006672172229154966, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006253460214938968, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005723893462563865, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005144686523126438, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004363411301746964, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0035149464130518025, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025995906941534484, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0017786950933077604, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0011504708340362413, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007006715388706653, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00048483233604201814, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003588721167761832, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002539944724230736, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00018136955410227528, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001404644082140294, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011278458055130614, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.76067552871973e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.929040137947596e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.274376765749821e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8575215697941303e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.327036132151349e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2882027057145029e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.658030483506025e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 6.052698991576334e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.7498251852843199e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.03274526124069e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 4.383335063635337e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.528889582207447e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 9.336878084072509e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0002379317799510686, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.8264638038861774e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00025130758190179805, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0004836420351806843, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0058376789093018, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9076007008552551, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7604446411132812, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6247063279151917, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5038990378379822, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3572991192340851, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.22875754535198212, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.16732802987098694, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.13292263448238373, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11333276331424713, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.10103685408830643, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08912581950426102, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.0809435322880745, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07155586034059525, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06451011449098587, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.059346821159124374, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0546857975423336, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.050440020859241486, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.046866875141859055, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.044249020516872406, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04200035333633423, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04066230356693268, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04007767140865326, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04013989493250847, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04067420959472656, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04169263690710068, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0426538921892643, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.043541938066482544, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.044507551938295364, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.045727383345365524, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.046618442982435226, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04719366505742073, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04841773957014084, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.050037771463394165, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.053076427429914474, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06979356706142426, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10559792071580887, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10483625531196594, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13533812761306763, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.21068517863750458, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.23046378791332245, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3108861446380615, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.24718786776065826, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3656756579875946, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.444850355386734, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8832459449768066, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6464676856994629, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.0025231838226318, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.0658859014511108, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7591765873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7946428571428571, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8355654761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8563988095238095, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8802083333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9283234126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.949156746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9603174603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9662698412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.96875, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9732142857142857, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9816468253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.984375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.986359126984127, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.980406746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9769345238095238, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6578269631421568, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7214663698237551, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7926308939583457, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8260203664619011, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.861711429580193, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9183714272723081, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9415405221256599, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.95335002559259, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9601254444207309, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9631406631244827, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9664607173680725, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.968185751351291, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9704292174318534, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9719871198051516, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9740628413744796, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.977609014050618, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9787482301849815, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9817218171833529, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9828259996915976, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9830534729219904, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9843844142788966, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9856145482554495, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9850761969688674, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9854344162314819, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.985393908582588, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.985407958969666, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9848624581915979, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9845610203038673, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9845609305245983, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853601561966162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9858869601961024, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9845075568426823, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986139862261585, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9851974188175525, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9853992851734465, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9841531418831332, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.984347151314376, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9824646149025854, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9836582226641527, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.983040451998894, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.981884737894503, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9806579713266329, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9849342240308298, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9815544961751395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.982000899907251, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9771693126986627, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.980836811483129, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9755136331027568, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9741426578842529, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00023825231939554215, "validation/loss_best": 0.04841773957014084, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.986139862261585} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.11409216437488795, "train/grad": 0.05112512450665235, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0072395133972167, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9093007898330688, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7622505187988281, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6271343326568604, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5077258443832398, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.36347308933734895, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.2381268722563982, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.17851730924099685, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.14446968536823987, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.12474703190848231, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.11201108258217574, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09872829099185765, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08901709727942944, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07742366570048034, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06837054618634283, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06109035479836166, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05301393799483776, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.044342032875865696, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03558056785725057, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02836558868177235, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02065899534150958, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014180660396814346, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.008942164918407798, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0053075504675507545, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0031711048539727928, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0019136789999902248, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0013170179165899753, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.000978311700746417, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006977695692330599, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005033998657017946, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003858889825642109, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003080669417977333, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002363471407443285, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018653287552297115, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.016457013785839e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.1774259880185124e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1380305513739586e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0513355955481528e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.774922177195549e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.314150333404541e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0964088141918182e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2624339908361435e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 4.736240953207016e-07, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.638816997408867e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 8.477549999952316e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.460554242134094e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 6.513744592666626e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0006263612303882837, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.57281756401062e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017895110337994992, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01738110449630767, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01665004570968449, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01597494594287127, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015246102288365364, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013992801699787378, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011707132407464087, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010299298495519906, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009544585831463337, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009068159726448357, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008730446043191478, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008339998949086293, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008015908797970041, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0075455635332036764, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007166853410308249, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006877912356867455, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006542335927952081, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006100635837065056, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005530220901709982, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00491865498421248, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004130809664784465, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003307144129648805, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0024186849237594287, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0016193322166509461, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0010376610617822734, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006424161574977915, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00044746870022208894, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003341255312625435, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00023752862676701625, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0001704292022259324, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013193703822253155, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010687291735848703, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.512531114320154e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.774721872716327e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.2077096729826735e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8890250108162262e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.2717780555309144e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2532806604915806e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 7.684983105480974e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 6.039661829127979e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.1504492438270068e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.70465042701665e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.7171711433085015e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 8.586957951591121e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.449950640590158e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.239995699043399e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.1765158903617104e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00029818442270433535, "train/grad_048_lr5.0e+01_wd1.0e+00": 5.483397575847241e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.996185302734375, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8979274034500122, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7502784729003906, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6141646504402161, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4936305582523346, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3472515940666199, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.22190766036510468, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.16394667327404022, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.13084563612937927, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11180741339921951, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09994242340326309, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08805932104587555, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08014104515314102, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07055290043354034, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06361674517393112, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05858684331178665, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.053888436406850815, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04977024719119072, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.046304091811180115, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.043825700879096985, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.041883427649736404, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.040833793580532074, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.040459953248500824, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04059534892439842, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04117520898580551, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04204000160098076, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.042972132563591, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04387851431965828, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04480713978409767, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04590911045670509, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04686547815799713, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.047506287693977356, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.048720236867666245, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05026738718152046, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05311683565378189, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06958974897861481, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1045670285820961, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10356085002422333, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13397125899791718, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.20732930302619934, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22605744004249573, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3044876158237457, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2422076314687729, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3557857275009155, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4307841956615448, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8525161743164062, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6185630559921265, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9501674175262451, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.01591157913208, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7638888888888888, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7986111111111112, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8360615079365079, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8598710317460317, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8829365079365079, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9315476190476191, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9506448412698413, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9672619047619048, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.96875, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9756944444444444, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.982390873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.986359126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.986359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6649097677355295, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7293039474608525, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7940373468830244, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8325213401448246, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8662495900057297, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9218598122242286, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.943787401132659, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.954292528627621, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9614775368294419, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9628843200648515, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9666650927152942, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9685443161724675, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9702108477805714, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9734921139776391, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9750697445706012, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9784985513544229, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9791477879878385, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9818906665799508, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9834046251745066, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9840731561307569, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9844327007731063, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9862830802165663, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9849161685854023, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9852959633451805, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9853002308174775, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9848861992592537, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9847220779571669, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9845420185905531, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844340238304512, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853601561966162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9862586662635219, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9848853018009655, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986139862261585, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846464917553089, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9853992851734465, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9840519865798085, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.984347151314376, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9829245891384849, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.983605562982839, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9830398910425883, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9824463821046543, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.980279380635106, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9847532341635018, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9815544961751395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9819645023449962, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9777166749409403, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9806088610811938, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764674716071691, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9745098807636265, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 0.014180660396814346, "validation/loss_best": 0.040833793580532074, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.9862830802165663} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.11186120167374611, "train/grad": 0.04928977217525244, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9995423793792725, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9013429498672485, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7536080956459046, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6181793522834778, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4992461860179901, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3547468401491642, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.23038916885852814, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.17239696405827998, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1388897306472063, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1196626217942685, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10721784799359739, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09439176534302533, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0851142053771764, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07379182182252407, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06480195176787674, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05776142238639295, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05007246439345181, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04208315826021135, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.034046691274270416, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02735530076548457, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02006717214360833, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01382724368944764, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.008577809473499655, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004950603479519486, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0029214523453265427, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0017559064459055662, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0012093378510326146, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009042274579405784, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006514251697808504, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00047191129066050054, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003656477946788073, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00029469198547303675, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00022761942818760872, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001776273548603058, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.363357745110988e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.4371092915534973e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 9.257569909095764e-06, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.465414866805077e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.4325476735830308e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2530572712421418e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 9.499769657850265e-07, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8421746790409088e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.392847090959549e-07, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.802504554390907e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 6.016157567501068e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.16751529276371e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.155507147312164e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.0920857787132264e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.5848316252231597e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017262079562060537, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01673744238447398, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01601859594229609, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015377154778689147, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014702370185405015, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013462818711996078, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01119642200646922, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009898734481539577, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009187380021903663, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008752124638995155, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008464582896558569, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008150261752307415, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007861457339022309, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00743029250879772, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007063407562091015, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006768126505194232, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00642425047524739, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005999677767977119, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005438979844329879, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0048504963878076525, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00407056508003734, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0032113095847307706, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0022985880853957498, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0015127554156060797, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0009675867738405941, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0005999806579347933, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004185720240639057, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003138498131011147, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00022527233368236922, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00016198410727156443, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012660475147640681, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010193788741162279, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.177943957889511e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.529777616833599e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.082820679059296e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.7009390458800907e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.2202225235715681e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.2519041529682196e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 7.2031793220109565e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 5.02245221436742e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.3249796225514877e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.504284044601668e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.9857751960977885e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.4165595691054893e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.954200919095091e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.452013185356291e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.7586465373043778e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 7.139055661007851e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.094836424379618e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9909851551055908, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.892770528793335, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7449555397033691, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6086297035217285, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.48828211426734924, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.34185969829559326, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2184213399887085, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.16226941347122192, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.12975651025772095, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11108813434839249, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09928644448518753, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0877004936337471, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07969644665718079, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07014307379722595, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06319605559110641, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05823773518204689, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05355382338166237, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04936812072992325, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04586903378367424, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04359027370810509, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04158001020550728, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.040597423911094666, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04022932052612305, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.040498144924640656, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.041025783866643906, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04205919802188873, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04299522191286087, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04374002665281296, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04481405392289162, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.046017520129680634, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04678110033273697, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04746982827782631, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04864910989999771, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05035143718123436, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.053034380078315735, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06920564919710159, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10415807366371155, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10280820727348328, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13342437148094177, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.20523828268051147, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22435986995697021, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3011138439178467, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.23879316449165344, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.350330650806427, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4219494163990021, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8359172940254211, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6032310128211975, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9289364218711853, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9913876056671143, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7668650793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8033234126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8373015873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8601190476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8849206349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9325396825396826, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9513888888888888, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9670138888888888, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9689980158730159, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.982390873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6711154149514319, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7375268645551559, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7962663734497569, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8337838215608312, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8680990291434195, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9234206329159576, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.94410719378649, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9543823490826323, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9609454606193996, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9637956409320727, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9663537984927786, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9682024664025499, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9700213452366381, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.973671150233095, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9744481129186223, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9784985513544229, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9794640207414183, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9824281097753155, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9835808605601672, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9838118578483487, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.984734681809541, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9853376716784291, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9847517805976079, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9856550475845539, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9858161819957202, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9856293318419037, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.985084180522888, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9844202990955603, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844340238304512, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9855405313769913, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9862586662635219, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9852600027873513, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986139862261585, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846464917553089, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9853992851734465, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9845269028264009, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.984347151314376, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9825667821562182, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9836599634826787, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.983040451998894, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9824463821046543, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.980279380635106, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9849342240308298, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9815544961751395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9821794401757606, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9777166749409403, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9806088610811938, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9762877416048878, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9743216470417427, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00022761942818760872, "validation/loss_best": 0.04864910989999771, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.986139862261585} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.10927544806152582, "train/grad": 0.049960434529930355, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9870494747161865, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8894594621658325, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7426150918006897, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.608354012966156, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4907065236568451, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.34726208209991455, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.22503793805837632, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1684505542367697, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.13521392641589045, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.11569033212959766, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10289867352694274, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08962038662284613, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0801340806297958, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.068839873643592, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06015648214146495, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.053436296228319406, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04607430573552847, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03849477991461754, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.030886287027969958, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02448169651441276, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017542229359969498, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011810725582763552, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.007323157703503966, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004372109426185489, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0026756766345351936, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0016445933096110822, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0011492541711777449, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008609866537153721, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006186054274439812, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004516058601438999, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003533796779811382, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00028576225973665716, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00022085214965045452, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001710923481732607, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.749008014798164e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.4701135009527204e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0281018912792206e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0285889729857444e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.862522378563881e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.1211057901382446e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 8.014962077140808e-07, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.626914694905281e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5141239166259765e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.4989526718854904e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.418248474597931e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5881454348564146e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.9531925469636915e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.6499217599630355e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.735147088766098e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017592672472819686, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01707810311578214, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01634003367740661, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015661895261146128, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014950103694573044, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013670124346390367, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011247387370094657, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009950475718360394, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009294502947013825, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00890668501611799, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008632311893161387, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008305578604340553, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008006392638199031, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00753294721711427, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0071301260677864775, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006809076832141727, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006415731412707828, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005931235694733914, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005317678259743843, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004689652764936909, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003877398292243015, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003025847103563137, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002144625953369541, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0014075750902702566, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0009018415988975903, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0005683730822784128, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004020237553777406, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003013179284607759, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00021658427382135415, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00015655027202228665, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012237899477440806, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.960293485164584e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.870074818129069e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.279968532908242e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.046278254804747e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.741876271414867e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.1696041952173885e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.3190422577658012e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.5398046635225746e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.889737865074928e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.1187951014710366e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.7495975956562555e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 6.178254232341302e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.1520412777244165e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 6.561566749390134e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 8.313599308347314e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.319189989724388e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.8303269806083143e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 7.094256992064689e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.989124596118927, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.890789270401001, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7428486943244934, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.606720507144928, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4864427149295807, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.339956134557724, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.21701008081436157, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.16151799261569977, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.12925001978874207, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.110611692070961, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09908094257116318, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08742587268352509, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07947365194559097, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.0700116977095604, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06305404007434845, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.058131150901317596, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05345185473561287, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04928625002503395, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04581957682967186, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04353383183479309, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04156295955181122, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04044535383582115, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04017499089241028, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04030761867761612, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04092884808778763, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04194526746869087, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04285505786538124, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.043890856206417084, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.044768333435058594, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04599601402878761, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.046771224588155746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.047475241124629974, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.048610869795084, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05033993721008301, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05306413397192955, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0690770223736763, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10365622490644455, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10284196585416794, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13345536589622498, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.20419062674045563, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.223781555891037, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3009415864944458, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2380731850862503, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3475973904132843, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4188472032546997, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8316266536712646, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5992243885993958, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9235479235649109, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.986843466758728, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7666170634920635, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8018353174603174, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8395337301587301, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8618551587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8849206349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9332837301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9513888888888888, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.966765873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9694940476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.986359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9769345238095238, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6696461511019592, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7345307470299837, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7983799897706829, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8359380712296831, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8684975134663213, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9240238804115222, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9441741457354492, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9540270183680503, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9607185133267161, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9644962858360808, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.96635544724082, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9687516240744, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.971065593446579, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9733169985055509, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9746199929549924, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9786788019101942, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9798845996879721, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9822504492019406, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9836251936570939, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.984252631717311, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.98402591958478, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9853585350209124, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9843719858378298, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9856550475845539, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9853891464575691, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9854249450273513, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.985084180522888, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9847827426351575, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844340238304512, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9855405313769913, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9858869601961024, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9848853018009655, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986139862261585, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846464917553089, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9853992851734465, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9847489893573154, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9847301400484774, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9829245891384849, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9836073038013651, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9832203553858524, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9824463821046543, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.980279380635106, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9849342240308298, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9815544961751395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9820022164439696, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9779444424270709, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9806088610811938, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764674716071691, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.974152256706553, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00022085214965045452, "validation/loss_best": 0.048610869795084, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.986139862261585} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.109696853607893, "train/grad": 0.04898130999878049, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9896071338653565, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8920638608932495, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7451926350593567, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6108011746406555, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4926331686973572, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3482095578312874, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.22588598139584065, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.16942031178623437, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.13625304838642477, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.11694036388769746, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10425295284949243, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09103690445423127, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08136896743439138, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06970941396430134, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.060738289803266524, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.053749956199899314, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.046198578476905824, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03838971795514226, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.030514286756515505, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.023917512549087406, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.016999694732949138, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011378754638135433, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.007033324558287859, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004253943329676986, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.002654730752110481, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.001653568297624588, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0011520957853645086, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008653884287923574, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006266319192945957, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000455063721165061, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003535536862909794, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00028680301271378995, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00021994290873408317, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00016800722107291222, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.035535924136639e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.442580811679363e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1145202443003654e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 9.458158165216447e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.7662591785192488e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.749655395746231e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 9.67038795351982e-07, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.682499423623085e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.8799956887960435e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.402529448270798e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.320661097764968e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.0318664610385894e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.791550502181053e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.8851069509983064e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.946277916431427e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017219421681948007, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01668889531865716, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01593841810710728, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015278305471874774, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014619757784530521, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013413512352854014, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011157157784327864, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0099049501423724, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00921172764385119, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008782655342947691, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008480001360876486, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008143839493859559, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007849469915963709, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0073688893346115945, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0069753200287232175, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00666086386074312, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006281377032282762, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005818750518665183, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005239148140826728, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004634361394273583, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003837585406145081, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0029954380267008672, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0021237075493263547, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0013918193144490942, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0008875255826569628, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0005597977861907566, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00039354299238766546, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00029698539510718546, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00021454108067700873, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00015620956859493162, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012149898741881771, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.820895175835176e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.813553749656421e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.205210893313051e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.924047609984882e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.6098238215155334e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.1087709542954016e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.3705305431264414e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.723773103989149e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 5.2870030188545335e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.6664219011624323e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.213314905489297e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 6.2621585998809534e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.1229464523108386e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.6591754490625973e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.5475807885638552e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.65317883231408e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 9.761026129635395e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 7.056619925742977e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9888098835945129, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8905174136161804, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7425864338874817, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6064080595970154, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.48609334230422974, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3396461308002472, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.21680422127246857, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1614212691783905, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.12919101119041443, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11057217419147491, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09901019185781479, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08738052099943161, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07944834977388382, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.06992873549461365, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06301837414503098, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.058075133711099625, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05340605974197388, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.049259722232818604, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04576190933585167, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04340645670890808, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04143254831433296, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04036999121308327, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04005430266261101, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04037666320800781, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04097631573677063, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04188162460923195, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04279431700706482, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04374754801392555, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04485068470239639, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04596459120512009, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04674509912729263, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04748697951436043, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04859401285648346, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05032411590218544, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05307602137327194, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0689486712217331, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10372164100408554, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10286152362823486, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1331644505262375, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.20413120090961456, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22353920340538025, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.30036062002182007, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.23748467862606049, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.34653812646865845, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.41828015446662903, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8303323984146118, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5974048376083374, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9218462705612183, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9861732125282288, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7673611111111112, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8015873015873016, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.839781746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8618551587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8851686507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.933531746031746, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.951140873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.966765873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9694940476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.986359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.986359126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6712416063955362, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7344183128158959, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7991349322029939, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8356678314203385, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8686114480026691, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9242065304268245, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9436571358125716, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9540270183680503, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9607185133267161, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9644962858360808, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.96635544724082, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9687516240744, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.970948352411473, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9733169985055509, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9746199929549924, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9786788019101942, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9798845996879721, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9824296180223616, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9838043470769604, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.984252631717311, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9842054298433986, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9851779523343284, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9843708095879564, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9856550475845539, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9855866480074738, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9854249450273513, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.985084180522888, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9847827426351575, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844340238304512, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9855405313769913, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9858869601961024, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9848853018009655, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986139862261585, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846464917553089, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9853992851734465, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9847489893573154, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9847301400484774, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9829245891384849, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9836073038013651, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9832203553858524, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9824463821046543, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.980279380635106, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9849342240308298, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9815544961751395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9820022164439696, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9779444424270709, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9806088610811938, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764674716071691, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.974463424119472, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00021994290873408317, "validation/loss_best": 0.04859401285648346, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.986139862261585} diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9f42c81a5d0ebdad0d9768de4681296d8dcd3ce --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..051b3fe13672e6a6a143b0a7ad0f10b189e77055 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 5, "eval/id_best": 26, "eval/lr_best": 0.00041999999999999996, "eval/wd_best": 0.05, "eval/train/loss": 2.097738265991211, "eval/train/acc": 0.370232643904238, "eval/train/acc_std": 0.002471604508938681, "eval/train/f1": 0.3092126652150962, "eval/train/f1_std": 0.002491458271523646, "eval/validation/loss": 2.3706860542297363, "eval/validation/acc": 0.2862679955703212, "eval/validation/acc_std": 0.005476649126067028, "eval/validation/f1": 0.21881163265593262, "eval/validation/f1_std": 0.005316580779719462, "eval/test/loss": 2.351391077041626, "eval/test/acc": 0.2855287569573284, "eval/test/acc_std": 0.005354620971114648, "eval/test/f1": 0.2202173896859306, "eval/test/f1_std": 0.005180939177282511, "eval/testid/loss": 2.3106331825256348, "eval/testid/acc": 0.2959321380374012, "eval/testid/acc_std": 0.005774897294308081, "eval/testid/f1": 0.23944871818440439, "eval/testid/f1_std": 0.005730383656711987} diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..9557b12e7afabdd6c1ef3094b4a71a54f36eb616 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 5, "eval/best/id_best": 26, "eval/best/lr_best": 0.00041999999999999996, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.097738265991211, "eval/best/train/acc": 0.370232643904238, "eval/best/train/acc_std": 0.002471604508938681, "eval/best/train/f1": 0.3092126652150962, "eval/best/train/f1_std": 0.002491458271523646, "eval/best/validation/loss": 2.3706860542297363, "eval/best/validation/acc": 0.2862679955703212, "eval/best/validation/acc_std": 0.005476649126067028, "eval/best/validation/f1": 0.21881163265593262, "eval/best/validation/f1_std": 0.005316580779719462, "eval/best/test/loss": 2.351391077041626, "eval/best/test/acc": 0.2855287569573284, "eval/best/test/acc_std": 0.005354620971114648, "eval/best/test/f1": 0.2202173896859306, "eval/best/test/f1_std": 0.005180939177282511, "eval/best/testid/loss": 2.3106331825256348, "eval/best/testid/acc": 0.2959321380374012, "eval/best/testid/acc_std": 0.005774897294308081, "eval/best/testid/f1": 0.23944871818440439, "eval/best/testid/f1_std": 0.005730383656711987} diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..d3134df75ffcee81124a3d179a17127d55814a2e --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 18, "eval/last/lr_best": 0.00011399999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.0890188217163086, "eval/last/train/acc": 0.37262976735609576, "eval/last/train/acc_std": 0.002399306317819814, "eval/last/train/f1": 0.31216785601736996, "eval/last/train/f1_std": 0.002493923312760216, "eval/last/validation/loss": 2.3983216285705566, "eval/last/validation/acc": 0.27445551864156514, "eval/last/validation/acc_std": 0.005281221483487775, "eval/last/validation/f1": 0.20935826360407148, "eval/last/validation/f1_std": 0.004894935029749872, "eval/last/test/loss": 2.3361010551452637, "eval/last/test/acc": 0.3007421150278293, "eval/last/test/acc_std": 0.005188524376932158, "eval/last/test/f1": 0.22225186954878193, "eval/last/test/f1_std": 0.005071978126023731, "eval/last/testid/loss": 2.280708074569702, "eval/last/testid/acc": 0.30229419703103916, "eval/last/testid/acc_std": 0.00580966765465548, "eval/last/testid/f1": 0.24281670994563687, "eval/last/testid/f1_std": 0.005829004152180844} diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..921da6c9d8d8417bd2c4ecaf2dfa33159845a4dc --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",train,2.097738265991211,0.370232643904238,0.002471604508938681,0.3092126652150962,0.002491458271523646 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",validation,2.3706860542297363,0.2862679955703212,0.005476649126067028,0.21881163265593262,0.005316580779719462 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",test,2.351391077041626,0.2855287569573284,0.005354620971114648,0.2202173896859306,0.005180939177282511 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",testid,2.3106331825256348,0.2959321380374012,0.005774897294308081,0.23944871818440439,0.005730383656711987 diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..921da6c9d8d8417bd2c4ecaf2dfa33159845a4dc --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",train,2.097738265991211,0.370232643904238,0.002471604508938681,0.3092126652150962,0.002491458271523646 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",validation,2.3706860542297363,0.2862679955703212,0.005476649126067028,0.21881163265593262,0.005316580779719462 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",test,2.351391077041626,0.2855287569573284,0.005354620971114648,0.2202173896859306,0.005180939177282511 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",testid,2.3106331825256348,0.2959321380374012,0.005774897294308081,0.23944871818440439,0.005730383656711987 diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..1cd1341a2d47556ec45e2ae9773396015378153e --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.0890188217163086,0.37262976735609576,0.002399306317819814,0.31216785601736996,0.002493923312760216 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.3983216285705566,0.27445551864156514,0.005281221483487775,0.20935826360407148,0.004894935029749872 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.3361010551452637,0.3007421150278293,0.005188524376932158,0.22225186954878193,0.005071978126023731 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.280708074569702,0.30229419703103916,0.00580966765465548,0.24281670994563687,0.005829004152180844 diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..807c37e3e5680c695a959d583fcf08bb4a3d53e3 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,962 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:27:25 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:28 lr: nan time: 3.6723 data: 3.0563 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 3.1766 (3.1794) grad: 0.1683 (0.1711) time: 0.4622 data: 0.0038 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:09 lr: 0.000006 loss: 3.1717 (3.1678) grad: 0.1672 (0.1694) time: 0.4327 data: 0.0050 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:48 lr: 0.000009 loss: 3.1595 (3.1694) grad: 0.1672 (0.1698) time: 0.4337 data: 0.0046 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:34 lr: 0.000012 loss: 3.1557 (3.1660) grad: 0.1662 (0.1682) time: 0.4449 data: 0.0046 max mem: 22448 +train: [0] [100/400] eta: 0:02:22 lr: 0.000015 loss: 3.1585 (3.1660) grad: 0.1596 (0.1669) time: 0.4376 data: 0.0044 max mem: 22448 +train: [0] [120/400] eta: 0:02:11 lr: 0.000018 loss: 3.1636 (3.1653) grad: 0.1512 (0.1646) time: 0.4462 data: 0.0045 max mem: 22448 +train: [0] [140/400] eta: 0:02:00 lr: 0.000021 loss: 3.1523 (3.1632) grad: 0.1543 (0.1638) time: 0.4350 data: 0.0047 max mem: 22448 +train: [0] [160/400] eta: 0:01:50 lr: 0.000024 loss: 3.1490 (3.1613) grad: 0.1652 (0.1653) time: 0.4462 data: 0.0048 max mem: 22448 +train: [0] [180/400] eta: 0:01:41 lr: 0.000027 loss: 3.1369 (3.1596) grad: 0.1652 (0.1649) time: 0.4517 data: 0.0047 max mem: 22448 +train: [0] [200/400] eta: 0:01:32 lr: 0.000030 loss: 3.1593 (3.1596) grad: 0.1501 (0.1633) time: 0.4640 data: 0.0049 max mem: 22448 +train: [0] [220/400] eta: 0:01:22 lr: 0.000033 loss: 3.1644 (3.1598) grad: 0.1556 (0.1628) time: 0.4514 data: 0.0045 max mem: 22448 +train: [0] [240/400] eta: 0:01:13 lr: 0.000036 loss: 3.1466 (3.1584) grad: 0.1556 (0.1621) time: 0.4426 data: 0.0047 max mem: 22448 +train: [0] [260/400] eta: 0:01:04 lr: 0.000039 loss: 3.1269 (3.1568) grad: 0.1521 (0.1612) time: 0.4508 data: 0.0046 max mem: 22448 +train: [0] [280/400] eta: 0:00:54 lr: 0.000042 loss: 3.1189 (3.1542) grad: 0.1514 (0.1607) time: 0.4537 data: 0.0047 max mem: 22448 +train: [0] [300/400] eta: 0:00:45 lr: 0.000045 loss: 3.1081 (3.1502) grad: 0.1538 (0.1605) time: 0.4384 data: 0.0045 max mem: 22448 +train: [0] [320/400] eta: 0:00:36 lr: 0.000048 loss: 3.0989 (3.1478) grad: 0.1590 (0.1610) time: 0.4484 data: 0.0046 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.1067 (3.1450) grad: 0.1612 (0.1609) time: 0.4506 data: 0.0049 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0844 (3.1412) grad: 0.1637 (0.1615) time: 0.4389 data: 0.0046 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0844 (3.1381) grad: 0.1719 (0.1623) time: 0.4519 data: 0.0048 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0849 (3.1358) grad: 0.1769 (0.1631) time: 0.4533 data: 0.0050 max mem: 22448 +train: [0] Total time: 0:03:02 (0.4552 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0849 (3.1358) grad: 0.1769 (0.1631) +eval (validation): [0] [ 0/85] eta: 0:04:33 time: 3.2158 data: 2.9788 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3392 data: 0.0049 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3528 data: 0.0035 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3368 data: 0.0040 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3116 data: 0.0041 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3152 data: 0.0041 max mem: 22448 +eval (validation): [0] Total time: 0:00:31 (0.3721 s / it) +cv: [0] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 2.589 acc: 0.222 f1: 0.156 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:21:48 lr: nan time: 3.2707 data: 2.8803 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:44 lr: 0.000063 loss: 3.0332 (3.0362) grad: 0.1755 (0.1712) time: 0.4561 data: 0.0032 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:05 lr: 0.000066 loss: 3.0417 (3.0344) grad: 0.1666 (0.1676) time: 0.4391 data: 0.0049 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:48 lr: 0.000069 loss: 3.0319 (3.0241) grad: 0.1640 (0.1688) time: 0.4496 data: 0.0047 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:34 lr: 0.000072 loss: 3.0065 (3.0238) grad: 0.1713 (0.1711) time: 0.4468 data: 0.0047 max mem: 22448 +train: [1] [100/400] eta: 0:02:22 lr: 0.000075 loss: 3.0141 (3.0203) grad: 0.1728 (0.1731) time: 0.4405 data: 0.0045 max mem: 22448 +train: [1] [120/400] eta: 0:02:10 lr: 0.000078 loss: 3.0210 (3.0192) grad: 0.1819 (0.1747) time: 0.4327 data: 0.0046 max mem: 22448 +train: [1] [140/400] eta: 0:02:00 lr: 0.000081 loss: 3.0058 (3.0162) grad: 0.1811 (0.1757) time: 0.4367 data: 0.0046 max mem: 22448 +train: [1] [160/400] eta: 0:01:51 lr: 0.000084 loss: 3.0058 (3.0172) grad: 0.1780 (0.1761) time: 0.4591 data: 0.0050 max mem: 22448 +train: [1] [180/400] eta: 0:01:41 lr: 0.000087 loss: 3.0186 (3.0180) grad: 0.1766 (0.1771) time: 0.4591 data: 0.0048 max mem: 22448 +train: [1] [200/400] eta: 0:01:32 lr: 0.000090 loss: 2.9912 (3.0156) grad: 0.1832 (0.1780) time: 0.4411 data: 0.0048 max mem: 22448 +train: [1] [220/400] eta: 0:01:22 lr: 0.000093 loss: 2.9499 (3.0093) grad: 0.1895 (0.1803) time: 0.4372 data: 0.0047 max mem: 22448 +train: [1] [240/400] eta: 0:01:13 lr: 0.000096 loss: 2.9477 (3.0060) grad: 0.1916 (0.1805) time: 0.4502 data: 0.0048 max mem: 22448 +train: [1] [260/400] eta: 0:01:03 lr: 0.000099 loss: 2.9697 (3.0052) grad: 0.1833 (0.1812) time: 0.4372 data: 0.0048 max mem: 22448 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 2.9714 (3.0020) grad: 0.1817 (0.1815) time: 0.4405 data: 0.0047 max mem: 22448 +train: [1] [300/400] eta: 0:00:45 lr: 0.000105 loss: 2.9524 (3.0002) grad: 0.1894 (0.1822) time: 0.4375 data: 0.0047 max mem: 22448 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 2.9373 (2.9959) grad: 0.1927 (0.1831) time: 0.4438 data: 0.0046 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.9206 (2.9916) grad: 0.1928 (0.1836) time: 0.4378 data: 0.0046 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.9264 (2.9896) grad: 0.1906 (0.1840) time: 0.4509 data: 0.0048 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.9325 (2.9859) grad: 0.1924 (0.1850) time: 0.4448 data: 0.0048 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9325 (2.9841) grad: 0.2096 (0.1870) time: 0.4786 data: 0.0048 max mem: 22448 +train: [1] Total time: 0:03:01 (0.4534 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9325 (2.9841) grad: 0.2096 (0.1870) +eval (validation): [1] [ 0/85] eta: 0:04:38 time: 3.2708 data: 2.9763 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3590 data: 0.0057 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:18 time: 0.3218 data: 0.0036 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3399 data: 0.0040 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3366 data: 0.0042 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3205 data: 0.0041 max mem: 22448 +eval (validation): [1] Total time: 0:00:31 (0.3753 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 2.490 acc: 0.247 f1: 0.180 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:36 lr: nan time: 3.2419 data: 2.8591 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:40 lr: 0.000123 loss: 2.9749 (2.9589) grad: 0.2997 (0.2971) time: 0.4478 data: 0.0033 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:04 lr: 0.000126 loss: 3.0032 (3.0440) grad: 0.3263 (0.4552) time: 0.4422 data: 0.0048 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:46 lr: 0.000129 loss: 3.3107 (3.2153) grad: 0.9220 (0.7509) time: 0.4374 data: 0.0044 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=68.03 > 63.56) at step 433. Freezing. +train: [2] [ 80/400] eta: 0:02:32 lr: 0.000132 loss: 3.4484 (3.2163) grad: 1.1682 (0.7349) time: 0.4362 data: 0.0046 max mem: 22448 +train: [2] [100/400] eta: 0:02:19 lr: 0.000135 loss: 2.9462 (3.1517) grad: 0.2116 (0.6309) time: 0.4294 data: 0.0048 max mem: 22448 +train: [2] [120/400] eta: 0:02:09 lr: 0.000138 loss: 2.8875 (3.1074) grad: 0.2113 (0.5617) time: 0.4349 data: 0.0047 max mem: 22448 +train: [2] [140/400] eta: 0:01:59 lr: 0.000141 loss: 2.9057 (3.0832) grad: 0.2199 (0.5143) time: 0.4476 data: 0.0046 max mem: 22448 +train: [2] [160/400] eta: 0:01:50 lr: 0.000144 loss: 2.9059 (3.0580) grad: 0.2238 (0.4781) time: 0.4547 data: 0.0046 max mem: 22448 +train: [2] [180/400] eta: 0:01:40 lr: 0.000147 loss: 2.8646 (3.0371) grad: 0.2162 (0.4490) time: 0.4521 data: 0.0047 max mem: 22448 +train: [2] [200/400] eta: 0:01:31 lr: 0.000150 loss: 2.8597 (3.0193) grad: 0.2095 (0.4256) time: 0.4437 data: 0.0048 max mem: 22448 +train: [2] [220/400] eta: 0:01:21 lr: 0.000153 loss: 2.8656 (3.0061) grad: 0.2163 (0.4074) time: 0.4445 data: 0.0045 max mem: 22448 +train: [2] [240/400] eta: 0:01:12 lr: 0.000156 loss: 2.9038 (2.9981) grad: 0.2227 (0.3923) time: 0.4464 data: 0.0047 max mem: 22448 +train: [2] [260/400] eta: 0:01:03 lr: 0.000159 loss: 2.9000 (2.9872) grad: 0.2176 (0.3789) time: 0.4387 data: 0.0045 max mem: 22448 +train: [2] [280/400] eta: 0:00:54 lr: 0.000162 loss: 2.8452 (2.9771) grad: 0.2239 (0.3691) time: 0.4290 data: 0.0046 max mem: 22448 +train: [2] [300/400] eta: 0:00:45 lr: 0.000165 loss: 2.8343 (2.9682) grad: 0.2287 (0.3595) time: 0.4364 data: 0.0047 max mem: 22448 +train: [2] [320/400] eta: 0:00:35 lr: 0.000168 loss: 2.8285 (2.9598) grad: 0.2261 (0.3512) time: 0.4388 data: 0.0047 max mem: 22448 +train: [2] [340/400] eta: 0:00:26 lr: 0.000171 loss: 2.8379 (2.9546) grad: 0.2262 (0.3442) time: 0.4410 data: 0.0045 max mem: 22448 +train: [2] [360/400] eta: 0:00:17 lr: 0.000174 loss: 2.8413 (2.9503) grad: 0.2379 (0.3386) time: 0.4469 data: 0.0048 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.8614 (2.9464) grad: 0.2469 (0.3345) time: 0.4630 data: 0.0050 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8614 (2.9418) grad: 0.2630 (0.3313) time: 0.4520 data: 0.0047 max mem: 22448 +train: [2] Total time: 0:03:00 (0.4506 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8614 (2.9418) grad: 0.2630 (0.3313) +eval (validation): [2] [ 0/85] eta: 0:04:30 time: 3.1867 data: 2.9208 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:31 time: 0.3522 data: 0.0042 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:19 time: 0.3591 data: 0.0040 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3319 data: 0.0040 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3094 data: 0.0039 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3032 data: 0.0039 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3728 s / it) +cv: [2] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 2.453 acc: 0.260 f1: 0.196 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:21:15 lr: nan time: 3.1899 data: 2.8158 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:30 lr: 0.000183 loss: 2.8638 (2.8681) grad: 0.3195 (0.3527) time: 0.4233 data: 0.0031 max mem: 22448 +train: [3] [ 40/400] eta: 0:02:58 lr: 0.000186 loss: 2.9905 (3.1238) grad: 0.5799 (0.7978) time: 0.4309 data: 0.0046 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=78.93 > 63.56) at step 625. Freezing. +train: [3] [ 60/400] eta: 0:02:41 lr: 0.000189 loss: 3.3278 (3.1857) grad: 1.1468 (0.8514) time: 0.4330 data: 0.0046 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:28 lr: 0.000192 loss: 2.8307 (3.0965) grad: 0.2496 (0.7003) time: 0.4307 data: 0.0047 max mem: 22448 +train: [3] [100/400] eta: 0:02:17 lr: 0.000195 loss: 2.8531 (3.0600) grad: 0.2885 (0.6496) time: 0.4336 data: 0.0048 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=65.68 > 63.56) at step 655. Freezing. +train: [3] [120/400] eta: 0:02:06 lr: 0.000198 loss: 2.9905 (3.1188) grad: 0.6558 (0.7382) time: 0.4281 data: 0.0049 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=75.28 > 63.56) at step 670. Freezing. +train: [3] [140/400] eta: 0:01:57 lr: 0.000201 loss: 3.5217 (3.1994) grad: 1.3567 (0.8752) time: 0.4500 data: 0.0050 max mem: 22448 +train: [3] [160/400] eta: 0:01:48 lr: 0.000204 loss: 2.9544 (3.1580) grad: 0.2619 (0.7950) time: 0.4384 data: 0.0047 max mem: 22448 +train: [3] [180/400] eta: 0:01:38 lr: 0.000207 loss: 2.8510 (3.1213) grad: 0.2353 (0.7339) time: 0.4435 data: 0.0050 max mem: 22448 +train: [3] [200/400] eta: 0:01:29 lr: 0.000210 loss: 2.8303 (3.0956) grad: 0.2565 (0.6863) time: 0.4241 data: 0.0045 max mem: 22448 +train: [3] [220/400] eta: 0:01:20 lr: 0.000213 loss: 2.8894 (3.0816) grad: 0.2834 (0.6694) time: 0.4513 data: 0.0047 max mem: 22448 +train: [3] [240/400] eta: 0:01:11 lr: 0.000216 loss: 3.0236 (3.0987) grad: 0.6923 (0.6967) time: 0.4402 data: 0.0049 max mem: 22448 +train: [3] [260/400] eta: 0:01:02 lr: 0.000219 loss: 3.5146 (3.1426) grad: 1.3213 (0.7655) time: 0.4329 data: 0.0049 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=68.20 > 63.56) at step 732. Freezing. +train: [3] [280/400] eta: 0:00:53 lr: 0.000222 loss: 3.3900 (3.1365) grad: 1.3213 (0.7480) time: 0.4330 data: 0.0048 max mem: 22448 +train: [3] [300/400] eta: 0:00:44 lr: 0.000225 loss: 2.8427 (3.1161) grad: 0.2092 (0.7117) time: 0.4253 data: 0.0048 max mem: 22448 +train: [3] [320/400] eta: 0:00:35 lr: 0.000228 loss: 2.8192 (3.0964) grad: 0.2093 (0.6809) time: 0.4260 data: 0.0048 max mem: 22448 +train: [3] [340/400] eta: 0:00:26 lr: 0.000231 loss: 2.8228 (3.0812) grad: 0.2230 (0.6545) time: 0.4297 data: 0.0048 max mem: 22448 +train: [3] [360/400] eta: 0:00:17 lr: 0.000234 loss: 2.8495 (3.0688) grad: 0.2313 (0.6307) time: 0.4596 data: 0.0050 max mem: 22448 +train: [3] [380/400] eta: 0:00:08 lr: 0.000237 loss: 2.8315 (3.0552) grad: 0.2313 (0.6098) time: 0.4480 data: 0.0049 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.7935 (3.0422) grad: 0.2298 (0.5903) time: 0.4465 data: 0.0050 max mem: 22448 +train: [3] Total time: 0:02:57 (0.4438 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.7935 (3.0422) grad: 0.2298 (0.5903) +eval (validation): [3] [ 0/85] eta: 0:04:16 time: 3.0206 data: 2.7900 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:31 time: 0.3529 data: 0.0038 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:19 time: 0.3702 data: 0.0039 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3244 data: 0.0040 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3196 data: 0.0042 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3127 data: 0.0042 max mem: 22448 +eval (validation): [3] Total time: 0:00:31 (0.3742 s / it) +cv: [3] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 2.462 acc: 0.268 f1: 0.185 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:21:28 lr: nan time: 3.2213 data: 2.8909 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:32 lr: 0.000243 loss: 2.7261 (2.7674) grad: 0.2153 (0.2210) time: 0.4270 data: 0.0032 max mem: 22448 +train: [4] [ 40/400] eta: 0:02:58 lr: 0.000246 loss: 2.7507 (2.7738) grad: 0.2196 (0.2195) time: 0.4271 data: 0.0047 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:40 lr: 0.000249 loss: 2.7678 (2.7712) grad: 0.2161 (0.2186) time: 0.4250 data: 0.0050 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:28 lr: 0.000252 loss: 2.7380 (2.7712) grad: 0.2125 (0.2158) time: 0.4391 data: 0.0048 max mem: 22448 +train: [4] [100/400] eta: 0:02:16 lr: 0.000255 loss: 2.7825 (2.7772) grad: 0.2134 (0.2165) time: 0.4263 data: 0.0049 max mem: 22448 +train: [4] [120/400] eta: 0:02:06 lr: 0.000258 loss: 2.7825 (2.7730) grad: 0.2199 (0.2182) time: 0.4291 data: 0.0050 max mem: 22448 +train: [4] [140/400] eta: 0:01:57 lr: 0.000261 loss: 2.7628 (2.7711) grad: 0.2307 (0.2207) time: 0.4449 data: 0.0048 max mem: 22448 +train: [4] [160/400] eta: 0:01:47 lr: 0.000264 loss: 2.7737 (2.7728) grad: 0.2360 (0.2231) time: 0.4381 data: 0.0048 max mem: 22448 +train: [4] [180/400] eta: 0:01:38 lr: 0.000267 loss: 2.7844 (2.7752) grad: 0.2405 (0.2251) time: 0.4356 data: 0.0046 max mem: 22448 +train: [4] [200/400] eta: 0:01:29 lr: 0.000270 loss: 2.7565 (2.7715) grad: 0.2345 (0.2256) time: 0.4275 data: 0.0046 max mem: 22448 +train: [4] [220/400] eta: 0:01:20 lr: 0.000273 loss: 2.7565 (2.7734) grad: 0.2359 (0.2273) time: 0.4429 data: 0.0050 max mem: 22448 +train: [4] [240/400] eta: 0:01:11 lr: 0.000276 loss: 2.7820 (2.7734) grad: 0.2422 (0.2289) time: 0.4330 data: 0.0047 max mem: 22448 +train: [4] [260/400] eta: 0:01:02 lr: 0.000279 loss: 2.7946 (2.7755) grad: 0.2541 (0.2322) time: 0.4339 data: 0.0047 max mem: 22448 +train: [4] [280/400] eta: 0:00:53 lr: 0.000282 loss: 2.8238 (2.7847) grad: 0.3220 (0.2559) time: 0.4310 data: 0.0049 max mem: 22448 +WARNING: classifier 43 (22, 1.0) diverged (loss=65.03 > 63.56) at step 946. Freezing. +train: [4] [300/400] eta: 0:00:44 lr: 0.000285 loss: 2.9135 (2.8130) grad: 0.5273 (0.3066) time: 0.4263 data: 0.0048 max mem: 22448 +train: [4] [320/400] eta: 0:00:35 lr: 0.000288 loss: 2.7846 (2.8100) grad: 0.2159 (0.3003) time: 0.4292 data: 0.0047 max mem: 22448 +train: [4] [340/400] eta: 0:00:26 lr: 0.000291 loss: 2.7803 (2.8078) grad: 0.2083 (0.2951) time: 0.4394 data: 0.0046 max mem: 22448 +train: [4] [360/400] eta: 0:00:17 lr: 0.000294 loss: 2.7864 (2.8071) grad: 0.2156 (0.2910) time: 0.4562 data: 0.0047 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7760 (2.8046) grad: 0.2205 (0.2872) time: 0.4433 data: 0.0049 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.7368 (2.8018) grad: 0.2203 (0.2839) time: 0.4392 data: 0.0049 max mem: 22448 +train: [4] Total time: 0:02:56 (0.4422 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.7368 (2.8018) grad: 0.2203 (0.2839) +eval (validation): [4] [ 0/85] eta: 0:04:13 time: 2.9812 data: 2.6931 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:34 time: 0.4105 data: 0.0044 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:19 time: 0.3470 data: 0.0039 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:10 time: 0.3274 data: 0.0039 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3115 data: 0.0040 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3071 data: 0.0040 max mem: 22448 +eval (validation): [4] Total time: 0:00:32 (0.3812 s / it) +cv: [4] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 2.436 acc: 0.267 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:17 lr: nan time: 3.1931 data: 2.8168 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:32 lr: 0.000300 loss: 2.6751 (2.6663) grad: 0.2161 (0.2208) time: 0.4266 data: 0.0035 max mem: 22448 +train: [5] [ 40/400] eta: 0:02:57 lr: 0.000300 loss: 2.6831 (2.7100) grad: 0.2189 (0.2233) time: 0.4273 data: 0.0043 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:40 lr: 0.000300 loss: 2.7122 (2.7162) grad: 0.2247 (0.2258) time: 0.4266 data: 0.0050 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:27 lr: 0.000300 loss: 2.7234 (2.7135) grad: 0.2307 (0.2265) time: 0.4242 data: 0.0049 max mem: 22448 +train: [5] [100/400] eta: 0:02:16 lr: 0.000300 loss: 2.7270 (2.7178) grad: 0.2329 (0.2303) time: 0.4279 data: 0.0048 max mem: 22448 +train: [5] [120/400] eta: 0:02:05 lr: 0.000300 loss: 2.7156 (2.7125) grad: 0.2331 (0.2300) time: 0.4281 data: 0.0049 max mem: 22448 +train: [5] [140/400] eta: 0:01:56 lr: 0.000300 loss: 2.6773 (2.7052) grad: 0.2241 (0.2291) time: 0.4477 data: 0.0051 max mem: 22448 +train: [5] [160/400] eta: 0:01:47 lr: 0.000299 loss: 2.6725 (2.7039) grad: 0.2242 (0.2296) time: 0.4377 data: 0.0047 max mem: 22448 +train: [5] [180/400] eta: 0:01:38 lr: 0.000299 loss: 2.7073 (2.7078) grad: 0.2332 (0.2297) time: 0.4382 data: 0.0049 max mem: 22448 +train: [5] [200/400] eta: 0:01:28 lr: 0.000299 loss: 2.7063 (2.7068) grad: 0.2319 (0.2298) time: 0.4240 data: 0.0048 max mem: 22448 +train: [5] [220/400] eta: 0:01:19 lr: 0.000299 loss: 2.6854 (2.7059) grad: 0.2257 (0.2291) time: 0.4409 data: 0.0049 max mem: 22448 +train: [5] [240/400] eta: 0:01:10 lr: 0.000299 loss: 2.6634 (2.7049) grad: 0.2232 (0.2288) time: 0.4268 data: 0.0048 max mem: 22448 +train: [5] [260/400] eta: 0:01:01 lr: 0.000299 loss: 2.6645 (2.7017) grad: 0.2232 (0.2281) time: 0.4254 data: 0.0047 max mem: 22448 +train: [5] [280/400] eta: 0:00:52 lr: 0.000298 loss: 2.6761 (2.7027) grad: 0.2254 (0.2288) time: 0.4280 data: 0.0048 max mem: 22448 +train: [5] [300/400] eta: 0:00:43 lr: 0.000298 loss: 2.6761 (2.7000) grad: 0.2349 (0.2292) time: 0.4264 data: 0.0049 max mem: 22448 +train: [5] [320/400] eta: 0:00:35 lr: 0.000298 loss: 2.6745 (2.7007) grad: 0.2381 (0.2300) time: 0.4247 data: 0.0047 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.6964 (2.6998) grad: 0.2399 (0.2302) time: 0.4278 data: 0.0047 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6803 (2.7000) grad: 0.2385 (0.2306) time: 0.4582 data: 0.0050 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.6676 (2.6993) grad: 0.2359 (0.2309) time: 0.4259 data: 0.0047 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.6345 (2.6959) grad: 0.2224 (0.2302) time: 0.4312 data: 0.0049 max mem: 22448 +train: [5] Total time: 0:02:55 (0.4386 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.6345 (2.6959) grad: 0.2224 (0.2302) +eval (validation): [5] [ 0/85] eta: 0:04:07 time: 2.9165 data: 2.6391 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:32 time: 0.3719 data: 0.0042 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:19 time: 0.3746 data: 0.0045 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:10 time: 0.3348 data: 0.0043 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3198 data: 0.0041 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3091 data: 0.0041 max mem: 22448 +eval (validation): [5] Total time: 0:00:32 (0.3808 s / it) +cv: [5] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 2.371 acc: 0.286 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:02 lr: nan time: 3.1565 data: 2.7880 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:29 lr: 0.000296 loss: 2.6400 (2.6195) grad: 0.2278 (0.2286) time: 0.4216 data: 0.0041 max mem: 22448 +train: [6] [ 40/400] eta: 0:02:56 lr: 0.000296 loss: 2.6400 (2.6396) grad: 0.2260 (0.2291) time: 0.4257 data: 0.0040 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:39 lr: 0.000296 loss: 2.6166 (2.6294) grad: 0.2241 (0.2301) time: 0.4249 data: 0.0045 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:26 lr: 0.000295 loss: 2.6036 (2.6197) grad: 0.2332 (0.2319) time: 0.4259 data: 0.0049 max mem: 22448 +train: [6] [100/400] eta: 0:02:15 lr: 0.000295 loss: 2.6115 (2.6177) grad: 0.2333 (0.2313) time: 0.4331 data: 0.0048 max mem: 22448 +train: [6] [120/400] eta: 0:02:05 lr: 0.000295 loss: 2.6066 (2.6200) grad: 0.2324 (0.2323) time: 0.4336 data: 0.0047 max mem: 22448 +train: [6] [140/400] eta: 0:01:56 lr: 0.000294 loss: 2.6475 (2.6243) grad: 0.2337 (0.2324) time: 0.4483 data: 0.0051 max mem: 22448 +train: [6] [160/400] eta: 0:01:47 lr: 0.000294 loss: 2.6527 (2.6291) grad: 0.2343 (0.2324) time: 0.4427 data: 0.0049 max mem: 22448 +train: [6] [180/400] eta: 0:01:38 lr: 0.000293 loss: 2.6405 (2.6281) grad: 0.2380 (0.2335) time: 0.4345 data: 0.0045 max mem: 22448 +train: [6] [200/400] eta: 0:01:29 lr: 0.000293 loss: 2.6657 (2.6315) grad: 0.2420 (0.2345) time: 0.4322 data: 0.0046 max mem: 22448 +train: [6] [220/400] eta: 0:01:20 lr: 0.000292 loss: 2.6744 (2.6314) grad: 0.2474 (0.2355) time: 0.4428 data: 0.0047 max mem: 22448 +train: [6] [240/400] eta: 0:01:11 lr: 0.000292 loss: 2.6565 (2.6349) grad: 0.2394 (0.2362) time: 0.4284 data: 0.0050 max mem: 22448 +train: [6] [260/400] eta: 0:01:02 lr: 0.000291 loss: 2.6410 (2.6320) grad: 0.2360 (0.2364) time: 0.4288 data: 0.0049 max mem: 22448 +train: [6] [280/400] eta: 0:00:53 lr: 0.000291 loss: 2.6285 (2.6322) grad: 0.2363 (0.2367) time: 0.4291 data: 0.0048 max mem: 22448 +train: [6] [300/400] eta: 0:00:44 lr: 0.000290 loss: 2.6581 (2.6341) grad: 0.2365 (0.2369) time: 0.4335 data: 0.0045 max mem: 22448 +train: [6] [320/400] eta: 0:00:35 lr: 0.000290 loss: 2.6675 (2.6347) grad: 0.2365 (0.2372) time: 0.4359 data: 0.0047 max mem: 22448 +train: [6] [340/400] eta: 0:00:26 lr: 0.000289 loss: 2.6547 (2.6351) grad: 0.2357 (0.2373) time: 0.4491 data: 0.0047 max mem: 22448 +train: [6] [360/400] eta: 0:00:17 lr: 0.000288 loss: 2.6052 (2.6333) grad: 0.2397 (0.2375) time: 0.4600 data: 0.0049 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.6052 (2.6350) grad: 0.2406 (0.2374) time: 0.4377 data: 0.0051 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6443 (2.6349) grad: 0.2251 (0.2367) time: 0.4239 data: 0.0049 max mem: 22448 +train: [6] Total time: 0:02:56 (0.4420 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6443 (2.6349) grad: 0.2251 (0.2367) +eval (validation): [6] [ 0/85] eta: 0:04:11 time: 2.9569 data: 2.7036 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:34 time: 0.4056 data: 0.0038 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3311 data: 0.0041 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3335 data: 0.0043 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3242 data: 0.0039 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3174 data: 0.0039 max mem: 22448 +eval (validation): [6] Total time: 0:00:32 (0.3800 s / it) +cv: [6] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.390 acc: 0.278 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:19 lr: nan time: 3.1999 data: 2.8727 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:32 lr: 0.000286 loss: 2.5160 (2.5488) grad: 0.2242 (0.2322) time: 0.4273 data: 0.0044 max mem: 22448 +train: [7] [ 40/400] eta: 0:02:58 lr: 0.000286 loss: 2.5380 (2.5601) grad: 0.2262 (0.2358) time: 0.4273 data: 0.0041 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:41 lr: 0.000285 loss: 2.5577 (2.5521) grad: 0.2465 (0.2409) time: 0.4310 data: 0.0050 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:28 lr: 0.000284 loss: 2.5715 (2.5666) grad: 0.2448 (0.2400) time: 0.4388 data: 0.0049 max mem: 22448 +train: [7] [100/400] eta: 0:02:17 lr: 0.000284 loss: 2.5723 (2.5620) grad: 0.2398 (0.2400) time: 0.4318 data: 0.0048 max mem: 22448 +train: [7] [120/400] eta: 0:02:07 lr: 0.000283 loss: 2.5572 (2.5603) grad: 0.2461 (0.2410) time: 0.4321 data: 0.0047 max mem: 22448 +train: [7] [140/400] eta: 0:01:58 lr: 0.000282 loss: 2.5572 (2.5642) grad: 0.2383 (0.2403) time: 0.4671 data: 0.0050 max mem: 22448 +train: [7] [160/400] eta: 0:01:49 lr: 0.000282 loss: 2.5880 (2.5643) grad: 0.2359 (0.2406) time: 0.4539 data: 0.0049 max mem: 22448 +train: [7] [180/400] eta: 0:01:39 lr: 0.000281 loss: 2.5964 (2.5706) grad: 0.2454 (0.2416) time: 0.4442 data: 0.0051 max mem: 22448 +train: [7] [200/400] eta: 0:01:30 lr: 0.000280 loss: 2.5993 (2.5707) grad: 0.2454 (0.2419) time: 0.4316 data: 0.0049 max mem: 22448 +train: [7] [220/400] eta: 0:01:21 lr: 0.000279 loss: 2.5806 (2.5701) grad: 0.2394 (0.2427) time: 0.4555 data: 0.0048 max mem: 22448 +train: [7] [240/400] eta: 0:01:12 lr: 0.000278 loss: 2.5870 (2.5749) grad: 0.2487 (0.2437) time: 0.4523 data: 0.0048 max mem: 22448 +train: [7] [260/400] eta: 0:01:03 lr: 0.000278 loss: 2.5868 (2.5736) grad: 0.2426 (0.2432) time: 0.4374 data: 0.0050 max mem: 22448 +train: [7] [280/400] eta: 0:00:54 lr: 0.000277 loss: 2.5357 (2.5694) grad: 0.2366 (0.2426) time: 0.4336 data: 0.0048 max mem: 22448 +train: [7] [300/400] eta: 0:00:44 lr: 0.000276 loss: 2.4943 (2.5690) grad: 0.2405 (0.2428) time: 0.4446 data: 0.0049 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.5172 (2.5693) grad: 0.2448 (0.2427) time: 0.4360 data: 0.0045 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.5273 (2.5672) grad: 0.2380 (0.2423) time: 0.4424 data: 0.0044 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.5554 (2.5677) grad: 0.2379 (0.2424) time: 0.4652 data: 0.0049 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.5614 (2.5675) grad: 0.2447 (0.2428) time: 0.4543 data: 0.0049 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5984 (2.5685) grad: 0.2451 (0.2432) time: 0.4336 data: 0.0046 max mem: 22448 +train: [7] Total time: 0:02:59 (0.4494 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5984 (2.5685) grad: 0.2451 (0.2432) +eval (validation): [7] [ 0/85] eta: 0:04:22 time: 3.0920 data: 2.7970 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:31 time: 0.3485 data: 0.0052 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:18 time: 0.3308 data: 0.0040 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3235 data: 0.0032 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3170 data: 0.0036 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3085 data: 0.0035 max mem: 22448 +eval (validation): [7] Total time: 0:00:30 (0.3639 s / it) +cv: [7] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.402 acc: 0.267 f1: 0.207 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:35 lr: nan time: 3.3881 data: 2.9933 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:45 lr: 0.000270 loss: 2.4263 (2.4577) grad: 0.2243 (0.2290) time: 0.4550 data: 0.0038 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:06 lr: 0.000270 loss: 2.4711 (2.4715) grad: 0.2323 (0.2329) time: 0.4350 data: 0.0047 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:45 lr: 0.000269 loss: 2.4944 (2.4832) grad: 0.2379 (0.2352) time: 0.4238 data: 0.0047 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:32 lr: 0.000268 loss: 2.5330 (2.4950) grad: 0.2422 (0.2381) time: 0.4429 data: 0.0048 max mem: 22448 +train: [8] [100/400] eta: 0:02:20 lr: 0.000267 loss: 2.5330 (2.4953) grad: 0.2490 (0.2416) time: 0.4370 data: 0.0048 max mem: 22448 +train: [8] [120/400] eta: 0:02:09 lr: 0.000266 loss: 2.4835 (2.4949) grad: 0.2550 (0.2443) time: 0.4443 data: 0.0049 max mem: 22448 +train: [8] [140/400] eta: 0:01:59 lr: 0.000265 loss: 2.4871 (2.4993) grad: 0.2533 (0.2459) time: 0.4395 data: 0.0050 max mem: 22448 +train: [8] [160/400] eta: 0:01:50 lr: 0.000264 loss: 2.5120 (2.5023) grad: 0.2605 (0.2492) time: 0.4454 data: 0.0050 max mem: 22448 +train: [8] [180/400] eta: 0:01:40 lr: 0.000263 loss: 2.4884 (2.4981) grad: 0.2570 (0.2495) time: 0.4379 data: 0.0048 max mem: 22448 +train: [8] [200/400] eta: 0:01:30 lr: 0.000262 loss: 2.4884 (2.5023) grad: 0.2477 (0.2499) time: 0.4385 data: 0.0049 max mem: 22448 +train: [8] [220/400] eta: 0:01:21 lr: 0.000260 loss: 2.5395 (2.5042) grad: 0.2472 (0.2496) time: 0.4334 data: 0.0048 max mem: 22448 +train: [8] [240/400] eta: 0:01:12 lr: 0.000259 loss: 2.4902 (2.5023) grad: 0.2442 (0.2492) time: 0.4459 data: 0.0050 max mem: 22448 +train: [8] [260/400] eta: 0:01:03 lr: 0.000258 loss: 2.5188 (2.5043) grad: 0.2411 (0.2488) time: 0.4378 data: 0.0050 max mem: 22448 +train: [8] [280/400] eta: 0:00:54 lr: 0.000257 loss: 2.5088 (2.5034) grad: 0.2411 (0.2489) time: 0.4481 data: 0.0050 max mem: 22448 +train: [8] [300/400] eta: 0:00:45 lr: 0.000256 loss: 2.4890 (2.5041) grad: 0.2433 (0.2490) time: 0.4438 data: 0.0048 max mem: 22448 +train: [8] [320/400] eta: 0:00:35 lr: 0.000255 loss: 2.4957 (2.5035) grad: 0.2404 (0.2484) time: 0.4428 data: 0.0049 max mem: 22448 +train: [8] [340/400] eta: 0:00:26 lr: 0.000254 loss: 2.4848 (2.5030) grad: 0.2459 (0.2489) time: 0.4355 data: 0.0047 max mem: 22448 +train: [8] [360/400] eta: 0:00:17 lr: 0.000253 loss: 2.4854 (2.5027) grad: 0.2455 (0.2485) time: 0.4535 data: 0.0050 max mem: 22448 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 2.4945 (2.5033) grad: 0.2368 (0.2481) time: 0.4488 data: 0.0050 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.5069 (2.5036) grad: 0.2472 (0.2487) time: 0.4398 data: 0.0047 max mem: 22448 +train: [8] Total time: 0:02:59 (0.4494 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.5069 (2.5036) grad: 0.2472 (0.2487) +eval (validation): [8] [ 0/85] eta: 0:04:14 time: 2.9964 data: 2.7588 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:32 time: 0.3763 data: 0.0045 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:19 time: 0.3596 data: 0.0041 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:10 time: 0.3560 data: 0.0044 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3369 data: 0.0040 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3346 data: 0.0039 max mem: 22448 +eval (validation): [8] Total time: 0:00:33 (0.3898 s / it) +cv: [8] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.432 acc: 0.264 f1: 0.205 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:55 lr: nan time: 3.4376 data: 3.0431 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:52 lr: 0.000249 loss: 2.4501 (2.4668) grad: 0.2515 (0.2637) time: 0.4707 data: 0.0049 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:11 lr: 0.000248 loss: 2.4686 (2.4728) grad: 0.2483 (0.2525) time: 0.4479 data: 0.0051 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:50 lr: 0.000247 loss: 2.4485 (2.4618) grad: 0.2397 (0.2486) time: 0.4375 data: 0.0049 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:35 lr: 0.000246 loss: 2.4359 (2.4693) grad: 0.2411 (0.2491) time: 0.4438 data: 0.0046 max mem: 22448 +train: [9] [100/400] eta: 0:02:23 lr: 0.000244 loss: 2.4641 (2.4674) grad: 0.2494 (0.2491) time: 0.4447 data: 0.0047 max mem: 22448 +train: [9] [120/400] eta: 0:02:12 lr: 0.000243 loss: 2.4256 (2.4638) grad: 0.2421 (0.2477) time: 0.4497 data: 0.0047 max mem: 22448 +train: [9] [140/400] eta: 0:02:01 lr: 0.000242 loss: 2.4445 (2.4635) grad: 0.2414 (0.2483) time: 0.4370 data: 0.0048 max mem: 22448 +train: [9] [160/400] eta: 0:01:52 lr: 0.000241 loss: 2.4552 (2.4598) grad: 0.2434 (0.2493) time: 0.4679 data: 0.0048 max mem: 22448 +train: [9] [180/400] eta: 0:01:42 lr: 0.000240 loss: 2.4463 (2.4620) grad: 0.2588 (0.2501) time: 0.4533 data: 0.0048 max mem: 22448 +train: [9] [200/400] eta: 0:01:33 lr: 0.000238 loss: 2.4419 (2.4595) grad: 0.2587 (0.2512) time: 0.4537 data: 0.0049 max mem: 22448 +train: [9] [220/400] eta: 0:01:23 lr: 0.000237 loss: 2.4187 (2.4560) grad: 0.2585 (0.2521) time: 0.4278 data: 0.0047 max mem: 22448 +train: [9] [240/400] eta: 0:01:13 lr: 0.000236 loss: 2.4727 (2.4606) grad: 0.2585 (0.2520) time: 0.4464 data: 0.0050 max mem: 22448 +train: [9] [260/400] eta: 0:01:04 lr: 0.000234 loss: 2.4861 (2.4602) grad: 0.2484 (0.2514) time: 0.4396 data: 0.0047 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.4450 (2.4607) grad: 0.2484 (0.2519) time: 0.4392 data: 0.0048 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.4413 (2.4598) grad: 0.2518 (0.2522) time: 0.4368 data: 0.0046 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.4365 (2.4600) grad: 0.2492 (0.2518) time: 0.4320 data: 0.0049 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.4284 (2.4583) grad: 0.2466 (0.2520) time: 0.4365 data: 0.0044 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.4382 (2.4592) grad: 0.2594 (0.2527) time: 0.4395 data: 0.0047 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.4550 (2.4586) grad: 0.2509 (0.2526) time: 0.4358 data: 0.0049 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.4694 (2.4606) grad: 0.2507 (0.2530) time: 0.4353 data: 0.0046 max mem: 22448 +train: [9] Total time: 0:03:00 (0.4519 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.4694 (2.4606) grad: 0.2507 (0.2530) +eval (validation): [9] [ 0/85] eta: 0:04:05 time: 2.8854 data: 2.6511 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:29 time: 0.3365 data: 0.0033 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3401 data: 0.0034 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3330 data: 0.0041 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3314 data: 0.0043 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3256 data: 0.0042 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3678 s / it) +cv: [9] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.397 acc: 0.272 f1: 0.205 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:20:57 lr: nan time: 3.1437 data: 2.7641 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:40 lr: 0.000224 loss: 2.3911 (2.4031) grad: 0.2507 (0.2553) time: 0.4519 data: 0.0041 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:05 lr: 0.000222 loss: 2.3850 (2.3952) grad: 0.2517 (0.2519) time: 0.4473 data: 0.0048 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:48 lr: 0.000221 loss: 2.4031 (2.4144) grad: 0.2408 (0.2480) time: 0.4538 data: 0.0048 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:33 lr: 0.000220 loss: 2.4025 (2.4028) grad: 0.2371 (0.2473) time: 0.4300 data: 0.0048 max mem: 22448 +train: [10] [100/400] eta: 0:02:22 lr: 0.000218 loss: 2.3947 (2.4003) grad: 0.2436 (0.2479) time: 0.4590 data: 0.0049 max mem: 22448 +train: [10] [120/400] eta: 0:02:11 lr: 0.000217 loss: 2.4041 (2.4026) grad: 0.2510 (0.2494) time: 0.4524 data: 0.0051 max mem: 22448 +train: [10] [140/400] eta: 0:02:02 lr: 0.000215 loss: 2.4163 (2.4048) grad: 0.2525 (0.2494) time: 0.4579 data: 0.0049 max mem: 22448 +train: [10] [160/400] eta: 0:01:52 lr: 0.000214 loss: 2.4134 (2.4072) grad: 0.2473 (0.2493) time: 0.4660 data: 0.0052 max mem: 22448 +train: [10] [180/400] eta: 0:01:42 lr: 0.000213 loss: 2.4123 (2.4098) grad: 0.2488 (0.2505) time: 0.4559 data: 0.0048 max mem: 22448 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 2.4131 (2.4094) grad: 0.2534 (0.2513) time: 0.4495 data: 0.0048 max mem: 22448 +train: [10] [220/400] eta: 0:01:23 lr: 0.000210 loss: 2.3956 (2.4088) grad: 0.2529 (0.2512) time: 0.4408 data: 0.0046 max mem: 22448 +train: [10] [240/400] eta: 0:01:14 lr: 0.000208 loss: 2.3956 (2.4084) grad: 0.2479 (0.2506) time: 0.4670 data: 0.0050 max mem: 22448 +train: [10] [260/400] eta: 0:01:04 lr: 0.000207 loss: 2.4080 (2.4073) grad: 0.2432 (0.2503) time: 0.4567 data: 0.0047 max mem: 22448 +train: [10] [280/400] eta: 0:00:55 lr: 0.000205 loss: 2.4080 (2.4076) grad: 0.2487 (0.2505) time: 0.4527 data: 0.0050 max mem: 22448 +train: [10] [300/400] eta: 0:00:46 lr: 0.000204 loss: 2.3634 (2.4049) grad: 0.2518 (0.2504) time: 0.4595 data: 0.0050 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.3484 (2.4030) grad: 0.2524 (0.2511) time: 0.4563 data: 0.0049 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.3985 (2.4024) grad: 0.2538 (0.2512) time: 0.4423 data: 0.0048 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.3893 (2.4005) grad: 0.2471 (0.2510) time: 0.4763 data: 0.0049 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.3637 (2.3994) grad: 0.2496 (0.2509) time: 0.4576 data: 0.0051 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.3906 (2.4000) grad: 0.2520 (0.2510) time: 0.4610 data: 0.0050 max mem: 22448 +train: [10] Total time: 0:03:04 (0.4619 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.3906 (2.4000) grad: 0.2520 (0.2510) +eval (validation): [10] [ 0/85] eta: 0:04:44 time: 3.3442 data: 3.0904 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:32 time: 0.3642 data: 0.0049 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3698 data: 0.0038 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:10 time: 0.3601 data: 0.0042 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3507 data: 0.0040 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3402 data: 0.0039 max mem: 22448 +eval (validation): [10] Total time: 0:00:33 (0.3978 s / it) +cv: [10] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.403 acc: 0.271 f1: 0.204 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:17 lr: nan time: 3.1927 data: 2.8138 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:43 lr: 0.000195 loss: 2.3145 (2.3375) grad: 0.2391 (0.2459) time: 0.4587 data: 0.0048 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:10 lr: 0.000193 loss: 2.3481 (2.3543) grad: 0.2430 (0.2482) time: 0.4667 data: 0.0048 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:51 lr: 0.000192 loss: 2.3331 (2.3369) grad: 0.2489 (0.2501) time: 0.4513 data: 0.0051 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:36 lr: 0.000190 loss: 2.3407 (2.3538) grad: 0.2522 (0.2517) time: 0.4434 data: 0.0048 max mem: 22448 +train: [11] [100/400] eta: 0:02:25 lr: 0.000189 loss: 2.3814 (2.3511) grad: 0.2535 (0.2517) time: 0.4758 data: 0.0052 max mem: 22448 +train: [11] [120/400] eta: 0:02:15 lr: 0.000187 loss: 2.3125 (2.3373) grad: 0.2501 (0.2514) time: 0.4677 data: 0.0050 max mem: 22448 +train: [11] [140/400] eta: 0:02:04 lr: 0.000186 loss: 2.3133 (2.3420) grad: 0.2514 (0.2516) time: 0.4465 data: 0.0048 max mem: 22448 +train: [11] [160/400] eta: 0:01:53 lr: 0.000184 loss: 2.3498 (2.3442) grad: 0.2568 (0.2527) time: 0.4289 data: 0.0043 max mem: 22448 +train: [11] [180/400] eta: 0:01:44 lr: 0.000183 loss: 2.3487 (2.3470) grad: 0.2623 (0.2548) time: 0.4826 data: 0.0049 max mem: 22448 +train: [11] [200/400] eta: 0:01:34 lr: 0.000181 loss: 2.3945 (2.3511) grad: 0.2563 (0.2547) time: 0.4488 data: 0.0048 max mem: 22448 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 2.4360 (2.3593) grad: 0.2517 (0.2547) time: 0.4390 data: 0.0046 max mem: 22448 +train: [11] [240/400] eta: 0:01:14 lr: 0.000178 loss: 2.4192 (2.3603) grad: 0.2479 (0.2551) time: 0.4257 data: 0.0043 max mem: 22448 +train: [11] [260/400] eta: 0:01:04 lr: 0.000177 loss: 2.3399 (2.3602) grad: 0.2577 (0.2558) time: 0.4489 data: 0.0047 max mem: 22448 +train: [11] [280/400] eta: 0:00:55 lr: 0.000175 loss: 2.3385 (2.3604) grad: 0.2592 (0.2562) time: 0.4393 data: 0.0047 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.3734 (2.3646) grad: 0.2587 (0.2565) time: 0.4335 data: 0.0047 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.3994 (2.3666) grad: 0.2604 (0.2574) time: 0.4331 data: 0.0046 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.3753 (2.3664) grad: 0.2669 (0.2578) time: 0.4433 data: 0.0047 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.3753 (2.3659) grad: 0.2572 (0.2579) time: 0.4359 data: 0.0047 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.3218 (2.3623) grad: 0.2512 (0.2576) time: 0.4266 data: 0.0046 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.3473 (2.3655) grad: 0.2487 (0.2571) time: 0.4384 data: 0.0047 max mem: 22448 +train: [11] Total time: 0:03:01 (0.4542 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.3473 (2.3655) grad: 0.2487 (0.2571) +eval (validation): [11] [ 0/85] eta: 0:04:18 time: 3.0371 data: 2.7955 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:30 time: 0.3392 data: 0.0037 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:18 time: 0.3423 data: 0.0037 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:09 time: 0.3629 data: 0.0042 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3559 data: 0.0042 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3487 data: 0.0043 max mem: 22448 +eval (validation): [11] Total time: 0:00:32 (0.3850 s / it) +cv: [11] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.402 acc: 0.275 f1: 0.209 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:59 lr: nan time: 3.4500 data: 3.1079 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:47 lr: 0.000164 loss: 2.2736 (2.2594) grad: 0.2378 (0.2396) time: 0.4574 data: 0.0041 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:11 lr: 0.000163 loss: 2.2761 (2.2748) grad: 0.2412 (0.2442) time: 0.4596 data: 0.0045 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:52 lr: 0.000161 loss: 2.2843 (2.2829) grad: 0.2464 (0.2442) time: 0.4578 data: 0.0051 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:37 lr: 0.000160 loss: 2.2902 (2.2921) grad: 0.2437 (0.2444) time: 0.4467 data: 0.0049 max mem: 22448 +train: [12] [100/400] eta: 0:02:26 lr: 0.000158 loss: 2.3241 (2.2951) grad: 0.2513 (0.2461) time: 0.4663 data: 0.0052 max mem: 22448 +train: [12] [120/400] eta: 0:02:15 lr: 0.000156 loss: 2.3090 (2.3002) grad: 0.2452 (0.2450) time: 0.4568 data: 0.0050 max mem: 22448 +train: [12] [140/400] eta: 0:02:04 lr: 0.000155 loss: 2.2908 (2.2983) grad: 0.2490 (0.2476) time: 0.4534 data: 0.0050 max mem: 22448 +train: [12] [160/400] eta: 0:01:54 lr: 0.000153 loss: 2.3022 (2.3012) grad: 0.2658 (0.2501) time: 0.4602 data: 0.0051 max mem: 22448 +train: [12] [180/400] eta: 0:01:44 lr: 0.000152 loss: 2.3008 (2.2994) grad: 0.2634 (0.2509) time: 0.4577 data: 0.0051 max mem: 22448 +train: [12] [200/400] eta: 0:01:34 lr: 0.000150 loss: 2.3287 (2.3063) grad: 0.2545 (0.2520) time: 0.4444 data: 0.0050 max mem: 22448 +train: [12] [220/400] eta: 0:01:24 lr: 0.000149 loss: 2.3339 (2.3082) grad: 0.2536 (0.2520) time: 0.4673 data: 0.0049 max mem: 22448 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 2.3192 (2.3075) grad: 0.2559 (0.2535) time: 0.4554 data: 0.0049 max mem: 22448 +train: [12] [260/400] eta: 0:01:05 lr: 0.000145 loss: 2.3137 (2.3090) grad: 0.2586 (0.2534) time: 0.4565 data: 0.0051 max mem: 22448 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 2.2934 (2.3045) grad: 0.2487 (0.2535) time: 0.4655 data: 0.0051 max mem: 22448 +train: [12] [300/400] eta: 0:00:46 lr: 0.000142 loss: 2.2981 (2.3073) grad: 0.2544 (0.2544) time: 0.4530 data: 0.0048 max mem: 22448 +train: [12] [320/400] eta: 0:00:37 lr: 0.000141 loss: 2.3200 (2.3082) grad: 0.2582 (0.2546) time: 0.4509 data: 0.0047 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.2724 (2.3083) grad: 0.2559 (0.2543) time: 0.4596 data: 0.0050 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.2786 (2.3074) grad: 0.2490 (0.2542) time: 0.4692 data: 0.0051 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.3085 (2.3078) grad: 0.2490 (0.2542) time: 0.4592 data: 0.0049 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.2823 (2.3061) grad: 0.2548 (0.2545) time: 0.4359 data: 0.0047 max mem: 22448 +train: [12] Total time: 0:03:05 (0.4648 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.2823 (2.3061) grad: 0.2548 (0.2545) +eval (validation): [12] [ 0/85] eta: 0:04:49 time: 3.4023 data: 3.1035 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:35 time: 0.4006 data: 0.0064 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:20 time: 0.3601 data: 0.0038 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:10 time: 0.3567 data: 0.0044 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3289 data: 0.0039 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3227 data: 0.0036 max mem: 22448 +eval (validation): [12] Total time: 0:00:33 (0.3992 s / it) +cv: [12] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.418 acc: 0.273 f1: 0.207 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:23:16 lr: nan time: 3.4922 data: 3.1461 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:45 lr: 0.000133 loss: 2.2448 (2.2601) grad: 0.2501 (0.2531) time: 0.4493 data: 0.0028 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:09 lr: 0.000131 loss: 2.2448 (2.2635) grad: 0.2505 (0.2545) time: 0.4543 data: 0.0047 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:48 lr: 0.000130 loss: 2.2416 (2.2648) grad: 0.2517 (0.2537) time: 0.4292 data: 0.0047 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:33 lr: 0.000128 loss: 2.2275 (2.2634) grad: 0.2497 (0.2540) time: 0.4375 data: 0.0047 max mem: 22448 +train: [13] [100/400] eta: 0:02:21 lr: 0.000127 loss: 2.2520 (2.2618) grad: 0.2455 (0.2526) time: 0.4373 data: 0.0048 max mem: 22448 +train: [13] [120/400] eta: 0:02:10 lr: 0.000125 loss: 2.2520 (2.2586) grad: 0.2455 (0.2529) time: 0.4352 data: 0.0047 max mem: 22448 +train: [13] [140/400] eta: 0:02:00 lr: 0.000124 loss: 2.2730 (2.2664) grad: 0.2602 (0.2542) time: 0.4495 data: 0.0049 max mem: 22448 +train: [13] [160/400] eta: 0:01:50 lr: 0.000122 loss: 2.2812 (2.2644) grad: 0.2625 (0.2554) time: 0.4329 data: 0.0048 max mem: 22448 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 2.2670 (2.2708) grad: 0.2610 (0.2561) time: 0.4446 data: 0.0047 max mem: 22448 +train: [13] [200/400] eta: 0:01:31 lr: 0.000119 loss: 2.2432 (2.2636) grad: 0.2576 (0.2559) time: 0.4450 data: 0.0049 max mem: 22448 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 2.2608 (2.2652) grad: 0.2573 (0.2567) time: 0.4396 data: 0.0046 max mem: 22448 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 2.2729 (2.2658) grad: 0.2586 (0.2570) time: 0.4475 data: 0.0048 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.2596 (2.2674) grad: 0.2534 (0.2562) time: 0.4354 data: 0.0045 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.2330 (2.2638) grad: 0.2404 (0.2551) time: 0.4448 data: 0.0043 max mem: 22448 +train: [13] [300/400] eta: 0:00:45 lr: 0.000111 loss: 2.2179 (2.2625) grad: 0.2407 (0.2542) time: 0.4419 data: 0.0044 max mem: 22448 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 2.2714 (2.2646) grad: 0.2519 (0.2548) time: 0.4470 data: 0.0049 max mem: 22448 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 2.2602 (2.2641) grad: 0.2521 (0.2541) time: 0.4690 data: 0.0047 max mem: 22448 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 2.2560 (2.2653) grad: 0.2521 (0.2543) time: 0.4446 data: 0.0047 max mem: 22448 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 2.2748 (2.2658) grad: 0.2611 (0.2547) time: 0.4537 data: 0.0047 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2625 (2.2655) grad: 0.2582 (0.2547) time: 0.4678 data: 0.0051 max mem: 22448 +train: [13] Total time: 0:03:01 (0.4535 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2625 (2.2655) grad: 0.2582 (0.2547) +eval (validation): [13] [ 0/85] eta: 0:04:44 time: 3.3473 data: 3.0509 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:34 time: 0.3845 data: 0.0046 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:19 time: 0.3590 data: 0.0044 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3684 data: 0.0046 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:02 time: 0.3511 data: 0.0044 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3365 data: 0.0043 max mem: 22448 +eval (validation): [13] Total time: 0:00:34 (0.4025 s / it) +cv: [13] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.406 acc: 0.275 f1: 0.208 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:39 lr: nan time: 3.3995 data: 3.0223 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:50 lr: 0.000102 loss: 2.1721 (2.1783) grad: 0.2393 (0.2404) time: 0.4675 data: 0.0034 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:10 lr: 0.000101 loss: 2.1981 (2.1913) grad: 0.2390 (0.2416) time: 0.4448 data: 0.0049 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:52 lr: 0.000099 loss: 2.2036 (2.1931) grad: 0.2424 (0.2442) time: 0.4629 data: 0.0050 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:37 lr: 0.000098 loss: 2.2148 (2.2059) grad: 0.2424 (0.2440) time: 0.4425 data: 0.0048 max mem: 22448 +train: [14] [100/400] eta: 0:02:25 lr: 0.000096 loss: 2.2340 (2.2094) grad: 0.2424 (0.2451) time: 0.4674 data: 0.0053 max mem: 22448 +train: [14] [120/400] eta: 0:02:14 lr: 0.000095 loss: 2.1955 (2.2063) grad: 0.2495 (0.2472) time: 0.4550 data: 0.0050 max mem: 22448 +train: [14] [140/400] eta: 0:02:04 lr: 0.000093 loss: 2.1940 (2.2054) grad: 0.2567 (0.2489) time: 0.4587 data: 0.0051 max mem: 22448 +train: [14] [160/400] eta: 0:01:54 lr: 0.000092 loss: 2.1747 (2.2027) grad: 0.2570 (0.2493) time: 0.4725 data: 0.0052 max mem: 22448 +train: [14] [180/400] eta: 0:01:44 lr: 0.000090 loss: 2.1747 (2.2011) grad: 0.2525 (0.2498) time: 0.4399 data: 0.0046 max mem: 22448 +train: [14] [200/400] eta: 0:01:33 lr: 0.000089 loss: 2.1704 (2.1999) grad: 0.2525 (0.2505) time: 0.4422 data: 0.0049 max mem: 22448 +train: [14] [220/400] eta: 0:01:24 lr: 0.000088 loss: 2.1904 (2.2021) grad: 0.2556 (0.2510) time: 0.4609 data: 0.0050 max mem: 22448 +train: [14] [240/400] eta: 0:01:14 lr: 0.000086 loss: 2.2053 (2.2057) grad: 0.2536 (0.2508) time: 0.4580 data: 0.0051 max mem: 22448 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 2.2074 (2.2064) grad: 0.2496 (0.2510) time: 0.4553 data: 0.0051 max mem: 22448 +train: [14] [280/400] eta: 0:00:55 lr: 0.000083 loss: 2.2074 (2.2072) grad: 0.2495 (0.2509) time: 0.4575 data: 0.0050 max mem: 22448 +train: [14] [300/400] eta: 0:00:46 lr: 0.000082 loss: 2.2570 (2.2125) grad: 0.2520 (0.2514) time: 0.4476 data: 0.0049 max mem: 22448 +train: [14] [320/400] eta: 0:00:37 lr: 0.000081 loss: 2.2572 (2.2138) grad: 0.2561 (0.2517) time: 0.4619 data: 0.0052 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.2453 (2.2139) grad: 0.2593 (0.2525) time: 0.4521 data: 0.0050 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.2169 (2.2139) grad: 0.2589 (0.2527) time: 0.4595 data: 0.0051 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.1713 (2.2116) grad: 0.2528 (0.2525) time: 0.4644 data: 0.0052 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.1964 (2.2124) grad: 0.2554 (0.2528) time: 0.4464 data: 0.0049 max mem: 22448 +train: [14] Total time: 0:03:05 (0.4638 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.1964 (2.2124) grad: 0.2554 (0.2528) +eval (validation): [14] [ 0/85] eta: 0:04:42 time: 3.3237 data: 3.0368 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:32 time: 0.3614 data: 0.0056 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:19 time: 0.3582 data: 0.0033 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:10 time: 0.3476 data: 0.0043 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3666 data: 0.0042 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3464 data: 0.0038 max mem: 22448 +eval (validation): [14] Total time: 0:00:33 (0.3952 s / it) +cv: [14] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.401 acc: 0.275 f1: 0.208 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:50 lr: nan time: 3.2759 data: 2.9249 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:46 lr: 0.000074 loss: 2.1341 (2.1836) grad: 0.2449 (0.2478) time: 0.4626 data: 0.0038 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:11 lr: 0.000072 loss: 2.1341 (2.1735) grad: 0.2449 (0.2450) time: 0.4657 data: 0.0043 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:50 lr: 0.000071 loss: 2.1346 (2.1630) grad: 0.2407 (0.2454) time: 0.4381 data: 0.0044 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:36 lr: 0.000070 loss: 2.1466 (2.1585) grad: 0.2407 (0.2449) time: 0.4559 data: 0.0045 max mem: 22448 +train: [15] [100/400] eta: 0:02:24 lr: 0.000068 loss: 2.1115 (2.1483) grad: 0.2460 (0.2460) time: 0.4493 data: 0.0047 max mem: 22448 +train: [15] [120/400] eta: 0:02:14 lr: 0.000067 loss: 2.1271 (2.1542) grad: 0.2519 (0.2480) time: 0.4609 data: 0.0047 max mem: 22448 +train: [15] [140/400] eta: 0:02:03 lr: 0.000066 loss: 2.1912 (2.1597) grad: 0.2542 (0.2501) time: 0.4527 data: 0.0048 max mem: 22448 +train: [15] [160/400] eta: 0:01:53 lr: 0.000064 loss: 2.1495 (2.1597) grad: 0.2520 (0.2506) time: 0.4535 data: 0.0052 max mem: 22448 +train: [15] [180/400] eta: 0:01:43 lr: 0.000063 loss: 2.1974 (2.1678) grad: 0.2568 (0.2520) time: 0.4470 data: 0.0048 max mem: 22448 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 2.2131 (2.1684) grad: 0.2520 (0.2515) time: 0.4912 data: 0.0051 max mem: 22448 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 2.1956 (2.1691) grad: 0.2475 (0.2515) time: 0.4636 data: 0.0050 max mem: 22448 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 2.1838 (2.1703) grad: 0.2458 (0.2514) time: 0.4559 data: 0.0051 max mem: 22448 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 2.2200 (2.1752) grad: 0.2487 (0.2513) time: 0.4543 data: 0.0051 max mem: 22448 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 2.1910 (2.1721) grad: 0.2438 (0.2506) time: 0.4502 data: 0.0049 max mem: 22448 +train: [15] [300/400] eta: 0:00:46 lr: 0.000056 loss: 2.1157 (2.1709) grad: 0.2413 (0.2505) time: 0.4520 data: 0.0048 max mem: 22448 +train: [15] [320/400] eta: 0:00:37 lr: 0.000054 loss: 2.1780 (2.1709) grad: 0.2490 (0.2505) time: 0.4644 data: 0.0050 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.1830 (2.1710) grad: 0.2511 (0.2504) time: 0.4664 data: 0.0051 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.1676 (2.1721) grad: 0.2511 (0.2506) time: 0.4549 data: 0.0054 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.1635 (2.1704) grad: 0.2433 (0.2503) time: 0.4604 data: 0.0051 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1560 (2.1712) grad: 0.2433 (0.2501) time: 0.4670 data: 0.0050 max mem: 22448 +train: [15] Total time: 0:03:06 (0.4660 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1560 (2.1712) grad: 0.2433 (0.2501) +eval (validation): [15] [ 0/85] eta: 0:04:44 time: 3.3420 data: 3.0522 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:33 time: 0.3676 data: 0.0036 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:19 time: 0.3624 data: 0.0043 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.3587 data: 0.0046 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3594 data: 0.0044 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3324 data: 0.0041 max mem: 22448 +eval (validation): [15] Total time: 0:00:33 (0.3988 s / it) +cv: [15] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.406 acc: 0.274 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:21:56 lr: nan time: 3.2920 data: 2.9634 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:43 lr: 0.000048 loss: 2.1064 (2.1327) grad: 0.2310 (0.2346) time: 0.4519 data: 0.0054 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:09 lr: 0.000047 loss: 2.1240 (2.1282) grad: 0.2319 (0.2344) time: 0.4611 data: 0.0044 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:50 lr: 0.000046 loss: 2.1140 (2.1204) grad: 0.2315 (0.2360) time: 0.4488 data: 0.0049 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:37 lr: 0.000045 loss: 2.1390 (2.1295) grad: 0.2429 (0.2392) time: 0.4651 data: 0.0050 max mem: 22448 +train: [16] [100/400] eta: 0:02:25 lr: 0.000044 loss: 2.1390 (2.1280) grad: 0.2470 (0.2404) time: 0.4546 data: 0.0048 max mem: 22448 +train: [16] [120/400] eta: 0:02:14 lr: 0.000043 loss: 2.1274 (2.1283) grad: 0.2424 (0.2414) time: 0.4526 data: 0.0048 max mem: 22448 +train: [16] [140/400] eta: 0:02:03 lr: 0.000042 loss: 2.1182 (2.1310) grad: 0.2361 (0.2408) time: 0.4577 data: 0.0051 max mem: 22448 +train: [16] [160/400] eta: 0:01:53 lr: 0.000041 loss: 2.1535 (2.1401) grad: 0.2444 (0.2429) time: 0.4557 data: 0.0049 max mem: 22448 +train: [16] [180/400] eta: 0:01:43 lr: 0.000040 loss: 2.1494 (2.1398) grad: 0.2464 (0.2430) time: 0.4592 data: 0.0049 max mem: 22448 +train: [16] [200/400] eta: 0:01:33 lr: 0.000039 loss: 2.1202 (2.1366) grad: 0.2413 (0.2421) time: 0.4473 data: 0.0050 max mem: 22448 +train: [16] [220/400] eta: 0:01:24 lr: 0.000038 loss: 2.1134 (2.1366) grad: 0.2302 (0.2417) time: 0.4629 data: 0.0050 max mem: 22448 +train: [16] [240/400] eta: 0:01:14 lr: 0.000036 loss: 2.1414 (2.1365) grad: 0.2416 (0.2422) time: 0.4582 data: 0.0051 max mem: 22448 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 2.1414 (2.1395) grad: 0.2522 (0.2430) time: 0.4576 data: 0.0050 max mem: 22448 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 2.1334 (2.1393) grad: 0.2505 (0.2430) time: 0.4677 data: 0.0053 max mem: 22448 +train: [16] [300/400] eta: 0:00:46 lr: 0.000033 loss: 2.1391 (2.1418) grad: 0.2442 (0.2434) time: 0.4448 data: 0.0050 max mem: 22448 +train: [16] [320/400] eta: 0:00:37 lr: 0.000032 loss: 2.1709 (2.1445) grad: 0.2529 (0.2438) time: 0.4457 data: 0.0046 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.1268 (2.1430) grad: 0.2431 (0.2438) time: 0.4735 data: 0.0051 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 2.1133 (2.1442) grad: 0.2429 (0.2443) time: 0.4614 data: 0.0053 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.1606 (2.1437) grad: 0.2476 (0.2447) time: 0.4572 data: 0.0051 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.1349 (2.1430) grad: 0.2560 (0.2453) time: 0.4555 data: 0.0052 max mem: 22448 +train: [16] Total time: 0:03:05 (0.4646 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.1349 (2.1430) grad: 0.2560 (0.2453) +eval (validation): [16] [ 0/85] eta: 0:04:50 time: 3.4216 data: 3.1724 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:33 time: 0.3654 data: 0.0048 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3618 data: 0.0038 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3503 data: 0.0045 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3506 data: 0.0044 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3362 data: 0.0044 max mem: 22448 +eval (validation): [16] Total time: 0:00:33 (0.3944 s / it) +cv: [16] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.401 acc: 0.276 f1: 0.211 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:27 lr: nan time: 3.3700 data: 3.0278 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:53 lr: 0.000028 loss: 2.0858 (2.0887) grad: 0.2200 (0.2299) time: 0.4763 data: 0.0054 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:11 lr: 0.000027 loss: 2.1000 (2.1093) grad: 0.2342 (0.2349) time: 0.4454 data: 0.0040 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:55 lr: 0.000026 loss: 2.1165 (2.1167) grad: 0.2325 (0.2331) time: 0.4844 data: 0.0048 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:39 lr: 0.000025 loss: 2.1120 (2.1078) grad: 0.2263 (0.2332) time: 0.4430 data: 0.0049 max mem: 22448 +train: [17] [100/400] eta: 0:02:27 lr: 0.000024 loss: 2.1120 (2.1183) grad: 0.2295 (0.2332) time: 0.4697 data: 0.0053 max mem: 22448 +train: [17] [120/400] eta: 0:02:16 lr: 0.000023 loss: 2.0934 (2.1150) grad: 0.2338 (0.2339) time: 0.4519 data: 0.0052 max mem: 22448 +train: [17] [140/400] eta: 0:02:05 lr: 0.000023 loss: 2.0842 (2.1148) grad: 0.2333 (0.2344) time: 0.4690 data: 0.0050 max mem: 22448 +train: [17] [160/400] eta: 0:01:55 lr: 0.000022 loss: 2.1095 (2.1134) grad: 0.2343 (0.2352) time: 0.4560 data: 0.0050 max mem: 22448 +train: [17] [180/400] eta: 0:01:44 lr: 0.000021 loss: 2.1095 (2.1132) grad: 0.2370 (0.2349) time: 0.4301 data: 0.0045 max mem: 22448 +train: [17] [200/400] eta: 0:01:35 lr: 0.000020 loss: 2.0825 (2.1115) grad: 0.2344 (0.2347) time: 0.4805 data: 0.0052 max mem: 22448 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 2.0996 (2.1122) grad: 0.2355 (0.2355) time: 0.4563 data: 0.0049 max mem: 22448 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 2.1397 (2.1110) grad: 0.2355 (0.2361) time: 0.4535 data: 0.0050 max mem: 22448 +train: [17] [260/400] eta: 0:01:05 lr: 0.000018 loss: 2.0925 (2.1103) grad: 0.2365 (0.2364) time: 0.4552 data: 0.0053 max mem: 22448 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 2.1102 (2.1114) grad: 0.2386 (0.2366) time: 0.4483 data: 0.0049 max mem: 22448 +train: [17] [300/400] eta: 0:00:46 lr: 0.000016 loss: 2.1191 (2.1132) grad: 0.2346 (0.2365) time: 0.4361 data: 0.0050 max mem: 22448 +train: [17] [320/400] eta: 0:00:37 lr: 0.000016 loss: 2.1191 (2.1131) grad: 0.2319 (0.2359) time: 0.4576 data: 0.0052 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 2.1144 (2.1132) grad: 0.2304 (0.2362) time: 0.4601 data: 0.0048 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.1172 (2.1144) grad: 0.2391 (0.2366) time: 0.4530 data: 0.0049 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.1129 (2.1133) grad: 0.2419 (0.2373) time: 0.4519 data: 0.0051 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.0841 (2.1114) grad: 0.2381 (0.2370) time: 0.4413 data: 0.0049 max mem: 22448 +train: [17] Total time: 0:03:05 (0.4639 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.0841 (2.1114) grad: 0.2381 (0.2370) +eval (validation): [17] [ 0/85] eta: 0:04:58 time: 3.5133 data: 3.1893 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:34 time: 0.3823 data: 0.0116 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:20 time: 0.3643 data: 0.0036 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3696 data: 0.0046 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:02 time: 0.3463 data: 0.0042 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3350 data: 0.0044 max mem: 22448 +eval (validation): [17] Total time: 0:00:34 (0.4043 s / it) +cv: [17] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.397 acc: 0.276 f1: 0.211 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:06 lr: nan time: 3.1655 data: 2.8354 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:55 lr: 0.000012 loss: 2.1099 (2.1301) grad: 0.2336 (0.2365) time: 0.4929 data: 0.0038 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:12 lr: 0.000012 loss: 2.1093 (2.0971) grad: 0.2336 (0.2355) time: 0.4448 data: 0.0046 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:49 lr: 0.000011 loss: 2.0657 (2.0945) grad: 0.2415 (0.2389) time: 0.4290 data: 0.0046 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:36 lr: 0.000011 loss: 2.0660 (2.0949) grad: 0.2335 (0.2363) time: 0.4600 data: 0.0051 max mem: 22448 +train: [18] [100/400] eta: 0:02:25 lr: 0.000010 loss: 2.0971 (2.0945) grad: 0.2290 (0.2360) time: 0.4596 data: 0.0051 max mem: 22448 +train: [18] [120/400] eta: 0:02:14 lr: 0.000009 loss: 2.0901 (2.0906) grad: 0.2344 (0.2355) time: 0.4565 data: 0.0053 max mem: 22448 +train: [18] [140/400] eta: 0:02:03 lr: 0.000009 loss: 2.0750 (2.0917) grad: 0.2308 (0.2354) time: 0.4588 data: 0.0046 max mem: 22448 +train: [18] [160/400] eta: 0:01:53 lr: 0.000008 loss: 2.0871 (2.0920) grad: 0.2270 (0.2345) time: 0.4570 data: 0.0052 max mem: 22448 +train: [18] [180/400] eta: 0:01:43 lr: 0.000008 loss: 2.0728 (2.0878) grad: 0.2345 (0.2350) time: 0.4521 data: 0.0050 max mem: 22448 +train: [18] [200/400] eta: 0:01:34 lr: 0.000007 loss: 2.0734 (2.0887) grad: 0.2345 (0.2348) time: 0.4693 data: 0.0052 max mem: 22448 +train: [18] [220/400] eta: 0:01:24 lr: 0.000007 loss: 2.0792 (2.0883) grad: 0.2368 (0.2350) time: 0.4576 data: 0.0049 max mem: 22448 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 2.1036 (2.0918) grad: 0.2368 (0.2349) time: 0.4577 data: 0.0050 max mem: 22448 +train: [18] [260/400] eta: 0:01:05 lr: 0.000006 loss: 2.1036 (2.0929) grad: 0.2311 (0.2347) time: 0.4547 data: 0.0051 max mem: 22448 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 2.0922 (2.0926) grad: 0.2344 (0.2349) time: 0.4744 data: 0.0050 max mem: 22448 +train: [18] [300/400] eta: 0:00:46 lr: 0.000005 loss: 2.0790 (2.0913) grad: 0.2329 (0.2347) time: 0.4640 data: 0.0049 max mem: 22448 +train: [18] [320/400] eta: 0:00:37 lr: 0.000005 loss: 2.0905 (2.0937) grad: 0.2293 (0.2344) time: 0.4435 data: 0.0049 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 2.1063 (2.0922) grad: 0.2304 (0.2342) time: 0.4583 data: 0.0051 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.0770 (2.0923) grad: 0.2306 (0.2341) time: 0.4545 data: 0.0052 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.0816 (2.0924) grad: 0.2321 (0.2340) time: 0.4624 data: 0.0049 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.0754 (2.0906) grad: 0.2324 (0.2342) time: 0.4628 data: 0.0052 max mem: 22448 +train: [18] Total time: 0:03:06 (0.4660 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.0754 (2.0906) grad: 0.2324 (0.2342) +eval (validation): [18] [ 0/85] eta: 0:04:19 time: 3.0577 data: 2.8222 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:31 time: 0.3617 data: 0.0037 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:18 time: 0.3475 data: 0.0036 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3743 data: 0.0042 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3697 data: 0.0044 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3432 data: 0.0042 max mem: 22448 +eval (validation): [18] Total time: 0:00:33 (0.3961 s / it) +cv: [18] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.399 acc: 0.275 f1: 0.210 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:27:04 lr: nan time: 4.0615 data: 3.7010 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:54 lr: 0.000003 loss: 2.0844 (2.0862) grad: 0.2220 (0.2274) time: 0.4449 data: 0.0044 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:11 lr: 0.000003 loss: 2.0608 (2.0656) grad: 0.2266 (0.2303) time: 0.4443 data: 0.0045 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:52 lr: 0.000002 loss: 2.0734 (2.0834) grad: 0.2274 (0.2303) time: 0.4556 data: 0.0050 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 2.1113 (2.0927) grad: 0.2293 (0.2298) time: 0.4652 data: 0.0052 max mem: 22448 +train: [19] [100/400] eta: 0:02:26 lr: 0.000002 loss: 2.0729 (2.0904) grad: 0.2280 (0.2304) time: 0.4610 data: 0.0053 max mem: 22448 +train: [19] [120/400] eta: 0:02:15 lr: 0.000002 loss: 2.0893 (2.0941) grad: 0.2311 (0.2316) time: 0.4578 data: 0.0049 max mem: 22448 +train: [19] [140/400] eta: 0:02:04 lr: 0.000001 loss: 2.1243 (2.0948) grad: 0.2290 (0.2311) time: 0.4429 data: 0.0047 max mem: 22448 +train: [19] [160/400] eta: 0:01:55 lr: 0.000001 loss: 2.0892 (2.0909) grad: 0.2230 (0.2305) time: 0.4841 data: 0.0051 max mem: 22448 +train: [19] [180/400] eta: 0:01:44 lr: 0.000001 loss: 2.0561 (2.0869) grad: 0.2330 (0.2316) time: 0.4430 data: 0.0047 max mem: 22448 +train: [19] [200/400] eta: 0:01:34 lr: 0.000001 loss: 2.0684 (2.0854) grad: 0.2344 (0.2313) time: 0.4486 data: 0.0049 max mem: 22448 +train: [19] [220/400] eta: 0:01:25 lr: 0.000001 loss: 2.0729 (2.0843) grad: 0.2311 (0.2311) time: 0.4720 data: 0.0052 max mem: 22448 +train: [19] [240/400] eta: 0:01:15 lr: 0.000001 loss: 2.0891 (2.0830) grad: 0.2338 (0.2313) time: 0.4587 data: 0.0052 max mem: 22448 +train: [19] [260/400] eta: 0:01:05 lr: 0.000000 loss: 2.0551 (2.0823) grad: 0.2310 (0.2312) time: 0.4580 data: 0.0049 max mem: 22448 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 2.0945 (2.0855) grad: 0.2310 (0.2311) time: 0.4536 data: 0.0050 max mem: 22448 +train: [19] [300/400] eta: 0:00:46 lr: 0.000000 loss: 2.1145 (2.0865) grad: 0.2327 (0.2309) time: 0.4554 data: 0.0052 max mem: 22448 +train: [19] [320/400] eta: 0:00:37 lr: 0.000000 loss: 2.1103 (2.0886) grad: 0.2343 (0.2314) time: 0.4462 data: 0.0050 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 2.1130 (2.0907) grad: 0.2329 (0.2312) time: 0.4602 data: 0.0049 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.1190 (2.0902) grad: 0.2227 (0.2308) time: 0.4557 data: 0.0051 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.0940 (2.0904) grad: 0.2227 (0.2306) time: 0.4564 data: 0.0051 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.1132 (2.0924) grad: 0.2258 (0.2306) time: 0.4591 data: 0.0051 max mem: 22448 +train: [19] Total time: 0:03:06 (0.4657 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.1132 (2.0924) grad: 0.2258 (0.2306) +eval (validation): [19] [ 0/85] eta: 0:04:29 time: 3.1721 data: 2.9351 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:35 time: 0.4123 data: 0.0190 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:20 time: 0.3648 data: 0.0041 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3771 data: 0.0044 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:02 time: 0.3520 data: 0.0040 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3458 data: 0.0039 max mem: 22448 +eval (validation): [19] Total time: 0:00:34 (0.4099 s / it) +cv: [19] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.398 acc: 0.274 f1: 0.209 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.27445551864156514, "hparam": [0.38, 1.0], "hparam_id": 18, "epoch": 19, "is_best": false, "best_score": 0.2862679955703212} +eval (train): [20] [ 0/509] eta: 0:25:34 time: 3.0155 data: 2.7512 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:05 time: 0.3762 data: 0.0051 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:34 time: 0.4087 data: 0.0046 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:10 time: 0.3558 data: 0.0043 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:54 time: 0.3566 data: 0.0041 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:44 time: 0.3887 data: 0.0045 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:34 time: 0.3613 data: 0.0045 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:24 time: 0.3703 data: 0.0044 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:17 time: 0.4080 data: 0.0047 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:09 time: 0.3833 data: 0.0045 max mem: 22448 +eval (train): [20] [200/509] eta: 0:02:00 time: 0.3578 data: 0.0039 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:52 time: 0.3826 data: 0.0047 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:43 time: 0.3464 data: 0.0040 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:35 time: 0.3844 data: 0.0045 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:28 time: 0.3855 data: 0.0043 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:20 time: 0.3771 data: 0.0044 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:12 time: 0.3836 data: 0.0048 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:04 time: 0.3747 data: 0.0042 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:56 time: 0.3488 data: 0.0039 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:49 time: 0.3837 data: 0.0043 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:41 time: 0.3761 data: 0.0045 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:34 time: 0.3894 data: 0.0043 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:26 time: 0.3771 data: 0.0046 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3644 data: 0.0041 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:11 time: 0.3451 data: 0.0038 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3699 data: 0.0045 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3462 data: 0.0043 max mem: 22448 +eval (train): [20] Total time: 0:03:13 (0.3802 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:09 time: 2.9390 data: 2.7067 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:29 time: 0.3374 data: 0.0034 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3406 data: 0.0048 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3553 data: 0.0039 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3426 data: 0.0042 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3406 data: 0.0042 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3769 s / it) +eval (test): [20] [ 0/85] eta: 0:04:15 time: 3.0093 data: 2.7573 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:33 time: 0.3854 data: 0.0170 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3403 data: 0.0036 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3732 data: 0.0042 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3499 data: 0.0046 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3418 data: 0.0045 max mem: 22448 +eval (test): [20] Total time: 0:00:33 (0.3958 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:13 time: 3.0955 data: 2.8512 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3734 data: 0.0279 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3909 data: 0.0066 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3652 data: 0.0033 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3365 data: 0.0040 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3257 data: 0.0040 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.4011 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.2862679955703212, "hparam": [1.4, 1.0], "hparam_id": 26, "epoch": 5, "is_best": true, "best_score": 0.2862679955703212} +eval (train): [20] [ 0/509] eta: 0:24:02 time: 2.8331 data: 2.5856 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:58 time: 0.3695 data: 0.0037 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:22 time: 0.3735 data: 0.0038 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:03 time: 0.3588 data: 0.0045 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:50 time: 0.3707 data: 0.0043 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:41 time: 0.3827 data: 0.0042 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:31 time: 0.3560 data: 0.0042 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:22 time: 0.3617 data: 0.0042 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:13 time: 0.3762 data: 0.0044 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:04 time: 0.3418 data: 0.0039 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:56 time: 0.3644 data: 0.0041 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:48 time: 0.3499 data: 0.0042 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:40 time: 0.3478 data: 0.0042 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:32 time: 0.3662 data: 0.0042 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:25 time: 0.3564 data: 0.0045 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:17 time: 0.3491 data: 0.0041 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:09 time: 0.3589 data: 0.0040 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:02 time: 0.3755 data: 0.0042 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3708 data: 0.0041 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:47 time: 0.3929 data: 0.0046 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3724 data: 0.0043 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3621 data: 0.0043 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3626 data: 0.0041 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3630 data: 0.0044 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3594 data: 0.0043 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3598 data: 0.0040 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3501 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:08 (0.3703 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:27 time: 3.1422 data: 2.8879 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3868 data: 0.0060 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:20 time: 0.3677 data: 0.0035 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3659 data: 0.0043 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:02 time: 0.3513 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3515 data: 0.0041 max mem: 22448 +eval (validation): [20] Total time: 0:00:34 (0.4043 s / it) +eval (test): [20] [ 0/85] eta: 0:04:28 time: 3.1638 data: 2.9155 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:32 time: 0.3723 data: 0.0041 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3880 data: 0.0042 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3922 data: 0.0046 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3482 data: 0.0042 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3351 data: 0.0041 max mem: 22448 +eval (test): [20] Total time: 0:00:34 (0.4076 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:20 time: 3.1756 data: 2.9158 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3695 data: 0.0095 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3924 data: 0.0048 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3574 data: 0.0040 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3550 data: 0.0046 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3420 data: 0.0044 max mem: 22448 +eval (testid): [20] Total time: 0:00:33 (0.4036 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | train | 2.0977 | 0.37023 | 0.0024716 | 0.30921 | 0.0024915 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | validation | 2.3707 | 0.28627 | 0.0054766 | 0.21881 | 0.0053166 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | test | 2.3514 | 0.28553 | 0.0053546 | 0.22022 | 0.0051809 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | testid | 2.3106 | 0.29593 | 0.0057749 | 0.23945 | 0.0057304 | + + +done! total time: 1:24:24 diff --git a/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..153906439b256cd66e5060d52b0a9f1988861221 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.135808854103088, "train/grad": 0.1630547048151493, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.17670654296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.176031494140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.174805908203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.17377197265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.172664794921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.171263427734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.16978759765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.168302001953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.166455078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.164715576171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.163056640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.160897216796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.15902099609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.15677734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.154930419921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.153475341796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.151861572265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.150283203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.148740234375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.147506103515625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.14625244140625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.14518798828125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.14424072265625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.143411865234375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.142720947265625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.14228271484375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.141988525390625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.14197509765625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.14211669921875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.142430419921875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.142723388671875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.143287353515625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.143858642578125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.14451171875, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1453369140625, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.146219482421875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.147435302734375, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.148668212890625, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1497454833984375, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1448898315429688, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1318728637695314, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1178692626953124, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0974822998046876, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.075073699951172, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0517690277099607, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0335293579101563, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.013229751586914, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9953548431396486, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9881188583374025, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024584956993348898, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02452203994616866, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024421777678653597, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024325241181068123, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024233612325042488, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.024111586599610747, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023982232264243068, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023848258377984165, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023685669312253594, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023529578587040304, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023390552713535726, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02320743238553405, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023053508712910117, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022868612883612513, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02272251517046243, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022608735160902144, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022487813839688897, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022372683947905897, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02226897768676281, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02219280135817826, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02211571210063994, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022053078138269483, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02199877308681607, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021954624955542387, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02192182425875217, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021897764671593906, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02188659377861768, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.021880694758147003, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.021882219947874548, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02188559680711478, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02188534983433783, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02187745762988925, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021847888743504882, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021797186248004436, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02170834526885301, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.021587549187242986, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02143013009335846, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.021264268588274716, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021094568921253085, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021134986407123506, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.021641204548068345, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022252080645412207, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.023228314057923853, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023803362217731775, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.024515085075981914, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.025783195574767888, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.027058388604782522, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.028162888577207924, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.029748036088421942, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.162855863571167, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1608083248138428, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.157644271850586, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1546807289123535, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1519882678985596, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.148610830307007, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.145231246948242, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.141958713531494, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1383161544799805, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1351542472839355, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.132716178894043, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1299426555633545, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1280572414398193, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.126260280609131, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1252803802490234, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1247153282165527, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1243555545806885, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.124234676361084, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1242663860321045, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1244752407073975, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1249465942382812, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.125706911087036, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.126919984817505, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1285557746887207, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1304967403411865, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.132835626602173, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.134854316711426, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1363730430603027, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1376516819000244, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.137394666671753, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1358563899993896, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.134141683578491, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.131896495819092, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.130377769470215, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1293978691101074, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.129962921142578, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1314520835876465, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.130272388458252, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.111893653869629, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9415955543518066, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8302724361419678, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7754857540130615, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.698953866958618, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.679860830307007, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6901588439941406, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6288909912109375, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.5891923904418945, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.588529348373413, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.633247137069702, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05297157622739018, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05389442598744924, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.055740125507567365, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.057770394979697305, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06090808416389812, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.067921742340347, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.067921742340347, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.067921742340347, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06035437430786268, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.055924695459579184, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.05463270579549649, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.05334071613141381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05167958656330749, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.051310446659283866, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05278700627537837, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.055740125507567365, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.060723514211886306, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.06552233296419344, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07050572166851236, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.0710594315245478, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07308970099667775, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.0710594315245478, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07124400147655961, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07364341085271318, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1271686969361388, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.16057585825027684, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.17220376522702105, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1967515688445921, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20191952750092285, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21133259505352528, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.22240679217423404, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.2146548541897379, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013214461388157454, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013438241442797276, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013706770252114205, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014081284059556587, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014533070371567007, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014088894911501232, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013933054554132572, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013666921970514487, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01408707560933191, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014592110532308931, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015318072518875332, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014674618878781007, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014054676911801877, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014213038556561803, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.014038028182539894, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013250648390518491, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012584620351308718, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01249699478477801, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012111274178032752, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.011491859610515189, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.011865510962842876, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.014421150363872726, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.015850370691941562, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.014585874766387341, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.011775982918561169, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.008993009691501313, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.006870022187099698, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.005148655593927219, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.0045029757763434845, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.006136984996078604, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.008567646551505046, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.01121072207549839, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.014842514406880678, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01772155487644124, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.020535590900267593, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.025522893112453187, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.02078389311468818, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.01767214562660693, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.01962174037363706, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05973965064841886, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.08058145991453163, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.08849465056306684, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.11708143046593011, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.11773047493166955, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.12728084740292792, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1435037678862758, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.1472686950968833, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1560157561918817, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.14357982927235777, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 2.9953548431396486, "validation/loss_best": 2.588529348373413, "validation/acc_best": 0.22240679217423404, "validation/f1_best": 0.1560157561918817} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.984144439697266, "train/grad": 0.18702676624059678, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.15393798828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1513818359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1476708984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1447509765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1422998046875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.139677734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13740234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.135548095703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1337890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13250732421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.131778564453125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13095947265625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.130545654296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.13018798828125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1300439453125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12994873046875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.129896240234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.129906005859375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.129737548828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.129764404296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.129681396484375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.129642333984375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1295458984375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.129669189453125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.129808349609375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.12993408203125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.130205078125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1304705810546873, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1307867431640624, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.128887939453125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1124017333984373, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.076337890625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.018722915649414, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9685984802246095, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.906515655517578, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.8390495681762697, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.7716021728515625, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.717032527923584, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.662186622619629, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.6210058689117433, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.602157678604126, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5912555694580077, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5768902492523194, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5728922748565672, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.577745990753174, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.592480707168579, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6063394808769225, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.6643931078910827, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.725105973482132, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022154425461776553, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021928270417265593, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021612173495814204, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021360839973203836, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02115993494633585, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020944738872349263, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02076766088604927, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020633575478568672, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020518685695715248, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02044596467167139, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02040844323579222, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020385001483373345, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020382167110219597, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020395252062007783, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020417973892763258, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02044202007818967, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020472641745582224, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02050760146230459, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020544593622907996, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02057466265745461, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020610271650366486, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020640577138401568, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02066296327393502, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020668108467943966, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020640442771837117, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.020557817495428024, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020436448934487997, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02028919693082571, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.020041439407505093, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01971574765164405, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.019757512086071075, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.020352699351496993, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021286107385531067, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02227201921865344, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023590132850222288, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025189667660742997, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02682998705189675, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02833297388162464, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.030324123278260232, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03228266092948615, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03345629228278994, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03440672398544848, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03595311676152051, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03698896703310311, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.038262670878320935, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.040551319597288964, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.043119964608922604, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04780830120667815, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05331785087473691, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1365575790405273, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.133972644805908, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.130819797515869, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1286964416503906, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.127302885055542, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1260616779327393, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.125321626663208, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1249380111694336, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1247477531433105, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1246819496154785, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.124682664871216, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.124681234359741, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.124630928039551, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.124478816986084, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1242711544036865, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1240756511688232, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1238901615142822, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.123793363571167, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.123976945877075, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1244499683380127, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1253085136413574, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1264777183532715, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.128321647644043, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.131089448928833, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.135117530822754, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1410439014434814, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.146028757095337, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.149371862411499, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1491000652313232, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1147210597991943, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.913224935531616, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.787105083465576, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7051405906677246, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6380929946899414, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5793650150299072, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.543825387954712, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.49788236618042, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.47700572013855, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5123095512390137, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5022075176239014, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.4939820766448975, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.4904062747955322, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.532533884048462, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6116786003112793, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6316964626312256, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.641554594039917, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8779690265655518, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.4059345722198486, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.4807534217834473, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06829088224437062, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06902916205241787, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.0664451827242525, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06902916205241787, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07124400147655961, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.1389811738648948, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.16020671834625322, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1792174234034699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.19361387966039129, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24529346622369877, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.24363233665559247, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24492432631967515, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24713916574381692, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23421926910299004, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22702104097452935, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2222222222222222, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.22591362126245848, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.18143226282761166, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.1419342930970838, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.17552602436323367, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013804795570036091, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01365532300097618, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012294533192608738, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011127766666122384, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010504934437986136, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010047357183481211, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009262109255768312, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009111274784380142, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009269646304879402, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.009201750193283184, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.009385999242642298, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.010156631430411128, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010675792865224012, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.011403620302123545, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.012811327554549883, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014526848766587976, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.015053539056579872, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.016127089997315668, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.015731853744628762, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01649564390254037, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.016881540985292123, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.017271159703820347, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016912183513281494, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.014243328962109189, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.013912191813618439, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.014441158951667801, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.014037609930061512, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.014723282221878723, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.015936800758796575, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.029302988758792947, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.06008283040400003, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.0778679456187753, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.09928849762993452, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.122686683054497, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15082984864502433, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1611040037601843, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17473257111031013, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1796999260585289, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1766618340173435, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17697655298376933, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18146413407912962, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17970119610981883, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1696648994528258, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15825079062257943, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.15995231493455972, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1674348153862668, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.12795747483162234, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.07544641700476097, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.10583069021166631, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 2.5912555694580077, "validation/loss_best": 2.4904062747955322, "validation/acc_best": 0.24713916574381692, "validation/f1_best": 0.17970119610981883} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.941826868057251, "train/grad": 0.3312811806797981, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1374365234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.136209716796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.134840087890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.134227294921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.13376220703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.13348876953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.133411865234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1333056640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.133193359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1330615234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13295166015625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.132664794921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.132493896484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.13220458984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1319580078125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.131712646484375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.131365966796875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.131158447265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.130938720703125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1307568359375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.130621337890625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1303466796875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.129991455078125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.128975830078125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.12383544921875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0823269653320313, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.011829528808594, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.939952087402344, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.843938980102539, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.7277265930175782, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.639453659057617, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.5845015335083006, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.52140962600708, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.4710754585266113, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.4214443874359133, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.389161672592163, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.374854211807251, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3677814769744874, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3670194339752197, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3834316301345826, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4091411781311036, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4302625703811644, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4583357191085815, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.493625099658966, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5509014439582827, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6464220869541166, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.88635041475296, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.828530433177948, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.015130171775818, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020772054293192923, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02066051577683538, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020558039559982717, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020511542386375366, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020495678670704363, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020495737306773662, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020510527538135648, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020530206626281144, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020553822373040022, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020576606607064604, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020595703409053385, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020621118182316423, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020642078472301362, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02066852822434157, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020689686299301683, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02070655391085893, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020718984426930546, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020724337217397987, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020713261966593562, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0206827131472528, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020614501805976033, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020508085442706943, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020338783296756445, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02010554391425103, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01987196085508913, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02052137061022222, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021917193131521344, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02311740579083562, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0247884306544438, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02693519840016961, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.029013880798593163, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.030115177929401396, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0313632473256439, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.032811371264979246, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.034618569854646924, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03577263902872801, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03669892058707774, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03778924415819347, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03903810859657824, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03971744015812874, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.041080597564578054, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04195212655700743, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04363521747291088, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.045648371316492554, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05015619507059455, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05695256257429719, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07575329542160034, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07098926885053515, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.13166029181331396, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.126267433166504, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1256110668182373, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1251466274261475, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1249988079071045, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124997138977051, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1250455379486084, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.125178813934326, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.125297784805298, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.12548565864563, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.12571382522583, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1259729862213135, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.126432180404663, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.126925468444824, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1277003288269043, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.128345012664795, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1288139820098877, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1291561126708984, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1289422512054443, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1281678676605225, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1269242763519287, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1248183250427246, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1221940517425537, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.118563413619995, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.110828161239624, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0625836849212646, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.802443504333496, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7055752277374268, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.653498649597168, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.597440242767334, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5195677280426025, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4747419357299805, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.45646071434021, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.442911386489868, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.452951192855835, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.510624647140503, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5135443210601807, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.506765604019165, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4807214736938477, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.4889626502990723, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5281896591186523, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.590183973312378, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5657103061676025, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6546716690063477, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6587841510772705, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6655237674713135, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.731161117553711, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.802330017089844, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.857863187789917, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.067921742340347, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.09634551495016612, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.16205241786637137, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.18586194167589518, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2059800664451827, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25802879291251385, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2587670727205611, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2602436323366556, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24603174603174602, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2412329272794389, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23237356958287192, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22388335179032853, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21410114433370248, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.13362864525655224, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.20801033591731266, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01138126182635909, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010922204658882802, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010652455221686778, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010882604832113204, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010828597396682968, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010719564220504069, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010823886411682124, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01163279613030181, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01225120878907978, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013050853035447924, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013513145376136583, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013628658394791146, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01354195944908825, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013060720384292299, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.012537159694458911, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.011730233001120317, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01134245617742031, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01075920799306592, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.010301576813008348, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.008732143214217412, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.009617933905292938, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.011273960592326729, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.012862439324116228, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.015140579723207473, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.022678329915200215, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.0740741557048535, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.09783251292085604, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.12544475999316607, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.14398937573614706, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1691888799271881, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1831281702083164, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18431665307596093, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19236264404793676, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1956726852853484, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18412106066917852, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18402836884101814, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1898051787960082, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19968672969195686, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19949025339016932, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18813278304866035, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15534978955781878, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1602524599456134, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15570676852760648, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1648225875119912, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.16636806104766264, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15548835065553612, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.06429889894633016, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1525854164174043, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 2.4710754585266113, "validation/loss_best": 2.452951192855835, "validation/acc_best": 0.2602436323366556, "validation/f1_best": 0.1956726852853484} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 3.0421810567379, "train/grad": 0.5903165694326162, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.131165771484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1308544921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1303271484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1300341796875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.129852294921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.129583740234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.129356689453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.129056396484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.128714599609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.128359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.128056640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.127510986328125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1271435546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.12657470703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1261474609375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12565185546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.125003662109375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.124378662109375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1234619140625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12214599609375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.11806884765625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.090299072265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.002119140625, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.887777557373047, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.7658338928222657, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.6503746795654295, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.582585792541504, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.528789215087891, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.4710151863098146, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.4070825576782227, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3660590744018553, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.341012315750122, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.3109674739837645, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.283160800933838, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.262316427230835, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.258374333381653, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.273290600776672, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2785114455223083, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.294254410266876, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.354861705303192, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4124774026870726, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.460129382610321, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.540236564874649, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6123451602458956, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.0964806628227235, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.809957251548767, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.7208549952507015, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.162384282350541, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020390907158143817, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020394258941523732, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020411245343275368, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02042815732304007, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020443563177250327, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020460623232647777, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02047860886901617, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020496462662704287, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020514415591023863, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02053102510981262, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02054480993654579, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02055992600042373, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020569340344518423, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02057092237751931, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020559105598367752, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020534994192421437, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020485896659083666, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020403531845659018, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020272177904844285, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020107188401743768, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01987596889026463, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0201520913047716, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021798421991989017, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.024027513186447323, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02637885184958577, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.028493377389386298, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02970614518970251, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03094567868858576, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0323763017822057, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.033849173970520494, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.034948867354542015, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.035652954457327725, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03678004282526672, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03798947120085359, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03939385076984763, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04059442630037666, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0416953886859119, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04266953276470303, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04401455908082426, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04578318824991584, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04772965744137764, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050798459965735676, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.055690739043056964, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06035496613010764, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.20866565017029642, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.14273105839267372, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11603598698973655, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06781420011073351, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1249353885650635, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1249213218688965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1248888969421387, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.124826192855835, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124730348587036, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.124573230743408, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.124326705932617, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1240415573120117, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123692512512207, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1232714653015137, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1229207515716553, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1224355697631836, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.122068405151367, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1217801570892334, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121772289276123, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.121981620788574, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.122504234313965, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.123166561126709, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1232643127441406, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1211190223693848, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.10093092918396, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.918027400970459, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.752793073654175, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6748414039611816, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6153101921081543, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.54355525970459, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4999406337738037, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4823007583618164, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4856972694396973, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.475635051727295, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.46481990814209, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.443126916885376, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4460532665252686, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.462395668029785, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4998104572296143, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6112403869628906, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6139705181121826, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6848304271698, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7058372497558594, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7266056537628174, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6418747901916504, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6685540676116943, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.839933395385742, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.789642810821533, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.067921742340347, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.1496862310815799, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.17238833517903285, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.18973791066814322, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2593207825765965, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2576596530084902, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26836471022517533, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23883351790328536, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2249907715023994, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21945367294204504, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21539313399778517, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012172889031476147, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01225228361816314, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01234411927485612, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012260388688277972, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01227930291941306, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01251296329196723, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01272593097830754, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012628130101795402, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012098658856020432, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01218444758621949, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013197188068624568, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014593814555934212, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.015706951206508793, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.016884279116392863, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01613536981555984, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01660964471477892, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01629411599761308, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01744950500153927, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.017682139716382942, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.018148360182586788, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02869637418313094, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.07509398371377042, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.09847583214789446, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.11428217241415477, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.12897187900822868, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1497743662028714, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1717452339787272, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18086099589365548, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18426379053954878, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1857362604888748, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18553629466355234, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1867637113969347, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17970514502699578, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18502726110570142, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18157667513701223, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17852693693562613, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1867356017745144, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18089446605557893, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1728102301746869, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16407139916936964, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17067250463276337, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16369375981781895, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1546048792163624, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14999411755738026, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 2.283160800933838, "validation/loss_best": 2.462395668029785, "validation/acc_best": 0.26836471022517533, "validation/f1_best": 0.18502726110570142} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.801768844127655, "train/grad": 0.2839372941851616, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13318359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.132933349609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13267333984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1322509765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.132099609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.131715087890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13123046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.130850830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.130233154296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12969970703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1291943359375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12842529296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.127733154296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.126810302734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1258056640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.124893798828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12329833984375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.119375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.089217529296875, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.9971209716796876, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.8558714294433596, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.7259170532226564, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.6386541748046874, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.5602333831787107, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.4869794845581055, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.412350654602051, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.3609106826782225, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.3265210723876955, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2892321586608886, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.257093915939331, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.229274339675903, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.215995242595673, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.202169812917709, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1922001242637634, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1858963346481324, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1895498633384705, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2230379366874695, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2475126683712006, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.285283212661743, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.368976340293884, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.433886082172394, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4900261771678927, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5921032249927523, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.248881175518036, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020224725822918118, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020236506857909262, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02025123404338956, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020262028500437738, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020271044201217593, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02028157113585621, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020290560671128333, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020300892326049508, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02031154361087829, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02032003114465624, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020324004665017128, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02031870236620307, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020300476029515267, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020251580574549736, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02017489480320364, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020077266772277655, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01991768490988761, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019697530781850218, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01990221070125699, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021479438291862607, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.024095643842592836, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.026495509808883072, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0279901650454849, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.029397759931162, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030949042877182364, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03253409259952605, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03341552010737359, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.033890163777396086, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03475048000924289, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03589054712094367, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03663408816792071, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03719000897370279, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03838124586269259, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04006812730804086, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04180401490069926, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04288446834310889, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.044306821469217536, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04592880135402083, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04897668043151498, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052348905205726624, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.054184092581272124, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056979142986238004, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06321578620001674, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.12245526166632771, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1256654262542725, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125802516937256, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1259806156158447, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1261229515075684, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.126224994659424, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.126303195953369, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.126296281814575, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1262502670288086, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1260063648223877, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.125690221786499, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1252598762512207, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.124567747116089, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1239423751831055, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.12325382232666, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.122868537902832, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1226284503936768, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1214005947113037, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1099307537078857, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.946989059448242, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.765683889389038, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.672377109527588, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.6185998916625977, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.564985513687134, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4999163150787354, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.448937177658081, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4324898719787598, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4372332096099854, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4356789588928223, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.446843385696411, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4771945476531982, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4913835525512695, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.535736083984375, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.592559576034546, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5914885997772217, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5514345169067383, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5587029457092285, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5726451873779297, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6481385231018066, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7619667053222656, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6996941566467285, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.807695150375366, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8783838748931885, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9048564434051514, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.13270579549649317, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.17072720561092655, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.1877076411960133, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.20321151716500555, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2216685123661868, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26375046142488, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.262827611664821, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26688815060908083, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2648578811369509, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24381690660760427, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2188999630860096, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23477297895902546, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2087486157253599, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010264456827633104, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010324641890314205, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010184916994765003, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010683651416587609, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01065776194115759, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010965442181061288, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010994929426912483, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011843598936851779, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012099028448069794, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01137458035537909, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01161846651835731, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.012281249509421032, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013031882419065869, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012912324682929828, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013334034825258141, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01258422319537567, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01168436852593252, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014620594088929018, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.06830551452328197, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.09441831961760515, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1108570318781625, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.12733154285621243, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1464716654764384, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.175377154091196, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18817784370605936, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19113561671828147, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19215314961867783, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20141670591054242, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20608703311559828, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19916368359589373, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19782463900087766, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18354788480877096, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18269139011777905, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1847776487013673, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19021850500836837, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18237425790400122, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18791225622836105, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17378611852683565, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16067360067154382, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16404257172133122, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16351761578463983, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14675526977418718, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14040588540396093, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 2.3265210723876955, "validation/loss_best": 2.4356789588928223, "validation/acc_best": 0.26688815060908083, "validation/f1_best": 0.20141670591054242} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.6959142661094666, "train/grad": 0.2302344324439764, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1260302734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.125709228515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125257568359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.124808349609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.124495849609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.124046630859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.123394775390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1229541015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.122349853515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.121630859375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.120999755859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.120032958984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.119232177734375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.11765380859375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.115562744140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1107763671875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0725311279296874, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.9208111572265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.74765625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.656272125244141, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.578967514038086, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.502266540527344, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.4230263137817385, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.3557432746887206, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.3036182022094724, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.250078115463257, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2123070764541626, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1865799188613892, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1500870633125304, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1271479153633117, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.108899737596512, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.102008410692215, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.094595282673836, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.083716051578522, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0884571826457976, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.095592513680458, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1315554082393646, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1615524530410766, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2101545906066895, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2728737771511076, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.369132400751114, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4022049736976623, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.515781219005585, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02049360580276698, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020503930184058845, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020519528863951565, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02053243648726493, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020545096127316354, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02056063254363835, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020574463820084928, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020586084038950504, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02059426345396787, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020593117121607066, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020586448702961207, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020558856343850494, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02051105529535562, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020409707790240646, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020273654372431338, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02012920998968184, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02059906471055001, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02325515398290008, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.026308138910681008, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02769011627882719, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.028854598579928278, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.030929099554196, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03288164749741554, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03389932481572032, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03445954266935587, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03522235446609557, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.035989287849515675, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03640412108972669, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.036976675475016235, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.037885262239724396, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03933346170932055, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.040318820970132944, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.042049698885530236, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.043450817689299585, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.045316642262041566, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04692250598222017, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04824231840670109, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04928150225430727, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05160249276086688, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052251384891569615, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0556706384755671, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.055853763464838266, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06265664856880904, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124000310897827, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1237754821777344, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1234123706817627, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12306809425354, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1227996349334717, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122382879257202, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1219794750213623, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1215274333953857, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.12096905708313, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1203296184539795, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.119661808013916, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1186320781707764, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1175522804260254, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1156013011932373, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1119542121887207, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0981295108795166, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.939908027648926, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.7424395084381104, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.6513993740081787, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.595026731491089, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5262184143066406, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.475173234939575, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4361252784729004, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.407893180847168, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3890771865844727, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.37803316116333, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.3706860542297363, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.390589952468872, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4511311054229736, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.430509328842163, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.43265438079834, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4466054439544678, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.476724624633789, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5530946254730225, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.633126974105835, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6710500717163086, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.738464593887329, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8086283206939697, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7645914554595947, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.774426221847534, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.778019905090332, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.771893262863159, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9447102546691895, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.1391657438169066, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.1699889258028793, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.19121447028423771, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.20764119601328904, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.23311184939091917, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24640088593576967, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.257844222960502, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2757475083056478, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28183831672203763, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2862679955703212, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2809154669619786, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2678110003691399, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.27464008859357697, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.27408637873754155, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2713178294573643, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26393503137689184, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2216685123661868, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2115171650055371, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2188999630860096, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21207087486157253, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2117017349575489, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19102990033222592, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008499331463532787, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008397068915271621, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0081340948038505, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008052708713129067, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008168110130157015, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008398763500006736, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.007955744376293825, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.007913871992954222, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.008148943561569014, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.008325520591256389, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.008488132616315824, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0094793385590892, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.009828759182424391, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012665426418040616, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.016194825091223598, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020423370129140545, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.06356358126723108, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.08872345691504561, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.10965264440248179, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1267567645840106, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1552941655662412, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16942253929745377, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18277646953523663, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19079366637708248, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20001476378747388, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21186310194185273, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21881163265593262, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21575887061132415, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20372959037442762, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21450509055465514, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22017997573525358, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22154058242118438, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.21256279086253982, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19397081730123786, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18153676713093167, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18041116193836873, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16907962177781136, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16940123265437182, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17263502426434604, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17434283324732505, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16130470387237072, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17252065627590416, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14435002013100334, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 2.2123070764541626, "validation/loss_best": 2.3706860542297363, "validation/acc_best": 0.2862679955703212, "validation/f1_best": 0.21881163265593262} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.63493355512619, "train/grad": 0.23666838862001896, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130682373046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.130328369140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.129627685546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12904296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128441162109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127652587890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12685302734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.125963134765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1247509765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12371826171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.122650146484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.120950927734375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11914306640625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.114334716796875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0928955078125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.983117980957031, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.794583282470703, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6752970886230467, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.589385299682617, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.513859748840332, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.430748710632324, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3533732986450193, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.287078685760498, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.233695583343506, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.1991103744506835, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.1564465379714965, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1243578720092775, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.102490779161453, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0627191293239595, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.039350230693817, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0177140867710115, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.008731046915054, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9984526205062867, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.995212664604187, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0006266683340073, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.018088059425354, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.058839404582977, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.087992709875107, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.145085480213165, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.246998138427734, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.316664822101593, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3745341658592225, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.418907823562622, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020476903673261403, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020480666956864298, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020486350706778466, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02048943637870252, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02049085880164057, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020490536228753627, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020487351142801345, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02047923643141985, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020458667604252696, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02042670676484704, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02038421545177698, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020292277047410608, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02017385732382536, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019955978239886464, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019988511768169702, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02194274344481528, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.025440510967746376, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.027359429756179453, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.028840022375807167, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.030771791590377687, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03259664429351687, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03396220271475613, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03455770961940288, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03494354737922549, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03512706661596894, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03586443052627146, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.036721700020134446, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03723141783848405, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03861712522804737, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03972916852682829, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04071343814954162, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04149036230519414, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.043008424900472166, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04435723787173629, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04600540878251195, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.047819984816014766, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04923209503293038, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05066400216892362, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05374126674607396, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0556794767268002, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05581124948337674, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.058162729702889916, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.059731725472956895, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123627185821533, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123453140258789, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123213052749634, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1230051517486572, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122835874557495, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122624635696411, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1223835945129395, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1221280097961426, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1217310428619385, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1212379932403564, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.120603084564209, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1192145347595215, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.116828203201294, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1070876121520996, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.026369333267212, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.8033134937286377, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.68228816986084, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.60705828666687, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5424892902374268, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.484300136566162, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.429908275604248, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4090678691864014, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.403017520904541, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.399991035461426, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3901801109313965, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.403400421142578, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4114692211151123, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.422365188598633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4371368885040283, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4549789428710938, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4869003295898438, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.52786922454834, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.56886625289917, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.606745481491089, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.637423515319824, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6734442710876465, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7027742862701416, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.756833076477051, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.769178628921509, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.691295862197876, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7589075565338135, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7737021446228027, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.819727897644043, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.12089331856773718, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.16795865633074936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.19435215946843853, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.21483942414174972, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.23458840900701367, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24898486526393504, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.268733850129199, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2703949796973053, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2783314876338132, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2731635289774825, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2781469176818014, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.27648578811369506, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.27113325950535255, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.262827611664821, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24658545588778147, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2279438907345884, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2146548541897379, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22240679217423404, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21853082318198597, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009528417652008231, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00947156293366125, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009193651703901025, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009650944882811525, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00976346250486316, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009668943141041365, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010251745990285888, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011363311927056453, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013007158233252906, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014246087636355485, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015898994139665217, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016912382047699726, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.021191375831777378, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.026049875000333914, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.057996449524971544, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.0874036097003254, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.11292291788313509, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.14078534545913438, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16247368144828392, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1818355790018313, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20365036990384677, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20948030144183902, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20926587210999129, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2124079540678008, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2188416946168905, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21246154631276926, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22053449825925023, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21820585843079207, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21442917945837867, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21196642675002017, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.21164329109589242, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.2049247287380004, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.20496195625586147, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19584432823505746, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.20023329279115543, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18854322191529774, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1910756847485977, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18146006360040223, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18523911624578748, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19368576114876876, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18338439107062152, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17233315847621658, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15716104616689067, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.1991103744506835, "validation/loss_best": 2.3901801109313965, "validation/acc_best": 0.2783314876338132, "validation/f1_best": 0.2188416946168905} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5684578812122343, "train/grad": 0.24319927282631398, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1293212890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12876708984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.127930908203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.127056884765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.126279296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.125218505859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.124066162109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.122890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.121221923828125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.119520263671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117708740234375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1145263671875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.109334716796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0686279296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.877862548828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.732738800048828, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6370538330078124, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.551789093017578, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.456005630493164, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.374560317993164, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3024346351623537, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.235082817077637, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.175417022705078, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.128345365524292, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.099282886981964, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0554353046417235, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.023378460407257, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.0004972076416014, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9605623245239259, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9406203854084014, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9218197631835938, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9173712611198426, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.912939382791519, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.917236555814743, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9194835370779038, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9378842437267303, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9758668267726898, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.020653535723686, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0781214559078216, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.145699799656868, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2132661736011503, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2937367296218873, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.355571928024292, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01998054824769497, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01998304921668023, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019985164641402663, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019984822091646492, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019981343699619174, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019973952462896703, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019959334023296832, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019938364909030497, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01989755803719163, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019838952273130418, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019768065423704684, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01962690460495651, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019463988919742405, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019778712149709464, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023240461042150856, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025881636906415224, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0273777580820024, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.029058508584275843, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03156627863645554, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.033405914660543205, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03413762222975492, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03494950459338725, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0354066354315728, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03592860375531018, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03641425094567239, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03738517734222114, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03826599550433457, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03892704701051116, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.040766067355871204, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04276583097875118, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04313558965921402, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04370489465072751, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04457754893228412, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04637160683050752, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047743358444422485, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04939228473231196, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05041256716474891, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05262887515127659, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05541581321507692, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05510966438800097, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05508959051221609, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0574229459092021, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0603768428042531, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123605728149414, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1234679222106934, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123274803161621, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1231279373168945, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123016595840454, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1228442192077637, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.122678518295288, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.122506618499756, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.122330904006958, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1220145225524902, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.121539354324341, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1196587085723877, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.112903356552124, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.9996702671051025, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7701470851898193, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.68058180809021, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.606825828552246, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5377111434936523, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4690983295440674, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4304397106170654, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4028093814849854, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4018373489379883, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4135680198669434, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.41933274269104, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.434936285018921, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.444870948791504, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4504690170288086, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.469886541366577, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4853804111480713, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.515958070755005, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.606172561645508, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6356356143951416, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.698741912841797, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7333624362945557, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7405214309692383, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.751683235168457, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.736248731613159, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8850133419036865, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8079495429992676, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8541150093078613, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.885429620742798, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9788336753845215, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.172363519668579, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.12440014765596161, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1760797342192691, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.19435215946843853, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.21502399409376152, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.23034330011074197, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2663344407530454, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26596530084902176, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.26651901070505724, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2646733111849391, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2633813215208564, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2596899224806202, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20007382798080472, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2233296419342931, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2216685123661868, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19509043927648578, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01023291783885807, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009960059679214555, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011188188405613577, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010850071769472095, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011620723289568105, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012950300829746094, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013460404100458305, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014155189925677483, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014357647322659066, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015256172763642497, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015307756655990254, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.015297919739787329, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018141278074219034, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.04826164071814954, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.09132906807104618, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.11444892415818837, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.13480750389925877, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1552209168001282, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1834423183130338, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19458891860744257, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.202940964553624, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20700861513197863, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20696550064394806, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20912238873932584, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20869579381676107, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2158200749740018, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2209691953173023, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21791008308920187, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21348841152403317, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21746203511813345, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19425008430765348, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.190910519406381, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18167937207221865, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17634077000463952, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18372340342308288, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17857252768335283, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19160890141190046, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1641077963140866, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16388962242819768, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16773130722428722, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15722660386399231, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14259940408483235, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.12869460910282252, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.235082817077637, "validation/loss_best": 2.4018373489379883, "validation/acc_best": 0.26688815060908083, "validation/f1_best": 0.20700861513197863} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.503604805469513, "train/grad": 0.24872348695993424, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125606689453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12494140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.124163818359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.123299560546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12250244140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.121483154296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.120362548828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.119078369140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11740478515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11552734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.113306884765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.107647705078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0856689453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.87203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.7117538452148438, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6331878662109376, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.555031280517578, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.464674644470215, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.368388195037842, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.295952072143555, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.230658597946167, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.166302299499512, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1056836462020874, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.056161253452301, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0217740964889526, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9743922364711761, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9366300010681152, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.910880638360977, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8643682116270066, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.837030183672905, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8149869084358214, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.809959447979927, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8017539566755294, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7982621216773986, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.806184492111206, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.82268829703331, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8600377595424653, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8877558410167694, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9335280215740205, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0412522107362747, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.135460708141327, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.182389351129532, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.225235606431961, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020647873678244652, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02065092956647277, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020655210870318116, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020656723342835904, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020653004799969496, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020644790730439126, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020628453274257483, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020601154691539704, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02055236234329641, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020477005718275904, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020385467088781296, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020212606280110776, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020240136375650763, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02396142913028598, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027090996326878666, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02847409712150693, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.029966012034565212, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03236461297608912, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03465660471469164, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03554630094207823, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.035788564747199415, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.036396417301148176, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03681931822560727, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03741166641935706, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.037923285802826286, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03908728687092662, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.040385090578347445, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.041226627510041, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.042803520038723945, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04434006435796618, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04454412657767534, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04507348716259003, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04596563382074237, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.047025171872228384, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04838033428415656, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05002941865473986, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05102940831333399, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051334536150097844, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053093251679092644, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05533571125939488, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05590494165197015, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056665326748043296, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05667330188676715, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123511791229248, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123321294784546, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1229562759399414, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1226069927215576, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1222879886627197, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1218161582946777, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121269702911377, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120630979537964, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1196482181549072, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1182634830474854, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.11614990234375, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1081597805023193, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.057478427886963, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.783414125442505, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6749391555786133, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6125292778015137, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.547370672225952, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.484215259552002, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4447402954101562, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.434954881668091, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4315848350524902, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.447859525680542, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4631826877593994, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4706978797912598, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4784960746765137, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4989240169525146, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.50858473777771, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5174524784088135, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.563722610473633, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.62481427192688, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.692636013031006, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.694443702697754, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.763728618621826, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8308608531951904, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.899675130844116, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.961454153060913, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.993790864944458, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0034854412078857, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9889333248138428, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.004810333251953, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9495849609375, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8730013370513916, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9268290996551514, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.10741971207087486, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16814322628276115, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.18881506090808417, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20616463639719454, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22517534145441123, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2440014765596161, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2576596530084902, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2547065337763012, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24935400516795866, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25083056478405313, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2425249169435216, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23181985972683647, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2187153931339978, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21262458471760798, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2146548541897379, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2054263565891473, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009456602616313798, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010315406262069256, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010853151780531646, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0125987926695511, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013271930706211665, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015524463202196198, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01688887805026281, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.018342845776918942, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01970168211098287, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.020192969366620444, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0199096229343534, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.024753440346134997, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.049756323610433896, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08704195391563165, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1124739515575968, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1355340120619318, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.15664165321250612, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1801875552651273, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19470571269713455, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19806334348159257, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20469160291636787, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2011371186732626, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20437522963901636, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20309569789924123, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20375794475928563, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20311660936251444, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2069532933080743, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20596539417877077, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20755397835348535, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19800871952044483, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18904627710283775, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18161649465048058, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17952223895043082, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16984746503707826, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1640625947339756, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1603641285261476, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16268435999250527, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16643352693554736, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1634722921041681, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1744171908815242, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16337194520380877, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1677063773726628, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.156781622598607, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.230658597946167, "validation/loss_best": 2.4315848350524902, "validation/acc_best": 0.26393503137689184, "validation/f1_best": 0.20469160291636787} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.4606193041801454, "train/grad": 0.25297140270471574, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12329345703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1226123046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12155029296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120426025390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.119451904296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1181103515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.116732177734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.115169677734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.113026123046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.110411376953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.106746826171875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0911651611328126, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9742364501953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.735924072265625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6352232360839842, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5651808166503907, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.479053192138672, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3819058990478514, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.295304126739502, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2306838607788086, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1714520835876465, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1116329717636106, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.054244110584259, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.004897425174713, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9738062500953675, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.921467183828354, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.877102108001709, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.847092053294182, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7923150646686554, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.759309964776039, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7403896069526672, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7339687263965606, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7321605414152146, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7278627282381058, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7335586643218994, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7410736775398254, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7884031695127487, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8390044456720351, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8912504917383195, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9896903073787688, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.094079087972641, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.14963210940361, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1884981459379196, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020543623738922177, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020543742328882217, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020542170451954007, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020537444110959768, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020529859773814677, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020514339208602905, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020487585086375475, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020449025803245605, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020380697147920727, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020284005110152065, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020169427702203393, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020064763403497637, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02186957382131368, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02634124957025051, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.028126426292583346, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029427701886743308, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.031444923747330905, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03393095961771905, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03530648713931441, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03593540676869452, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036197171332314614, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0369371057394892, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.037651534080505374, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03844195210374892, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03899169661104679, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04014137580990791, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04132238736376166, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04211572211235762, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04359334096312523, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04542950209230184, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04593199018388987, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0462299913726747, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.047322038263082504, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.048117273319512606, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04978643655776978, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05037087010219693, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05123686529695988, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05266533302143216, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05445794677361846, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.055863268561661246, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.055856570061296226, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056429362650960685, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.056828373800963165, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122976541519165, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1227123737335205, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122313976287842, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1219308376312256, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1216042041778564, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1211023330688477, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1205074787139893, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.119734764099121, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.11845064163208, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1163198947906494, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1121983528137207, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0844080448150635, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.889382839202881, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7039873600006104, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6252264976501465, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5693893432617188, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.501187324523926, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.441544771194458, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4083142280578613, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3973453044891357, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.399676561355591, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4199821949005127, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.43974232673645, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4519598484039307, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4593334197998047, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.506059169769287, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5337064266204834, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5647776126861572, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.640817642211914, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.712393283843994, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7664268016815186, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.782109498977661, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7512612342834473, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.796358823776245, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.873260259628296, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.95327091217041, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.917344331741333, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.949535608291626, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9046995639801025, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8715734481811523, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.854933738708496, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7792062759399414, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8446168899536133, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0917312661498708, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15227021040974528, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1849390919158361, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2011812476928756, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21834625322997417, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2722406792174234, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2646733111849391, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.262827611664821, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25858250276854927, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20376522702104097, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20671834625322996, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009482146941341144, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009830869679532584, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010401135520260623, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011542163951696915, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012336851500923852, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013484342992094304, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015108057628824152, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.016403187110469713, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01751279862576598, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02064385246365245, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02433490086035323, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.034494330273933234, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.07408831021558403, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1020409536597489, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.11939336142423312, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13812404967784725, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1630860218246708, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1881039969831788, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20126647858399416, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20507844005397213, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20475763363100297, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20977404188296123, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2098603183102842, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21210650525638775, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2186597388854622, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22114284001474638, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22145618287980282, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2174292533983361, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20441430556143017, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19496889977377255, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19210074226566598, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1910566406007612, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19622694220009662, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19047522139818182, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1828955668688815, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1730818504351955, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18249225698811541, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17767746901872994, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17432340444570502, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1801901426265279, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17273711036157122, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18108932175627215, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16681147631100254, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.2306838607788086, "validation/loss_best": 2.3973453044891357, "validation/acc_best": 0.2722406792174234, "validation/f1_best": 0.20507844005397213} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.400029338598251, "train/grad": 0.25104636669158936, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123292236328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122535400390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.121265869140625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120091552734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11911376953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.117564697265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.116031494140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1141796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1115234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.107900390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.101258544921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.042481689453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.828166198730469, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6716291809082033, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.585092010498047, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5110813903808595, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4164151763916015, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3216526985168455, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.238377265930176, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1738674354553225, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.11372540473938, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0509803390502928, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.989485239982605, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9345733666419982, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.898644014596939, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.840015504360199, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7864971333742141, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7507845306396483, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.693826788663864, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6534484845399857, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.636050935983658, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.631148945093155, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6219347488880158, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6148413026332855, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.621363417506218, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.636696754693985, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6727293330430983, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7185509294271468, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7638067644834519, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8563116043806076, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9508904498815536, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0080423307418824, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0623212701082227, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020231948578730227, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020232085613533856, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020227554733864964, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020220989347435533, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020211927513591944, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02019233430735767, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02016000997275114, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020118469540029764, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020040535461157562, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019934075227938594, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019826132515445353, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02040493056643754, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024086542390286923, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027142949840053915, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.028696133978664874, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.030383723275735974, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03281333930790424, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03475424399599433, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03568548322655261, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03618333909660578, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03640534291043877, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.037127710608765484, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03790112173184752, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.038701340686529875, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.039269985482096675, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.040526828821748496, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.041536906640976666, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.042207926101982596, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04440578600391745, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04618436912074685, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04691067896783352, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.047218664549291134, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.047647580299526454, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04870535863563418, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0497139436006546, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0506000704690814, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.051093339417129756, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05219980321824551, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053187309093773366, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0531366891041398, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.052115354668349025, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.052487231772392985, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05312840178608894, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.12320876121521, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230180263519287, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.12271785736084, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1224091053009033, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.12207293510437, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1215569972991943, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1208503246307373, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.119884967803955, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1180529594421387, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1146202087402344, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.106204032897949, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0054516792297363, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7831075191497803, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6583714485168457, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5897791385650635, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.531952142715454, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4668128490448, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4230735301971436, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4025895595550537, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4019534587860107, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4073147773742676, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.426454544067383, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.446376085281372, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4691827297210693, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.502060651779175, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.558755874633789, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.552812337875366, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5803427696228027, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6516551971435547, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.694282293319702, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7614996433258057, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8204798698425293, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8713114261627197, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8981785774230957, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.934502363204956, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.004587173461914, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0031020641326904, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.979874610900879, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9600589275360107, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0144686698913574, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0737292766571045, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.008195400238037, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.891624689102173, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.12329272794389073, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1699889258028793, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1967515688445921, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21502399409376152, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2321889996308601, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.268733850129199, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2691029900332226, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2678110003691399, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26596530084902176, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2571059431524548, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2397563676633444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21963824289405684, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19490586932447398, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19730527870062753, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20579549649317092, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010498091062194535, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010619554297286844, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010308032716010261, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010483056662772229, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012462880145662644, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013193633388336921, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01317771453718312, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0151858342626304, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015487886205714277, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01919137850052356, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.023023993301743508, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0552981833397251, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0893145080452707, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.11763248455511444, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13729074046848128, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15821269619906228, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1806878644847176, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.197581538473244, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20417813003016247, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20605248947798352, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20483455036805176, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20843078476348728, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21075699187100685, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2112155389991733, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20418797608113715, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20346388868092838, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21003635759343223, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2096146366042737, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20269567015666376, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20090038544657904, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19771920548999578, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1926964383796547, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18676037660231082, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1886666401516629, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18618961747649831, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1723931551754702, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17478157064382968, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17869356294793767, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18271262856454404, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17448834139424907, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15652976512407635, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15503319740631347, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.171413992746802, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.238377265930176, "validation/loss_best": 2.4025895595550537, "validation/acc_best": 0.2705795496493171, "validation/f1_best": 0.20417813003016247} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.3654715847969054, "train/grad": 0.25710263386368754, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.128189697265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.127352294921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.126201171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.125018310546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12382568359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.122283935546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.120697021484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.118626708984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115572509765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11053466796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.098082275390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.950264892578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7591571044921874, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6370339965820313, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.555156707763672, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4746157836914064, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.378623924255371, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.290844612121582, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.211741638183594, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1478605461120606, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0859670448303222, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.02160493850708, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9574025416374206, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9003557395935058, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8659498381614685, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8013485884666443, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.743554835319519, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7061448138952255, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.636607033610344, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5914666283130645, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5717350578308105, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.570964525938034, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5603992933034896, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5471688824892045, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5499816232919692, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5628951394557953, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5962579500675202, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.632093002796173, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6707465422153474, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7782104176282882, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8799631595611572, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9363429820537568, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9880153799057008, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0203234129678458, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020321790101006626, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020315497810952366, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02030795662663877, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020297287479043007, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020276322583667933, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0202431851811707, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020193888866342605, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020105444085784255, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01998700003605336, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019923401293344797, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02212791041471064, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025671550799161197, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028080529626458885, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.029863969432190062, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032085522450506686, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.034377404749393464, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035860933810472485, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036591371400281784, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03708255742676556, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03734634903259575, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038212192580103874, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03904325904324651, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039963710717856885, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04059356046840548, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.042087978776544334, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04358198497444391, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04441813984885812, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04616658860817552, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.047710421606898305, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049060708992183205, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04950364699587226, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04959493318572641, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050180101562291385, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05129483290016651, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05248923921957612, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05243052300065756, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05251941634342074, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052511752601712945, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.053393400982022284, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0517873053625226, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05194294482469559, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0525749309360981, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123300552368164, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230223178863525, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1224989891052246, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12200927734375, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121471881866455, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1207165718078613, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119769334793091, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1184613704681396, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1159660816192627, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1105782985687256, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.092863082885742, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8941421508789062, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7332234382629395, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6333727836608887, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5691606998443604, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5092272758483887, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4519035816192627, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4191665649414062, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.402198553085327, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4028894901275635, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4101297855377197, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4290850162506104, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4530038833618164, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.469421625137329, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4862310886383057, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.537168502807617, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.539198398590088, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5690677165985107, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6353938579559326, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.693269968032837, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.766766309738159, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.843494176864624, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8485565185546875, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8654260635375977, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8915576934814453, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.935548782348633, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.926950216293335, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9072039127349854, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8965930938720703, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8147056102752686, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7978622913360596, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7628214359283447, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7373316287994385, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08490217792543374, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.150609080841639, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18014027316352899, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2026578073089701, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2222222222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23606496862310816, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26965669988925806, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2748246585455888, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2698412698412698, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.271686969361388, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26393503137689184, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.260797342192691, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25802879291251385, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24437061646363972, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24344776670358065, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24012550756736803, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23034330011074197, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23920265780730898, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23717238833517904, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23255813953488372, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23163528977482467, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0110174314449702, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011391274031272993, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011114186686401453, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011715752390716976, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012401477197957768, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013982565405054164, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013919709836567566, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01541670831342743, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.017682071170559463, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.020522606439319385, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0335049296713155, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07024501396354256, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09660712249365698, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12027274004789397, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14248130181331167, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15837110121540185, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18104741090947574, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20039345551751545, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20867543030405397, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20841673761399537, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2129252308702709, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21190685666501755, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21160107239395506, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21621229475071121, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21648870545194418, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2163444930055655, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2181742386988542, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21364201652368217, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20470051315569315, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20153999391767563, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19716777539383046, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19094526764944866, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19210036280555318, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19262154654601707, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19437647221647447, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18517444501736255, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19332119995186883, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19467639330347156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20220581419570371, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19766901763231925, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18960757367008227, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18607364741417542, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1873616274602882, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.211741638183594, "validation/loss_best": 2.402198553085327, "validation/acc_best": 0.2748246585455888, "validation/f1_best": 0.20867543030405397} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.3061044824123385, "train/grad": 0.25450229078531267, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12113525390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12027099609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11889404296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.117626953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11633056640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.114510498046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11254150390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.110069580078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10590576171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0981982421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0718585205078126, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8493728637695312, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7049577331542967, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5950665283203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.507265777587891, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4192792892456056, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3302106857299805, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2474300384521486, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.17018009185791, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1071005916595458, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0446642684936522, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9754502391815185, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9066608500480653, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.844558653831482, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8059595382213594, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7339463394880295, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6673697674274444, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6235075187683106, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.543710777759552, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4915155684947967, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4579662042856216, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.445799852013588, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4382305842638017, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4247724264860153, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4253251373767852, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4289192843437195, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4660346800088881, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4982000982761383, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5375269836187362, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6332733470201493, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7475294369459151, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8136471658945084, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.8750975161790848, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02030235351063311, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02030018496327102, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020293666082434356, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02028421437833458, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02027086881455034, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020246133524924517, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020205590864643453, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020149766807444392, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020047286828048527, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019925978635437786, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020027479440905153, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023766513112932444, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026721448488533497, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02889543849043548, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030994198247790337, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03352860377170146, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03533482470549643, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036541995191946626, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037101300181820986, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03741617566905916, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03772749044001102, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038460336420685054, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03926676819100976, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04014866724610329, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04082077745348215, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04193942803889513, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04314704665914178, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04403827356174588, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045593322832137344, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04716023268178105, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04780823977664113, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04827420214191079, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04935182888060808, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05029488772153854, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05080938577651978, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051071559973061086, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05125890161842108, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051404830906540154, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05157747568562627, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05095447078347206, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0501076377928257, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04946873323991895, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.050245448611676694, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1233503818511963, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230804920196533, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122603416442871, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122119426727295, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121626853942871, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1208460330963135, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119797468185425, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.118312120437622, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1151881217956543, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1072657108306885, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.071883201599121, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.826780319213867, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.702655553817749, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.613497257232666, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.550241470336914, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.491976022720337, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4464590549468994, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4243180751800537, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4179084300994873, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4282171726226807, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.443716526031494, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4689860343933105, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.503498077392578, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.526052951812744, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5508015155792236, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6002073287963867, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.619032382965088, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6612131595611572, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7068092823028564, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7635135650634766, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8482611179351807, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.891684055328369, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9128551483154297, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.978484630584717, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9958736896514893, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.063756227493286, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.075843095779419, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.092097520828247, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0851645469665527, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.903193473815918, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8907980918884277, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8416004180908203, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.89896821975708, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09800664451827243, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.15928386858619417, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.17995570321151716, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20487264673311184, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22572905131044665, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23901808785529716, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26707272056109266, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2563676633444075, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25341454411221853, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2543373938722776, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23809523809523808, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23274270948689554, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23440383905500184, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23015873015873015, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22978959025470652, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2201919527500923, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00959553192745814, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009775347748028308, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00984310262851121, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00962409801969307, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009626122215612012, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009893575266383693, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01178384693466837, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012041984977584094, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0153496213425914, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.019768047194103047, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04424391605779032, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07675133238047697, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09728379433026757, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12247213540483072, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14601091626024212, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.162387970566979, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18481852155291156, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19685606727865176, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20710902078136853, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20757818388371993, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20561116249233855, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1994860480519164, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20225017645642893, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20166991838200496, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19807384306216902, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19918788049545835, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20070083938230168, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1997091523930972, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19735581361524454, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1934554430580494, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18952910747461985, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18413735414471344, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18562575498152808, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18917837415021324, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19426814314760377, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19205409075832647, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18089079993459645, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17839079147248826, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18064179717614812, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19181925008023856, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17835589216838763, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18452390595313847, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17717926182708477, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.17018009185791, "validation/loss_best": 2.4179084300994873, "validation/acc_best": 0.2727943890734588, "validation/f1_best": 0.20710902078136853} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.265468400716782, "train/grad": 0.2547194667905569, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12350830078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12268310546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1210693359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119666748046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.116348876953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114134521484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11144287109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10676513671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0964208984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0487445068359373, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8030526733398435, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6825387573242185, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5787423706054686, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.487633590698242, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.397285041809082, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3107673263549806, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.228058671951294, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1499419021606445, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0852896690368654, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.021572680473328, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9496587657928466, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8779788041114807, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8130027294158935, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7696241843700409, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6914088183641434, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6217630195617676, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5715175914764403, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.486214631795883, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4199471807479858, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.380533738732338, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3646342468261718, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3528105998039246, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3305017060041429, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3222594040632247, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3244114905595779, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.354313851594925, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.379020134806633, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4151402932405472, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5173213344812393, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6353378707170487, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7040583246946335, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7512972217798233, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020679682302288713, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020675255325622855, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020668076705187558, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020656327442266045, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02064115877263248, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02060962880961597, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020564782745204867, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02050099811516702, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020392281292006372, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020279370783828198, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020694065243005752, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024924549628049134, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0274055428057909, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029442195566371083, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03174996183253825, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03419602076523006, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03559822130948305, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036605842467397454, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03711779683828354, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037487653661519287, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037792098419740794, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038533366452902555, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03929333398118615, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04013980308547616, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04080063547939062, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04222315192222595, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.043630411569029096, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04466984393075109, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04594768760725856, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04715246440842748, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04802987141534686, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0485732608102262, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04920927248895168, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04997315336018801, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0502623744122684, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05068984229117632, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.050792414713650945, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0506591671705246, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.050880764313042164, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.051637765150517226, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05029799526557326, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04907924581319094, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0482117591612041, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123197317123413, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1228418350219727, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1221983432769775, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1215083599090576, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1207950115203857, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.119706869125366, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1183342933654785, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1163899898529053, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1122984886169434, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1010043621063232, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0402419567108154, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.786639928817749, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.680662155151367, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.596374988555908, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.529784679412842, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4710779190063477, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4284801483154297, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.410168409347534, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4061200618743896, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.416220188140869, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4295120239257812, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4573493003845215, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4790079593658447, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4956443309783936, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.512293815612793, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.563110589981079, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5874338150024414, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6234512329101562, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.724213123321533, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.798365592956543, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.882215738296509, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.999980926513672, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0382437705993652, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1209523677825928, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1471850872039795, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.206404685974121, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2536752223968506, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2405664920806885, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1564042568206787, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.032259702682495, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.938157796859741, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9175522327423096, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9556198120117188, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.067921742340347, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0784422296050203, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.11258767072720562, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.16980435585086748, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19139904023624954, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21391657438169065, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2334809892949428, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2517534145441122, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26596530084902176, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.271686969361388, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2751937984496124, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.262827611664821, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.257844222960502, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25396825396825395, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23145071982281284, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2188999630860096, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.24215577703949798, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23181985972683647, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23754152823920266, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010303172957896073, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010592281395703092, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011001819905230538, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012118587354437058, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012225024209519037, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013789730942071027, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014954708163253075, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01604340213228334, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01862610622535501, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.026869152908459665, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0507296329820758, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.08557804718289432, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10788158198259563, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13310307142295527, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15606628891657068, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17586740314182356, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1924282559046396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20216035694254952, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20826525724795752, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2069094726243936, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2081720074380956, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20644835691511906, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20859294804973758, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20790560479480966, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21110178841849084, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21124246235646568, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2097935790285906, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20738927817015465, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20355515363069984, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19352421472215278, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19563093316495803, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1847923818802951, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18304588595428758, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18235036704866436, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18784658436788035, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17927193635029312, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17807128453087204, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17234561584512065, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1850619785466742, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1940266819308171, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18364855126773313, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17995858011243304, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17180317632349942, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1499419021606445, "validation/loss_best": 2.4061200618743896, "validation/acc_best": 0.2751937984496124, "validation/f1_best": 0.20826525724795752} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.212404264807701, "train/grad": 0.2528245572000742, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12166748046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12065185546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118958740234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11743896484375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.115986328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11388427734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11150390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.108465576171875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1030029296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.089244384765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0149493408203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7677386474609373, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6548310852050783, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.550404510498047, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4537627410888674, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3643774032592773, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.281604042053223, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2010720062255857, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.120487356185913, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0552266120910643, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9893274354934691, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9169231271743774, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8380522906780243, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7652045953273774, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7220782452821732, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6385744094848633, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.557606619000435, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5003371220827102, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.403970991373062, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3263244277238846, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2776464837789536, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2531234657764434, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2282513964176178, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1979573172330857, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1818505030870439, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1797786474227905, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.205455785393715, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2370818543434143, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.279911590218544, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3822463059425354, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4996741729974747, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5792430210113526, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6306828683614731, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020236888215877116, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020230988175608217, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020220258589833975, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020207644691690802, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020190943568013608, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020160846491344274, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020114784836769106, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020049101235345007, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01994362421799451, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01986967271193862, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020742060458287595, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025130262393504382, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027351278373971583, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02945301535539329, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03204693110659718, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0344193169567734, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03570586927235127, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03666648919694126, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037165820943191645, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037484884429723024, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037794103352352976, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03857078641653061, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039362422730773686, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0403392388112843, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041018696445971725, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04242902539670467, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04376013658940792, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04447143582627177, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046305165495723485, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04756001362577081, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04814788708463311, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0479932657442987, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0485211724601686, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04899218594655395, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04909514812752604, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049506877232342955, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04962755935266614, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05023710882291198, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05078181540593505, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05063112240284681, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.049607337005436423, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04816793126985431, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04781202057376504, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226017475128174, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122192621231079, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121464729309082, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1207499504089355, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.120016574859619, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1188864707946777, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1174979209899902, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.115413188934326, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1107635498046875, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.095940589904785, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.00909161567688, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7636382579803467, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.666395664215088, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5836682319641113, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.513904571533203, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4577393531799316, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.41764760017395, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.400841236114502, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3946497440338135, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4031436443328857, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.413564920425415, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.440399408340454, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.465728282928467, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4845447540283203, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.509958505630493, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5716423988342285, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6035454273223877, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.641692876815796, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7513911724090576, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8459486961364746, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9307117462158203, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.029069662094116, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.08760404586792, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1412134170532227, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.176328420639038, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.266070604324341, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.293504476547241, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3480210304260254, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.3050389289855957, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2579827308654785, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1111061573028564, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.982435941696167, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.107912540435791, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07954964931709117, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.12495385751199704, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.17497231450719822, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19509043927648578, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2172388335179033, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24049464747139165, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25378368401624213, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2748246585455888, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26578073089701, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26651901070505724, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25839793281653745, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22129937246216316, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20634920634920634, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010143374837274103, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01014249284959517, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009869132507710399, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010312999321225652, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011118278130961229, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011661040430670358, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012812718108537915, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014550747281935327, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.017577706363101128, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.027003001713027316, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.05673596264951012, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0905665521496017, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11249813867332743, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13653724403756046, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16255461689136527, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17881900917048643, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19658491663450797, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20777095756118205, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20960095547979316, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2082360672465988, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20990401855185006, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21101662281637326, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21736602925746298, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21733786334344385, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21779662410315095, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21611935325936135, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21640853546160274, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21282009369391997, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2023739019710469, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18987826228154922, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19324806128563163, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18702561288014605, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1841266080210678, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1807589636299782, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17984798328338783, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17445167329821307, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1764640564489128, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17545117764030094, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1756955286004164, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17516810792179496, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1783623565094857, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18764285052310745, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17085223940752678, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.2010720062255857, "validation/loss_best": 2.400841236114502, "validation/acc_best": 0.2748246585455888, "validation/f1_best": 0.20777095756118205} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.1711577904224395, "train/grad": 0.2500962678343058, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.117393798828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1163232421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1147705078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113194580078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.111695556640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.109781494140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1072314453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.104158935546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.098177490234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0815960693359377, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9857147216796873, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7514866638183593, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6461776733398437, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5398003387451173, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.437186431884766, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3466615295410156, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2629134368896486, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.181611270904541, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1029268741607665, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.037387547492981, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.971228733062744, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8989037704467773, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8212260329723358, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7476376187801361, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6960560297966003, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6043096560239791, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5207987475395202, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4574756741523742, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.351136526465416, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2661112701892854, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.20037383466959, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1742170411348343, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1382235616445542, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1005661565065383, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0824913761019708, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.070415624976158, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0881478410959244, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1133465492725372, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1472265508770942, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.247556383907795, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3447617888450623, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4439311254024505, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.503152562379837, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02065548687707633, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020649920077994467, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02063790652900934, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02062118105124682, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02059882036410272, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020561609803698956, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02050648642703891, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02043237035628408, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020312566333450376, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020246865567751227, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02142702316865325, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02582247435115278, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027892933124676347, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03002926390618086, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.032773989159613846, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035046858582645654, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03619867631234228, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03703709146939218, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037422777935862545, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037745120320469144, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038068332113325595, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03877138454467058, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03951284136623144, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04039450615644455, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04104218058288098, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.042252457216382024, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0433607411570847, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04400091972202062, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04527879931032658, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04601767571642995, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.046333280093967914, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04627231877297163, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.046960744615644215, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04721062483265996, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047978077940642835, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04796334762126207, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04869468150660396, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04882213814184069, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04940501462668181, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.049473139867186544, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.048593888767063616, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.047826899252831935, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04737664729356766, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122683525085449, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1222856044769287, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121652603149414, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12099289894104, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.120300769805908, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1192517280578613, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117845058441162, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1156792640686035, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1105175018310547, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.09279727935791, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.986855983734131, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.751760721206665, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6593475341796875, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.577740430831909, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5073556900024414, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4555373191833496, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.419726610183716, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4056296348571777, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4022839069366455, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4126837253570557, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.424773693084717, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4520881175994873, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.475985050201416, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.493814468383789, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5205671787261963, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5868821144104004, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6088008880615234, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.656224489212036, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.761784076690674, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8580117225646973, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.954566478729248, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.046485662460327, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.079751968383789, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1446380615234375, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1852731704711914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2964892387390137, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3196563720703125, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2840347290039062, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2795181274414062, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2000889778137207, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.125188112258911, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0193123817443848, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.999821186065674, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08545588778146918, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.12919896640826872, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.17977113325950536, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1937984496124031, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2233296419342931, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26578073089701, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.262827611664821, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23145071982281284, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21963824289405684, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21557770394979697, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21040974529346623, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01029591382969513, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010366141896588852, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010222789616775264, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011378358103232502, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012797619266757583, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01461439515641634, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016454220943242826, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017571795022933042, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020670163447700392, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03208498656845233, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.05804908893427402, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09784639564535795, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11395535674735052, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14279274869011052, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16277609579096927, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18036019100013134, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19588539352993972, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2055137066803783, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20847645554874972, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20816316209537986, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20748341518273558, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2093348971552489, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21147762582460347, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21208530036088535, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21218334223394134, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21396250696148778, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2159640880641802, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21228002185655526, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19646276430766096, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19523337537431526, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1915211709855619, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18634185901116193, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18592850699720329, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18785157465040117, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1880218752236742, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18556675052902527, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18368976638419446, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1853408040554058, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.186392555093869, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19257405091538157, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18400804985805777, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18409665015533108, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.189309099879712, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.181611270904541, "validation/loss_best": 2.4056296348571777, "validation/acc_best": 0.27408637873754155, "validation/f1_best": 0.2055137066803783} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.142985505461693, "train/grad": 0.2453058895468712, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12505859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1239208984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.122271728515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12058837890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11904296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11673095703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114166259765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11084228515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.104376220703125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0851495361328123, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.97370361328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.746574249267578, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6455416870117188, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.542476806640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.443390007019043, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3566212844848633, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2749308013916014, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1933111000061034, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.112749252319336, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0437184238433836, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9733912086486816, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8965823078155517, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8155565345287323, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7397819185256957, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6847901582717895, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.586236509680748, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4971217375993728, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4292166250944138, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3150630187988281, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2230253863334655, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1590251693129539, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1257000494003295, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0791369727253914, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0324385929107667, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0026610207557678, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9784616202116012, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9899042275547981, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.9998721757531166, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0159566193819045, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.101975140273571, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2045601761341096, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2911920261383056, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3582272386550904, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020628575291484593, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02062124480959028, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020607515526935457, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020590102509595455, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020567865730263292, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02053041192702949, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02047549775801599, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020399688365869223, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020282395700924098, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020251508462242784, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021709577506408097, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02605255872011185, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02809110703878105, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03031291170977056, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03317866149358451, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03534899048507214, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0364146154653281, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03720153590664268, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03761703037656844, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0378769853245467, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038127928944304584, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038802592009305956, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03950482962653041, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04031831925734877, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04087971469387412, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.042034911792725324, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042983769290149215, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04356626709923148, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.044640371855348346, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04518084207549691, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04537300439551473, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0454912506043911, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04566856727004051, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.045741769056767224, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.046126219779253005, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04616420842707157, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04629187887534499, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04610575309023261, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04637752901762724, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.046945161428302525, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04709502901881933, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04614152643829584, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.045763605311512946, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226882934570312, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1222753524780273, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121643543243408, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120990037918091, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1202917098999023, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1192033290863037, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1177282333374023, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.115452766418457, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.109919786453247, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0900909900665283, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.972352981567383, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7439377307891846, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6536738872528076, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5720407962799072, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.500298023223877, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4491264820098877, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4143598079681396, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4013924598693848, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.400712251663208, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4165258407592773, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4323689937591553, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4593660831451416, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.485464334487915, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5047693252563477, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5339884757995605, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.605616807937622, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6366162300109863, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6849098205566406, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.794456958770752, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.888601541519165, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.988684892654419, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.086416482925415, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1325738430023193, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2294535636901855, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.277771234512329, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4142701625823975, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4557132720947266, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.451624631881714, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.404942035675049, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2969560623168945, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2081286907196045, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.113741159439087, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.142904758453369, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.073827980804725, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08564045773348099, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1345514950166113, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1788482834994463, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19878183831672203, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22240679217423404, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24381690660760427, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2589516426725729, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27464008859357697, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2692875599852344, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2648578811369509, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26578073089701, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2526762643041713, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23108157991878922, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2069029162052418, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21428571428571427, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21816168327796234, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21114802510151348, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009778137248618594, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010107523781467603, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010238382090911083, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010596453977492074, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01140950459507326, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013557960268781718, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01506507420138005, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.016956418892324493, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.019878607897184375, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.031766781796865526, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.062122116393009345, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09704817322279864, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11726771264967455, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14302012081792306, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16753081293409536, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18607558526505263, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19758271108092917, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20673789182821378, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2109108689108866, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20837255445764605, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20745955241837424, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20818555245045756, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2119999404634801, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21089075046246528, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21080684306434183, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21216336791030757, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.213028854870387, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21256450117417883, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19561810491004375, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1950078376908894, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1926306767109971, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18152517372234864, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18631195112982143, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1804635119365401, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17601251317872244, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1755456370448182, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17046093883651348, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17046116095231093, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17938653074203825, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1861911758665615, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18803405229747291, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1855923003276927, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18251105081418273, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.112749252319336, "validation/loss_best": 2.400712251663208, "validation/acc_best": 0.27593207825765964, "validation/f1_best": 0.2109108689108866} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.1114395201206206, "train/grad": 0.23702083364129067, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121265869140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1202880859375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118624267578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1168896484375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.115556640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11338134765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11091064453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.107646484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10113037109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0809503173828126, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.962302551269531, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.738051452636719, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.636726837158203, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5321349334716796, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4312384033203127, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.346653366088867, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2652073287963868, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.183384017944336, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1023460674285888, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.035848045349121, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9644047355651855, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8879321336746215, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8058169031143187, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7257271552085875, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6643897038698197, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5644362163543701, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4713326305150987, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4031123894453048, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2885913586616515, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1940018153190612, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1167855882644653, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0707094144821168, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0220329663157464, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.970077645778656, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9265332043170929, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8931981307268143, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8922533142566681, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8974857172369957, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9002694550156594, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9824161925911903, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0711566656827927, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1602729251980781, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2158152043819428, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020405902736820282, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02039755036123097, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020380565021187066, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02035917928442359, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02033672935795039, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02029573209118098, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020238283732905985, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02016281699296087, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020044622141867877, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020028472426347435, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021597202140837907, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025841505760326983, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02780094100162387, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030012612352147697, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03286482389084995, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03495834575034678, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03598050977103412, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036738063720986244, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03709070513956249, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0374188185390085, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037624947167932986, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03831326067447662, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03902459058910608, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039791000429540875, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0403223624266684, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04142371267080307, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04234540006145835, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.042870172392576934, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04378889968618751, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04416467724367976, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043982954174280164, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043546104338020086, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04345806198194623, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04332302328199148, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04323736807331443, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.043101408425718545, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04294709565117955, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04282301228493452, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043262255638837815, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04372471489012241, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04401123709976673, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.044020573683083056, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.043442137558013204, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1227033138275146, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122304916381836, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1216824054718018, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1209726333618164, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.120274782180786, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.119154214859009, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1176013946533203, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1152701377868652, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1095075607299805, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.088442087173462, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9648468494415283, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7400758266448975, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.650949716567993, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5693907737731934, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.496933698654175, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4464683532714844, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.411818504333496, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.398768901824951, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3969309329986572, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.410735845565796, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.424777030944824, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4516093730926514, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4772391319274902, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4984984397888184, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.530704975128174, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6058096885681152, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.633169174194336, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.686034679412842, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.799536943435669, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.895496129989624, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.997385263442993, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1055896282196045, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.148430347442627, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2455177307128906, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.295830249786377, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4463796615600586, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5010907649993896, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.5030698776245117, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4843497276306152, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3712854385375977, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2776386737823486, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2054786682128906, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.164034843444824, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0873015873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1362126245847176, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18124769287559986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19712070874861573, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22462163159837578, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24492432631967515, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2698412698412698, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.266703580657069, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25396825396825395, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19915097822074565, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21502399409376152, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22111480251015134, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010341225868545472, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010175974954947075, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010497888926760115, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011344516445400188, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012522836986205729, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013852057893605751, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01572450182991677, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017736317298353912, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021106481543792948, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03439935532202251, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06286186884755436, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09960538922946995, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1173070596253488, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14575260808197418, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16866117498576094, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18596802430983508, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20116764106879356, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20714633105106225, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21102714640156348, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20956434678554955, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20954795465495205, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21198373991053765, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21215341744547941, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21305003953065804, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2142223422539535, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21010830948341142, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21332285880733395, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2116354472059514, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19748756873751716, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19230231416901675, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18973147400512339, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18150564945649159, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18524798067419398, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1809586556107562, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18323475759760074, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1749894387563212, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17545123908285223, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1701909199985374, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17613922223286593, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18226416671232867, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18571814919733268, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18072107540603385, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19046189043580267, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1023460674285888, "validation/loss_best": 2.3969309329986572, "validation/acc_best": 0.27593207825765964, "validation/f1_best": 0.21102714640156348} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.0905636292696, "train/grad": 0.23418330952525138, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123170166015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122125244140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12042236328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.118760986328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11720947265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1149560546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1122998046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.108909912109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.102109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0810699462890625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.959638671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.739391632080078, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.638834991455078, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.533784408569336, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.430862045288086, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.343295021057129, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2604408645629883, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1774210929870605, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0949352169036866, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.024217519760132, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9514276361465455, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8727807116508484, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7905962538719178, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7115412497520446, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6529466992616653, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5486797106266021, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4556873613595962, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.386003162264824, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2675071388483048, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.165465114712715, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0866984859108926, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0463618609309195, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9877806237339973, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9258239850401878, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8839473491907119, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.840066086947918, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8403820222616196, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8376756954193115, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.832904824912548, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8924546986818314, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9642750358581543, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.0430197414755822, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0984890562295915, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020510262856259943, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02050169470719993, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0204832698777318, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020463019139133395, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020438749454915525, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0203973023686558, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020339892921037973, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020263519128784536, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020146167683415116, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02014046055264771, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021792278792709112, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026022856999188663, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027951670354232194, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030187672106549143, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.033037495715543624, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03508987387642264, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03610950888134539, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03687629096210003, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03722946943715215, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037568984013050796, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03779370803385973, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03842767620459199, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03906694548204541, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039799584392458204, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04027331471443176, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04132680585607886, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042200773656368255, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04262243630364537, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0434506681188941, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04378453126177192, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043641261998564, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04336506368592381, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04301390694454312, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0427058413811028, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04247604073956609, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041795427035540345, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04161979777738452, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.041378326639533045, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04140546437352896, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04175522109493613, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04188045369461179, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04217569639906287, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.041579281836748125, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226985454559326, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.12229061126709, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1216604709625244, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120959997177124, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.120255708694458, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.11910343170166, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1175787448883057, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.115222930908203, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1093826293945312, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.087886333465576, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9622859954833984, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.739095687866211, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6506428718566895, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5694382190704346, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4973416328430176, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.447333335876465, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.413248062133789, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4007978439331055, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.39861798286438, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4128475189208984, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.427464485168457, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4540584087371826, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4789905548095703, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4991941452026367, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.532755136489868, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.603623628616333, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6287238597869873, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6791932582855225, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7929346561431885, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.894324779510498, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9976747035980225, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.102962017059326, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.152900457382202, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2574265003204346, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3135294914245605, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.465535879135132, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5120532512664795, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.520948648452759, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.5017075538635254, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.389204263687134, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3100011348724365, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.239377737045288, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.239381790161133, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08803986710963455, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1363971945367294, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1806939830195644, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19712070874861573, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2248062015503876, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27297895902547065, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2753783684016242, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2692875599852344, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25452196382428943, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25489110372831303, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25064599483204136, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23385012919896642, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2039497969730528, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21539313399778517, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21816168327796234, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010289971522581955, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01019695542466117, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010565225023040784, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011299764565841032, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012478420518556336, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014145484507229071, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015883945216129552, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017245010812007404, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02230511659471642, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03466222223920617, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06293140736399006, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09912512840036643, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11639286470283798, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14542116320688273, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.167945733746606, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18499193457033067, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19889703765189504, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20519436018113327, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2099806172929872, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20869383284121268, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2085752048650913, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21092059132368193, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21186974822405455, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2124035269767377, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21018635017228338, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2113962517753869, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21455337447129944, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2118550318400001, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19831251236370276, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1958475727539143, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1904590245128258, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1839580773351596, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18599154566479892, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18357311925075112, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1834069508262912, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17510725102085542, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17639642607010197, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17528928884887085, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1775495768947476, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18711231643868378, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18360596605675208, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17999155868899389, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18349301249063465, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0949352169036866, "validation/loss_best": 2.39861798286438, "validation/acc_best": 0.2753783684016242, "validation/f1_best": 0.2099806172929872} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.0923751431703566, "train/grad": 0.23064339213073254, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123939208984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.123004150390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.121412353515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119825439453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.118343505859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.116162109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.113609619140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.110255126953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10341796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.081915283203125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.956990051269531, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7353286743164062, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.636241455078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5333517456054686, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4316860961914064, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3469939041137695, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2654619598388672, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1838676738739013, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.103505640029907, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.034795160293579, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9647075939178467, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8882596111297607, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8047941160202026, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7258643364906312, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6672615170478822, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5655320405960083, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4744357693195342, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.401416838169098, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2773078733682632, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1775929421186446, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.09851422727108, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0464196673035622, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9909472036361694, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9343723419308663, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8866797116398811, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8427747756242752, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8347021439671516, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8322697067260743, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8207709556818008, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8744250079989433, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9481249141693116, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.0202817264199258, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0575718128681182, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020130309122614563, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020121873570606113, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02010448154527694, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020083320806734264, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02005874172318727, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02001859836280346, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01995922711212188, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019881775537505746, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019762367559596896, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01975717917084694, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021388604119420052, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025576517311856152, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027472185995429756, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02967563291080296, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03250225907191634, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03456961493939161, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03564199032261968, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036501368870958686, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036972770830616355, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03729079023003578, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037602538280189036, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038277400163933634, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03894885405898094, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03971212099306285, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04012737512588501, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.041159678604453805, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042069066148251295, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.042494769636541606, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04314862979575992, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04350223705172539, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043177821580320595, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04264977980405092, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04227211045101285, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04187172006815672, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04155108151957393, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04102499792352319, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04080256460234523, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04043906340375543, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.040091842878609896, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.040285078808665276, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04062543524429202, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04081408377736807, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.040435128826647994, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226871013641357, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122286558151245, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.12164306640625, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1209495067596436, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1202268600463867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1190989017486572, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117555856704712, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.115178108215332, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1093435287475586, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0877795219421387, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.961884021759033, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.738835334777832, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.650463581085205, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5692176818847656, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4970595836639404, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4470584392547607, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4128577709198, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4003336429595947, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3983216285705566, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4127197265625, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.427426815032959, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4542880058288574, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.479780912399292, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5001890659332275, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5342319011688232, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6055941581726074, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.630957841873169, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6806955337524414, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.794532060623169, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8953850269317627, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9998371601104736, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1092185974121094, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.16007399559021, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2629740238189697, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3111863136291504, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4606518745422363, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5062313079833984, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.511948823928833, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.497105360031128, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.391353130340576, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3199527263641357, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2424232959747314, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.231713056564331, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08803986710963455, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1362126245847176, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18087855297157623, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1969361387966039, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2249907715023994, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24510889627168697, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.268733850129199, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26707272056109266, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26578073089701, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26098191214470284, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23440383905500184, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21908453303802142, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20210409745293467, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20044296788482835, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21539313399778517, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21483942414174972, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010329124675397207, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01019785974185968, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010554887150815847, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01138739120557846, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012068943805670801, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014193535612486709, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015593171702324849, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017195087250049607, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021988652529264203, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03455784685896155, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06291408326441186, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09960764029342113, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11623481599500189, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14567662310219984, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.167944500490082, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1852344824825587, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19927597837647334, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20493669016215812, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20935826360407148, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2082160188887429, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2086767104720706, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21045163720096535, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21081558789628044, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21148459360816807, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21154611253089328, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21053106873960248, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21298084203904888, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2122465044973282, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1983191401902706, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19344113141801453, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18884377686096698, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18317831560572764, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18675523438952846, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18282703216042906, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1817890695336728, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17386537066970073, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17454597970880834, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17481876323872017, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17814625223983035, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18728873340385668, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18688493093372438, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18258348705688376, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18433843436886252, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.103505640029907, "validation/loss_best": 2.3983216285705566, "validation/acc_best": 0.27445551864156514, "validation/f1_best": 0.20935826360407148} diff --git a/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7bec4a481e0f46989a8d94499ac5fe53e559b1f7 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..970a279b432981832f8cbca8de153a6fb8985906 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,2.782559402207126,train,0.994661921708185,0.0032038333791449135,0.994365029762402,0.003385488840597332,0.9939424297068056,0.0036897057254305574 +flat_mae,patch,logistic,ppmi_dx,,2.782559402207126,test,0.63,0.046637199744410045,0.5960257670051315,0.050176774519914445,0.5948090948090948,0.04929443690345563 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,train,0.8096085409252669,0.015863098200706943,0.7906802699776875,0.018038859180354347,0.7818855705416399,0.017942658460238164 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,test,0.66,0.042965567609424174,0.609375,0.05299364494136126,0.6086587436332768,0.048006286292201016 +flat_mae,patch,logistic,ppmi_dx,2,0.005994842503189409,train,0.7224199288256228,0.016351761343544404,0.666575392479007,0.022072814037731606,0.663241275958039,0.019028129990131364 +flat_mae,patch,logistic,ppmi_dx,2,0.005994842503189409,test,0.64,0.03863583310865705,0.5535714285714286,0.05005477390651157,0.567062818336163,0.041567976915763034 +flat_mae,patch,logistic,ppmi_dx,3,0.005994842503189409,train,0.7419928825622776,0.015123886943580848,0.6947974367329206,0.02002992612087956,0.6878345108113895,0.017804902007378286 +flat_mae,patch,logistic,ppmi_dx,3,0.005994842503189409,test,0.65,0.0413418141837051,0.5872154735228211,0.05246277692692931,0.5904074702886248,0.045910437735547725 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,train,0.8202846975088968,0.014878326867537048,0.8004955801978806,0.017318788035437915,0.7896863626632413,0.017259400288715834 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,test,0.66,0.04326215898449822,0.6155585707824514,0.04954627740140329,0.6137521222410866,0.04620503161625719 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,train,0.9306049822064056,0.010843817550715862,0.9250500111135808,0.01201046561175187,0.9166800470991223,0.013139825248232425 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,test,0.63,0.047990378202302175,0.6053333333333333,0.05093381344697975,0.6048387096774194,0.05046411617172397 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,train,0.7295373665480427,0.01571211092456402,0.6805910770105144,0.020754905483152952,0.6751097195461357,0.018386442177864372 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,test,0.67,0.04098531932289901,0.6033177064551027,0.05521668794452621,0.6065365025466893,0.046686074058680437 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,train,0.8185053380782918,0.015137151965881026,0.798268581081081,0.01748834175091146,0.7873715478484264,0.017354472994928465 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,test,0.57,0.05024753128264114,0.5459824728117411,0.05247098811937789,0.5462648556876061,0.052674062075452346 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,train,0.9234875444839857,0.01106144396585015,0.9183479015958076,0.011928777507903606,0.914378612716763,0.012572214086923217 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,test,0.53,0.043916917013834204,0.4643874643874644,0.04683613009120331,0.4732597623089983,0.04384504715126099 +flat_mae,patch,logistic,ppmi_dx,9,0.3593813663804626,train,0.9323843416370107,0.010179802232894417,0.9273457168129551,0.011113872291851642,0.920734318133162,0.01191229133792892 +flat_mae,patch,logistic,ppmi_dx,9,0.3593813663804626,test,0.58,0.047870237099893284,0.5543293718166383,0.05054998888663162,0.5543293718166383,0.05021844943448105 +flat_mae,patch,logistic,ppmi_dx,10,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,10,21.54434690031882,test,0.56,0.04944954600398268,0.537620849096259,0.051066610748901835,0.5382003395585738,0.05151089390957462 +flat_mae,patch,logistic,ppmi_dx,11,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,11,10000.0,test,0.51,0.04955338131752464,0.49541756770672435,0.049931720433012,0.49787775891341257,0.05121415756015187 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,train,0.8167259786476868,0.016338490279448826,0.7955158027857554,0.019261664013065463,0.7841870049239992,0.019103110081313122 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,test,0.6,0.044890515702094576,0.5324918186068257,0.052732320149245934,0.5398981324278438,0.047046723729376114 +flat_mae,patch,logistic,ppmi_dx,13,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,13,1291.5496650148827,test,0.63,0.04747174317422945,0.6093337556752191,0.04961095468182389,0.6099320882852293,0.049949176985838936 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,train,0.8291814946619217,0.014679329661709052,0.8106070179872783,0.017034656692474323,0.7995209805180903,0.017131043678412197 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,test,0.59,0.04158605535513076,0.5071523019593701,0.05077033841497736,0.5216468590831919,0.04365291317576021 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,train,0.7402135231316725,0.014485426668558654,0.6911483505728955,0.01946566753808946,0.6846499678869621,0.017147979450700148 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,test,0.67,0.041939117778036286,0.6176572818908586,0.05091092198925104,0.616723259762309,0.046046198262830476 +flat_mae,patch,logistic,ppmi_dx,16,0.005994842503189409,train,0.7241992882562278,0.01568177277900789,0.670431114390027,0.020798793137464855,0.6664258188824663,0.01812074800081546 +flat_mae,patch,logistic,ppmi_dx,16,0.005994842503189409,test,0.58,0.040741777084462076,0.5,0.04814962677601584,0.5135823429541596,0.04221757963008243 +flat_mae,patch,logistic,ppmi_dx,17,0.046415888336127774,train,0.8149466192170819,0.01641609816685505,0.7937934830160456,0.01914841301498474,0.7827419182187968,0.01883603772906804 +flat_mae,patch,logistic,ppmi_dx,17,0.046415888336127774,test,0.61,0.04746618164546207,0.5793334052421529,0.05088704836640471,0.5785229202037352,0.05009765026933367 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7455516014234875,0.015344615941218788,0.7028708753119512,0.019818626684667755,0.6950733247698566,0.017943672964998048 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.6,0.04561422585115306,0.5324918186068257,0.05226466588304116,0.5398981324278438,0.04719937984179418 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,train,0.9323843416370107,0.010144455671776346,0.9276332732423385,0.011002166342232658,0.9224737743523871,0.01186496620573 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,test,0.61,0.047430703136259744,0.5983935742971888,0.04777752157865447,0.6039898132427843,0.04881459704786047 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,train,0.8149466192170819,0.015676461152069762,0.7937934830160456,0.01827532314587067,0.7827419182187968,0.018037192298602456 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,test,0.62,0.04350290105268843,0.5558672276764843,0.05026843191706141,0.5611205432937181,0.04514776767559833 +flat_mae,patch,logistic,ppmi_dx,21,0.005994842503189409,train,0.7188612099644128,0.01627330967079983,0.6634730538922156,0.021785705694362074,0.6603511025476343,0.018793630406885413 +flat_mae,patch,logistic,ppmi_dx,21,0.005994842503189409,test,0.69,0.03672751012524534,0.627359057579036,0.04921084824823078,0.6277589134125636,0.0421428631868603 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,train,0.9288256227758007,0.011057696576976616,0.9233664230391623,0.01213558498337245,0.9161046885035324,0.013095859781258993 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,test,0.57,0.04726206089454838,0.5361881134721174,0.05022732780455318,0.5360780984719864,0.04938003938971144 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,train,0.9270462633451957,0.010759802219494402,0.9215305660274551,0.011803786795219677,0.9146596017983302,0.012787448104345218 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,test,0.57,0.045104727025002594,0.5174503422735944,0.05193065602054449,0.5207979626485568,0.04834043813460352 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7224199288256228,0.015942241892430976,0.6699941280093952,0.020946300321308575,0.6658504602868764,0.01836866436856448 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.64,0.04025905612405736,0.5628946090335114,0.051945077781129216,0.5721561969439728,0.04373107491527483 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.8113879003558719,0.015092272434108095,0.7898279730740463,0.017636477457637013,0.7789820166987798,0.017421057306077525 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.6,0.036888176967695224,0.49264332825976664,0.048560508614813945,0.5195246179966044,0.038775645580889744 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,train,0.9217081850533808,0.01102254844662422,0.916041938287701,0.011988952717526114,0.9103243416827231,0.012708704439239162 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,test,0.61,0.0476229986456124,0.5953937130407718,0.04892093195615898,0.5988964346349746,0.050064785258821076 +flat_mae,patch,logistic,ppmi_dx,27,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,27,21.54434690031882,test,0.6,0.04594289934255347,0.5796553173602353,0.047755605485833345,0.5806451612903225,0.04822029507144469 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,train,0.9288256227758007,0.010508468970904397,0.9238244981498298,0.011386037435707654,0.9187138728323699,0.012092833674475507 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,test,0.68,0.04505951619802415,0.64349376114082,0.05048414411563327,0.6400679117147707,0.04778205075273614 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,train,0.8131672597864769,0.015714459529199674,0.7910077739016486,0.01872767933585234,0.7795573752943695,0.01850217012688141 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,test,0.65,0.04371073552343863,0.5944849959448499,0.052296286299911975,0.5955008488964346,0.04733851143118218 +flat_mae,patch,logistic,ppmi_dx,30,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,30,166.81005372000556,test,0.6,0.04750684582247068,0.5833333333333333,0.048420210459806,0.5857385398981324,0.04904910831331844 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,train,0.7224199288256228,0.01633012446782592,0.666575392479007,0.022171123617382566,0.663241275958039,0.019084213087287125 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,test,0.65,0.042786778331629506,0.5792763553311696,0.05428795720402253,0.5853140916808149,0.04667641164031771 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,train,0.9252669039145908,0.011191234989079311,0.9196978975301082,0.012273379849367234,0.9132145150931279,0.013322380829397521 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,test,0.65,0.048499117517744585,0.6338529134846741,0.05042591606654472,0.6362478777589134,0.051163687277664086 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,train,0.9288256227758007,0.011298638155722981,0.9238244981498298,0.012256515924815168,0.9187138728323699,0.012953071946897576 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,test,0.61,0.04827877380381569,0.6010230179028133,0.048363015895970835,0.6090831918505942,0.049278894692317915 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,train,0.8185053380782918,0.015529670114561316,0.7972410865874364,0.018292869556701497,0.7856320916292014,0.018112044378200014 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,test,0.68,0.0437291481737296,0.6259934548854604,0.05424627460274398,0.6247877758913413,0.048463300105676795 +flat_mae,patch,logistic,ppmi_dx,35,2.782559402207126,train,0.998220640569395,0.0017470373962467744,0.9981184064710746,0.0018504803644432994,0.9976851851851851,0.0022727662423395537 +flat_mae,patch,logistic,ppmi_dx,35,2.782559402207126,test,0.53,0.04716819267260512,0.5219204557013528,0.04701890959904438,0.5292869269949065,0.048159979897378975 +flat_mae,patch,logistic,ppmi_dx,36,2.782559402207126,train,0.998220640569395,0.0017988223810135307,0.9981184064710746,0.0019052255440978531,0.9976851851851851,0.0023401346715963035 +flat_mae,patch,logistic,ppmi_dx,36,2.782559402207126,test,0.63,0.04755828003618298,0.6009060511271707,0.051628775929055494,0.5997453310696095,0.050841925333703915 +flat_mae,patch,logistic,ppmi_dx,37,0.3593813663804626,train,0.9288256227758007,0.010244420511632355,0.9236744893524554,0.011179388258332073,0.9178441447227574,0.01207291950840747 +flat_mae,patch,logistic,ppmi_dx,37,0.3593813663804626,test,0.64,0.0466523997239156,0.6179966044142615,0.04860304902811176,0.6179966044142615,0.048815718106102045 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,train,0.8238434163701067,0.0151409314732896,0.8029501868215544,0.017874045992265882,0.7908370798544209,0.017739608368692216 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,test,0.64,0.04329711306773236,0.6043956043956044,0.04882610229383651,0.6027164685908319,0.047021555456442564 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,train,0.9252669039145908,0.010772634067921264,0.9191998028261584,0.011930057282385017,0.9106053307642903,0.012929488177790851 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,test,0.7,0.04331602936558243,0.6744791666666667,0.04796313233968357,0.6714770797962648,0.047114111441141936 +flat_mae,patch,logistic,ppmi_dx,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,40,166.81005372000556,test,0.68,0.04695926319694549,0.6715927750410509,0.047267852612549933,0.6808149405772496,0.0478177571602733 +flat_mae,patch,logistic,ppmi_dx,41,0.046415888336127774,train,0.802491103202847,0.01527085034725932,0.7779012016021362,0.018343335001124627,0.7665382145150932,0.017964737307952488 +flat_mae,patch,logistic,ppmi_dx,41,0.046415888336127774,test,0.71,0.04631265917651458,0.6991389148251893,0.04729671624722055,0.7050084889643464,0.047695987562894865 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,train,0.9217081850533808,0.010786661201579525,0.9158739878886848,0.011844423649910995,0.9094546135731107,0.012749586821417384 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,test,0.67,0.039112841880896344,0.6108031607500884,0.049384140758112746,0.6116298811544991,0.04345042400738811 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,train,0.7295373665480427,0.01671244208802489,0.6805910770105144,0.021587261578795564,0.6751097195461357,0.01919390451909479 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,test,0.65,0.04132077443611143,0.5872154735228211,0.05065967853883074,0.5904074702886248,0.044789943742739244 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7295373665480427,0.015153155290964861,0.67953181272509,0.020201922137299044,0.6742399914365232,0.017807380309783898 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.65,0.037723260728627375,0.5706048337627285,0.05220094945757204,0.580220713073005,0.042820348045713094 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8202846975088968,0.015713178076018453,0.7999950670007012,0.018500714560789003,0.7888166345536287,0.01845080728810227 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.72,0.038990352652931985,0.6666666666666667,0.05115915779720026,0.6621392190152802,0.045250851195623644 +flat_mae,patch,logistic,ppmi_dx,46,2.782559402207126,train,0.998220640569395,0.001830064037895007,0.9981184064710746,0.0019385049183659492,0.9976851851851851,0.002380777753002301 +flat_mae,patch,logistic,ppmi_dx,46,2.782559402207126,test,0.63,0.047945681765931746,0.6093337556752191,0.050230771319847774,0.6099320882852293,0.05033746027429677 +flat_mae,patch,logistic,ppmi_dx,47,0.3593813663804626,train,0.9217081850533808,0.01101169058627063,0.9158739878886848,0.012024663875243514,0.9094546135731107,0.012887891609008157 +flat_mae,patch,logistic,ppmi_dx,47,0.3593813663804626,test,0.6,0.046589938828034534,0.5894909688013137,0.04717150228119544,0.5959252971137521,0.04844875322948227 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7384341637010676,0.016042825802306603,0.6874411213892515,0.02151272853648058,0.6814654249625348,0.018818595813776044 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.69,0.04563640651935689,0.6615351020853806,0.05057358453156994,0.6583191850594228,0.049230488071132394 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,train,0.8202846975088968,0.015373335965890021,0.7979101023587005,0.018435843875316216,0.7853377221151787,0.018173438857487602 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,test,0.69,0.037703108625151846,0.6343908479773559,0.049320581959225784,0.6328522920203735,0.04303108604330188 +flat_mae,patch,logistic,ppmi_dx,50,2.782559402207126,train,0.99644128113879,0.002424830183941676,0.9962334964144495,0.002572733656768152,0.9953703703703703,0.0031545244522574543 +flat_mae,patch,logistic,ppmi_dx,50,2.782559402207126,test,0.6,0.046699361880008596,0.570999570999571,0.05064447039305465,0.5704584040747029,0.049886715500986376 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,train,0.9234875444839857,0.010068014418185603,0.9177027887605017,0.01108523816405107,0.910899700278313,0.0120481942784993 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,test,0.64,0.04774268949273805,0.6138996138996139,0.05122204295673622,0.6129032258064516,0.05049821237560773 +flat_mae,patch,logistic,ppmi_dx,52,0.005994842503189409,train,0.7170818505338078,0.016830890781910732,0.6630784370652179,0.021980220100453494,0.6597757439520445,0.019225243455643064 +flat_mae,patch,logistic,ppmi_dx,52,0.005994842503189409,test,0.62,0.03606170267749431,0.5287698412698413,0.04859288924269462,0.5458404074702886,0.03930999153478549 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,train,0.8202846975088968,0.014866677846994074,0.7989693825149191,0.017549444156426154,0.7870771783344037,0.017351006274914697 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,test,0.65,0.04540944395167155,0.612789025334661,0.05143258623545436,0.6107809847198642,0.048943603009776376 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,train,0.7455516014234875,0.016439047159161044,0.7028708753119512,0.020626816772946328,0.6950733247698566,0.01863562152513086 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,test,0.61,0.041378695001171806,0.5481404240528328,0.04793255709748489,0.5530560271646858,0.04338646328621009 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,train,0.7295373665480427,0.015691851926776627,0.6751247413898016,0.021429469742710733,0.6707610789980732,0.01853858444469571 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,test,0.66,0.03772784117863093,0.5952380952380952,0.04856114856135563,0.5984719864176571,0.041840866525981686 +flat_mae,patch,logistic,ppmi_dx,56,2.782559402207126,train,0.994661921708185,0.003019882779156831,0.9943452231222015,0.003209979372448885,0.9930555555555556,0.003928643800662361 +flat_mae,patch,logistic,ppmi_dx,56,2.782559402207126,test,0.65,0.047806321757692255,0.612789025334661,0.05337121094195392,0.6107809847198642,0.05109208746864999 +flat_mae,patch,logistic,ppmi_dx,57,0.3593813663804626,train,0.9163701067615658,0.011359436736858287,0.9096756544189306,0.012523159696547596,0.9016404410190537,0.013321325022129238 +flat_mae,patch,logistic,ppmi_dx,57,0.3593813663804626,test,0.66,0.04314209081627824,0.6212121212121212,0.049615855897525075,0.6188455008488964,0.04691725244017084 +flat_mae,patch,logistic,ppmi_dx,58,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,58,166.81005372000556,test,0.53,0.04730014376299506,0.4986666666666667,0.0498073174604663,0.4987266553480475,0.04956325965565883 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,train,0.8185053380782918,0.01482699588565822,0.798268581081081,0.01735891091056715,0.7873715478484264,0.017317563606472332 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,test,0.61,0.04647268014651189,0.5623386825272135,0.0517875028203542,0.5632427843803056,0.04853642738330733 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,train,0.8131672597864769,0.015208144144921864,0.7910077739016486,0.017914969379987468,0.7795573752943695,0.017647437222421302 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,test,0.63,0.04377285003286855,0.5713127099988413,0.05211834284298713,0.5742784380305602,0.04700746600357921 +flat_mae,patch,logistic,ppmi_dx,61,2.782559402207126,train,0.994661921708185,0.0030760405649016475,0.9943452231222015,0.0032699649841431587,0.9930555555555556,0.0040017009200803855 +flat_mae,patch,logistic,ppmi_dx,61,2.782559402207126,test,0.65,0.04449130701609023,0.6419437340153453,0.04459271530769582,0.6515280135823429,0.04552994289255248 +flat_mae,patch,logistic,ppmi_dx,62,0.046415888336127774,train,0.8131672597864769,0.015450142586426428,0.7915452358495565,0.017967554331117724,0.780427103403982,0.017671747495424855 +flat_mae,patch,logistic,ppmi_dx,62,0.046415888336127774,test,0.63,0.044981218302753874,0.5783475783475784,0.05259371490050221,0.5793718166383701,0.048232809413455535 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,train,0.8220640569395018,0.014289278159542954,0.8027156437367482,0.016431340771342146,0.7920011774780561,0.016318198448813152 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,test,0.57,0.0446347443142671,0.50997150997151,0.04985826886762561,0.515704584040747,0.04585897834071706 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,train,0.8131672597864769,0.015484889527382235,0.793106349857478,0.01790411002799389,0.7830362877328195,0.017805896256724168 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,test,0.56,0.04311419255883147,0.5164835164835164,0.047698756519742716,0.5178268251273345,0.045669107686708295 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,train,0.8291814946619217,0.014996954305516919,0.809655522784042,0.01757333107080405,0.7977815242988653,0.01752468184530245 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,test,0.56,0.048057544673027135,0.5225694444444444,0.050227335134383655,0.5229202037351443,0.04886333606550463 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,train,0.8167259786476868,0.015593496749190357,0.7970471812887641,0.017964646576309255,0.7867961892528367,0.01785495304400555 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,test,0.62,0.04492035618736789,0.5766488413547237,0.04979178847644729,0.5764006791171477,0.04714141921543799 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.800711743772242,0.016002654571395068,0.7756007130124778,0.01910657337926004,0.7642233997002783,0.018624912764337767 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.68,0.0417577537710064,0.64349376114082,0.04743842131872774,0.6400679117147707,0.04537473097325511 +flat_mae,patch,logistic,ppmi_dx,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,68,166.81005372000556,test,0.49,0.04732716344764389,0.4615140956604371,0.04785001760218066,0.4613752122241087,0.04799125816343391 +flat_mae,patch,logistic,ppmi_dx,69,2.782559402207126,train,0.99644128113879,0.002509870838911609,0.9962334964144495,0.0026632116902545726,0.9953703703703703,0.0032651560450655624 +flat_mae,patch,logistic,ppmi_dx,69,2.782559402207126,test,0.62,0.04823500388721867,0.6100164203612479,0.04839652902488723,0.6171477079796265,0.049481507036448734 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,train,0.7419928825622776,0.015042252939243434,0.6916936231390576,0.02037733783053222,0.685225326482552,0.017836541258127322 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,test,0.62,0.04137088348101839,0.5558672276764843,0.05024800786282175,0.5611205432937181,0.0444619989746408 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,train,0.7135231316725978,0.016282364265568772,0.6588404299842772,0.02138740266161696,0.6560158424320274,0.018576848111234452 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,test,0.64,0.04382873942973948,0.5792426367461431,0.052995812791048244,0.5823429541595926,0.04709789515804875 +flat_mae,patch,logistic,ppmi_dx,72,0.046415888336127774,train,0.8256227758007118,0.014902961149896393,0.8056900128420429,0.017387778511504276,0.7940216227788482,0.0172553009232007 +flat_mae,patch,logistic,ppmi_dx,72,0.046415888336127774,test,0.61,0.04850927746318224,0.584,0.050834067044355176,0.583616298811545,0.05053627715343163 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,train,0.8416370106761566,0.01373758409631155,0.824633001307767,0.01598269347065495,0.8131154998929565,0.016166329140607182 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,test,0.67,0.04544877996162272,0.6440513428972063,0.049351515600487145,0.6421901528013583,0.04885573943048031 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7402135231316725,0.016817712777803907,0.695187006850231,0.02135071335874646,0.6881288803254121,0.019270929658693217 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.57,0.03884685830282803,0.49286472461375164,0.046950021807104655,0.5055178268251274,0.04078457431236075 +flat_mae,patch,logistic,ppmi_dx,75,0.3593813663804626,train,0.9306049822064056,0.010191347583007871,0.9258010460569746,0.011008164041154195,0.9210286876471847,0.011670962193498344 +flat_mae,patch,logistic,ppmi_dx,75,0.3593813663804626,test,0.63,0.04558333028641062,0.6053333333333333,0.04829309239815553,0.6048387096774194,0.04828085694865461 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,train,0.7206405693950177,0.015632323868332843,0.6695392935659898,0.02064459054625324,0.6652751016912867,0.018183828570631725 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,test,0.7,0.04001661654862889,0.6553308823529411,0.04934657780882658,0.6511035653650254,0.04503407587085904 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,train,0.8185053380782918,0.014255935932966889,0.798268581081081,0.016676403930333962,0.7873715478484264,0.016586888976982673 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,test,0.65,0.04226156646410542,0.6072270227808326,0.049257171846027174,0.6056876061120543,0.046233704220901464 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,train,0.7277580071174378,0.016539692532206142,0.6830862108999237,0.020839411427949706,0.6771435452793835,0.0188305612315209 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,test,0.64,0.036805494155085064,0.5535714285714286,0.04814877497758012,0.567062818336163,0.0397539121109246 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7384341637010676,0.015468736535589193,0.6863589252969784,0.02092388596659383,0.6805956968529223,0.018198736642880197 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.62,0.039943179643088016,0.5386109762020399,0.05029535151828152,0.5509337860780985,0.04254534517406859 +flat_mae,patch,logistic,ppmi_dx,80,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,80,1291.5496650148827,test,0.54,0.05188918577121826,0.5245969408846631,0.05259700765927105,0.5271646859083192,0.05381489675583566 +flat_mae,patch,logistic,ppmi_dx,81,0.3593813663804626,train,0.9252669039145908,0.01082416737018887,0.9198582138200783,0.01179442498680172,0.9140842432027403,0.012558496851326827 +flat_mae,patch,logistic,ppmi_dx,81,0.3593813663804626,test,0.59,0.04745218646174272,0.5710848415106182,0.0490039270137317,0.5725806451612903,0.049692458813771996 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,train,0.99644128113879,0.002705270105064991,0.9962334964144495,0.002872049515166732,0.9953703703703703,0.003519356016311396 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,test,0.64,0.04532469084285077,0.6043956043956044,0.0506779186406389,0.6027164685908319,0.04859076739847815 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,train,0.8309608540925267,0.014805938090444004,0.8137144412305524,0.01679733293983982,0.8035752515521302,0.01677894876820212 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,test,0.55,0.05260833013886679,0.529239460194581,0.053436522066153305,0.5301358234295416,0.053994028322876676 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7348754448398577,0.015613790897136617,0.6904039190313338,0.020000758930861968,0.6837936202098052,0.018047773714408084 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.56,0.04549241695052044,0.5024875621890548,0.05066687496904613,0.5076400679117148,0.047237717912039665 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,train,0.8309608540925267,0.014901852549504565,0.811398070530551,0.01758455514973016,0.7992266110040677,0.0175714018144904 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,test,0.62,0.04015236481205062,0.5558672276764843,0.04804415077218196,0.5611205432937181,0.0427945142803952 +flat_mae,patch,logistic,ppmi_dx,86,0.046415888336127774,train,0.8131672597864769,0.015654446291126178,0.7920740795551844,0.018350645151204926,0.7812968315135945,0.018187201969749812 +flat_mae,patch,logistic,ppmi_dx,86,0.046415888336127774,test,0.61,0.04254076162928915,0.5555555555555556,0.04981387907152777,0.5581494057724957,0.045593037350450505 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7330960854092526,0.01608879440335029,0.6858228980322003,0.021207112597189555,0.6797393491757653,0.018850725285671114 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.66,0.04226391368531788,0.5952380952380952,0.05431710646340025,0.5984719864176571,0.047106547147884086 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8274021352313167,0.015021896101871064,0.8069309911281897,0.01770738781892857,0.794596981374438,0.01755592516746886 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.61,0.04417569920216317,0.5481404240528328,0.05257902073077197,0.5530560271646858,0.047213707845376485 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7259786476868327,0.016281251968569922,0.6753151260504202,0.021245591497645478,0.6704800899165061,0.018683545500614598 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.65,0.04139381113161725,0.5792763553311696,0.05376786389819831,0.5853140916808149,0.045584148301501655 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,train,0.8380782918149466,0.01385738986825499,0.8202484930495756,0.016178800348085342,0.8084858702633269,0.01635294995912621 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,test,0.66,0.047318140284673064,0.6353496353496353,0.04973433739510369,0.634125636672326,0.04900133093246928 +flat_mae,patch,logistic,ppmi_dx,91,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,91,1291.5496650148827,test,0.6,0.04454884959232954,0.586606035551881,0.045406610530680916,0.5908319185059423,0.046566616857737615 +flat_mae,patch,logistic,ppmi_dx,92,0.000774263682681127,train,0.6725978647686833,0.012613743837782559,0.5537834852250759,0.022170040562468688,0.5845108113894241,0.015162644525997377 +flat_mae,patch,logistic,ppmi_dx,92,0.000774263682681127,test,0.68,0.03329549519079119,0.5841995841995842,0.05182629756013288,0.599320882852292,0.039237404267155565 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7402135231316725,0.015572854114978638,0.6900985013294658,0.020634907043731953,0.6837802397773496,0.018143970665213973 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.62,0.04486100756781997,0.5824175824175825,0.05060696295667338,0.5814940577249575,0.04860400608342765 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7153024911032029,0.01626461873113908,0.6648777579010137,0.020855922837151434,0.6609398415756798,0.018485084211756198 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.71,0.037772344380512045,0.6579785352046232,0.04909882078645419,0.6540747028862479,0.04345807527823008 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,train,0.8149466192170819,0.01521036407650141,0.7916292335115864,0.018107058307068033,0.7792630057803468,0.017736678039991575 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,test,0.62,0.041778133036314585,0.5703301673450927,0.04889219125540403,0.5713073005093379,0.045235089399417255 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,train,0.8327402135231317,0.013913005161273438,0.8145527051125434,0.01619326909187275,0.8032808820381074,0.016271227298704435 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,test,0.67,0.04113612038099849,0.6239316239316239,0.04794758612700136,0.6218166383701189,0.04422469294741887 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,train,0.7295373665480427,0.015134166508578297,0.6751247413898016,0.020155086637665662,0.6707610789980732,0.017492395937834606 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,test,0.69,0.040199283575705666,0.6343908479773559,0.051190097743301896,0.6328522920203735,0.045240590250925604 +flat_mae,patch,logistic,ppmi_dx,98,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,98,166.81005372000556,test,0.65,0.045501556017349565,0.6224786970121885,0.04907615910467546,0.6209677419354839,0.04828559827208222 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,train,0.8256227758007118,0.014766275154183385,0.8051924165251839,0.01746081034643236,0.7931518946692357,0.01740673917870979 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,test,0.59,0.045177870689088476,0.5464100011063171,0.05004934047381878,0.5471137521222411,0.047525267886794835 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,train,0.8202846975088968,0.014592786507968485,0.7999950670007012,0.016962791194364408,0.7888166345536287,0.016831759549880487 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,test,0.64,0.04015714631295406,0.5863970588235294,0.048791337572164785,0.5874363327674024,0.04444214281008521 diff --git a/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..570999f04a7498c46ad51474119351d08357c1e1 --- /dev/null +++ b/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:35 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:15:50 time: 4.0968 data: 3.1600 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:15 time: 0.1714 data: 0.0494 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:48 time: 0.1380 data: 0.0335 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:38 time: 0.1692 data: 0.0486 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:31 time: 0.1485 data: 0.0393 max mem: 2851 +extract (train) [100/232] eta: 0:00:25 time: 0.1526 data: 0.0399 max mem: 2851 +extract (train) [120/232] eta: 0:00:21 time: 0.1547 data: 0.0409 max mem: 2851 +extract (train) [140/232] eta: 0:00:17 time: 0.1760 data: 0.0486 max mem: 2851 +extract (train) [160/232] eta: 0:00:13 time: 0.1556 data: 0.0405 max mem: 2851 +extract (train) [180/232] eta: 0:00:09 time: 0.1465 data: 0.0387 max mem: 2851 +extract (train) [200/232] eta: 0:00:05 time: 0.1630 data: 0.0450 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1475 data: 0.0402 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1357 data: 0.0355 max mem: 2851 +extract (train) Total time: 0:00:40 (0.1739 s / it) +extract (validation) [ 0/50] eta: 0:02:46 time: 3.3309 data: 3.2094 max mem: 2851 +extract (validation) [20/50] eta: 0:00:10 time: 0.2000 data: 0.0607 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1312 data: 0.0321 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1309 data: 0.0341 max mem: 2851 +extract (validation) Total time: 0:00:11 (0.2278 s / it) +extract (test) [ 0/50] eta: 0:02:42 time: 3.2469 data: 3.1200 max mem: 2851 +extract (test) [20/50] eta: 0:00:10 time: 0.1918 data: 0.0552 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1316 data: 0.0327 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1317 data: 0.0326 max mem: 2851 +extract (test) Total time: 0:00:11 (0.2228 s / it) +feature extraction time: 0:01:02 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 2.7826 | train | 0.99466 | 0.0032038 | 0.99437 | 0.0033855 | 0.99394 | 0.0036897 | +| flat_mae | patch | logistic | ppmi_dx | | 2.7826 | test | 0.63 | 0.046637 | 0.59603 | 0.050177 | 0.59481 | 0.049294 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.042965567609424174, "f1": 0.609375, "f1_std": 0.05299364494136126, "bacc": 0.6086587436332768, "bacc_std": 0.048006286292201016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03863583310865705, "f1": 0.5535714285714286, "f1_std": 0.05005477390651157, "bacc": 0.567062818336163, "bacc_std": 0.041567976915763034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.0413418141837051, "f1": 0.5872154735228211, "f1_std": 0.05246277692692931, "bacc": 0.5904074702886248, "bacc_std": 0.045910437735547725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04326215898449822, "f1": 0.6155585707824514, "f1_std": 0.04954627740140329, "bacc": 0.6137521222410866, "bacc_std": 0.04620503161625719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.047990378202302175, "f1": 0.6053333333333333, "f1_std": 0.05093381344697975, "bacc": 0.6048387096774194, "bacc_std": 0.05046411617172397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04098531932289901, "f1": 0.6033177064551027, "f1_std": 0.05521668794452621, "bacc": 0.6065365025466893, "bacc_std": 0.046686074058680437} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.05024753128264114, "f1": 0.5459824728117411, "f1_std": 0.05247098811937789, "bacc": 0.5462648556876061, "bacc_std": 0.052674062075452346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.53, "acc_std": 0.043916917013834204, "f1": 0.4643874643874644, "f1_std": 0.04683613009120331, "bacc": 0.4732597623089983, "bacc_std": 0.04384504715126099} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.047870237099893284, "f1": 0.5543293718166383, "f1_std": 0.05054998888663162, "bacc": 0.5543293718166383, "bacc_std": 0.05021844943448105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 21.54434690031882, "split": "test", "acc": 0.56, "acc_std": 0.04944954600398268, "f1": 0.537620849096259, "f1_std": 0.051066610748901835, "bacc": 0.5382003395585738, "bacc_std": 0.05151089390957462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 10000.0, "split": "test", "acc": 0.51, "acc_std": 0.04955338131752464, "f1": 0.49541756770672435, "f1_std": 0.049931720433012, "bacc": 0.49787775891341257, "bacc_std": 0.05121415756015187} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.044890515702094576, "f1": 0.5324918186068257, "f1_std": 0.052732320149245934, "bacc": 0.5398981324278438, "bacc_std": 0.047046723729376114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 1291.5496650148827, "split": "test", "acc": 0.63, "acc_std": 0.04747174317422945, "f1": 0.6093337556752191, "f1_std": 0.04961095468182389, "bacc": 0.6099320882852293, "bacc_std": 0.049949176985838936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.04158605535513076, "f1": 0.5071523019593701, "f1_std": 0.05077033841497736, "bacc": 0.5216468590831919, "bacc_std": 0.04365291317576021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.041939117778036286, "f1": 0.6176572818908586, "f1_std": 0.05091092198925104, "bacc": 0.616723259762309, "bacc_std": 0.046046198262830476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.040741777084462076, "f1": 0.5, "f1_std": 0.04814962677601584, "bacc": 0.5135823429541596, "bacc_std": 0.04221757963008243} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04746618164546207, "f1": 0.5793334052421529, "f1_std": 0.05088704836640471, "bacc": 0.5785229202037352, "bacc_std": 0.05009765026933367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04561422585115306, "f1": 0.5324918186068257, "f1_std": 0.05226466588304116, "bacc": 0.5398981324278438, "bacc_std": 0.04719937984179418} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.047430703136259744, "f1": 0.5983935742971888, "f1_std": 0.04777752157865447, "bacc": 0.6039898132427843, "bacc_std": 0.04881459704786047} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04350290105268843, "f1": 0.5558672276764843, "f1_std": 0.05026843191706141, "bacc": 0.5611205432937181, "bacc_std": 0.04514776767559833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.03672751012524534, "f1": 0.627359057579036, "f1_std": 0.04921084824823078, "bacc": 0.6277589134125636, "bacc_std": 0.0421428631868603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.04726206089454838, "f1": 0.5361881134721174, "f1_std": 0.05022732780455318, "bacc": 0.5360780984719864, "bacc_std": 0.04938003938971144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.045104727025002594, "f1": 0.5174503422735944, "f1_std": 0.05193065602054449, "bacc": 0.5207979626485568, "bacc_std": 0.04834043813460352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04025905612405736, "f1": 0.5628946090335114, "f1_std": 0.051945077781129216, "bacc": 0.5721561969439728, "bacc_std": 0.04373107491527483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.036888176967695224, "f1": 0.49264332825976664, "f1_std": 0.048560508614813945, "bacc": 0.5195246179966044, "bacc_std": 0.038775645580889744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.0476229986456124, "f1": 0.5953937130407718, "f1_std": 0.04892093195615898, "bacc": 0.5988964346349746, "bacc_std": 0.050064785258821076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 21.54434690031882, "split": "test", "acc": 0.6, "acc_std": 0.04594289934255347, "f1": 0.5796553173602353, "f1_std": 0.047755605485833345, "bacc": 0.5806451612903225, "bacc_std": 0.04822029507144469} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04505951619802415, "f1": 0.64349376114082, "f1_std": 0.05048414411563327, "bacc": 0.6400679117147707, "bacc_std": 0.04778205075273614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04371073552343863, "f1": 0.5944849959448499, "f1_std": 0.052296286299911975, "bacc": 0.5955008488964346, "bacc_std": 0.04733851143118218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 166.81005372000556, "split": "test", "acc": 0.6, "acc_std": 0.04750684582247068, "f1": 0.5833333333333333, "f1_std": 0.048420210459806, "bacc": 0.5857385398981324, "bacc_std": 0.04904910831331844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.042786778331629506, "f1": 0.5792763553311696, "f1_std": 0.05428795720402253, "bacc": 0.5853140916808149, "bacc_std": 0.04667641164031771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.048499117517744585, "f1": 0.6338529134846741, "f1_std": 0.05042591606654472, "bacc": 0.6362478777589134, "bacc_std": 0.051163687277664086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04827877380381569, "f1": 0.6010230179028133, "f1_std": 0.048363015895970835, "bacc": 0.6090831918505942, "bacc_std": 0.049278894692317915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.0437291481737296, "f1": 0.6259934548854604, "f1_std": 0.05424627460274398, "bacc": 0.6247877758913413, "bacc_std": 0.048463300105676795} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.53, "acc_std": 0.04716819267260512, "f1": 0.5219204557013528, "f1_std": 0.04701890959904438, "bacc": 0.5292869269949065, "bacc_std": 0.048159979897378975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.63, "acc_std": 0.04755828003618298, "f1": 0.6009060511271707, "f1_std": 0.051628775929055494, "bacc": 0.5997453310696095, "bacc_std": 0.050841925333703915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.0466523997239156, "f1": 0.6179966044142615, "f1_std": 0.04860304902811176, "bacc": 0.6179966044142615, "bacc_std": 0.048815718106102045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04329711306773236, "f1": 0.6043956043956044, "f1_std": 0.04882610229383651, "bacc": 0.6027164685908319, "bacc_std": 0.047021555456442564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.7, "acc_std": 0.04331602936558243, "f1": 0.6744791666666667, "f1_std": 0.04796313233968357, "bacc": 0.6714770797962648, "bacc_std": 0.047114111441141936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.68, "acc_std": 0.04695926319694549, "f1": 0.6715927750410509, "f1_std": 0.047267852612549933, "bacc": 0.6808149405772496, "bacc_std": 0.0478177571602733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.04631265917651458, "f1": 0.6991389148251893, "f1_std": 0.04729671624722055, "bacc": 0.7050084889643464, "bacc_std": 0.047695987562894865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.039112841880896344, "f1": 0.6108031607500884, "f1_std": 0.049384140758112746, "bacc": 0.6116298811544991, "bacc_std": 0.04345042400738811} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04132077443611143, "f1": 0.5872154735228211, "f1_std": 0.05065967853883074, "bacc": 0.5904074702886248, "bacc_std": 0.044789943742739244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.037723260728627375, "f1": 0.5706048337627285, "f1_std": 0.05220094945757204, "bacc": 0.580220713073005, "bacc_std": 0.042820348045713094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.72, "acc_std": 0.038990352652931985, "f1": 0.6666666666666667, "f1_std": 0.05115915779720026, "bacc": 0.6621392190152802, "bacc_std": 0.045250851195623644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 2.782559402207126, "split": "test", "acc": 0.63, "acc_std": 0.047945681765931746, "f1": 0.6093337556752191, "f1_std": 0.050230771319847774, "bacc": 0.6099320882852293, "bacc_std": 0.05033746027429677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.046589938828034534, "f1": 0.5894909688013137, "f1_std": 0.04717150228119544, "bacc": 0.5959252971137521, "bacc_std": 0.04844875322948227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04563640651935689, "f1": 0.6615351020853806, "f1_std": 0.05057358453156994, "bacc": 0.6583191850594228, "bacc_std": 0.049230488071132394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.037703108625151846, "f1": 0.6343908479773559, "f1_std": 0.049320581959225784, "bacc": 0.6328522920203735, "bacc_std": 0.04303108604330188} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.046699361880008596, "f1": 0.570999570999571, "f1_std": 0.05064447039305465, "bacc": 0.5704584040747029, "bacc_std": 0.049886715500986376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04774268949273805, "f1": 0.6138996138996139, "f1_std": 0.05122204295673622, "bacc": 0.6129032258064516, "bacc_std": 0.05049821237560773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.03606170267749431, "f1": 0.5287698412698413, "f1_std": 0.04859288924269462, "bacc": 0.5458404074702886, "bacc_std": 0.03930999153478549} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04540944395167155, "f1": 0.612789025334661, "f1_std": 0.05143258623545436, "bacc": 0.6107809847198642, "bacc_std": 0.048943603009776376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.041378695001171806, "f1": 0.5481404240528328, "f1_std": 0.04793255709748489, "bacc": 0.5530560271646858, "bacc_std": 0.04338646328621009} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03772784117863093, "f1": 0.5952380952380952, "f1_std": 0.04856114856135563, "bacc": 0.5984719864176571, "bacc_std": 0.041840866525981686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.047806321757692255, "f1": 0.612789025334661, "f1_std": 0.05337121094195392, "bacc": 0.6107809847198642, "bacc_std": 0.05109208746864999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04314209081627824, "f1": 0.6212121212121212, "f1_std": 0.049615855897525075, "bacc": 0.6188455008488964, "bacc_std": 0.04691725244017084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 166.81005372000556, "split": "test", "acc": 0.53, "acc_std": 0.04730014376299506, "f1": 0.4986666666666667, "f1_std": 0.0498073174604663, "bacc": 0.4987266553480475, "bacc_std": 0.04956325965565883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04647268014651189, "f1": 0.5623386825272135, "f1_std": 0.0517875028203542, "bacc": 0.5632427843803056, "bacc_std": 0.04853642738330733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04377285003286855, "f1": 0.5713127099988413, "f1_std": 0.05211834284298713, "bacc": 0.5742784380305602, "bacc_std": 0.04700746600357921} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04449130701609023, "f1": 0.6419437340153453, "f1_std": 0.04459271530769582, "bacc": 0.6515280135823429, "bacc_std": 0.04552994289255248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.044981218302753874, "f1": 0.5783475783475784, "f1_std": 0.05259371490050221, "bacc": 0.5793718166383701, "bacc_std": 0.048232809413455535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.0446347443142671, "f1": 0.50997150997151, "f1_std": 0.04985826886762561, "bacc": 0.515704584040747, "bacc_std": 0.04585897834071706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.04311419255883147, "f1": 0.5164835164835164, "f1_std": 0.047698756519742716, "bacc": 0.5178268251273345, "bacc_std": 0.045669107686708295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.048057544673027135, "f1": 0.5225694444444444, "f1_std": 0.050227335134383655, "bacc": 0.5229202037351443, "bacc_std": 0.04886333606550463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04492035618736789, "f1": 0.5766488413547237, "f1_std": 0.04979178847644729, "bacc": 0.5764006791171477, "bacc_std": 0.04714141921543799} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.0417577537710064, "f1": 0.64349376114082, "f1_std": 0.04743842131872774, "bacc": 0.6400679117147707, "bacc_std": 0.04537473097325511} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.49, "acc_std": 0.04732716344764389, "f1": 0.4615140956604371, "f1_std": 0.04785001760218066, "bacc": 0.4613752122241087, "bacc_std": 0.04799125816343391} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.04823500388721867, "f1": 0.6100164203612479, "f1_std": 0.04839652902488723, "bacc": 0.6171477079796265, "bacc_std": 0.049481507036448734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04137088348101839, "f1": 0.5558672276764843, "f1_std": 0.05024800786282175, "bacc": 0.5611205432937181, "bacc_std": 0.0444619989746408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04382873942973948, "f1": 0.5792426367461431, "f1_std": 0.052995812791048244, "bacc": 0.5823429541595926, "bacc_std": 0.04709789515804875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04850927746318224, "f1": 0.584, "f1_std": 0.050834067044355176, "bacc": 0.583616298811545, "bacc_std": 0.05053627715343163} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04544877996162272, "f1": 0.6440513428972063, "f1_std": 0.049351515600487145, "bacc": 0.6421901528013583, "bacc_std": 0.04885573943048031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.03884685830282803, "f1": 0.49286472461375164, "f1_std": 0.046950021807104655, "bacc": 0.5055178268251274, "bacc_std": 0.04078457431236075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04558333028641062, "f1": 0.6053333333333333, "f1_std": 0.04829309239815553, "bacc": 0.6048387096774194, "bacc_std": 0.04828085694865461} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04001661654862889, "f1": 0.6553308823529411, "f1_std": 0.04934657780882658, "bacc": 0.6511035653650254, "bacc_std": 0.04503407587085904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04226156646410542, "f1": 0.6072270227808326, "f1_std": 0.049257171846027174, "bacc": 0.6056876061120543, "bacc_std": 0.046233704220901464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.036805494155085064, "f1": 0.5535714285714286, "f1_std": 0.04814877497758012, "bacc": 0.567062818336163, "bacc_std": 0.0397539121109246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.039943179643088016, "f1": 0.5386109762020399, "f1_std": 0.05029535151828152, "bacc": 0.5509337860780985, "bacc_std": 0.04254534517406859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 1291.5496650148827, "split": "test", "acc": 0.54, "acc_std": 0.05188918577121826, "f1": 0.5245969408846631, "f1_std": 0.05259700765927105, "bacc": 0.5271646859083192, "bacc_std": 0.05381489675583566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04745218646174272, "f1": 0.5710848415106182, "f1_std": 0.0490039270137317, "bacc": 0.5725806451612903, "bacc_std": 0.049692458813771996} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.64, "acc_std": 0.04532469084285077, "f1": 0.6043956043956044, "f1_std": 0.0506779186406389, "bacc": 0.6027164685908319, "bacc_std": 0.04859076739847815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.05260833013886679, "f1": 0.529239460194581, "f1_std": 0.053436522066153305, "bacc": 0.5301358234295416, "bacc_std": 0.053994028322876676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.56, "acc_std": 0.04549241695052044, "f1": 0.5024875621890548, "f1_std": 0.05066687496904613, "bacc": 0.5076400679117148, "bacc_std": 0.047237717912039665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04015236481205062, "f1": 0.5558672276764843, "f1_std": 0.04804415077218196, "bacc": 0.5611205432937181, "bacc_std": 0.0427945142803952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04254076162928915, "f1": 0.5555555555555556, "f1_std": 0.04981387907152777, "bacc": 0.5581494057724957, "bacc_std": 0.045593037350450505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04226391368531788, "f1": 0.5952380952380952, "f1_std": 0.05431710646340025, "bacc": 0.5984719864176571, "bacc_std": 0.047106547147884086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04417569920216317, "f1": 0.5481404240528328, "f1_std": 0.05257902073077197, "bacc": 0.5530560271646858, "bacc_std": 0.047213707845376485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04139381113161725, "f1": 0.5792763553311696, "f1_std": 0.05376786389819831, "bacc": 0.5853140916808149, "bacc_std": 0.045584148301501655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.047318140284673064, "f1": 0.6353496353496353, "f1_std": 0.04973433739510369, "bacc": 0.634125636672326, "bacc_std": 0.04900133093246928} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 1291.5496650148827, "split": "test", "acc": 0.6, "acc_std": 0.04454884959232954, "f1": 0.586606035551881, "f1_std": 0.045406610530680916, "bacc": 0.5908319185059423, "bacc_std": 0.046566616857737615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.000774263682681127, "split": "test", "acc": 0.68, "acc_std": 0.03329549519079119, "f1": 0.5841995841995842, "f1_std": 0.05182629756013288, "bacc": 0.599320882852292, "bacc_std": 0.039237404267155565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04486100756781997, "f1": 0.5824175824175825, "f1_std": 0.05060696295667338, "bacc": 0.5814940577249575, "bacc_std": 0.04860400608342765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.037772344380512045, "f1": 0.6579785352046232, "f1_std": 0.04909882078645419, "bacc": 0.6540747028862479, "bacc_std": 0.04345807527823008} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.041778133036314585, "f1": 0.5703301673450927, "f1_std": 0.04889219125540403, "bacc": 0.5713073005093379, "bacc_std": 0.045235089399417255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04113612038099849, "f1": 0.6239316239316239, "f1_std": 0.04794758612700136, "bacc": 0.6218166383701189, "bacc_std": 0.04422469294741887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.040199283575705666, "f1": 0.6343908479773559, "f1_std": 0.051190097743301896, "bacc": 0.6328522920203735, "bacc_std": 0.045240590250925604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 166.81005372000556, "split": "test", "acc": 0.65, "acc_std": 0.045501556017349565, "f1": 0.6224786970121885, "f1_std": 0.04907615910467546, "bacc": 0.6209677419354839, "bacc_std": 0.04828559827208222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.045177870689088476, "f1": 0.5464100011063171, "f1_std": 0.05004934047381878, "bacc": 0.5471137521222411, "bacc_std": 0.047525267886794835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04015714631295406, "f1": 0.5863970588235294, "f1_std": 0.048791337572164785, "bacc": 0.5874363327674024, "bacc_std": 0.04444214281008521} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 147.82 | 1019.8 | 0.84738 | 0.099419 | 0.82444 | 0.1193 | 0.81819 | 0.12022 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 147.82 | 1019.8 | 0.6255 | 0.046305 | 0.58108 | 0.048686 | 0.58399 | 0.046303 | + + +done! total time: 0:05:01 diff --git a/data_scaling/n800_2/pretrain/config.yaml b/data_scaling/n800_2/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d755a160eea22d0919b980210dc0a97208dae13 --- /dev/null +++ b/data_scaling/n800_2/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n800_2/pretrain +notes: data scaling experiment n800_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n800_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n800_2/pretrain/log.json b/data_scaling/n800_2/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..52448a121dd0bd59e3d9c86334dd87a579c5b7a2 --- /dev/null +++ b/data_scaling/n800_2/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05098469931155443, "train/loss": 0.9934885198974609, "eval/hcp-train-subset/loss": 0.9902102908780498, "eval/hcp-val/loss": 0.9901342843809435, "eval/nsd-val/loss": 0.9907482381789915} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.07953966767311096, "train/loss": 0.9885122758102417, "eval/hcp-train-subset/loss": 0.9873889915404781, "eval/hcp-val/loss": 0.9873921909639912, "eval/nsd-val/loss": 0.9879477966216302} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.1287920453046686, "train/loss": 0.9851705249595643, "eval/hcp-train-subset/loss": 0.9839521781090768, "eval/hcp-val/loss": 0.9828871623162301, "eval/nsd-val/loss": 0.9820270451807207} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.20426896531793792, "train/loss": 0.9753198632144928, "eval/hcp-train-subset/loss": 0.9710049340801854, "eval/hcp-val/loss": 0.970623726806333, "eval/nsd-val/loss": 0.9632984899705456} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.26035058386254073, "train/loss": 0.9408004950904846, "eval/hcp-train-subset/loss": 0.921384145175257, "eval/hcp-val/loss": 0.9200737485962529, "eval/nsd-val/loss": 0.8896638279961001} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.181959671494235, "train/loss": 0.9020892810726165, "eval/hcp-train-subset/loss": 0.8824390490208903, "eval/hcp-val/loss": 0.8803831367723404, "eval/nsd-val/loss": 0.8462768841174341} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.11743162475212762, "train/loss": 0.8735933842372894, "eval/hcp-train-subset/loss": 0.8668222465822774, "eval/hcp-val/loss": 0.8648504893625936, "eval/nsd-val/loss": 0.8343504830714195} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.09399318378833135, "train/loss": 0.8620397812366486, "eval/hcp-train-subset/loss": 0.8606643542166679, "eval/hcp-val/loss": 0.8584034067969168, "eval/nsd-val/loss": 0.8268223664452953} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.08503866423381715, "train/loss": 0.8552363811969758, "eval/hcp-train-subset/loss": 0.8565763456206168, "eval/hcp-val/loss": 0.8545147324762037, "eval/nsd-val/loss": 0.8223581006450038} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.07922675374406937, "train/loss": 0.8517686851787567, "eval/hcp-train-subset/loss": 0.8537775806842312, "eval/hcp-val/loss": 0.8519887232011364, "eval/nsd-val/loss": 0.8166634113557877} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.07443710844713369, "train/loss": 0.8484348146152496, "eval/hcp-train-subset/loss": 0.8526360440638757, "eval/hcp-val/loss": 0.8504398811248041, "eval/nsd-val/loss": 0.8170408289278707} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.07308620871172121, "train/loss": 0.8471039885234832, "eval/hcp-train-subset/loss": 0.8506367860301849, "eval/hcp-val/loss": 0.8483857656678846, "eval/nsd-val/loss": 0.8183306868999235} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.07267813884637941, "train/loss": 0.8431984373283387, "eval/hcp-train-subset/loss": 0.8495152544590735, "eval/hcp-val/loss": 0.8471457179515592, "eval/nsd-val/loss": 0.8185621471174301} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.07175791359574518, "train/loss": 0.8439267409515381, "eval/hcp-train-subset/loss": 0.8501720582285235, "eval/hcp-val/loss": 0.8480460749518487, "eval/nsd-val/loss": 0.8200230483085879} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.07053452558251529, "train/loss": 0.8430005910110474, "eval/hcp-train-subset/loss": 0.8478179997013461, "eval/hcp-val/loss": 0.8457503530286974, "eval/nsd-val/loss": 0.8181626316039793} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.07321041871988718, "train/loss": 0.838122406206131, "eval/hcp-train-subset/loss": 0.8473840990374165, "eval/hcp-val/loss": 0.8450958978745245, "eval/nsd-val/loss": 0.8175119997993592} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.07244404142909437, "train/loss": 0.8367186047172547, "eval/hcp-train-subset/loss": 0.8464752089592719, "eval/hcp-val/loss": 0.8446893538198164, "eval/nsd-val/loss": 0.8178253875624749} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.07265476700038333, "train/loss": 0.8361964552974701, "eval/hcp-train-subset/loss": 0.8467465917910298, "eval/hcp-val/loss": 0.8446059976854632, "eval/nsd-val/loss": 0.8177284963669316} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.0736751946982468, "train/loss": 0.8366876863956452, "eval/hcp-train-subset/loss": 0.8454516347377531, "eval/hcp-val/loss": 0.844482232486048, "eval/nsd-val/loss": 0.8141418445494867} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.07560572217526036, "train/loss": 0.8329440010261535, "eval/hcp-train-subset/loss": 0.8458958200870021, "eval/hcp-val/loss": 0.843685987495607, "eval/nsd-val/loss": 0.8169836017393297} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.07419760419828718, "train/loss": 0.8341310118293762, "eval/hcp-train-subset/loss": 0.8460954571923902, "eval/hcp-val/loss": 0.8436165682731136, "eval/nsd-val/loss": 0.8198120344069696} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.07494731936427866, "train/loss": 0.8306362971782685, "eval/hcp-train-subset/loss": 0.8451821515637059, "eval/hcp-val/loss": 0.8431281351274059, "eval/nsd-val/loss": 0.8158333551499152} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.07705757522465649, "train/loss": 0.8292590542316437, "eval/hcp-train-subset/loss": 0.8457233675064579, "eval/hcp-val/loss": 0.8432603949500669, "eval/nsd-val/loss": 0.8163766034187809} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.0783461550602784, "train/loss": 0.8291799073028564, "eval/hcp-train-subset/loss": 0.8455486960949437, "eval/hcp-val/loss": 0.843242795236649, "eval/nsd-val/loss": 0.8133091609324178} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.07928959175076207, "train/loss": 0.8279069058418274, "eval/hcp-train-subset/loss": 0.8464457758011357, "eval/hcp-val/loss": 0.8441989431458135, "eval/nsd-val/loss": 0.8190398081656425} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.07976362226523755, "train/loss": 0.8285144947814942, "eval/hcp-train-subset/loss": 0.8449495223260695, "eval/hcp-val/loss": 0.8423991155239844, "eval/nsd-val/loss": 0.8170179618943122} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.081237383852431, "train/loss": 0.8243327783107758, "eval/hcp-train-subset/loss": 0.8438753716407283, "eval/hcp-val/loss": 0.8420155654030461, "eval/nsd-val/loss": 0.8177155640817457} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.08485726716232636, "train/loss": 0.8237346849918366, "eval/hcp-train-subset/loss": 0.844212434945568, "eval/hcp-val/loss": 0.8422406815713451, "eval/nsd-val/loss": 0.8221738934516907} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.08284053149067397, "train/loss": 0.8247884890079499, "eval/hcp-train-subset/loss": 0.8443274507599492, "eval/hcp-val/loss": 0.8422910436507194, "eval/nsd-val/loss": 0.8191583156585693} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.08585456695665758, "train/loss": 0.8222009373664856, "eval/hcp-train-subset/loss": 0.8447320893887551, "eval/hcp-val/loss": 0.8427495091192184, "eval/nsd-val/loss": 0.8249363216661638} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.08657234158932152, "train/loss": 0.8230069062805175, "eval/hcp-train-subset/loss": 0.843877024227573, "eval/hcp-val/loss": 0.841970439880125, "eval/nsd-val/loss": 0.815079782278307} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.08826114315641538, "train/loss": 0.8215577190589904, "eval/hcp-train-subset/loss": 0.8433078815860133, "eval/hcp-val/loss": 0.841177063603555, "eval/nsd-val/loss": 0.8169773749766811} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.08928151037505251, "train/loss": 0.8196566376972199, "eval/hcp-train-subset/loss": 0.8437221511717765, "eval/hcp-val/loss": 0.8414053926544804, "eval/nsd-val/loss": 0.822688183476848} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.08949579266055749, "train/loss": 0.8195216508102418, "eval/hcp-train-subset/loss": 0.8455671206597359, "eval/hcp-val/loss": 0.8427383966984288, "eval/nsd-val/loss": 0.8234832161857236} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.0909292176431428, "train/loss": 0.8208085696697235, "eval/hcp-train-subset/loss": 0.8435362663961226, "eval/hcp-val/loss": 0.8421521850170628, "eval/nsd-val/loss": 0.817135202307855} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.09100930875831267, "train/loss": 0.8175050644874573, "eval/hcp-train-subset/loss": 0.8446491552937415, "eval/hcp-val/loss": 0.8426403951260352, "eval/nsd-val/loss": 0.8185918292691631} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.09257715624410101, "train/loss": 0.8159727197360992, "eval/hcp-train-subset/loss": 0.8428767208130129, "eval/hcp-val/loss": 0.8414468428780956, "eval/nsd-val/loss": 0.8132813476747082} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.09433259605724506, "train/loss": 0.816439276304245, "eval/hcp-train-subset/loss": 0.8445392750924633, "eval/hcp-val/loss": 0.8419067398194344, "eval/nsd-val/loss": 0.8221943801449191} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.09795117366844909, "train/loss": 0.8148298243045807, "eval/hcp-train-subset/loss": 0.8436413493848616, "eval/hcp-val/loss": 0.8414211061693007, "eval/nsd-val/loss": 0.8274644834379996} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.09691536543704808, "train/loss": 0.8156349912834168, "eval/hcp-train-subset/loss": 0.8436287804957359, "eval/hcp-val/loss": 0.841114156669186, "eval/nsd-val/loss": 0.8295753117530577} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.09976760653967322, "train/loss": 0.8127594681453705, "eval/hcp-train-subset/loss": 0.843525045341061, "eval/hcp-val/loss": 0.8413848367429548, "eval/nsd-val/loss": 0.8306991246438795} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.10111115552024694, "train/loss": 0.8129432416629792, "eval/hcp-train-subset/loss": 0.843137639184152, "eval/hcp-val/loss": 0.8402856761409391, "eval/nsd-val/loss": 0.8172005386121811} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.10185658783066438, "train/loss": 0.8124497267913818, "eval/hcp-train-subset/loss": 0.8425052348644503, "eval/hcp-val/loss": 0.8404677173783702, "eval/nsd-val/loss": 0.8239226331633906} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.10470314192133699, "train/loss": 0.8110775420284271, "eval/hcp-train-subset/loss": 0.8428715102134212, "eval/hcp-val/loss": 0.8405435806320559, "eval/nsd-val/loss": 0.8214718084181508} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.10678436166812065, "train/loss": 0.8074876543331146, "eval/hcp-train-subset/loss": 0.8443741538832265, "eval/hcp-val/loss": 0.8423017974822752, "eval/nsd-val/loss": 0.8240216730102417} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.1074155809780264, "train/loss": 0.8086731369495392, "eval/hcp-train-subset/loss": 0.8425911684190074, "eval/hcp-val/loss": 0.8405990177585233, "eval/nsd-val/loss": 0.826582049169848} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.11285051308084493, "train/loss": 0.8057534601402283, "eval/hcp-train-subset/loss": 0.8433406391451436, "eval/hcp-val/loss": 0.841125052782797, "eval/nsd-val/loss": 0.8468668556982472} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.114188050482155, "train/loss": 0.8057515702915191, "eval/hcp-train-subset/loss": 0.8440623735227892, "eval/hcp-val/loss": 0.8415794459081465, "eval/nsd-val/loss": 0.8210695491683099} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.11096123617889404, "train/loss": 0.8090394573783874, "eval/hcp-train-subset/loss": 0.8436014979116379, "eval/hcp-val/loss": 0.842042677825497, "eval/nsd-val/loss": 0.8229707066089876} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.11474478043936645, "train/loss": 0.8064780487632751, "eval/hcp-train-subset/loss": 0.8444455669772241, "eval/hcp-val/loss": 0.8419626518603294, "eval/nsd-val/loss": 0.8231260968792823} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.11564800290214362, "train/loss": 0.8062601385498047, "eval/hcp-train-subset/loss": 0.8433469341647241, "eval/hcp-val/loss": 0.8408673319124407, "eval/nsd-val/loss": 0.8197620895601088} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.11766397461893846, "train/loss": 0.8050249599456787, "eval/hcp-train-subset/loss": 0.8431119611186366, "eval/hcp-val/loss": 0.8408786442971998, "eval/nsd-val/loss": 0.8201677549269891} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.12151808318926065, "train/loss": 0.8028133444023132, "eval/hcp-train-subset/loss": 0.8427937944089213, "eval/hcp-val/loss": 0.8409540114864227, "eval/nsd-val/loss": 0.8307019356758364} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.12182535644180108, "train/loss": 0.8037726181125641, "eval/hcp-train-subset/loss": 0.8421001088234686, "eval/hcp-val/loss": 0.8404201778673357, "eval/nsd-val/loss": 0.8331245770377498} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.12509398463632235, "train/loss": 0.800190130405426, "eval/hcp-train-subset/loss": 0.8430019684376255, "eval/hcp-val/loss": 0.8413837830866536, "eval/nsd-val/loss": 0.8250461028468224} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.12493026092596038, "train/loss": 0.8011229005146027, "eval/hcp-train-subset/loss": 0.8436620610375558, "eval/hcp-val/loss": 0.8418379060683712, "eval/nsd-val/loss": 0.8253894275234591} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.12686508633530247, "train/loss": 0.7997561521339417, "eval/hcp-train-subset/loss": 0.8421973461104978, "eval/hcp-val/loss": 0.8409453793879478, "eval/nsd-val/loss": 0.8250780057522559} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.12987577412563653, "train/loss": 0.8019630799865722, "eval/hcp-train-subset/loss": 0.8431943049353938, "eval/hcp-val/loss": 0.8417241544492783, "eval/nsd-val/loss": 0.8373211872193121} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.13120909772332676, "train/loss": 0.7981452948760986, "eval/hcp-train-subset/loss": 0.842899392689428, "eval/hcp-val/loss": 0.8408517904819981, "eval/nsd-val/loss": 0.8292867848950047} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.13945020883846146, "train/loss": 0.7924873653888702, "eval/hcp-train-subset/loss": 0.8440605603879497, "eval/hcp-val/loss": 0.8415436533189589, "eval/nsd-val/loss": 0.8356711191515769} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.14041676265290445, "train/loss": 0.7924974319458008, "eval/hcp-train-subset/loss": 0.8433164223547904, "eval/hcp-val/loss": 0.8414538339261086, "eval/nsd-val/loss": 0.836162272960909} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.14095196249504474, "train/loss": 0.7934636704730987, "eval/hcp-train-subset/loss": 0.8427988540741705, "eval/hcp-val/loss": 0.8408546668867911, "eval/nsd-val/loss": 0.8300950123417762} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.14232108703839488, "train/loss": 0.7916637021160126, "eval/hcp-train-subset/loss": 0.8427223765080974, "eval/hcp-val/loss": 0.8407690525054932, "eval/nsd-val/loss": 0.8392817031952643} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.14296667235473337, "train/loss": 0.7950353825855255, "eval/hcp-train-subset/loss": 0.8428673282746346, "eval/hcp-val/loss": 0.8405877447897389, "eval/nsd-val/loss": 0.8272721479015965} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.14817403000909857, "train/loss": 0.7908260770893097, "eval/hcp-train-subset/loss": 0.8430321101219423, "eval/hcp-val/loss": 0.841394264851847, "eval/nsd-val/loss": 0.8329089216647609} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.14601490632527273, "train/loss": 0.7928474463653564, "eval/hcp-train-subset/loss": 0.8434614289191461, "eval/hcp-val/loss": 0.8416210884048093, "eval/nsd-val/loss": 0.8277506395693748} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.14728852387679298, "train/loss": 0.7929807115364075, "eval/hcp-train-subset/loss": 0.8416977120983985, "eval/hcp-val/loss": 0.840493478121296, "eval/nsd-val/loss": 0.84490974872343} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.15565755097350528, "train/loss": 0.7897666271972656, "eval/hcp-train-subset/loss": 0.8416866554367927, "eval/hcp-val/loss": 0.8404190376881631, "eval/nsd-val/loss": 0.8290546257649699} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.1570381360359862, "train/loss": 0.7879301334953308, "eval/hcp-train-subset/loss": 0.8420457638079121, "eval/hcp-val/loss": 0.8408514693860085, "eval/nsd-val/loss": 0.8376765972183596} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.15801996613581987, "train/loss": 0.7894675493240356, "eval/hcp-train-subset/loss": 0.8421359735150491, "eval/hcp-val/loss": 0.8408125869689449, "eval/nsd-val/loss": 0.8288647484394812} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.1603826898735651, "train/loss": 0.7872672140979767, "eval/hcp-train-subset/loss": 0.8423042287749629, "eval/hcp-val/loss": 0.8413743415186482, "eval/nsd-val/loss": 0.8244126958231772} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.15869783357181677, "train/loss": 0.7917103542900086, "eval/hcp-train-subset/loss": 0.84211235757797, "eval/hcp-val/loss": 0.8395244988702959, "eval/nsd-val/loss": 0.8233700375403127} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.1614448767014078, "train/loss": 0.7891969047069549, "eval/hcp-train-subset/loss": 0.8429608489236524, "eval/hcp-val/loss": 0.8414271291225187, "eval/nsd-val/loss": 0.8282984331730874} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.16505436749864136, "train/loss": 0.7862840759563446, "eval/hcp-train-subset/loss": 0.8428716736455117, "eval/hcp-val/loss": 0.8413473136963383, "eval/nsd-val/loss": 0.8288438733546964} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.165566777076667, "train/loss": 0.7867110289764404, "eval/hcp-train-subset/loss": 0.8424237024399542, "eval/hcp-val/loss": 0.8409657372582343, "eval/nsd-val/loss": 0.837528173961947} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.16719866003492306, "train/loss": 0.7884409356594085, "eval/hcp-train-subset/loss": 0.8424048394926132, "eval/hcp-val/loss": 0.8409663063864554, "eval/nsd-val/loss": 0.8296769472860521} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.16785319926624662, "train/loss": 0.7890805594444275, "eval/hcp-train-subset/loss": 0.8439437074045981, "eval/hcp-val/loss": 0.8420140502914306, "eval/nsd-val/loss": 0.8360825950099576} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.1713220970390586, "train/loss": 0.7883131935024261, "eval/hcp-train-subset/loss": 0.8424387776082561, "eval/hcp-val/loss": 0.8416969593494169, "eval/nsd-val/loss": 0.8399562364624392} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.17767688348321733, "train/loss": 0.782272384262085, "eval/hcp-train-subset/loss": 0.8422876509927935, "eval/hcp-val/loss": 0.8409331056379503, "eval/nsd-val/loss": 0.8300209276137813} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.17422625225123195, "train/loss": 0.7862755047035217, "eval/hcp-train-subset/loss": 0.8423484534986557, "eval/hcp-val/loss": 0.840327883920362, "eval/nsd-val/loss": 0.8292245105389626} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.1734416868501288, "train/loss": 0.7876783741092682, "eval/hcp-train-subset/loss": 0.8419932598067869, "eval/hcp-val/loss": 0.8405677753110086, "eval/nsd-val/loss": 0.8336970133166159} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.18218625031920915, "train/loss": 0.7820092810630799, "eval/hcp-train-subset/loss": 0.8422991806460965, "eval/hcp-val/loss": 0.8412260576601951, "eval/nsd-val/loss": 0.8364903696121708} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.1801007508070283, "train/loss": 0.7845840793609619, "eval/hcp-train-subset/loss": 0.8418723392871118, "eval/hcp-val/loss": 0.8401570435493223, "eval/nsd-val/loss": 0.8329783331963324} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.17978437064203315, "train/loss": 0.7868997272205353, "eval/hcp-train-subset/loss": 0.8422141796158206, "eval/hcp-val/loss": 0.84082642389882, "eval/nsd-val/loss": 0.832224428653717} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.18189704711816587, "train/loss": 0.7866698765087128, "eval/hcp-train-subset/loss": 0.8418046591743347, "eval/hcp-val/loss": 0.8410524245231382, "eval/nsd-val/loss": 0.8307895429672734} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.18315649044754068, "train/loss": 0.7842455362701416, "eval/hcp-train-subset/loss": 0.8422102697433964, "eval/hcp-val/loss": 0.8404801920537026, "eval/nsd-val/loss": 0.8326462141929134} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.18668252439446814, "train/loss": 0.7873305184555054, "eval/hcp-train-subset/loss": 0.8419908035186029, "eval/hcp-val/loss": 0.840511744060824, "eval/nsd-val/loss": 0.8292310881999231} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.18552495869013158, "train/loss": 0.7846665522766113, "eval/hcp-train-subset/loss": 0.8423668126906118, "eval/hcp-val/loss": 0.8403524102703217, "eval/nsd-val/loss": 0.8323641284819572} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.19169863628249773, "train/loss": 0.7834697227573395, "eval/hcp-train-subset/loss": 0.8426830220607019, "eval/hcp-val/loss": 0.8409425645105301, "eval/nsd-val/loss": 0.836781257583249} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.18782861619872895, "train/loss": 0.7866548924827576, "eval/hcp-train-subset/loss": 0.8425134872236559, "eval/hcp-val/loss": 0.8407469930187348, "eval/nsd-val/loss": 0.8345742600579416} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.18634410905824658, "train/loss": 0.7870230414581298, "eval/hcp-train-subset/loss": 0.8426731805647573, "eval/hcp-val/loss": 0.8406772257820252, "eval/nsd-val/loss": 0.829420413701765} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.19217269975117765, "train/loss": 0.7849380237102509, "eval/hcp-train-subset/loss": 0.8419400299749067, "eval/hcp-val/loss": 0.8408345628169275, "eval/nsd-val/loss": 0.8344816998127969} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.19308476994307794, "train/loss": 0.786316442270279, "eval/hcp-train-subset/loss": 0.8417019834441524, "eval/hcp-val/loss": 0.8400879261955139, "eval/nsd-val/loss": 0.8351359357756953} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.1894531275395642, "train/loss": 0.786865400390625, "eval/hcp-train-subset/loss": 0.8417422521498895, "eval/hcp-val/loss": 0.8399498904905012, "eval/nsd-val/loss": 0.832010769074963} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.18847570516331247, "train/loss": 0.788023424654007, "eval/hcp-train-subset/loss": 0.8413444026823966, "eval/hcp-val/loss": 0.8399091343725881, "eval/nsd-val/loss": 0.8309096847811053} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.19292650565271227, "train/loss": 0.7845235390090942, "eval/hcp-train-subset/loss": 0.8415541860365099, "eval/hcp-val/loss": 0.8398698722162554, "eval/nsd-val/loss": 0.83267129236652} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.19349908181086867, "train/loss": 0.787151837425232, "eval/hcp-train-subset/loss": 0.8416543872125687, "eval/hcp-val/loss": 0.8399008695156344, "eval/nsd-val/loss": 0.8309409743355166} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.1957570533929977, "train/loss": 0.7847154878425598, "eval/hcp-train-subset/loss": 0.8414170347875164, "eval/hcp-val/loss": 0.8399324955478791, "eval/nsd-val/loss": 0.8318648761318576} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.20059686467013335, "train/loss": 0.7830841287136078, "eval/hcp-train-subset/loss": 0.8412752593717268, "eval/hcp-val/loss": 0.839759002770147, "eval/nsd-val/loss": 0.8320935791538607} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.19677981934850028, "train/loss": 0.7877548532581329, "eval/hcp-train-subset/loss": 0.8418162368958996, "eval/hcp-val/loss": 0.8394508688680588, "eval/nsd-val/loss": 0.8318568362343696} diff --git a/data_scaling/n800_2/pretrain/log.txt b/data_scaling/n800_2/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..05becafeac58e3a0bd15a7026dbe85a82d4f71e9 --- /dev/null +++ b/data_scaling/n800_2/pretrain/log.txt @@ -0,0 +1,8252 @@ +pretraining fmri mae +start: 2026-01-17 20:36:16 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n800_2/pretrain +notes: data scaling experiment n800_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n800_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01599}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 9:41:56 lr: 0.000000 grad: 0.0120 (0.0120) loss: 0.9965 (0.9965) time: 5.5867 data: 3.5930 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:20:59 lr: 0.000000 grad: 0.0131 (0.0147) loss: 0.9962 (0.9960) time: 0.1400 data: 0.0632 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:18:03 lr: 0.000001 grad: 0.0138 (0.0143) loss: 0.9954 (0.9959) time: 0.1515 data: 0.0621 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:16:26 lr: 0.000001 grad: 0.0123 (0.0139) loss: 0.9954 (0.9958) time: 0.1450 data: 0.0566 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:15:37 lr: 0.000002 grad: 0.0128 (0.0136) loss: 0.9957 (0.9958) time: 0.1535 data: 0.0634 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:15:03 lr: 0.000002 grad: 0.0129 (0.0134) loss: 0.9955 (0.9958) time: 0.1525 data: 0.0707 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:14:37 lr: 0.000002 grad: 0.0128 (0.0133) loss: 0.9964 (0.9959) time: 0.1523 data: 0.0630 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:14:12 lr: 0.000003 grad: 0.0126 (0.0133) loss: 0.9957 (0.9958) time: 0.1077 data: 0.0144 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:13:47 lr: 0.000003 grad: 0.0132 (0.0132) loss: 0.9959 (0.9958) time: 0.1410 data: 0.0603 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:13:24 lr: 0.000004 grad: 0.0134 (0.0132) loss: 0.9955 (0.9958) time: 0.1338 data: 0.0460 max mem: 9377 +Train: [0] [1000/6250] eta: 0:12:59 lr: 0.000004 grad: 0.0144 (0.0133) loss: 0.9958 (0.9958) time: 0.1302 data: 0.0460 max mem: 9377 +Train: [0] [1100/6250] eta: 0:12:35 lr: 0.000004 grad: 0.0147 (0.0134) loss: 0.9957 (0.9958) time: 0.1182 data: 0.0340 max mem: 9377 +Train: [0] [1200/6250] eta: 0:12:14 lr: 0.000005 grad: 0.0176 (0.0136) loss: 0.9953 (0.9958) time: 0.1408 data: 0.0609 max mem: 9377 +Train: [0] [1300/6250] eta: 0:11:52 lr: 0.000005 grad: 0.0198 (0.0141) loss: 0.9956 (0.9958) time: 0.1305 data: 0.0469 max mem: 9377 +Train: [0] [1400/6250] eta: 0:11:32 lr: 0.000006 grad: 0.0208 (0.0146) loss: 0.9955 (0.9958) time: 0.1312 data: 0.0481 max mem: 9377 +Train: [0] [1500/6250] eta: 0:11:13 lr: 0.000006 grad: 0.0255 (0.0153) loss: 0.9949 (0.9958) time: 0.1199 data: 0.0291 max mem: 9377 +Train: [0] [1600/6250] eta: 0:10:56 lr: 0.000006 grad: 0.0266 (0.0161) loss: 0.9957 (0.9957) time: 0.1249 data: 0.0395 max mem: 9377 +Train: [0] [1700/6250] eta: 0:10:39 lr: 0.000007 grad: 0.0297 (0.0170) loss: 0.9951 (0.9957) time: 0.1334 data: 0.0490 max mem: 9377 +Train: [0] [1800/6250] eta: 0:10:22 lr: 0.000007 grad: 0.0337 (0.0182) loss: 0.9953 (0.9957) time: 0.1272 data: 0.0408 max mem: 9377 +Train: [0] [1900/6250] eta: 0:10:05 lr: 0.000008 grad: 0.0430 (0.0197) loss: 0.9946 (0.9956) time: 0.1115 data: 0.0311 max mem: 9377 +Train: [0] [2000/6250] eta: 0:09:48 lr: 0.000008 grad: 0.0397 (0.0214) loss: 0.9953 (0.9956) time: 0.1208 data: 0.0311 max mem: 9377 +Train: [0] [2100/6250] eta: 0:09:32 lr: 0.000008 grad: 0.0540 (0.0230) loss: 0.9947 (0.9956) time: 0.1200 data: 0.0314 max mem: 9377 +Train: [0] [2200/6250] eta: 0:09:16 lr: 0.000009 grad: 0.0497 (0.0245) loss: 0.9946 (0.9955) time: 0.1238 data: 0.0366 max mem: 9377 +Train: [0] [2300/6250] eta: 0:09:00 lr: 0.000009 grad: 0.0342 (0.0254) loss: 0.9946 (0.9955) time: 0.1262 data: 0.0433 max mem: 9377 +Train: [0] [2400/6250] eta: 0:08:45 lr: 0.000010 grad: 0.0562 (0.0266) loss: 0.9942 (0.9954) time: 0.1198 data: 0.0353 max mem: 9377 +Train: [0] [2500/6250] eta: 0:08:30 lr: 0.000010 grad: 0.0533 (0.0277) loss: 0.9934 (0.9954) time: 0.1243 data: 0.0415 max mem: 9377 +Train: [0] [2600/6250] eta: 0:08:15 lr: 0.000010 grad: 0.0490 (0.0287) loss: 0.9936 (0.9953) time: 0.1376 data: 0.0548 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:00 lr: 0.000011 grad: 0.0514 (0.0298) loss: 0.9951 (0.9953) time: 0.1289 data: 0.0370 max mem: 9377 +Train: [0] [2800/6250] eta: 0:07:46 lr: 0.000011 grad: 0.0478 (0.0307) loss: 0.9933 (0.9952) time: 0.1191 data: 0.0303 max mem: 9377 +Train: [0] [2900/6250] eta: 0:07:32 lr: 0.000012 grad: 0.0365 (0.0314) loss: 0.9939 (0.9952) time: 0.1358 data: 0.0452 max mem: 9377 +Train: [0] [3000/6250] eta: 0:07:18 lr: 0.000012 grad: 0.0584 (0.0323) loss: 0.9939 (0.9952) time: 0.1274 data: 0.0427 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:05 lr: 0.000012 grad: 0.0527 (0.0331) loss: 0.9940 (0.9951) time: 0.1456 data: 0.0611 max mem: 9377 +Train: [0] [3200/6250] eta: 0:06:51 lr: 0.000013 grad: 0.0507 (0.0339) loss: 0.9933 (0.9951) time: 0.1532 data: 0.0563 max mem: 9377 +Train: [0] [3300/6250] eta: 0:06:38 lr: 0.000013 grad: 0.0562 (0.0345) loss: 0.9942 (0.9950) time: 0.1511 data: 0.0618 max mem: 9377 +Train: [0] [3400/6250] eta: 0:06:24 lr: 0.000014 grad: 0.0519 (0.0351) loss: 0.9933 (0.9950) time: 0.1379 data: 0.0588 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:11 lr: 0.000014 grad: 0.0487 (0.0357) loss: 0.9938 (0.9950) time: 0.1179 data: 0.0320 max mem: 9377 +Train: [0] [3600/6250] eta: 0:05:58 lr: 0.000014 grad: 0.0534 (0.0361) loss: 0.9930 (0.9949) time: 0.1225 data: 0.0334 max mem: 9377 +Train: [0] [3700/6250] eta: 0:05:44 lr: 0.000015 grad: 0.0564 (0.0367) loss: 0.9927 (0.9949) time: 0.1440 data: 0.0546 max mem: 9377 +Train: [0] [3800/6250] eta: 0:05:31 lr: 0.000015 grad: 0.0522 (0.0373) loss: 0.9931 (0.9948) time: 0.1262 data: 0.0332 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:18 lr: 0.000016 grad: 0.0548 (0.0378) loss: 0.9925 (0.9948) time: 0.1468 data: 0.0563 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:04 lr: 0.000016 grad: 0.0545 (0.0384) loss: 0.9923 (0.9947) time: 0.1325 data: 0.0394 max mem: 9377 +Train: [0] [4100/6250] eta: 0:04:51 lr: 0.000016 grad: 0.0656 (0.0391) loss: 0.9922 (0.9947) time: 0.1490 data: 0.0630 max mem: 9377 +Train: [0] [4200/6250] eta: 0:04:37 lr: 0.000017 grad: 0.0672 (0.0397) loss: 0.9929 (0.9946) time: 0.1499 data: 0.0659 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:23 lr: 0.000017 grad: 0.0591 (0.0403) loss: 0.9920 (0.9946) time: 0.1248 data: 0.0327 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:09 lr: 0.000018 grad: 0.0634 (0.0409) loss: 0.9927 (0.9945) time: 0.1397 data: 0.0549 max mem: 9377 +Train: [0] [4500/6250] eta: 0:03:56 lr: 0.000018 grad: 0.0671 (0.0414) loss: 0.9923 (0.9945) time: 0.1205 data: 0.0370 max mem: 9377 +Train: [0] [4600/6250] eta: 0:03:42 lr: 0.000018 grad: 0.0654 (0.0420) loss: 0.9925 (0.9944) time: 0.1321 data: 0.0492 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:29 lr: 0.000019 grad: 0.0648 (0.0426) loss: 0.9912 (0.9944) time: 0.1498 data: 0.0639 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:15 lr: 0.000019 grad: 0.0647 (0.0432) loss: 0.9922 (0.9943) time: 0.1348 data: 0.0477 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:01 lr: 0.000020 grad: 0.0628 (0.0438) loss: 0.9932 (0.9943) time: 0.1422 data: 0.0666 max mem: 9377 +Train: [0] [5000/6250] eta: 0:02:48 lr: 0.000020 grad: 0.0705 (0.0443) loss: 0.9910 (0.9942) time: 0.1211 data: 0.0429 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:34 lr: 0.000020 grad: 0.0572 (0.0448) loss: 0.9912 (0.9941) time: 0.1381 data: 0.0561 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:21 lr: 0.000021 grad: 0.0643 (0.0454) loss: 0.9911 (0.9941) time: 0.1134 data: 0.0291 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:07 lr: 0.000021 grad: 0.0779 (0.0460) loss: 0.9905 (0.9940) time: 0.1298 data: 0.0479 max mem: 9377 +Train: [0] [5400/6250] eta: 0:01:54 lr: 0.000022 grad: 0.0844 (0.0466) loss: 0.9908 (0.9940) time: 0.1392 data: 0.0592 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:40 lr: 0.000022 grad: 0.0738 (0.0471) loss: 0.9914 (0.9939) time: 0.1345 data: 0.0551 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:27 lr: 0.000022 grad: 0.0727 (0.0477) loss: 0.9913 (0.9938) time: 0.1377 data: 0.0543 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:14 lr: 0.000023 grad: 0.0677 (0.0483) loss: 0.9929 (0.9938) time: 0.1378 data: 0.0606 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:00 lr: 0.000023 grad: 0.0761 (0.0488) loss: 0.9911 (0.9937) time: 0.1280 data: 0.0402 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:47 lr: 0.000024 grad: 0.0749 (0.0494) loss: 0.9923 (0.9937) time: 0.1474 data: 0.0716 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:33 lr: 0.000024 grad: 0.0687 (0.0499) loss: 0.9906 (0.9936) time: 0.1522 data: 0.0731 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:20 lr: 0.000024 grad: 0.0806 (0.0504) loss: 0.9900 (0.9936) time: 0.1893 data: 0.1080 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:06 lr: 0.000025 grad: 0.0638 (0.0508) loss: 0.9915 (0.9935) time: 0.1469 data: 0.0620 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0663 (0.0510) loss: 0.9902 (0.9935) time: 0.1443 data: 0.0699 max mem: 9377 +Train: [0] Total time: 0:14:15 (0.1369 s / it) +Averaged stats: lr: 0.000025 grad: 0.0663 (0.0510) loss: 0.9902 (0.9935) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:02:53 loss: 0.9915 (0.9915) time: 2.7927 data: 2.7200 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9910 (0.9902) time: 0.1320 data: 0.1023 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:13 (0.2160 s / it) +Averaged stats (hcp-train-subset): loss: 0.9910 (0.9902) +Eval (hcp-val): [0] [ 0/62] eta: 0:03:02 loss: 0.9883 (0.9883) time: 2.9462 data: 2.8725 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9899 (0.9901) time: 0.1237 data: 0.0988 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:12 (0.2023 s / it) +Averaged stats (hcp-val): loss: 0.9899 (0.9901) +Eval (nsd-val): [0] [ 0/62] eta: 0:04:01 loss: 0.9919 (0.9919) time: 3.8936 data: 3.8310 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9915 (0.9907) time: 0.1057 data: 0.0810 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.9915 (0.9907) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 8:23:41 lr: 0.000025 grad: 0.0703 (0.0703) loss: 0.9936 (0.9936) time: 4.8354 data: 4.6857 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:19:10 lr: 0.000025 grad: 0.0671 (0.0786) loss: 0.9917 (0.9911) time: 0.1497 data: 0.0603 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:16:11 lr: 0.000026 grad: 0.0720 (0.0867) loss: 0.9902 (0.9896) time: 0.1494 data: 0.0696 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:15:19 lr: 0.000026 grad: 0.0651 (0.0865) loss: 0.9904 (0.9893) time: 0.1269 data: 0.0372 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:14:31 lr: 0.000027 grad: 0.0743 (0.0837) loss: 0.9903 (0.9896) time: 0.1318 data: 0.0394 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:14:03 lr: 0.000027 grad: 0.0682 (0.0831) loss: 0.9911 (0.9896) time: 0.1453 data: 0.0580 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:14:28 lr: 0.000027 grad: 0.0550 (0.0814) loss: 0.9929 (0.9896) time: 0.1337 data: 0.0477 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:14:37 lr: 0.000028 grad: 0.0679 (0.0796) loss: 0.9918 (0.9897) time: 0.1786 data: 0.0921 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:14:41 lr: 0.000028 grad: 0.0649 (0.0790) loss: 0.9910 (0.9897) time: 0.1864 data: 0.1016 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:14:44 lr: 0.000029 grad: 0.0634 (0.0776) loss: 0.9904 (0.9898) time: 0.1810 data: 0.0909 max mem: 9377 +Train: [1] [1000/6250] eta: 0:14:31 lr: 0.000029 grad: 0.0687 (0.0773) loss: 0.9912 (0.9899) time: 0.1254 data: 0.0340 max mem: 9377 +Train: [1] [1100/6250] eta: 0:14:13 lr: 0.000029 grad: 0.0764 (0.0776) loss: 0.9911 (0.9899) time: 0.1993 data: 0.1195 max mem: 9377 +Train: [1] [1200/6250] eta: 0:13:47 lr: 0.000030 grad: 0.0684 (0.0771) loss: 0.9903 (0.9899) time: 0.1623 data: 0.0745 max mem: 9377 +Train: [1] [1300/6250] eta: 0:13:25 lr: 0.000030 grad: 0.0747 (0.0768) loss: 0.9889 (0.9898) time: 0.1742 data: 0.0911 max mem: 9377 +Train: [1] [1400/6250] eta: 0:13:05 lr: 0.000031 grad: 0.0677 (0.0768) loss: 0.9901 (0.9899) time: 0.1472 data: 0.0614 max mem: 9377 +Train: [1] [1500/6250] eta: 0:12:45 lr: 0.000031 grad: 0.0642 (0.0763) loss: 0.9907 (0.9899) time: 0.1422 data: 0.0604 max mem: 9377 +Train: [1] [1600/6250] eta: 0:12:25 lr: 0.000031 grad: 0.0597 (0.0758) loss: 0.9911 (0.9899) time: 0.1549 data: 0.0741 max mem: 9377 +Train: [1] [1700/6250] eta: 0:12:07 lr: 0.000032 grad: 0.0647 (0.0759) loss: 0.9903 (0.9899) time: 0.1741 data: 0.0927 max mem: 9377 +Train: [1] [1800/6250] eta: 0:11:48 lr: 0.000032 grad: 0.0695 (0.0756) loss: 0.9905 (0.9899) time: 0.1719 data: 0.0861 max mem: 9377 +Train: [1] [1900/6250] eta: 0:11:29 lr: 0.000033 grad: 0.0566 (0.0755) loss: 0.9897 (0.9899) time: 0.1343 data: 0.0513 max mem: 9377 +Train: [1] [2000/6250] eta: 0:11:12 lr: 0.000033 grad: 0.0743 (0.0756) loss: 0.9905 (0.9899) time: 0.1707 data: 0.0912 max mem: 9377 +Train: [1] [2100/6250] eta: 0:10:53 lr: 0.000033 grad: 0.0702 (0.0758) loss: 0.9909 (0.9899) time: 0.1380 data: 0.0557 max mem: 9377 +Train: [1] [2200/6250] eta: 0:10:36 lr: 0.000034 grad: 0.0669 (0.0756) loss: 0.9901 (0.9899) time: 0.1648 data: 0.0824 max mem: 9377 +Train: [1] [2300/6250] eta: 0:10:18 lr: 0.000034 grad: 0.0674 (0.0755) loss: 0.9891 (0.9898) time: 0.1169 data: 0.0357 max mem: 9377 +Train: [1] [2400/6250] eta: 0:10:02 lr: 0.000035 grad: 0.0623 (0.0754) loss: 0.9895 (0.9898) time: 0.1468 data: 0.0657 max mem: 9377 +Train: [1] [2500/6250] eta: 0:09:44 lr: 0.000035 grad: 0.0701 (0.0756) loss: 0.9902 (0.9898) time: 0.1391 data: 0.0567 max mem: 9377 +Train: [1] [2600/6250] eta: 0:09:26 lr: 0.000035 grad: 0.0721 (0.0757) loss: 0.9902 (0.9897) time: 0.1423 data: 0.0603 max mem: 9377 +Train: [1] [2700/6250] eta: 0:09:09 lr: 0.000036 grad: 0.0788 (0.0759) loss: 0.9884 (0.9897) time: 0.1423 data: 0.0597 max mem: 9377 +Train: [1] [2800/6250] eta: 0:08:52 lr: 0.000036 grad: 0.0704 (0.0760) loss: 0.9884 (0.9896) time: 0.1213 data: 0.0395 max mem: 9377 +Train: [1] [2900/6250] eta: 0:08:35 lr: 0.000037 grad: 0.0783 (0.0762) loss: 0.9894 (0.9895) time: 0.1346 data: 0.0508 max mem: 9377 +Train: [1] [3000/6250] eta: 0:08:18 lr: 0.000037 grad: 0.0728 (0.0764) loss: 0.9875 (0.9895) time: 0.1478 data: 0.0713 max mem: 9377 +Train: [1] [3100/6250] eta: 0:08:02 lr: 0.000037 grad: 0.0800 (0.0766) loss: 0.9887 (0.9894) time: 0.1530 data: 0.0668 max mem: 9377 +Train: [1] [3200/6250] eta: 0:07:46 lr: 0.000038 grad: 0.0722 (0.0767) loss: 0.9882 (0.9894) time: 0.1436 data: 0.0613 max mem: 9377 +Train: [1] [3300/6250] eta: 0:07:29 lr: 0.000038 grad: 0.0771 (0.0768) loss: 0.9882 (0.9893) time: 0.1282 data: 0.0505 max mem: 9377 +Train: [1] [3400/6250] eta: 0:07:13 lr: 0.000039 grad: 0.0800 (0.0770) loss: 0.9861 (0.9893) time: 0.1523 data: 0.0701 max mem: 9377 +Train: [1] [3500/6250] eta: 0:06:57 lr: 0.000039 grad: 0.0720 (0.0772) loss: 0.9887 (0.9892) time: 0.1595 data: 0.0733 max mem: 9377 +Train: [1] [3600/6250] eta: 0:06:43 lr: 0.000039 grad: 0.0751 (0.0773) loss: 0.9889 (0.9892) time: 0.1380 data: 0.0519 max mem: 9377 +Train: [1] [3700/6250] eta: 0:06:28 lr: 0.000040 grad: 0.0765 (0.0775) loss: 0.9883 (0.9891) time: 0.1597 data: 0.0760 max mem: 9377 +Train: [1] [3800/6250] eta: 0:06:13 lr: 0.000040 grad: 0.0829 (0.0777) loss: 0.9866 (0.9891) time: 0.1457 data: 0.0599 max mem: 9377 +Train: [1] [3900/6250] eta: 0:05:57 lr: 0.000041 grad: 0.0867 (0.0779) loss: 0.9850 (0.9890) time: 0.1349 data: 0.0533 max mem: 9377 +Train: [1] [4000/6250] eta: 0:05:42 lr: 0.000041 grad: 0.0800 (0.0782) loss: 0.9889 (0.9890) time: 0.1547 data: 0.0640 max mem: 9377 +Train: [1] [4100/6250] eta: 0:05:27 lr: 0.000041 grad: 0.0700 (0.0782) loss: 0.9876 (0.9889) time: 0.1471 data: 0.0594 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:12 lr: 0.000042 grad: 0.0667 (0.0784) loss: 0.9893 (0.9889) time: 0.1526 data: 0.0683 max mem: 9377 +Train: [1] [4300/6250] eta: 0:04:57 lr: 0.000042 grad: 0.0786 (0.0786) loss: 0.9880 (0.9889) time: 0.1465 data: 0.0593 max mem: 9377 +Train: [1] [4400/6250] eta: 0:04:41 lr: 0.000043 grad: 0.0821 (0.0787) loss: 0.9855 (0.9888) time: 0.1379 data: 0.0583 max mem: 9377 +Train: [1] [4500/6250] eta: 0:04:26 lr: 0.000043 grad: 0.0885 (0.0789) loss: 0.9889 (0.9888) time: 0.1652 data: 0.0855 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:10 lr: 0.000043 grad: 0.0836 (0.0790) loss: 0.9878 (0.9887) time: 0.1679 data: 0.0814 max mem: 9377 +Train: [1] [4700/6250] eta: 0:03:55 lr: 0.000044 grad: 0.0762 (0.0792) loss: 0.9867 (0.9887) time: 0.1428 data: 0.0561 max mem: 9377 +Train: [1] [4800/6250] eta: 0:03:40 lr: 0.000044 grad: 0.0749 (0.0793) loss: 0.9879 (0.9887) time: 0.1397 data: 0.0546 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:24 lr: 0.000045 grad: 0.0903 (0.0795) loss: 0.9865 (0.9887) time: 0.1588 data: 0.0812 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:09 lr: 0.000045 grad: 0.0734 (0.0796) loss: 0.9888 (0.9887) time: 0.1697 data: 0.0938 max mem: 9377 +Train: [1] [5100/6250] eta: 0:02:54 lr: 0.000045 grad: 0.0727 (0.0797) loss: 0.9874 (0.9886) time: 0.1435 data: 0.0601 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:38 lr: 0.000046 grad: 0.0853 (0.0799) loss: 0.9883 (0.9886) time: 0.1525 data: 0.0696 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:23 lr: 0.000046 grad: 0.0738 (0.0799) loss: 0.9896 (0.9886) time: 0.1437 data: 0.0560 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:08 lr: 0.000047 grad: 0.0857 (0.0799) loss: 0.9876 (0.9886) time: 0.1702 data: 0.0880 max mem: 9377 +Train: [1] [5500/6250] eta: 0:01:53 lr: 0.000047 grad: 0.0676 (0.0799) loss: 0.9890 (0.9886) time: 0.1532 data: 0.0694 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:38 lr: 0.000047 grad: 0.0802 (0.0799) loss: 0.9880 (0.9886) time: 0.1410 data: 0.0513 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:23 lr: 0.000048 grad: 0.0756 (0.0798) loss: 0.9859 (0.9886) time: 0.1536 data: 0.0701 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:08 lr: 0.000048 grad: 0.0693 (0.0798) loss: 0.9896 (0.9885) time: 0.1528 data: 0.0678 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:52 lr: 0.000049 grad: 0.0678 (0.0797) loss: 0.9874 (0.9885) time: 0.1380 data: 0.0538 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:37 lr: 0.000049 grad: 0.0629 (0.0797) loss: 0.9905 (0.9885) time: 0.1392 data: 0.0587 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:22 lr: 0.000049 grad: 0.0786 (0.0797) loss: 0.9875 (0.9885) time: 0.1733 data: 0.0943 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:07 lr: 0.000050 grad: 0.0667 (0.0796) loss: 0.9884 (0.9885) time: 0.2154 data: 0.1453 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0747 (0.0795) loss: 0.9888 (0.9885) time: 0.1846 data: 0.0947 max mem: 9377 +Train: [1] Total time: 0:15:55 (0.1528 s / it) +Averaged stats: lr: 0.000050 grad: 0.0747 (0.0795) loss: 0.9888 (0.9885) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:04:57 loss: 0.9877 (0.9877) time: 4.7934 data: 4.7598 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9876 (0.9874) time: 0.1130 data: 0.0884 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-train-subset): loss: 0.9876 (0.9874) +Eval (hcp-val): [1] [ 0/62] eta: 0:03:26 loss: 0.9819 (0.9819) time: 3.3246 data: 3.2550 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9885 (0.9874) time: 0.1143 data: 0.0876 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:12 (0.2049 s / it) +Averaged stats (hcp-val): loss: 0.9885 (0.9874) +Eval (nsd-val): [1] [ 0/62] eta: 0:03:24 loss: 0.9869 (0.9869) time: 3.3062 data: 3.2310 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9899 (0.9879) time: 0.0799 data: 0.0551 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (nsd-val): loss: 0.9899 (0.9879) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 8:47:23 lr: 0.000050 grad: 0.2206 (0.2206) loss: 0.9643 (0.9643) time: 5.0629 data: 4.9597 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:20:11 lr: 0.000050 grad: 0.0696 (0.0930) loss: 0.9874 (0.9860) time: 0.1528 data: 0.0608 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:17:54 lr: 0.000051 grad: 0.0920 (0.0944) loss: 0.9842 (0.9855) time: 0.1341 data: 0.0379 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:17:02 lr: 0.000051 grad: 0.0835 (0.0956) loss: 0.9852 (0.9853) time: 0.1651 data: 0.0729 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:16:16 lr: 0.000052 grad: 0.0847 (0.0940) loss: 0.9864 (0.9855) time: 0.1297 data: 0.0284 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:15:45 lr: 0.000052 grad: 0.0797 (0.0932) loss: 0.9854 (0.9856) time: 0.1126 data: 0.0167 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:15:09 lr: 0.000052 grad: 0.0837 (0.0930) loss: 0.9827 (0.9855) time: 0.1286 data: 0.0458 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:14:46 lr: 0.000053 grad: 0.0814 (0.0922) loss: 0.9866 (0.9856) time: 0.1589 data: 0.0673 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:14:53 lr: 0.000053 grad: 0.0851 (0.0917) loss: 0.9867 (0.9858) time: 0.2306 data: 0.1460 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:14:58 lr: 0.000054 grad: 0.0815 (0.0912) loss: 0.9850 (0.9858) time: 0.1915 data: 0.1072 max mem: 9377 +Train: [2] [1000/6250] eta: 0:14:51 lr: 0.000054 grad: 0.0712 (0.0902) loss: 0.9846 (0.9859) time: 0.1903 data: 0.1100 max mem: 9377 +Train: [2] [1100/6250] eta: 0:14:42 lr: 0.000054 grad: 0.0712 (0.0895) loss: 0.9859 (0.9859) time: 0.1421 data: 0.0640 max mem: 9377 +Train: [2] [1200/6250] eta: 0:14:16 lr: 0.000055 grad: 0.0672 (0.0885) loss: 0.9878 (0.9860) time: 0.1525 data: 0.0641 max mem: 9377 +Train: [2] [1300/6250] eta: 0:13:55 lr: 0.000055 grad: 0.0690 (0.0884) loss: 0.9863 (0.9861) time: 0.1710 data: 0.0894 max mem: 9377 +Train: [2] [1400/6250] eta: 0:13:38 lr: 0.000056 grad: 0.0684 (0.0881) loss: 0.9875 (0.9862) time: 0.1585 data: 0.0746 max mem: 9377 +Train: [2] [1500/6250] eta: 0:13:22 lr: 0.000056 grad: 0.0717 (0.0880) loss: 0.9869 (0.9862) time: 0.1742 data: 0.0914 max mem: 9377 +Train: [2] [1600/6250] eta: 0:13:06 lr: 0.000056 grad: 0.0768 (0.0878) loss: 0.9886 (0.9862) time: 0.1625 data: 0.0815 max mem: 9377 +Train: [2] [1700/6250] eta: 0:12:50 lr: 0.000057 grad: 0.0744 (0.0880) loss: 0.9864 (0.9863) time: 0.2276 data: 0.1397 max mem: 9377 +Train: [2] [1800/6250] eta: 0:12:30 lr: 0.000057 grad: 0.0770 (0.0878) loss: 0.9872 (0.9863) time: 0.1751 data: 0.0844 max mem: 9377 +Train: [2] [1900/6250] eta: 0:12:12 lr: 0.000058 grad: 0.0642 (0.0878) loss: 0.9871 (0.9863) time: 0.2051 data: 0.1126 max mem: 9377 +Train: [2] [2000/6250] eta: 0:11:52 lr: 0.000058 grad: 0.0845 (0.0875) loss: 0.9872 (0.9863) time: 0.1598 data: 0.0602 max mem: 9377 +Train: [2] [2100/6250] eta: 0:11:31 lr: 0.000058 grad: 0.0761 (0.0875) loss: 0.9855 (0.9863) time: 0.1503 data: 0.0720 max mem: 9377 +Train: [2] [2200/6250] eta: 0:11:08 lr: 0.000059 grad: 0.0745 (0.0875) loss: 0.9880 (0.9864) time: 0.1292 data: 0.0451 max mem: 9377 +Train: [2] [2300/6250] eta: 0:10:49 lr: 0.000059 grad: 0.0797 (0.0872) loss: 0.9875 (0.9864) time: 0.1252 data: 0.0437 max mem: 9377 +Train: [2] [2400/6250] eta: 0:10:31 lr: 0.000060 grad: 0.0823 (0.0869) loss: 0.9869 (0.9864) time: 0.1644 data: 0.0801 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:15 lr: 0.000060 grad: 0.0702 (0.0868) loss: 0.9875 (0.9864) time: 0.1875 data: 0.1024 max mem: 9377 +Train: [2] [2600/6250] eta: 0:09:56 lr: 0.000060 grad: 0.0777 (0.0868) loss: 0.9834 (0.9864) time: 0.1417 data: 0.0533 max mem: 9377 +Train: [2] [2700/6250] eta: 0:09:38 lr: 0.000061 grad: 0.0901 (0.0867) loss: 0.9847 (0.9864) time: 0.1635 data: 0.0762 max mem: 9377 +Train: [2] [2800/6250] eta: 0:09:18 lr: 0.000061 grad: 0.0796 (0.0869) loss: 0.9844 (0.9864) time: 0.1200 data: 0.0407 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:01 lr: 0.000062 grad: 0.0894 (0.0870) loss: 0.9857 (0.9864) time: 0.1815 data: 0.0997 max mem: 9377 +Train: [2] [3000/6250] eta: 0:08:43 lr: 0.000062 grad: 0.0948 (0.0873) loss: 0.9861 (0.9864) time: 0.1493 data: 0.0578 max mem: 9377 +Train: [2] [3100/6250] eta: 0:08:27 lr: 0.000062 grad: 0.0780 (0.0873) loss: 0.9868 (0.9864) time: 0.1571 data: 0.0736 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:09 lr: 0.000063 grad: 0.0798 (0.0874) loss: 0.9878 (0.9864) time: 0.1360 data: 0.0354 max mem: 9377 +Train: [2] [3300/6250] eta: 0:07:52 lr: 0.000063 grad: 0.0806 (0.0875) loss: 0.9861 (0.9864) time: 0.1203 data: 0.0292 max mem: 9377 +Train: [2] [3400/6250] eta: 0:07:36 lr: 0.000064 grad: 0.0919 (0.0877) loss: 0.9863 (0.9864) time: 0.1935 data: 0.1076 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:19 lr: 0.000064 grad: 0.0807 (0.0880) loss: 0.9866 (0.9864) time: 0.1551 data: 0.0694 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:02 lr: 0.000064 grad: 0.0883 (0.0883) loss: 0.9868 (0.9864) time: 0.1535 data: 0.0693 max mem: 9377 +Train: [2] [3700/6250] eta: 0:06:45 lr: 0.000065 grad: 0.1094 (0.0889) loss: 0.9856 (0.9863) time: 0.1570 data: 0.0651 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:29 lr: 0.000065 grad: 0.1222 (0.0896) loss: 0.9859 (0.9863) time: 0.1185 data: 0.0348 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:12 lr: 0.000066 grad: 0.1096 (0.0905) loss: 0.9865 (0.9863) time: 0.1583 data: 0.0836 max mem: 9377 +Train: [2] [4000/6250] eta: 0:05:56 lr: 0.000066 grad: 0.1035 (0.0909) loss: 0.9860 (0.9863) time: 0.1583 data: 0.0669 max mem: 9377 +Train: [2] [4100/6250] eta: 0:05:40 lr: 0.000066 grad: 0.0888 (0.0916) loss: 0.9860 (0.9863) time: 0.1347 data: 0.0522 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:23 lr: 0.000067 grad: 0.1157 (0.0922) loss: 0.9857 (0.9863) time: 0.1259 data: 0.0325 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:07 lr: 0.000067 grad: 0.1139 (0.0933) loss: 0.9831 (0.9863) time: 0.1395 data: 0.0508 max mem: 9377 +Train: [2] [4400/6250] eta: 0:04:51 lr: 0.000068 grad: 0.1204 (0.0948) loss: 0.9831 (0.9862) time: 0.1432 data: 0.0550 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:34 lr: 0.000068 grad: 0.1493 (0.0963) loss: 0.9843 (0.9862) time: 0.1552 data: 0.0714 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:19 lr: 0.000068 grad: 0.2335 (0.0986) loss: 0.9835 (0.9861) time: 0.1559 data: 0.0705 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:03 lr: 0.000069 grad: 0.1613 (0.1005) loss: 0.9824 (0.9861) time: 0.1425 data: 0.0525 max mem: 9377 +Train: [2] [4800/6250] eta: 0:03:47 lr: 0.000069 grad: 0.1821 (0.1025) loss: 0.9836 (0.9861) time: 0.1620 data: 0.0741 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:31 lr: 0.000070 grad: 0.1517 (0.1046) loss: 0.9842 (0.9860) time: 0.1415 data: 0.0518 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:15 lr: 0.000070 grad: 0.2429 (0.1068) loss: 0.9856 (0.9860) time: 0.1728 data: 0.0913 max mem: 9377 +Train: [2] [5100/6250] eta: 0:02:59 lr: 0.000070 grad: 0.1352 (0.1087) loss: 0.9844 (0.9859) time: 0.1540 data: 0.0703 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:44 lr: 0.000071 grad: 0.1604 (0.1105) loss: 0.9856 (0.9859) time: 0.1231 data: 0.0337 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:28 lr: 0.000071 grad: 0.2058 (0.1130) loss: 0.9837 (0.9858) time: 0.1622 data: 0.0774 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:12 lr: 0.000072 grad: 0.1476 (0.1144) loss: 0.9813 (0.9858) time: 0.1822 data: 0.0955 max mem: 9377 +Train: [2] [5500/6250] eta: 0:01:57 lr: 0.000072 grad: 0.1559 (0.1164) loss: 0.9836 (0.9857) time: 0.1465 data: 0.0661 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:41 lr: 0.000072 grad: 0.1685 (0.1182) loss: 0.9827 (0.9856) time: 0.1506 data: 0.0631 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:25 lr: 0.000073 grad: 0.2053 (0.1202) loss: 0.9813 (0.9856) time: 0.1483 data: 0.0555 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:10 lr: 0.000073 grad: 0.2338 (0.1217) loss: 0.9815 (0.9855) time: 0.1342 data: 0.0476 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:54 lr: 0.000074 grad: 0.1612 (0.1228) loss: 0.9825 (0.9854) time: 0.1565 data: 0.0742 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:38 lr: 0.000074 grad: 0.2480 (0.1247) loss: 0.9816 (0.9853) time: 0.1410 data: 0.0631 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:23 lr: 0.000074 grad: 0.1930 (0.1266) loss: 0.9801 (0.9853) time: 0.1613 data: 0.0793 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:07 lr: 0.000075 grad: 0.1156 (0.1282) loss: 0.9826 (0.9852) time: 0.1848 data: 0.0960 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1671 (0.1288) loss: 0.9816 (0.9852) time: 0.1538 data: 0.0729 max mem: 9377 +Train: [2] Total time: 0:16:19 (0.1568 s / it) +Averaged stats: lr: 0.000075 grad: 0.1671 (0.1288) loss: 0.9816 (0.9852) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:03:44 loss: 0.9884 (0.9884) time: 3.6145 data: 3.5443 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9848 (0.9840) time: 0.1254 data: 0.1002 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (hcp-train-subset): loss: 0.9848 (0.9840) +Eval (hcp-val): [2] [ 0/62] eta: 0:02:56 loss: 0.9798 (0.9798) time: 2.8530 data: 2.7822 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9824 (0.9829) time: 0.1347 data: 0.1092 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:13 (0.2207 s / it) +Averaged stats (hcp-val): loss: 0.9824 (0.9829) +Eval (nsd-val): [2] [ 0/62] eta: 0:05:43 loss: 0.9782 (0.9782) time: 5.5469 data: 5.5037 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9837 (0.9820) time: 0.1217 data: 0.0969 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (nsd-val): loss: 0.9837 (0.9820) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 9:20:32 lr: 0.000075 grad: 0.2395 (0.2395) loss: 0.9887 (0.9887) time: 5.3811 data: 5.2017 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:22:44 lr: 0.000075 grad: 0.1810 (0.2221) loss: 0.9851 (0.9847) time: 0.2165 data: 0.1296 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:18:45 lr: 0.000076 grad: 0.1706 (0.2103) loss: 0.9830 (0.9833) time: 0.1877 data: 0.1023 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:17:43 lr: 0.000076 grad: 0.1575 (0.2109) loss: 0.9824 (0.9825) time: 0.1378 data: 0.0320 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:17:00 lr: 0.000077 grad: 0.1422 (0.2070) loss: 0.9814 (0.9821) time: 0.1508 data: 0.0663 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:16:27 lr: 0.000077 grad: 0.1917 (0.2049) loss: 0.9790 (0.9813) time: 0.1720 data: 0.0848 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:16:05 lr: 0.000077 grad: 0.1550 (0.2036) loss: 0.9758 (0.9809) time: 0.1506 data: 0.0437 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:15:52 lr: 0.000078 grad: 0.1919 (0.2079) loss: 0.9797 (0.9808) time: 0.1730 data: 0.0832 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:15:25 lr: 0.000078 grad: 0.1430 (0.2091) loss: 0.9796 (0.9806) time: 0.1577 data: 0.0644 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:14:58 lr: 0.000079 grad: 0.2178 (0.2108) loss: 0.9787 (0.9805) time: 0.1378 data: 0.0524 max mem: 9377 +Train: [3] [1000/6250] eta: 0:14:28 lr: 0.000079 grad: 0.1487 (0.2102) loss: 0.9786 (0.9804) time: 0.1543 data: 0.0593 max mem: 9377 +Train: [3] [1100/6250] eta: 0:14:02 lr: 0.000079 grad: 0.1260 (0.2097) loss: 0.9786 (0.9802) time: 0.1240 data: 0.0292 max mem: 9377 +Train: [3] [1200/6250] eta: 0:13:37 lr: 0.000080 grad: 0.2442 (0.2115) loss: 0.9778 (0.9801) time: 0.1424 data: 0.0521 max mem: 9377 +Train: [3] [1300/6250] eta: 0:13:23 lr: 0.000080 grad: 0.1847 (0.2134) loss: 0.9791 (0.9800) time: 0.1758 data: 0.0960 max mem: 9377 +Train: [3] [1400/6250] eta: 0:13:05 lr: 0.000081 grad: 0.1904 (0.2120) loss: 0.9829 (0.9799) time: 0.1753 data: 0.0946 max mem: 9377 +Train: [3] [1500/6250] eta: 0:12:48 lr: 0.000081 grad: 0.1569 (0.2111) loss: 0.9798 (0.9798) time: 0.1550 data: 0.0761 max mem: 9377 +Train: [3] [1600/6250] eta: 0:12:34 lr: 0.000081 grad: 0.2105 (0.2123) loss: 0.9793 (0.9796) time: 0.1885 data: 0.1072 max mem: 9377 +Train: [3] [1700/6250] eta: 0:12:16 lr: 0.000082 grad: 0.1448 (0.2121) loss: 0.9777 (0.9795) time: 0.1274 data: 0.0465 max mem: 9377 +Train: [3] [1800/6250] eta: 0:11:59 lr: 0.000082 grad: 0.1410 (0.2127) loss: 0.9783 (0.9793) time: 0.1698 data: 0.0929 max mem: 9377 +Train: [3] [1900/6250] eta: 0:11:42 lr: 0.000083 grad: 0.1819 (0.2133) loss: 0.9759 (0.9791) time: 0.1705 data: 0.0881 max mem: 9377 +Train: [3] [2000/6250] eta: 0:11:25 lr: 0.000083 grad: 0.1609 (0.2140) loss: 0.9769 (0.9789) time: 0.1445 data: 0.0632 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:07 lr: 0.000083 grad: 0.2053 (0.2134) loss: 0.9762 (0.9788) time: 0.1545 data: 0.0735 max mem: 9377 +Train: [3] [2200/6250] eta: 0:10:48 lr: 0.000084 grad: 0.1883 (0.2142) loss: 0.9790 (0.9787) time: 0.1519 data: 0.0642 max mem: 9377 +Train: [3] [2300/6250] eta: 0:10:29 lr: 0.000084 grad: 0.2068 (0.2136) loss: 0.9774 (0.9786) time: 0.1730 data: 0.0887 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:10 lr: 0.000085 grad: 0.2577 (0.2141) loss: 0.9777 (0.9785) time: 0.1561 data: 0.0708 max mem: 9377 +Train: [3] [2500/6250] eta: 0:09:51 lr: 0.000085 grad: 0.1646 (0.2141) loss: 0.9745 (0.9784) time: 0.1289 data: 0.0447 max mem: 9377 +Train: [3] [2600/6250] eta: 0:09:35 lr: 0.000085 grad: 0.1433 (0.2140) loss: 0.9795 (0.9783) time: 0.2113 data: 0.1256 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:17 lr: 0.000086 grad: 0.1901 (0.2133) loss: 0.9754 (0.9781) time: 0.1244 data: 0.0337 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:01 lr: 0.000086 grad: 0.1647 (0.2125) loss: 0.9782 (0.9780) time: 0.1636 data: 0.0799 max mem: 9377 +Train: [3] [2900/6250] eta: 0:08:45 lr: 0.000087 grad: 0.1491 (0.2123) loss: 0.9747 (0.9779) time: 0.1504 data: 0.0656 max mem: 9377 +Train: [3] [3000/6250] eta: 0:08:28 lr: 0.000087 grad: 0.1345 (0.2117) loss: 0.9719 (0.9778) time: 0.1419 data: 0.0573 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:12 lr: 0.000087 grad: 0.1950 (0.2108) loss: 0.9754 (0.9777) time: 0.1339 data: 0.0531 max mem: 9377 +Train: [3] [3200/6250] eta: 0:07:57 lr: 0.000088 grad: 0.1997 (0.2107) loss: 0.9720 (0.9776) time: 0.1714 data: 0.0889 max mem: 9377 +Train: [3] [3300/6250] eta: 0:07:42 lr: 0.000088 grad: 0.1881 (0.2105) loss: 0.9731 (0.9776) time: 0.1756 data: 0.0957 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:27 lr: 0.000089 grad: 0.1339 (0.2094) loss: 0.9740 (0.9775) time: 0.1741 data: 0.0876 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:12 lr: 0.000089 grad: 0.1901 (0.2092) loss: 0.9746 (0.9774) time: 0.1523 data: 0.0644 max mem: 9377 +Train: [3] [3600/6250] eta: 0:06:56 lr: 0.000089 grad: 0.1862 (0.2096) loss: 0.9753 (0.9773) time: 0.1523 data: 0.0651 max mem: 9377 +Train: [3] [3700/6250] eta: 0:06:40 lr: 0.000090 grad: 0.1915 (0.2094) loss: 0.9760 (0.9772) time: 0.1833 data: 0.0987 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:24 lr: 0.000090 grad: 0.1483 (0.2089) loss: 0.9725 (0.9772) time: 0.1784 data: 0.0981 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:09 lr: 0.000091 grad: 0.1625 (0.2086) loss: 0.9731 (0.9771) time: 0.1692 data: 0.0839 max mem: 9377 +Train: [3] [4000/6250] eta: 0:05:53 lr: 0.000091 grad: 0.1131 (0.2084) loss: 0.9770 (0.9771) time: 0.1551 data: 0.0671 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:37 lr: 0.000091 grad: 0.1393 (0.2081) loss: 0.9752 (0.9770) time: 0.1463 data: 0.0607 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:21 lr: 0.000092 grad: 0.1481 (0.2073) loss: 0.9742 (0.9770) time: 0.1546 data: 0.0643 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:04 lr: 0.000092 grad: 0.1378 (0.2072) loss: 0.9750 (0.9769) time: 0.1344 data: 0.0495 max mem: 9377 +Train: [3] [4400/6250] eta: 0:04:48 lr: 0.000093 grad: 0.1767 (0.2069) loss: 0.9762 (0.9769) time: 0.1478 data: 0.0567 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:33 lr: 0.000093 grad: 0.2025 (0.2069) loss: 0.9722 (0.9768) time: 0.1607 data: 0.0724 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:17 lr: 0.000093 grad: 0.1389 (0.2068) loss: 0.9726 (0.9768) time: 0.1530 data: 0.0677 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:01 lr: 0.000094 grad: 0.1317 (0.2065) loss: 0.9767 (0.9767) time: 0.1489 data: 0.0677 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:46 lr: 0.000094 grad: 0.1232 (0.2064) loss: 0.9731 (0.9766) time: 0.1770 data: 0.0918 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:30 lr: 0.000095 grad: 0.1683 (0.2060) loss: 0.9706 (0.9765) time: 0.1350 data: 0.0568 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:14 lr: 0.000095 grad: 0.1646 (0.2055) loss: 0.9695 (0.9764) time: 0.1573 data: 0.0699 max mem: 9377 +Train: [3] [5100/6250] eta: 0:02:58 lr: 0.000095 grad: 0.1189 (0.2051) loss: 0.9726 (0.9764) time: 0.1465 data: 0.0591 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:43 lr: 0.000096 grad: 0.2213 (0.2055) loss: 0.9701 (0.9763) time: 0.1611 data: 0.0791 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:27 lr: 0.000096 grad: 0.1796 (0.2055) loss: 0.9718 (0.9762) time: 0.1540 data: 0.0609 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:11 lr: 0.000097 grad: 0.1938 (0.2054) loss: 0.9686 (0.9761) time: 0.1385 data: 0.0547 max mem: 9377 +Train: [3] [5500/6250] eta: 0:01:56 lr: 0.000097 grad: 0.1581 (0.2052) loss: 0.9719 (0.9760) time: 0.1349 data: 0.0551 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:40 lr: 0.000097 grad: 0.1313 (0.2051) loss: 0.9703 (0.9759) time: 0.1512 data: 0.0696 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:25 lr: 0.000098 grad: 0.1265 (0.2046) loss: 0.9738 (0.9758) time: 0.1452 data: 0.0563 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:09 lr: 0.000098 grad: 0.1574 (0.2044) loss: 0.9713 (0.9757) time: 0.1346 data: 0.0481 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:54 lr: 0.000099 grad: 0.1398 (0.2045) loss: 0.9711 (0.9756) time: 0.1369 data: 0.0445 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:38 lr: 0.000099 grad: 0.2268 (0.2041) loss: 0.9691 (0.9755) time: 0.1476 data: 0.0545 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:23 lr: 0.000099 grad: 0.1944 (0.2044) loss: 0.9706 (0.9755) time: 0.1415 data: 0.0542 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:07 lr: 0.000100 grad: 0.1835 (0.2043) loss: 0.9692 (0.9754) time: 0.1577 data: 0.0734 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1751 (0.2043) loss: 0.9720 (0.9753) time: 0.1789 data: 0.0882 max mem: 9377 +Train: [3] Total time: 0:16:14 (0.1559 s / it) +Averaged stats: lr: 0.000100 grad: 0.1751 (0.2043) loss: 0.9720 (0.9753) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:02:59 loss: 0.9748 (0.9748) time: 2.8958 data: 2.8332 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9695 (0.9710) time: 0.1410 data: 0.1157 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:13 (0.2190 s / it) +Averaged stats (hcp-train-subset): loss: 0.9695 (0.9710) +Eval (hcp-val): [3] [ 0/62] eta: 0:04:02 loss: 0.9661 (0.9661) time: 3.9034 data: 3.8314 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9711 (0.9706) time: 0.1357 data: 0.1091 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:12 (0.2093 s / it) +Averaged stats (hcp-val): loss: 0.9711 (0.9706) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:14 loss: 0.9578 (0.9578) time: 5.0754 data: 5.0417 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9637 (0.9633) time: 0.1665 data: 0.1409 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:15 (0.2532 s / it) +Averaged stats (nsd-val): loss: 0.9637 (0.9633) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 9:12:12 lr: 0.000100 grad: 0.2403 (0.2403) loss: 0.9763 (0.9763) time: 5.3013 data: 5.0991 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:21:55 lr: 0.000100 grad: 0.1662 (0.1900) loss: 0.9687 (0.9702) time: 0.1768 data: 0.0874 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:20:07 lr: 0.000101 grad: 0.1600 (0.2063) loss: 0.9709 (0.9687) time: 0.1528 data: 0.0634 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:18:33 lr: 0.000101 grad: 0.1589 (0.2074) loss: 0.9675 (0.9685) time: 0.1765 data: 0.0929 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:17:38 lr: 0.000102 grad: 0.1776 (0.2031) loss: 0.9677 (0.9678) time: 0.1585 data: 0.0697 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:16:53 lr: 0.000102 grad: 0.1655 (0.1997) loss: 0.9626 (0.9669) time: 0.1476 data: 0.0647 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:16:39 lr: 0.000102 grad: 0.1633 (0.2023) loss: 0.9596 (0.9663) time: 0.1999 data: 0.1025 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:16:15 lr: 0.000103 grad: 0.1476 (0.1990) loss: 0.9614 (0.9659) time: 0.1605 data: 0.0653 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:15:56 lr: 0.000103 grad: 0.2555 (0.2051) loss: 0.9662 (0.9656) time: 0.1954 data: 0.0856 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:15:22 lr: 0.000104 grad: 0.1470 (0.2061) loss: 0.9613 (0.9652) time: 0.1479 data: 0.0554 max mem: 9377 +Train: [4] [1000/6250] eta: 0:14:55 lr: 0.000104 grad: 0.2691 (0.2090) loss: 0.9613 (0.9648) time: 0.1629 data: 0.0723 max mem: 9377 +Train: [4] [1100/6250] eta: 0:14:29 lr: 0.000104 grad: 0.1452 (0.2107) loss: 0.9581 (0.9644) time: 0.1287 data: 0.0347 max mem: 9377 +Train: [4] [1200/6250] eta: 0:14:03 lr: 0.000105 grad: 0.2205 (0.2163) loss: 0.9568 (0.9639) time: 0.1423 data: 0.0526 max mem: 9377 +Train: [4] [1300/6250] eta: 0:13:37 lr: 0.000105 grad: 0.2033 (0.2187) loss: 0.9562 (0.9634) time: 0.1170 data: 0.0281 max mem: 9377 +Train: [4] [1400/6250] eta: 0:13:12 lr: 0.000106 grad: 0.2012 (0.2216) loss: 0.9560 (0.9628) time: 0.1235 data: 0.0362 max mem: 9377 +Train: [4] [1500/6250] eta: 0:12:54 lr: 0.000106 grad: 0.2047 (0.2238) loss: 0.9536 (0.9622) time: 0.1513 data: 0.0647 max mem: 9377 +Train: [4] [1600/6250] eta: 0:12:38 lr: 0.000106 grad: 0.2124 (0.2294) loss: 0.9476 (0.9617) time: 0.1758 data: 0.0965 max mem: 9377 +Train: [4] [1700/6250] eta: 0:12:25 lr: 0.000107 grad: 0.3017 (0.2336) loss: 0.9513 (0.9610) time: 0.1730 data: 0.0951 max mem: 9377 +Train: [4] [1800/6250] eta: 0:12:08 lr: 0.000107 grad: 0.3799 (0.2394) loss: 0.9479 (0.9604) time: 0.1440 data: 0.0692 max mem: 9377 +Train: [4] [1900/6250] eta: 0:11:47 lr: 0.000108 grad: 0.3086 (0.2433) loss: 0.9466 (0.9598) time: 0.1253 data: 0.0397 max mem: 9377 +Train: [4] [2000/6250] eta: 0:11:27 lr: 0.000108 grad: 0.3119 (0.2462) loss: 0.9479 (0.9591) time: 0.1119 data: 0.0184 max mem: 9377 +Train: [4] [2100/6250] eta: 0:11:13 lr: 0.000108 grad: 0.3666 (0.2497) loss: 0.9478 (0.9585) time: 0.1688 data: 0.0873 max mem: 9377 +Train: [4] [2200/6250] eta: 0:10:58 lr: 0.000109 grad: 0.3479 (0.2522) loss: 0.9445 (0.9579) time: 0.2027 data: 0.1236 max mem: 9377 +Train: [4] [2300/6250] eta: 0:10:42 lr: 0.000109 grad: 0.3170 (0.2548) loss: 0.9435 (0.9573) time: 0.1601 data: 0.0741 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:25 lr: 0.000110 grad: 0.2578 (0.2558) loss: 0.9423 (0.9567) time: 0.1540 data: 0.0751 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:06 lr: 0.000110 grad: 0.2434 (0.2583) loss: 0.9384 (0.9561) time: 0.1411 data: 0.0532 max mem: 9377 +Train: [4] [2600/6250] eta: 0:09:49 lr: 0.000110 grad: 0.4204 (0.2609) loss: 0.9413 (0.9555) time: 0.1754 data: 0.0948 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:32 lr: 0.000111 grad: 0.2359 (0.2628) loss: 0.9405 (0.9549) time: 0.1413 data: 0.0608 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:16 lr: 0.000111 grad: 0.2571 (0.2639) loss: 0.9381 (0.9544) time: 0.1335 data: 0.0468 max mem: 9377 +Train: [4] [2900/6250] eta: 0:08:59 lr: 0.000112 grad: 0.2671 (0.2643) loss: 0.9413 (0.9539) time: 0.1443 data: 0.0550 max mem: 9377 +Train: [4] [3000/6250] eta: 0:08:41 lr: 0.000112 grad: 0.3021 (0.2650) loss: 0.9405 (0.9534) time: 0.1513 data: 0.0618 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:24 lr: 0.000112 grad: 0.2242 (0.2650) loss: 0.9324 (0.9528) time: 0.1473 data: 0.0601 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:06 lr: 0.000113 grad: 0.2641 (0.2659) loss: 0.9386 (0.9524) time: 0.1539 data: 0.0677 max mem: 9377 +Train: [4] [3300/6250] eta: 0:07:49 lr: 0.000113 grad: 0.2698 (0.2670) loss: 0.9377 (0.9519) time: 0.1323 data: 0.0449 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:32 lr: 0.000114 grad: 0.3837 (0.2674) loss: 0.9365 (0.9515) time: 0.1454 data: 0.0616 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:15 lr: 0.000114 grad: 0.2520 (0.2682) loss: 0.9363 (0.9510) time: 0.1636 data: 0.0866 max mem: 9377 +Train: [4] [3600/6250] eta: 0:06:59 lr: 0.000114 grad: 0.2069 (0.2688) loss: 0.9340 (0.9505) time: 0.1617 data: 0.0712 max mem: 9377 +Train: [4] [3700/6250] eta: 0:06:42 lr: 0.000115 grad: 0.2481 (0.2690) loss: 0.9329 (0.9501) time: 0.1392 data: 0.0537 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:26 lr: 0.000115 grad: 0.2280 (0.2687) loss: 0.9344 (0.9497) time: 0.1507 data: 0.0689 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:11 lr: 0.000116 grad: 0.2531 (0.2685) loss: 0.9309 (0.9493) time: 0.1539 data: 0.0590 max mem: 9377 +Train: [4] [4000/6250] eta: 0:05:55 lr: 0.000116 grad: 0.2122 (0.2683) loss: 0.9306 (0.9489) time: 0.1557 data: 0.0734 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:39 lr: 0.000116 grad: 0.2594 (0.2688) loss: 0.9329 (0.9485) time: 0.1492 data: 0.0597 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:23 lr: 0.000117 grad: 0.2308 (0.2685) loss: 0.9305 (0.9481) time: 0.1498 data: 0.0597 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:07 lr: 0.000117 grad: 0.2497 (0.2681) loss: 0.9316 (0.9476) time: 0.1611 data: 0.0778 max mem: 9377 +Train: [4] [4400/6250] eta: 0:04:51 lr: 0.000118 grad: 0.3282 (0.2681) loss: 0.9311 (0.9472) time: 0.1395 data: 0.0536 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:35 lr: 0.000118 grad: 0.2193 (0.2680) loss: 0.9305 (0.9468) time: 0.1516 data: 0.0664 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:19 lr: 0.000118 grad: 0.2075 (0.2675) loss: 0.9304 (0.9465) time: 0.1432 data: 0.0620 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:03 lr: 0.000119 grad: 0.2657 (0.2675) loss: 0.9307 (0.9461) time: 0.1390 data: 0.0532 max mem: 9377 +Train: [4] [4800/6250] eta: 0:03:47 lr: 0.000119 grad: 0.2809 (0.2676) loss: 0.9292 (0.9457) time: 0.1470 data: 0.0562 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:31 lr: 0.000120 grad: 0.2262 (0.2673) loss: 0.9255 (0.9454) time: 0.1491 data: 0.0553 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:15 lr: 0.000120 grad: 0.1944 (0.2662) loss: 0.9276 (0.9450) time: 0.1409 data: 0.0556 max mem: 9377 +Train: [4] [5100/6250] eta: 0:02:59 lr: 0.000120 grad: 0.2215 (0.2658) loss: 0.9262 (0.9446) time: 0.1547 data: 0.0704 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:43 lr: 0.000121 grad: 0.1770 (0.2655) loss: 0.9275 (0.9443) time: 0.1350 data: 0.0530 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:27 lr: 0.000121 grad: 0.2168 (0.2651) loss: 0.9240 (0.9439) time: 0.1364 data: 0.0500 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:12 lr: 0.000122 grad: 0.2541 (0.2644) loss: 0.9241 (0.9436) time: 0.1433 data: 0.0570 max mem: 9377 +Train: [4] [5500/6250] eta: 0:01:56 lr: 0.000122 grad: 0.2447 (0.2642) loss: 0.9230 (0.9432) time: 0.1446 data: 0.0563 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:41 lr: 0.000122 grad: 0.2087 (0.2635) loss: 0.9223 (0.9429) time: 0.1436 data: 0.0546 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:25 lr: 0.000123 grad: 0.1875 (0.2627) loss: 0.9262 (0.9426) time: 0.1431 data: 0.0495 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:09 lr: 0.000123 grad: 0.1944 (0.2621) loss: 0.9235 (0.9423) time: 0.1466 data: 0.0662 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.2197 (0.2621) loss: 0.9224 (0.9420) time: 0.1319 data: 0.0480 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.1693 (0.2615) loss: 0.9204 (0.9417) time: 0.1608 data: 0.0827 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.1722 (0.2608) loss: 0.9236 (0.9413) time: 0.2197 data: 0.1466 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.2194 (0.2603) loss: 0.9222 (0.9410) time: 0.1578 data: 0.0725 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.2730 (0.2604) loss: 0.9213 (0.9408) time: 0.1620 data: 0.0701 max mem: 9377 +Train: [4] Total time: 0:16:15 (0.1561 s / it) +Averaged stats: lr: 0.000125 grad: 0.2730 (0.2604) loss: 0.9213 (0.9408) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:04:13 loss: 0.9203 (0.9203) time: 4.0913 data: 4.0289 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9223 (0.9214) time: 0.1598 data: 0.1344 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-train-subset): loss: 0.9223 (0.9214) +Making plots (hcp-train-subset): example=27 +Eval (hcp-val): [4] [ 0/62] eta: 0:03:56 loss: 0.9169 (0.9169) time: 3.8165 data: 3.7067 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9207 (0.9201) time: 0.1174 data: 0.0924 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:13 (0.2118 s / it) +Averaged stats (hcp-val): loss: 0.9207 (0.9201) +Making plots (hcp-val): example=1 +Eval (nsd-val): [4] [ 0/62] eta: 0:04:58 loss: 0.8797 (0.8797) time: 4.8219 data: 4.7793 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8901 (0.8897) time: 0.1470 data: 0.1213 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (nsd-val): loss: 0.8901 (0.8897) +Making plots (nsd-val): example=28 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 8:52:38 lr: 0.000125 grad: 0.2842 (0.2842) loss: 0.9289 (0.9289) time: 5.1134 data: 4.9718 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:21:27 lr: 0.000125 grad: 0.1637 (0.2045) loss: 0.9250 (0.9289) time: 0.1447 data: 0.0575 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:19:11 lr: 0.000125 grad: 0.2216 (0.2049) loss: 0.9238 (0.9265) time: 0.1793 data: 0.0900 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:18:44 lr: 0.000125 grad: 0.1949 (0.2110) loss: 0.9222 (0.9238) time: 0.1921 data: 0.1206 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:17:50 lr: 0.000125 grad: 0.1693 (0.2107) loss: 0.9211 (0.9228) time: 0.1836 data: 0.1010 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:13 lr: 0.000125 grad: 0.1820 (0.2112) loss: 0.9210 (0.9223) time: 0.1569 data: 0.0690 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:16:44 lr: 0.000125 grad: 0.1780 (0.2101) loss: 0.9188 (0.9219) time: 0.1406 data: 0.0555 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:03 lr: 0.000125 grad: 0.2153 (0.2114) loss: 0.9155 (0.9217) time: 0.1657 data: 0.0859 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:15:42 lr: 0.000125 grad: 0.2392 (0.2157) loss: 0.9223 (0.9215) time: 0.1621 data: 0.0723 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:24 lr: 0.000125 grad: 0.1713 (0.2129) loss: 0.9191 (0.9211) time: 0.1576 data: 0.0634 max mem: 9377 +Train: [5] [1000/6250] eta: 0:14:50 lr: 0.000125 grad: 0.1744 (0.2100) loss: 0.9208 (0.9209) time: 0.1404 data: 0.0360 max mem: 9377 +Train: [5] [1100/6250] eta: 0:14:22 lr: 0.000125 grad: 0.2046 (0.2093) loss: 0.9176 (0.9206) time: 0.1291 data: 0.0222 max mem: 9377 +Train: [5] [1200/6250] eta: 0:13:52 lr: 0.000125 grad: 0.1561 (0.2082) loss: 0.9168 (0.9204) time: 0.1149 data: 0.0257 max mem: 9377 +Train: [5] [1300/6250] eta: 0:13:23 lr: 0.000125 grad: 0.1680 (0.2082) loss: 0.9146 (0.9201) time: 0.1326 data: 0.0499 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:00 lr: 0.000125 grad: 0.1800 (0.2057) loss: 0.9181 (0.9199) time: 0.1511 data: 0.0679 max mem: 9377 +Train: [5] [1500/6250] eta: 0:12:45 lr: 0.000125 grad: 0.1604 (0.2044) loss: 0.9190 (0.9197) time: 0.1684 data: 0.0764 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:28 lr: 0.000125 grad: 0.1797 (0.2034) loss: 0.9103 (0.9195) time: 0.1628 data: 0.0785 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:11 lr: 0.000125 grad: 0.1948 (0.2059) loss: 0.9153 (0.9192) time: 0.1595 data: 0.0760 max mem: 9377 +Train: [5] [1800/6250] eta: 0:11:52 lr: 0.000125 grad: 0.1545 (0.2039) loss: 0.9145 (0.9190) time: 0.1263 data: 0.0352 max mem: 9377 +Train: [5] [1900/6250] eta: 0:11:33 lr: 0.000125 grad: 0.1830 (0.2049) loss: 0.9121 (0.9188) time: 0.1675 data: 0.0876 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:19 lr: 0.000125 grad: 0.1614 (0.2036) loss: 0.9115 (0.9185) time: 0.1838 data: 0.1042 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:03 lr: 0.000125 grad: 0.1534 (0.2029) loss: 0.9095 (0.9182) time: 0.1647 data: 0.0872 max mem: 9377 +Train: [5] [2200/6250] eta: 0:10:46 lr: 0.000125 grad: 0.1515 (0.2022) loss: 0.9106 (0.9179) time: 0.1294 data: 0.0425 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:28 lr: 0.000125 grad: 0.2007 (0.2019) loss: 0.9087 (0.9176) time: 0.1406 data: 0.0619 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:10 lr: 0.000125 grad: 0.1743 (0.2010) loss: 0.9107 (0.9173) time: 0.1483 data: 0.0642 max mem: 9377 +Train: [5] [2500/6250] eta: 0:09:53 lr: 0.000125 grad: 0.1654 (0.2009) loss: 0.9088 (0.9170) time: 0.1495 data: 0.0608 max mem: 9377 +Train: [5] [2600/6250] eta: 0:09:36 lr: 0.000125 grad: 0.1488 (0.2002) loss: 0.9065 (0.9166) time: 0.1025 data: 0.0112 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:21 lr: 0.000125 grad: 0.1656 (0.1997) loss: 0.9077 (0.9163) time: 0.1626 data: 0.0808 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:05 lr: 0.000125 grad: 0.1678 (0.1991) loss: 0.9056 (0.9159) time: 0.1572 data: 0.0758 max mem: 9377 +Train: [5] [2900/6250] eta: 0:08:48 lr: 0.000125 grad: 0.1564 (0.1986) loss: 0.9042 (0.9156) time: 0.1518 data: 0.0668 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:32 lr: 0.000125 grad: 0.1514 (0.1985) loss: 0.9070 (0.9152) time: 0.1634 data: 0.0757 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:15 lr: 0.000125 grad: 0.1601 (0.1981) loss: 0.9025 (0.9148) time: 0.1487 data: 0.0668 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:00 lr: 0.000125 grad: 0.1691 (0.1975) loss: 0.9013 (0.9144) time: 0.1696 data: 0.0898 max mem: 9377 +Train: [5] [3300/6250] eta: 0:07:43 lr: 0.000125 grad: 0.1658 (0.1969) loss: 0.8998 (0.9140) time: 0.1412 data: 0.0648 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:25 lr: 0.000125 grad: 0.1771 (0.1967) loss: 0.8968 (0.9136) time: 0.1337 data: 0.0494 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:08 lr: 0.000125 grad: 0.1586 (0.1961) loss: 0.9024 (0.9133) time: 0.1487 data: 0.0670 max mem: 9377 +Train: [5] [3600/6250] eta: 0:06:53 lr: 0.000125 grad: 0.1633 (0.1961) loss: 0.9011 (0.9129) time: 0.1589 data: 0.0636 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:37 lr: 0.000125 grad: 0.1519 (0.1956) loss: 0.8962 (0.9124) time: 0.1328 data: 0.0371 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:21 lr: 0.000125 grad: 0.1743 (0.1955) loss: 0.8999 (0.9121) time: 0.1367 data: 0.0456 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:05 lr: 0.000125 grad: 0.1933 (0.1951) loss: 0.8979 (0.9117) time: 0.1368 data: 0.0459 max mem: 9377 +Train: [5] [4000/6250] eta: 0:05:49 lr: 0.000125 grad: 0.1388 (0.1946) loss: 0.8944 (0.9113) time: 0.1600 data: 0.0760 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:32 lr: 0.000125 grad: 0.1477 (0.1939) loss: 0.8984 (0.9109) time: 0.1449 data: 0.0540 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:16 lr: 0.000125 grad: 0.1808 (0.1937) loss: 0.8917 (0.9105) time: 0.1461 data: 0.0556 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:00 lr: 0.000125 grad: 0.2051 (0.1934) loss: 0.8974 (0.9101) time: 0.1260 data: 0.0454 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:44 lr: 0.000125 grad: 0.1513 (0.1926) loss: 0.8913 (0.9097) time: 0.1356 data: 0.0526 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:29 lr: 0.000125 grad: 0.1767 (0.1921) loss: 0.8891 (0.9093) time: 0.1442 data: 0.0583 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:13 lr: 0.000125 grad: 0.1382 (0.1916) loss: 0.8910 (0.9089) time: 0.1594 data: 0.0737 max mem: 9377 +Train: [5] [4700/6250] eta: 0:03:58 lr: 0.000125 grad: 0.1445 (0.1909) loss: 0.8894 (0.9085) time: 0.1441 data: 0.0531 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:42 lr: 0.000125 grad: 0.1484 (0.1903) loss: 0.8844 (0.9081) time: 0.1282 data: 0.0464 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:27 lr: 0.000125 grad: 0.1285 (0.1899) loss: 0.8877 (0.9077) time: 0.1582 data: 0.0668 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:12 lr: 0.000125 grad: 0.1546 (0.1895) loss: 0.8867 (0.9072) time: 0.1495 data: 0.0590 max mem: 9377 +Train: [5] [5100/6250] eta: 0:02:56 lr: 0.000125 grad: 0.1572 (0.1889) loss: 0.8854 (0.9068) time: 0.1434 data: 0.0635 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:41 lr: 0.000125 grad: 0.1780 (0.1883) loss: 0.8833 (0.9063) time: 0.1667 data: 0.0820 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:25 lr: 0.000125 grad: 0.1667 (0.1878) loss: 0.8875 (0.9059) time: 0.1574 data: 0.0743 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:10 lr: 0.000125 grad: 0.1838 (0.1873) loss: 0.8891 (0.9055) time: 0.1542 data: 0.0728 max mem: 9377 +Train: [5] [5500/6250] eta: 0:01:54 lr: 0.000125 grad: 0.1414 (0.1869) loss: 0.8831 (0.9051) time: 0.1470 data: 0.0630 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:39 lr: 0.000125 grad: 0.1400 (0.1864) loss: 0.8827 (0.9048) time: 0.1532 data: 0.0663 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:24 lr: 0.000125 grad: 0.1599 (0.1857) loss: 0.8819 (0.9043) time: 0.1507 data: 0.0722 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:08 lr: 0.000125 grad: 0.1343 (0.1850) loss: 0.8801 (0.9039) time: 0.1553 data: 0.0753 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.1424 (0.1845) loss: 0.8789 (0.9035) time: 0.1490 data: 0.0580 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.1236 (0.1838) loss: 0.8789 (0.9031) time: 0.1356 data: 0.0529 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.1261 (0.1830) loss: 0.8803 (0.9027) time: 0.1431 data: 0.0610 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.1331 (0.1823) loss: 0.8741 (0.9023) time: 0.1533 data: 0.0703 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1284 (0.1820) loss: 0.8784 (0.9021) time: 0.1950 data: 0.1035 max mem: 9377 +Train: [5] Total time: 0:16:02 (0.1541 s / it) +Averaged stats: lr: 0.000125 grad: 0.1284 (0.1820) loss: 0.8784 (0.9021) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:04:00 loss: 0.8835 (0.8835) time: 3.8811 data: 3.7911 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8812 (0.8824) time: 0.1359 data: 0.1088 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:15 (0.2493 s / it) +Averaged stats (hcp-train-subset): loss: 0.8812 (0.8824) +Eval (hcp-val): [5] [ 0/62] eta: 0:03:58 loss: 0.8758 (0.8758) time: 3.8420 data: 3.7761 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8799 (0.8804) time: 0.1298 data: 0.1043 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-val): loss: 0.8799 (0.8804) +Eval (nsd-val): [5] [ 0/62] eta: 0:05:49 loss: 0.8367 (0.8367) time: 5.6351 data: 5.6030 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8480 (0.8463) time: 0.1498 data: 0.1229 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2344 s / it) +Averaged stats (nsd-val): loss: 0.8480 (0.8463) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 9:33:18 lr: 0.000125 grad: 0.0829 (0.0829) loss: 0.9122 (0.9122) time: 5.5038 data: 5.2555 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:22:14 lr: 0.000125 grad: 0.1306 (0.1770) loss: 0.8905 (0.8802) time: 0.1490 data: 0.0577 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:19:22 lr: 0.000125 grad: 0.1136 (0.1554) loss: 0.8850 (0.8825) time: 0.1635 data: 0.0752 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:18:23 lr: 0.000125 grad: 0.1293 (0.1545) loss: 0.8831 (0.8838) time: 0.1922 data: 0.0955 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:17:19 lr: 0.000125 grad: 0.1098 (0.1491) loss: 0.8869 (0.8836) time: 0.1429 data: 0.0478 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:16:38 lr: 0.000125 grad: 0.1224 (0.1504) loss: 0.8790 (0.8833) time: 0.1524 data: 0.0626 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:16:02 lr: 0.000125 grad: 0.1385 (0.1490) loss: 0.8797 (0.8826) time: 0.1407 data: 0.0488 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:15:45 lr: 0.000125 grad: 0.1142 (0.1463) loss: 0.8807 (0.8825) time: 0.1853 data: 0.0958 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:15 lr: 0.000125 grad: 0.1102 (0.1440) loss: 0.8832 (0.8823) time: 0.1336 data: 0.0433 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:14:46 lr: 0.000125 grad: 0.1217 (0.1433) loss: 0.8812 (0.8820) time: 0.1462 data: 0.0578 max mem: 9377 +Train: [6] [1000/6250] eta: 0:14:16 lr: 0.000125 grad: 0.1036 (0.1419) loss: 0.8830 (0.8819) time: 0.1329 data: 0.0431 max mem: 9377 +Train: [6] [1100/6250] eta: 0:13:52 lr: 0.000125 grad: 0.1108 (0.1402) loss: 0.8798 (0.8817) time: 0.1454 data: 0.0571 max mem: 9377 +Train: [6] [1200/6250] eta: 0:13:31 lr: 0.000125 grad: 0.1092 (0.1382) loss: 0.8819 (0.8815) time: 0.1420 data: 0.0547 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:10 lr: 0.000125 grad: 0.1130 (0.1374) loss: 0.8835 (0.8814) time: 0.1371 data: 0.0516 max mem: 9377 +Train: [6] [1400/6250] eta: 0:12:48 lr: 0.000125 grad: 0.1148 (0.1362) loss: 0.8822 (0.8812) time: 0.1484 data: 0.0656 max mem: 9377 +Train: [6] [1500/6250] eta: 0:12:27 lr: 0.000125 grad: 0.1065 (0.1346) loss: 0.8833 (0.8811) time: 0.1596 data: 0.0814 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:06 lr: 0.000125 grad: 0.1181 (0.1334) loss: 0.8766 (0.8808) time: 0.1499 data: 0.0529 max mem: 9377 +Train: [6] [1700/6250] eta: 0:11:50 lr: 0.000125 grad: 0.1125 (0.1329) loss: 0.8761 (0.8806) time: 0.1260 data: 0.0356 max mem: 9377 +Train: [6] [1800/6250] eta: 0:11:34 lr: 0.000125 grad: 0.1100 (0.1322) loss: 0.8780 (0.8803) time: 0.1529 data: 0.0587 max mem: 9377 +Train: [6] [1900/6250] eta: 0:11:16 lr: 0.000125 grad: 0.1185 (0.1315) loss: 0.8773 (0.8800) time: 0.1394 data: 0.0572 max mem: 9377 +Train: [6] [2000/6250] eta: 0:10:59 lr: 0.000125 grad: 0.1142 (0.1310) loss: 0.8799 (0.8798) time: 0.1639 data: 0.0831 max mem: 9377 +Train: [6] [2100/6250] eta: 0:10:43 lr: 0.000125 grad: 0.1109 (0.1303) loss: 0.8782 (0.8796) time: 0.1479 data: 0.0598 max mem: 9377 +Train: [6] [2200/6250] eta: 0:10:25 lr: 0.000125 grad: 0.1067 (0.1296) loss: 0.8759 (0.8794) time: 0.1486 data: 0.0669 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:09 lr: 0.000125 grad: 0.1068 (0.1303) loss: 0.8744 (0.8792) time: 0.1322 data: 0.0459 max mem: 9377 +Train: [6] [2400/6250] eta: 0:09:53 lr: 0.000125 grad: 0.1069 (0.1295) loss: 0.8744 (0.8790) time: 0.1585 data: 0.0739 max mem: 9377 +Train: [6] [2500/6250] eta: 0:09:40 lr: 0.000125 grad: 0.1000 (0.1288) loss: 0.8765 (0.8787) time: 0.1610 data: 0.0809 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:23 lr: 0.000125 grad: 0.1058 (0.1289) loss: 0.8728 (0.8785) time: 0.1466 data: 0.0637 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:08 lr: 0.000125 grad: 0.1067 (0.1284) loss: 0.8718 (0.8783) time: 0.1586 data: 0.0823 max mem: 9377 +Train: [6] [2800/6250] eta: 0:08:53 lr: 0.000125 grad: 0.1193 (0.1277) loss: 0.8711 (0.8781) time: 0.1734 data: 0.0937 max mem: 9377 +Train: [6] [2900/6250] eta: 0:08:37 lr: 0.000125 grad: 0.0998 (0.1269) loss: 0.8746 (0.8779) time: 0.1513 data: 0.0688 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:21 lr: 0.000125 grad: 0.1099 (0.1266) loss: 0.8695 (0.8777) time: 0.1475 data: 0.0651 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:05 lr: 0.000125 grad: 0.1145 (0.1263) loss: 0.8718 (0.8774) time: 0.1451 data: 0.0691 max mem: 9377 +Train: [6] [3200/6250] eta: 0:07:48 lr: 0.000125 grad: 0.1063 (0.1260) loss: 0.8684 (0.8773) time: 0.1346 data: 0.0571 max mem: 9377 +Train: [6] [3300/6250] eta: 0:07:33 lr: 0.000125 grad: 0.1038 (0.1256) loss: 0.8721 (0.8770) time: 0.1954 data: 0.1092 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:17 lr: 0.000125 grad: 0.1092 (0.1253) loss: 0.8744 (0.8769) time: 0.1416 data: 0.0509 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0966 (0.1248) loss: 0.8747 (0.8767) time: 0.1601 data: 0.0777 max mem: 9377 +Train: [6] [3600/6250] eta: 0:06:46 lr: 0.000125 grad: 0.1214 (0.1243) loss: 0.8709 (0.8766) time: 0.1620 data: 0.0808 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:30 lr: 0.000125 grad: 0.0994 (0.1239) loss: 0.8729 (0.8764) time: 0.1642 data: 0.0834 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:15 lr: 0.000125 grad: 0.1117 (0.1237) loss: 0.8728 (0.8763) time: 0.1390 data: 0.0552 max mem: 9377 +Train: [6] [3900/6250] eta: 0:05:59 lr: 0.000125 grad: 0.0991 (0.1233) loss: 0.8689 (0.8761) time: 0.1509 data: 0.0670 max mem: 9377 +Train: [6] [4000/6250] eta: 0:05:43 lr: 0.000125 grad: 0.1113 (0.1229) loss: 0.8688 (0.8759) time: 0.1423 data: 0.0522 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:28 lr: 0.000125 grad: 0.1080 (0.1226) loss: 0.8694 (0.8758) time: 0.1394 data: 0.0582 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:12 lr: 0.000125 grad: 0.0973 (0.1225) loss: 0.8728 (0.8757) time: 0.1762 data: 0.0970 max mem: 9377 +Train: [6] [4300/6250] eta: 0:04:57 lr: 0.000125 grad: 0.1367 (0.1225) loss: 0.8697 (0.8756) time: 0.2007 data: 0.1234 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:43 lr: 0.000125 grad: 0.0978 (0.1220) loss: 0.8710 (0.8755) time: 0.1586 data: 0.0645 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:28 lr: 0.000125 grad: 0.0973 (0.1215) loss: 0.8700 (0.8755) time: 0.1586 data: 0.0709 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:12 lr: 0.000125 grad: 0.1265 (0.1217) loss: 0.8760 (0.8754) time: 0.1506 data: 0.0710 max mem: 9377 +Train: [6] [4700/6250] eta: 0:03:57 lr: 0.000125 grad: 0.0944 (0.1213) loss: 0.8712 (0.8753) time: 0.1501 data: 0.0535 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:42 lr: 0.000125 grad: 0.0919 (0.1209) loss: 0.8728 (0.8752) time: 0.1368 data: 0.0393 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:26 lr: 0.000125 grad: 0.0983 (0.1206) loss: 0.8716 (0.8751) time: 0.1451 data: 0.0518 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:11 lr: 0.000125 grad: 0.1133 (0.1203) loss: 0.8667 (0.8750) time: 0.1412 data: 0.0510 max mem: 9377 +Train: [6] [5100/6250] eta: 0:02:55 lr: 0.000125 grad: 0.0976 (0.1200) loss: 0.8659 (0.8750) time: 0.1593 data: 0.0775 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:40 lr: 0.000125 grad: 0.0975 (0.1197) loss: 0.8669 (0.8748) time: 0.1357 data: 0.0568 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:24 lr: 0.000125 grad: 0.0938 (0.1195) loss: 0.8699 (0.8747) time: 0.1436 data: 0.0600 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:09 lr: 0.000125 grad: 0.0932 (0.1192) loss: 0.8701 (0.8746) time: 0.1533 data: 0.0632 max mem: 9377 +Train: [6] [5500/6250] eta: 0:01:54 lr: 0.000125 grad: 0.1010 (0.1190) loss: 0.8697 (0.8745) time: 0.1462 data: 0.0605 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:39 lr: 0.000125 grad: 0.0946 (0.1188) loss: 0.8668 (0.8744) time: 0.1290 data: 0.0362 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:23 lr: 0.000125 grad: 0.0992 (0.1187) loss: 0.8675 (0.8742) time: 0.1473 data: 0.0648 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:08 lr: 0.000125 grad: 0.1113 (0.1186) loss: 0.8659 (0.8741) time: 0.1465 data: 0.0617 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.0912 (0.1183) loss: 0.8666 (0.8740) time: 0.1252 data: 0.0414 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.1071 (0.1181) loss: 0.8676 (0.8739) time: 0.1748 data: 0.0864 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.0949 (0.1179) loss: 0.8643 (0.8738) time: 0.2137 data: 0.1383 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.0914 (0.1176) loss: 0.8659 (0.8737) time: 0.1660 data: 0.0766 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0911 (0.1174) loss: 0.8632 (0.8736) time: 0.1581 data: 0.0683 max mem: 9377 +Train: [6] Total time: 0:16:02 (0.1540 s / it) +Averaged stats: lr: 0.000125 grad: 0.0911 (0.1174) loss: 0.8632 (0.8736) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:00 loss: 0.8669 (0.8669) time: 3.8774 data: 3.7754 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8657 (0.8668) time: 0.1357 data: 0.1106 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:15 (0.2445 s / it) +Averaged stats (hcp-train-subset): loss: 0.8657 (0.8668) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:58 loss: 0.8610 (0.8610) time: 3.8498 data: 3.7845 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8650 (0.8649) time: 0.1293 data: 0.1042 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:13 (0.2130 s / it) +Averaged stats (hcp-val): loss: 0.8650 (0.8649) +Eval (nsd-val): [6] [ 0/62] eta: 0:08:03 loss: 0.8292 (0.8292) time: 7.7953 data: 7.5823 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8352 (0.8344) time: 0.1476 data: 0.1225 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:15 (0.2517 s / it) +Averaged stats (nsd-val): loss: 0.8352 (0.8344) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 10:06:41 lr: 0.000125 grad: 0.0712 (0.0712) loss: 0.8877 (0.8877) time: 5.8243 data: 5.6681 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:22:36 lr: 0.000125 grad: 0.0857 (0.1657) loss: 0.8629 (0.8684) time: 0.1536 data: 0.0647 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:19:01 lr: 0.000125 grad: 0.1218 (0.1378) loss: 0.8618 (0.8649) time: 0.1680 data: 0.0817 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:17:42 lr: 0.000125 grad: 0.1028 (0.1275) loss: 0.8642 (0.8645) time: 0.1813 data: 0.0927 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:17:26 lr: 0.000125 grad: 0.0941 (0.1209) loss: 0.8650 (0.8643) time: 0.1587 data: 0.0633 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:04 lr: 0.000125 grad: 0.0893 (0.1160) loss: 0.8702 (0.8646) time: 0.1564 data: 0.0637 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:16:26 lr: 0.000125 grad: 0.1035 (0.1139) loss: 0.8676 (0.8650) time: 0.1521 data: 0.0583 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:15:56 lr: 0.000125 grad: 0.1054 (0.1115) loss: 0.8609 (0.8650) time: 0.1521 data: 0.0693 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:15:25 lr: 0.000125 grad: 0.1043 (0.1106) loss: 0.8656 (0.8651) time: 0.1427 data: 0.0527 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:11 lr: 0.000125 grad: 0.0877 (0.1085) loss: 0.8691 (0.8652) time: 0.1911 data: 0.1039 max mem: 9377 +Train: [7] [1000/6250] eta: 0:14:45 lr: 0.000125 grad: 0.0962 (0.1072) loss: 0.8664 (0.8654) time: 0.1531 data: 0.0722 max mem: 9377 +Train: [7] [1100/6250] eta: 0:14:20 lr: 0.000125 grad: 0.0837 (0.1057) loss: 0.8701 (0.8655) time: 0.1666 data: 0.0758 max mem: 9377 +Train: [7] [1200/6250] eta: 0:13:52 lr: 0.000125 grad: 0.0847 (0.1048) loss: 0.8657 (0.8656) time: 0.1419 data: 0.0657 max mem: 9377 +Train: [7] [1300/6250] eta: 0:13:37 lr: 0.000125 grad: 0.0925 (0.1041) loss: 0.8635 (0.8656) time: 0.1609 data: 0.0771 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:16 lr: 0.000125 grad: 0.0940 (0.1032) loss: 0.8645 (0.8657) time: 0.1568 data: 0.0622 max mem: 9377 +Train: [7] [1500/6250] eta: 0:12:57 lr: 0.000125 grad: 0.0843 (0.1027) loss: 0.8648 (0.8657) time: 0.1400 data: 0.0548 max mem: 9377 +Train: [7] [1600/6250] eta: 0:12:37 lr: 0.000125 grad: 0.0926 (0.1019) loss: 0.8627 (0.8656) time: 0.1484 data: 0.0567 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:16 lr: 0.000125 grad: 0.0970 (0.1018) loss: 0.8574 (0.8654) time: 0.1236 data: 0.0320 max mem: 9377 +Train: [7] [1800/6250] eta: 0:11:55 lr: 0.000125 grad: 0.0894 (0.1013) loss: 0.8634 (0.8653) time: 0.1405 data: 0.0567 max mem: 9377 +Train: [7] [1900/6250] eta: 0:11:36 lr: 0.000125 grad: 0.0903 (0.1010) loss: 0.8618 (0.8653) time: 0.1638 data: 0.0807 max mem: 9377 +Train: [7] [2000/6250] eta: 0:11:16 lr: 0.000125 grad: 0.0839 (0.1004) loss: 0.8637 (0.8652) time: 0.1259 data: 0.0423 max mem: 9377 +Train: [7] [2100/6250] eta: 0:10:56 lr: 0.000125 grad: 0.0858 (0.1000) loss: 0.8619 (0.8651) time: 0.1158 data: 0.0280 max mem: 9377 +Train: [7] [2200/6250] eta: 0:10:37 lr: 0.000125 grad: 0.0964 (0.1002) loss: 0.8594 (0.8651) time: 0.1407 data: 0.0570 max mem: 9377 +Train: [7] [2300/6250] eta: 0:10:17 lr: 0.000125 grad: 0.0836 (0.0997) loss: 0.8583 (0.8650) time: 0.1398 data: 0.0508 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:01 lr: 0.000125 grad: 0.0843 (0.1002) loss: 0.8657 (0.8649) time: 0.1686 data: 0.0880 max mem: 9377 +Train: [7] [2500/6250] eta: 0:09:45 lr: 0.000125 grad: 0.0931 (0.1000) loss: 0.8681 (0.8648) time: 0.1580 data: 0.0739 max mem: 9377 +Train: [7] [2600/6250] eta: 0:09:28 lr: 0.000125 grad: 0.0913 (0.0998) loss: 0.8639 (0.8647) time: 0.1526 data: 0.0743 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:12 lr: 0.000125 grad: 0.0870 (0.0995) loss: 0.8648 (0.8646) time: 0.1577 data: 0.0733 max mem: 9377 +Train: [7] [2800/6250] eta: 0:08:56 lr: 0.000125 grad: 0.0826 (0.0992) loss: 0.8639 (0.8646) time: 0.1555 data: 0.0730 max mem: 9377 +Train: [7] [2900/6250] eta: 0:08:39 lr: 0.000125 grad: 0.0841 (0.0989) loss: 0.8655 (0.8645) time: 0.1362 data: 0.0534 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:23 lr: 0.000125 grad: 0.0863 (0.0987) loss: 0.8570 (0.8645) time: 0.1545 data: 0.0706 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:07 lr: 0.000125 grad: 0.0904 (0.0987) loss: 0.8641 (0.8644) time: 0.1448 data: 0.0482 max mem: 9377 +Train: [7] [3200/6250] eta: 0:07:51 lr: 0.000125 grad: 0.0804 (0.0987) loss: 0.8631 (0.8643) time: 0.1522 data: 0.0708 max mem: 9377 +Train: [7] [3300/6250] eta: 0:07:36 lr: 0.000125 grad: 0.0838 (0.0984) loss: 0.8611 (0.8643) time: 0.1393 data: 0.0519 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:20 lr: 0.000125 grad: 0.0877 (0.0981) loss: 0.8606 (0.8642) time: 0.1384 data: 0.0571 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:05 lr: 0.000125 grad: 0.0871 (0.0981) loss: 0.8589 (0.8640) time: 0.1473 data: 0.0653 max mem: 9377 +Train: [7] [3600/6250] eta: 0:06:50 lr: 0.000125 grad: 0.0821 (0.0979) loss: 0.8610 (0.8640) time: 0.1740 data: 0.1003 max mem: 9377 +Train: [7] [3700/6250] eta: 0:06:33 lr: 0.000125 grad: 0.1076 (0.0979) loss: 0.8593 (0.8639) time: 0.1550 data: 0.0730 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:17 lr: 0.000125 grad: 0.0862 (0.0976) loss: 0.8609 (0.8638) time: 0.1317 data: 0.0494 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:01 lr: 0.000125 grad: 0.1025 (0.0976) loss: 0.8612 (0.8637) time: 0.1539 data: 0.0691 max mem: 9377 +Train: [7] [4000/6250] eta: 0:05:45 lr: 0.000125 grad: 0.0786 (0.0974) loss: 0.8581 (0.8636) time: 0.1426 data: 0.0603 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:30 lr: 0.000125 grad: 0.0824 (0.0971) loss: 0.8602 (0.8636) time: 0.1543 data: 0.0678 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:14 lr: 0.000125 grad: 0.0832 (0.0969) loss: 0.8597 (0.8635) time: 0.1442 data: 0.0589 max mem: 9377 +Train: [7] [4300/6250] eta: 0:04:58 lr: 0.000125 grad: 0.0855 (0.0967) loss: 0.8556 (0.8634) time: 0.1215 data: 0.0362 max mem: 9377 +Train: [7] [4400/6250] eta: 0:04:42 lr: 0.000125 grad: 0.0872 (0.0965) loss: 0.8598 (0.8633) time: 0.1358 data: 0.0553 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:26 lr: 0.000125 grad: 0.0849 (0.0963) loss: 0.8582 (0.8632) time: 0.1359 data: 0.0547 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:11 lr: 0.000125 grad: 0.0834 (0.0961) loss: 0.8591 (0.8631) time: 0.1436 data: 0.0629 max mem: 9377 +Train: [7] [4700/6250] eta: 0:03:55 lr: 0.000125 grad: 0.0854 (0.0960) loss: 0.8550 (0.8630) time: 0.1520 data: 0.0652 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:40 lr: 0.000125 grad: 0.0812 (0.0957) loss: 0.8579 (0.8629) time: 0.1823 data: 0.0912 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:25 lr: 0.000125 grad: 0.0828 (0.0955) loss: 0.8547 (0.8628) time: 0.1616 data: 0.0688 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:10 lr: 0.000125 grad: 0.0924 (0.0954) loss: 0.8596 (0.8627) time: 0.1578 data: 0.0687 max mem: 9377 +Train: [7] [5100/6250] eta: 0:02:55 lr: 0.000125 grad: 0.0792 (0.0953) loss: 0.8650 (0.8627) time: 0.1469 data: 0.0522 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:40 lr: 0.000125 grad: 0.0858 (0.0952) loss: 0.8635 (0.8626) time: 0.1479 data: 0.0507 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:24 lr: 0.000125 grad: 0.0891 (0.0952) loss: 0.8582 (0.8625) time: 0.1311 data: 0.0426 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:09 lr: 0.000125 grad: 0.0932 (0.0951) loss: 0.8593 (0.8625) time: 0.1344 data: 0.0420 max mem: 9377 +Train: [7] [5500/6250] eta: 0:01:54 lr: 0.000125 grad: 0.0811 (0.0950) loss: 0.8598 (0.8624) time: 0.1228 data: 0.0398 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:38 lr: 0.000125 grad: 0.0929 (0.0949) loss: 0.8586 (0.8623) time: 0.1331 data: 0.0429 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:23 lr: 0.000125 grad: 0.0805 (0.0947) loss: 0.8612 (0.8623) time: 0.1600 data: 0.0753 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:08 lr: 0.000125 grad: 0.0877 (0.0946) loss: 0.8613 (0.8623) time: 0.1510 data: 0.0657 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.0804 (0.0944) loss: 0.8572 (0.8622) time: 0.1712 data: 0.0893 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.0777 (0.0943) loss: 0.8618 (0.8622) time: 0.1496 data: 0.0668 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.0795 (0.0942) loss: 0.8597 (0.8621) time: 0.2045 data: 0.1266 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.0873 (0.0941) loss: 0.8594 (0.8621) time: 0.1576 data: 0.0806 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0812 (0.0940) loss: 0.8563 (0.8620) time: 0.1781 data: 0.0886 max mem: 9377 +Train: [7] Total time: 0:15:58 (0.1534 s / it) +Averaged stats: lr: 0.000125 grad: 0.0812 (0.0940) loss: 0.8563 (0.8620) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:03:40 loss: 0.8618 (0.8618) time: 3.5487 data: 3.4604 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8604 (0.8607) time: 0.1362 data: 0.1108 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:13 (0.2181 s / it) +Averaged stats (hcp-train-subset): loss: 0.8604 (0.8607) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:11 loss: 0.8547 (0.8547) time: 4.0569 data: 4.0266 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8590 (0.8584) time: 0.1305 data: 0.1055 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (hcp-val): loss: 0.8590 (0.8584) +Eval (nsd-val): [7] [ 0/62] eta: 0:05:14 loss: 0.8160 (0.8160) time: 5.0753 data: 5.0444 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8252 (0.8268) time: 0.1514 data: 0.1246 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:15 (0.2487 s / it) +Averaged stats (nsd-val): loss: 0.8252 (0.8268) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 11:07:47 lr: 0.000125 grad: 0.0783 (0.0783) loss: 0.8527 (0.8527) time: 6.4109 data: 6.3020 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:23:26 lr: 0.000125 grad: 0.0845 (0.0985) loss: 0.8528 (0.8586) time: 0.1557 data: 0.0669 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:19:55 lr: 0.000125 grad: 0.0855 (0.0917) loss: 0.8615 (0.8598) time: 0.1686 data: 0.0703 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:18:38 lr: 0.000125 grad: 0.0813 (0.0888) loss: 0.8669 (0.8597) time: 0.1859 data: 0.0919 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:17:37 lr: 0.000125 grad: 0.1188 (0.0909) loss: 0.8551 (0.8593) time: 0.1623 data: 0.0760 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:16:53 lr: 0.000125 grad: 0.0790 (0.0894) loss: 0.8597 (0.8593) time: 0.1502 data: 0.0522 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:16:18 lr: 0.000125 grad: 0.0930 (0.0906) loss: 0.8606 (0.8597) time: 0.1458 data: 0.0528 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:15:45 lr: 0.000125 grad: 0.0738 (0.0894) loss: 0.8648 (0.8600) time: 0.1457 data: 0.0609 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:15:20 lr: 0.000125 grad: 0.0876 (0.0893) loss: 0.8612 (0.8601) time: 0.1667 data: 0.0808 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:14:55 lr: 0.000125 grad: 0.0887 (0.0888) loss: 0.8563 (0.8600) time: 0.1346 data: 0.0485 max mem: 9377 +Train: [8] [1000/6250] eta: 0:14:29 lr: 0.000125 grad: 0.0901 (0.0887) loss: 0.8584 (0.8597) time: 0.1387 data: 0.0535 max mem: 9377 +Train: [8] [1100/6250] eta: 0:14:03 lr: 0.000125 grad: 0.0752 (0.0888) loss: 0.8606 (0.8595) time: 0.1573 data: 0.0685 max mem: 9377 +Train: [8] [1200/6250] eta: 0:13:51 lr: 0.000125 grad: 0.0772 (0.0886) loss: 0.8575 (0.8593) time: 0.1606 data: 0.0738 max mem: 9377 +Train: [8] [1300/6250] eta: 0:13:31 lr: 0.000125 grad: 0.0806 (0.0882) loss: 0.8556 (0.8589) time: 0.1486 data: 0.0641 max mem: 9377 +Train: [8] [1400/6250] eta: 0:13:14 lr: 0.000125 grad: 0.0775 (0.0882) loss: 0.8548 (0.8586) time: 0.1607 data: 0.0844 max mem: 9377 +Train: [8] [1500/6250] eta: 0:12:56 lr: 0.000125 grad: 0.0800 (0.0880) loss: 0.8546 (0.8584) time: 0.1759 data: 0.0854 max mem: 9377 +Train: [8] [1600/6250] eta: 0:12:44 lr: 0.000125 grad: 0.0823 (0.0879) loss: 0.8539 (0.8582) time: 0.1660 data: 0.0738 max mem: 9377 +Train: [8] [1700/6250] eta: 0:12:23 lr: 0.000125 grad: 0.0931 (0.0882) loss: 0.8577 (0.8580) time: 0.1489 data: 0.0635 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:04 lr: 0.000125 grad: 0.0928 (0.0891) loss: 0.8532 (0.8578) time: 0.1637 data: 0.0797 max mem: 9377 +Train: [8] [1900/6250] eta: 0:11:47 lr: 0.000125 grad: 0.0797 (0.0893) loss: 0.8511 (0.8576) time: 0.1521 data: 0.0586 max mem: 9377 +Train: [8] [2000/6250] eta: 0:11:30 lr: 0.000125 grad: 0.0812 (0.0892) loss: 0.8541 (0.8575) time: 0.1359 data: 0.0535 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:10 lr: 0.000125 grad: 0.0920 (0.0893) loss: 0.8524 (0.8573) time: 0.1610 data: 0.0812 max mem: 9377 +Train: [8] [2200/6250] eta: 0:10:52 lr: 0.000125 grad: 0.0854 (0.0890) loss: 0.8543 (0.8571) time: 0.1450 data: 0.0609 max mem: 9377 +Train: [8] [2300/6250] eta: 0:10:35 lr: 0.000125 grad: 0.0908 (0.0889) loss: 0.8530 (0.8570) time: 0.1598 data: 0.0695 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:17 lr: 0.000125 grad: 0.0767 (0.0887) loss: 0.8573 (0.8569) time: 0.1411 data: 0.0575 max mem: 9377 +Train: [8] [2500/6250] eta: 0:09:59 lr: 0.000125 grad: 0.0812 (0.0886) loss: 0.8512 (0.8566) time: 0.1548 data: 0.0681 max mem: 9377 +Train: [8] [2600/6250] eta: 0:09:42 lr: 0.000125 grad: 0.0789 (0.0883) loss: 0.8583 (0.8566) time: 0.1473 data: 0.0654 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:25 lr: 0.000125 grad: 0.0802 (0.0881) loss: 0.8577 (0.8565) time: 0.1496 data: 0.0609 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:06 lr: 0.000125 grad: 0.0814 (0.0879) loss: 0.8515 (0.8564) time: 0.1397 data: 0.0523 max mem: 9377 +Train: [8] [2900/6250] eta: 0:08:48 lr: 0.000125 grad: 0.0933 (0.0880) loss: 0.8523 (0.8563) time: 0.1215 data: 0.0325 max mem: 9377 +Train: [8] [3000/6250] eta: 0:08:31 lr: 0.000125 grad: 0.0835 (0.0879) loss: 0.8528 (0.8563) time: 0.1408 data: 0.0543 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:15 lr: 0.000125 grad: 0.0808 (0.0878) loss: 0.8547 (0.8563) time: 0.1467 data: 0.0617 max mem: 9377 +Train: [8] [3200/6250] eta: 0:07:59 lr: 0.000125 grad: 0.0814 (0.0879) loss: 0.8583 (0.8562) time: 0.1691 data: 0.0876 max mem: 9377 +Train: [8] [3300/6250] eta: 0:07:42 lr: 0.000125 grad: 0.0933 (0.0881) loss: 0.8521 (0.8562) time: 0.1541 data: 0.0755 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:24 lr: 0.000125 grad: 0.0812 (0.0881) loss: 0.8547 (0.8561) time: 0.1339 data: 0.0509 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:08 lr: 0.000125 grad: 0.0806 (0.0880) loss: 0.8576 (0.8560) time: 0.1504 data: 0.0600 max mem: 9377 +Train: [8] [3600/6250] eta: 0:06:52 lr: 0.000125 grad: 0.0820 (0.0880) loss: 0.8555 (0.8560) time: 0.1455 data: 0.0527 max mem: 9377 +Train: [8] [3700/6250] eta: 0:06:36 lr: 0.000125 grad: 0.0763 (0.0878) loss: 0.8570 (0.8560) time: 0.1702 data: 0.0851 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:21 lr: 0.000125 grad: 0.0808 (0.0878) loss: 0.8521 (0.8559) time: 0.1543 data: 0.0742 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:05 lr: 0.000125 grad: 0.0762 (0.0877) loss: 0.8520 (0.8559) time: 0.1511 data: 0.0661 max mem: 9377 +Train: [8] [4000/6250] eta: 0:05:50 lr: 0.000125 grad: 0.0835 (0.0878) loss: 0.8518 (0.8559) time: 0.1527 data: 0.0698 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:34 lr: 0.000125 grad: 0.0751 (0.0876) loss: 0.8517 (0.8559) time: 0.1619 data: 0.0809 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:19 lr: 0.000125 grad: 0.0762 (0.0876) loss: 0.8549 (0.8559) time: 0.1586 data: 0.0748 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:03 lr: 0.000125 grad: 0.0762 (0.0874) loss: 0.8575 (0.8558) time: 0.1844 data: 0.1060 max mem: 9377 +Train: [8] [4400/6250] eta: 0:04:48 lr: 0.000125 grad: 0.0785 (0.0873) loss: 0.8484 (0.8558) time: 0.1317 data: 0.0531 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:32 lr: 0.000125 grad: 0.0740 (0.0873) loss: 0.8537 (0.8557) time: 0.1375 data: 0.0505 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:16 lr: 0.000125 grad: 0.0783 (0.0872) loss: 0.8509 (0.8557) time: 0.1349 data: 0.0490 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:00 lr: 0.000125 grad: 0.0745 (0.0870) loss: 0.8549 (0.8557) time: 0.1517 data: 0.0662 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:44 lr: 0.000125 grad: 0.0776 (0.0870) loss: 0.8568 (0.8557) time: 0.1361 data: 0.0458 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:29 lr: 0.000125 grad: 0.0853 (0.0869) loss: 0.8553 (0.8557) time: 0.1507 data: 0.0688 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:13 lr: 0.000125 grad: 0.0731 (0.0866) loss: 0.8557 (0.8556) time: 0.1309 data: 0.0544 max mem: 9377 +Train: [8] [5100/6250] eta: 0:02:57 lr: 0.000125 grad: 0.0756 (0.0864) loss: 0.8536 (0.8556) time: 0.1525 data: 0.0638 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:42 lr: 0.000124 grad: 0.0766 (0.0863) loss: 0.8538 (0.8556) time: 0.1886 data: 0.1001 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:26 lr: 0.000124 grad: 0.0729 (0.0861) loss: 0.8560 (0.8556) time: 0.1658 data: 0.0826 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:11 lr: 0.000124 grad: 0.0810 (0.0860) loss: 0.8591 (0.8556) time: 0.1761 data: 0.0965 max mem: 9377 +Train: [8] [5500/6250] eta: 0:01:56 lr: 0.000124 grad: 0.0748 (0.0858) loss: 0.8551 (0.8555) time: 0.1498 data: 0.0508 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:40 lr: 0.000124 grad: 0.0770 (0.0857) loss: 0.8475 (0.8555) time: 0.1533 data: 0.0662 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:25 lr: 0.000124 grad: 0.0768 (0.0856) loss: 0.8567 (0.8554) time: 0.1630 data: 0.0767 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:09 lr: 0.000124 grad: 0.0808 (0.0854) loss: 0.8504 (0.8554) time: 0.1533 data: 0.0690 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.0741 (0.0853) loss: 0.8541 (0.8553) time: 0.1510 data: 0.0663 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.0744 (0.0852) loss: 0.8542 (0.8553) time: 0.1487 data: 0.0619 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.0723 (0.0851) loss: 0.8610 (0.8553) time: 0.1575 data: 0.0774 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0752 (0.0850) loss: 0.8568 (0.8553) time: 0.1719 data: 0.0782 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0815 (0.0850) loss: 0.8500 (0.8552) time: 0.1627 data: 0.0813 max mem: 9377 +Train: [8] Total time: 0:16:14 (0.1559 s / it) +Averaged stats: lr: 0.000124 grad: 0.0815 (0.0850) loss: 0.8500 (0.8552) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:08:05 loss: 0.8561 (0.8561) time: 7.8308 data: 7.7986 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8569 (0.8566) time: 0.1353 data: 0.1100 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (hcp-train-subset): loss: 0.8569 (0.8566) +Eval (hcp-val): [8] [ 0/62] eta: 0:04:38 loss: 0.8498 (0.8498) time: 4.4954 data: 4.4625 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8529 (0.8545) time: 0.1515 data: 0.1241 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (hcp-val): loss: 0.8529 (0.8545) +Eval (nsd-val): [8] [ 0/62] eta: 0:04:59 loss: 0.8115 (0.8115) time: 4.8277 data: 4.7827 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8214 (0.8224) time: 0.1443 data: 0.1170 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:14 (0.2359 s / it) +Averaged stats (nsd-val): loss: 0.8214 (0.8224) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 10:38:23 lr: 0.000124 grad: 0.1365 (0.1365) loss: 0.8092 (0.8092) time: 6.1285 data: 5.9597 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:22:48 lr: 0.000124 grad: 0.0874 (0.1127) loss: 0.8393 (0.8438) time: 0.1901 data: 0.1000 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:19:52 lr: 0.000124 grad: 0.0800 (0.0986) loss: 0.8425 (0.8442) time: 0.1986 data: 0.1080 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:18:32 lr: 0.000124 grad: 0.0812 (0.0961) loss: 0.8420 (0.8443) time: 0.1502 data: 0.0571 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:17:19 lr: 0.000124 grad: 0.0764 (0.0929) loss: 0.8529 (0.8459) time: 0.1515 data: 0.0532 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:16:36 lr: 0.000124 grad: 0.0749 (0.0895) loss: 0.8587 (0.8473) time: 0.1549 data: 0.0663 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:21 lr: 0.000124 grad: 0.0781 (0.0881) loss: 0.8506 (0.8480) time: 0.1892 data: 0.0962 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:15:49 lr: 0.000124 grad: 0.0774 (0.0867) loss: 0.8505 (0.8484) time: 0.1634 data: 0.0678 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:15:19 lr: 0.000124 grad: 0.0841 (0.0861) loss: 0.8541 (0.8489) time: 0.1452 data: 0.0545 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:14:57 lr: 0.000124 grad: 0.0732 (0.0858) loss: 0.8477 (0.8491) time: 0.1529 data: 0.0617 max mem: 9377 +Train: [9] [1000/6250] eta: 0:14:32 lr: 0.000124 grad: 0.0731 (0.0851) loss: 0.8535 (0.8496) time: 0.1620 data: 0.0791 max mem: 9377 +Train: [9] [1100/6250] eta: 0:14:04 lr: 0.000124 grad: 0.0765 (0.0844) loss: 0.8577 (0.8499) time: 0.1403 data: 0.0542 max mem: 9377 +Train: [9] [1200/6250] eta: 0:13:41 lr: 0.000124 grad: 0.0703 (0.0839) loss: 0.8504 (0.8501) time: 0.1352 data: 0.0544 max mem: 9377 +Train: [9] [1300/6250] eta: 0:13:19 lr: 0.000124 grad: 0.0728 (0.0832) loss: 0.8526 (0.8504) time: 0.1407 data: 0.0489 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:05 lr: 0.000124 grad: 0.0739 (0.0827) loss: 0.8480 (0.8504) time: 0.1605 data: 0.0776 max mem: 9377 +Train: [9] [1500/6250] eta: 0:12:47 lr: 0.000124 grad: 0.0724 (0.0824) loss: 0.8523 (0.8506) time: 0.1701 data: 0.0833 max mem: 9377 +Train: [9] [1600/6250] eta: 0:12:28 lr: 0.000124 grad: 0.0745 (0.0823) loss: 0.8543 (0.8506) time: 0.1427 data: 0.0572 max mem: 9377 +Train: [9] [1700/6250] eta: 0:12:08 lr: 0.000124 grad: 0.0695 (0.0826) loss: 0.8533 (0.8509) time: 0.1603 data: 0.0771 max mem: 9377 +Train: [9] [1800/6250] eta: 0:11:53 lr: 0.000124 grad: 0.0770 (0.0825) loss: 0.8532 (0.8510) time: 0.1891 data: 0.1068 max mem: 9377 +Train: [9] [1900/6250] eta: 0:11:35 lr: 0.000124 grad: 0.0754 (0.0821) loss: 0.8494 (0.8512) time: 0.1469 data: 0.0636 max mem: 9377 +Train: [9] [2000/6250] eta: 0:11:18 lr: 0.000124 grad: 0.0759 (0.0823) loss: 0.8551 (0.8513) time: 0.1395 data: 0.0510 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:00 lr: 0.000124 grad: 0.0739 (0.0820) loss: 0.8510 (0.8514) time: 0.1304 data: 0.0471 max mem: 9377 +Train: [9] [2200/6250] eta: 0:10:46 lr: 0.000124 grad: 0.0756 (0.0818) loss: 0.8518 (0.8514) time: 0.1786 data: 0.0884 max mem: 9377 +Train: [9] [2300/6250] eta: 0:10:30 lr: 0.000124 grad: 0.0716 (0.0818) loss: 0.8504 (0.8514) time: 0.1591 data: 0.0747 max mem: 9377 +Train: [9] [2400/6250] eta: 0:10:12 lr: 0.000124 grad: 0.0796 (0.0817) loss: 0.8489 (0.8515) time: 0.1506 data: 0.0722 max mem: 9377 +Train: [9] [2500/6250] eta: 0:09:55 lr: 0.000124 grad: 0.0700 (0.0815) loss: 0.8499 (0.8515) time: 0.1490 data: 0.0595 max mem: 9377 +Train: [9] [2600/6250] eta: 0:09:39 lr: 0.000124 grad: 0.0768 (0.0813) loss: 0.8548 (0.8515) time: 0.1635 data: 0.0788 max mem: 9377 +Train: [9] [2700/6250] eta: 0:09:22 lr: 0.000124 grad: 0.0782 (0.0814) loss: 0.8502 (0.8515) time: 0.1728 data: 0.0991 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:05 lr: 0.000124 grad: 0.0767 (0.0816) loss: 0.8514 (0.8515) time: 0.1346 data: 0.0426 max mem: 9377 +Train: [9] [2900/6250] eta: 0:08:49 lr: 0.000124 grad: 0.0710 (0.0816) loss: 0.8493 (0.8515) time: 0.1562 data: 0.0725 max mem: 9377 +Train: [9] [3000/6250] eta: 0:08:32 lr: 0.000124 grad: 0.0731 (0.0816) loss: 0.8536 (0.8514) time: 0.1448 data: 0.0557 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0714 (0.0815) loss: 0.8504 (0.8514) time: 0.1459 data: 0.0586 max mem: 9377 +Train: [9] [3200/6250] eta: 0:07:58 lr: 0.000124 grad: 0.0759 (0.0815) loss: 0.8519 (0.8514) time: 0.1353 data: 0.0472 max mem: 9377 +Train: [9] [3300/6250] eta: 0:07:42 lr: 0.000124 grad: 0.0734 (0.0814) loss: 0.8505 (0.8514) time: 0.1521 data: 0.0701 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:25 lr: 0.000124 grad: 0.0735 (0.0814) loss: 0.8549 (0.8514) time: 0.1472 data: 0.0643 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:08 lr: 0.000124 grad: 0.0729 (0.0812) loss: 0.8510 (0.8515) time: 0.1354 data: 0.0486 max mem: 9377 +Train: [9] [3600/6250] eta: 0:06:53 lr: 0.000124 grad: 0.0726 (0.0811) loss: 0.8526 (0.8515) time: 0.1842 data: 0.1094 max mem: 9377 +Train: [9] [3700/6250] eta: 0:06:37 lr: 0.000124 grad: 0.0716 (0.0808) loss: 0.8525 (0.8515) time: 0.1720 data: 0.0888 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:21 lr: 0.000124 grad: 0.0682 (0.0806) loss: 0.8507 (0.8516) time: 0.1485 data: 0.0645 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:05 lr: 0.000124 grad: 0.0827 (0.0805) loss: 0.8530 (0.8516) time: 0.1619 data: 0.0835 max mem: 9377 +Train: [9] [4000/6250] eta: 0:05:49 lr: 0.000124 grad: 0.0737 (0.0807) loss: 0.8516 (0.8517) time: 0.1489 data: 0.0697 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:32 lr: 0.000124 grad: 0.0679 (0.0806) loss: 0.8555 (0.8517) time: 0.1053 data: 0.0152 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:17 lr: 0.000124 grad: 0.0668 (0.0805) loss: 0.8552 (0.8518) time: 0.1455 data: 0.0572 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:01 lr: 0.000124 grad: 0.0676 (0.0804) loss: 0.8550 (0.8518) time: 0.1570 data: 0.0813 max mem: 9377 +Train: [9] [4400/6250] eta: 0:04:46 lr: 0.000124 grad: 0.0694 (0.0802) loss: 0.8555 (0.8519) time: 0.1721 data: 0.0919 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:31 lr: 0.000124 grad: 0.0699 (0.0801) loss: 0.8569 (0.8520) time: 0.1924 data: 0.1148 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:16 lr: 0.000124 grad: 0.0681 (0.0800) loss: 0.8530 (0.8520) time: 0.1653 data: 0.0844 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:00 lr: 0.000124 grad: 0.0681 (0.0799) loss: 0.8524 (0.8520) time: 0.1538 data: 0.0707 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:44 lr: 0.000124 grad: 0.0715 (0.0798) loss: 0.8485 (0.8520) time: 0.1545 data: 0.0694 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:29 lr: 0.000124 grad: 0.0760 (0.0799) loss: 0.8532 (0.8519) time: 0.1317 data: 0.0510 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:13 lr: 0.000124 grad: 0.0768 (0.0799) loss: 0.8500 (0.8519) time: 0.1352 data: 0.0459 max mem: 9377 +Train: [9] [5100/6250] eta: 0:02:57 lr: 0.000124 grad: 0.0728 (0.0798) loss: 0.8478 (0.8519) time: 0.1545 data: 0.0749 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:42 lr: 0.000124 grad: 0.0708 (0.0798) loss: 0.8512 (0.8518) time: 0.1578 data: 0.0728 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:26 lr: 0.000124 grad: 0.0749 (0.0797) loss: 0.8524 (0.8518) time: 0.1494 data: 0.0677 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:11 lr: 0.000124 grad: 0.0740 (0.0796) loss: 0.8484 (0.8518) time: 0.1777 data: 0.0910 max mem: 9377 +Train: [9] [5500/6250] eta: 0:01:55 lr: 0.000124 grad: 0.0757 (0.0796) loss: 0.8501 (0.8517) time: 0.1718 data: 0.0814 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:40 lr: 0.000124 grad: 0.0761 (0.0796) loss: 0.8520 (0.8517) time: 0.1491 data: 0.0630 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:24 lr: 0.000124 grad: 0.0682 (0.0796) loss: 0.8490 (0.8517) time: 0.1573 data: 0.0760 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:09 lr: 0.000124 grad: 0.0731 (0.0795) loss: 0.8504 (0.8518) time: 0.1737 data: 0.0918 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.0752 (0.0796) loss: 0.8500 (0.8517) time: 0.1609 data: 0.0817 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.0735 (0.0795) loss: 0.8537 (0.8518) time: 0.1465 data: 0.0583 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.0730 (0.0794) loss: 0.8508 (0.8517) time: 0.1784 data: 0.1027 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0688 (0.0793) loss: 0.8501 (0.8518) time: 0.1622 data: 0.0751 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0702 (0.0792) loss: 0.8534 (0.8518) time: 0.2026 data: 0.1177 max mem: 9377 +Train: [9] Total time: 0:16:23 (0.1574 s / it) +Averaged stats: lr: 0.000124 grad: 0.0702 (0.0792) loss: 0.8534 (0.8518) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:04:50 loss: 0.8489 (0.8489) time: 4.6784 data: 4.6443 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8526 (0.8538) time: 0.1679 data: 0.1408 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:15 (0.2514 s / it) +Averaged stats (hcp-train-subset): loss: 0.8526 (0.8538) +Making plots (hcp-train-subset): example=43 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:03 loss: 0.8470 (0.8470) time: 4.8969 data: 4.8641 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8523 (0.8520) time: 0.1117 data: 0.0857 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-val): loss: 0.8523 (0.8520) +Making plots (hcp-val): example=26 +Eval (nsd-val): [9] [ 0/62] eta: 0:05:17 loss: 0.8053 (0.8053) time: 5.1288 data: 5.0978 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8147 (0.8167) time: 0.1586 data: 0.1331 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (nsd-val): loss: 0.8147 (0.8167) +Making plots (nsd-val): example=36 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 13:59:04 lr: 0.000124 grad: 0.0796 (0.0796) loss: 0.8544 (0.8544) time: 8.0552 data: 7.9513 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:26:47 lr: 0.000124 grad: 0.0723 (0.0875) loss: 0.8526 (0.8546) time: 0.1909 data: 0.1003 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:22:20 lr: 0.000124 grad: 0.0773 (0.0822) loss: 0.8572 (0.8529) time: 0.1779 data: 0.0799 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:21:02 lr: 0.000124 grad: 0.0695 (0.0801) loss: 0.8544 (0.8535) time: 0.1984 data: 0.1038 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:19:53 lr: 0.000124 grad: 0.0725 (0.0788) loss: 0.8519 (0.8529) time: 0.1567 data: 0.0634 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:18:57 lr: 0.000124 grad: 0.0692 (0.0771) loss: 0.8496 (0.8528) time: 0.1661 data: 0.0723 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:17:57 lr: 0.000124 grad: 0.0692 (0.0765) loss: 0.8526 (0.8525) time: 0.1603 data: 0.0754 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:17:04 lr: 0.000124 grad: 0.0679 (0.0762) loss: 0.8486 (0.8522) time: 0.1462 data: 0.0612 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:16:28 lr: 0.000124 grad: 0.0678 (0.0767) loss: 0.8496 (0.8519) time: 0.1563 data: 0.0645 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:16:00 lr: 0.000124 grad: 0.0742 (0.0765) loss: 0.8458 (0.8517) time: 0.1467 data: 0.0646 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:23 lr: 0.000124 grad: 0.0721 (0.0768) loss: 0.8538 (0.8515) time: 0.1605 data: 0.0810 max mem: 9377 +Train: [10] [1100/6250] eta: 0:14:54 lr: 0.000124 grad: 0.0740 (0.0766) loss: 0.8499 (0.8514) time: 0.1496 data: 0.0644 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:28 lr: 0.000124 grad: 0.0706 (0.0771) loss: 0.8546 (0.8514) time: 0.1412 data: 0.0600 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:11 lr: 0.000124 grad: 0.0694 (0.0767) loss: 0.8467 (0.8514) time: 0.2184 data: 0.1461 max mem: 9377 +Train: [10] [1400/6250] eta: 0:13:51 lr: 0.000124 grad: 0.0691 (0.0768) loss: 0.8497 (0.8513) time: 0.1512 data: 0.0672 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:27 lr: 0.000124 grad: 0.0813 (0.0770) loss: 0.8496 (0.8512) time: 0.1394 data: 0.0586 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:07 lr: 0.000124 grad: 0.0656 (0.0767) loss: 0.8511 (0.8512) time: 0.1621 data: 0.0822 max mem: 9377 +Train: [10] [1700/6250] eta: 0:12:43 lr: 0.000124 grad: 0.0696 (0.0764) loss: 0.8480 (0.8512) time: 0.1315 data: 0.0541 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:24 lr: 0.000124 grad: 0.0720 (0.0762) loss: 0.8519 (0.8511) time: 0.1636 data: 0.0837 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:04 lr: 0.000124 grad: 0.0751 (0.0764) loss: 0.8495 (0.8509) time: 0.1416 data: 0.0585 max mem: 9377 +Train: [10] [2000/6250] eta: 0:11:44 lr: 0.000124 grad: 0.0709 (0.0762) loss: 0.8468 (0.8508) time: 0.1298 data: 0.0480 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:24 lr: 0.000124 grad: 0.0702 (0.0761) loss: 0.8524 (0.8508) time: 0.1361 data: 0.0478 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:04 lr: 0.000124 grad: 0.0745 (0.0761) loss: 0.8453 (0.8507) time: 0.1339 data: 0.0403 max mem: 9377 +Train: [10] [2300/6250] eta: 0:10:44 lr: 0.000124 grad: 0.0671 (0.0762) loss: 0.8479 (0.8507) time: 0.1486 data: 0.0592 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:24 lr: 0.000124 grad: 0.0704 (0.0762) loss: 0.8478 (0.8506) time: 0.1534 data: 0.0718 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:04 lr: 0.000124 grad: 0.0682 (0.0761) loss: 0.8526 (0.8506) time: 0.1442 data: 0.0590 max mem: 9377 +Train: [10] [2600/6250] eta: 0:09:44 lr: 0.000124 grad: 0.0737 (0.0760) loss: 0.8489 (0.8506) time: 0.1540 data: 0.0724 max mem: 9377 +Train: [10] [2700/6250] eta: 0:09:25 lr: 0.000124 grad: 0.0687 (0.0759) loss: 0.8516 (0.8506) time: 0.1432 data: 0.0629 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:07 lr: 0.000124 grad: 0.0680 (0.0757) loss: 0.8514 (0.8507) time: 0.1445 data: 0.0647 max mem: 9377 +Train: [10] [2900/6250] eta: 0:08:49 lr: 0.000124 grad: 0.0702 (0.0757) loss: 0.8551 (0.8507) time: 0.1299 data: 0.0500 max mem: 9377 +Train: [10] [3000/6250] eta: 0:08:32 lr: 0.000124 grad: 0.0696 (0.0757) loss: 0.8480 (0.8507) time: 0.1476 data: 0.0683 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:14 lr: 0.000124 grad: 0.0719 (0.0756) loss: 0.8486 (0.8506) time: 0.1453 data: 0.0640 max mem: 9377 +Train: [10] [3200/6250] eta: 0:07:56 lr: 0.000124 grad: 0.0705 (0.0757) loss: 0.8491 (0.8505) time: 0.1271 data: 0.0441 max mem: 9377 +Train: [10] [3300/6250] eta: 0:07:39 lr: 0.000124 grad: 0.0731 (0.0757) loss: 0.8472 (0.8504) time: 0.1246 data: 0.0403 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:22 lr: 0.000124 grad: 0.0738 (0.0757) loss: 0.8442 (0.8502) time: 0.1372 data: 0.0555 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:05 lr: 0.000124 grad: 0.0684 (0.0756) loss: 0.8462 (0.8502) time: 0.1363 data: 0.0590 max mem: 9377 +Train: [10] [3600/6250] eta: 0:06:49 lr: 0.000124 grad: 0.0706 (0.0756) loss: 0.8513 (0.8500) time: 0.1570 data: 0.0815 max mem: 9377 +Train: [10] [3700/6250] eta: 0:06:32 lr: 0.000124 grad: 0.0716 (0.0755) loss: 0.8440 (0.8499) time: 0.1115 data: 0.0330 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:16 lr: 0.000124 grad: 0.0719 (0.0754) loss: 0.8450 (0.8498) time: 0.1788 data: 0.1030 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:00 lr: 0.000124 grad: 0.0721 (0.0754) loss: 0.8402 (0.8497) time: 0.1586 data: 0.0811 max mem: 9377 +Train: [10] [4000/6250] eta: 0:05:44 lr: 0.000124 grad: 0.0757 (0.0754) loss: 0.8463 (0.8496) time: 0.1442 data: 0.0658 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:29 lr: 0.000124 grad: 0.0700 (0.0754) loss: 0.8488 (0.8496) time: 0.1249 data: 0.0480 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:13 lr: 0.000124 grad: 0.0736 (0.0753) loss: 0.8478 (0.8495) time: 0.1229 data: 0.0430 max mem: 9377 +Train: [10] [4300/6250] eta: 0:04:57 lr: 0.000124 grad: 0.0676 (0.0752) loss: 0.8449 (0.8494) time: 0.1448 data: 0.0664 max mem: 9377 +Train: [10] [4400/6250] eta: 0:04:42 lr: 0.000124 grad: 0.0663 (0.0751) loss: 0.8546 (0.8494) time: 0.1634 data: 0.0822 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:27 lr: 0.000124 grad: 0.0687 (0.0751) loss: 0.8468 (0.8494) time: 0.1526 data: 0.0716 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:12 lr: 0.000124 grad: 0.0734 (0.0750) loss: 0.8494 (0.8494) time: 0.1363 data: 0.0553 max mem: 9377 +Train: [10] [4700/6250] eta: 0:03:56 lr: 0.000124 grad: 0.0672 (0.0749) loss: 0.8494 (0.8493) time: 0.1350 data: 0.0562 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:41 lr: 0.000124 grad: 0.0644 (0.0748) loss: 0.8483 (0.8493) time: 0.1740 data: 0.0973 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:26 lr: 0.000124 grad: 0.0745 (0.0748) loss: 0.8441 (0.8493) time: 0.1851 data: 0.1089 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:11 lr: 0.000124 grad: 0.0712 (0.0747) loss: 0.8460 (0.8492) time: 0.1537 data: 0.0766 max mem: 9377 +Train: [10] [5100/6250] eta: 0:02:55 lr: 0.000124 grad: 0.0682 (0.0747) loss: 0.8438 (0.8492) time: 0.1466 data: 0.0657 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:40 lr: 0.000124 grad: 0.0685 (0.0746) loss: 0.8506 (0.8491) time: 0.1343 data: 0.0583 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:25 lr: 0.000124 grad: 0.0724 (0.0745) loss: 0.8454 (0.8491) time: 0.1420 data: 0.0618 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:09 lr: 0.000124 grad: 0.0688 (0.0745) loss: 0.8471 (0.8490) time: 0.1547 data: 0.0720 max mem: 9377 +Train: [10] [5500/6250] eta: 0:01:54 lr: 0.000124 grad: 0.0670 (0.0744) loss: 0.8457 (0.8489) time: 0.1551 data: 0.0783 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:38 lr: 0.000124 grad: 0.0719 (0.0744) loss: 0.8471 (0.8489) time: 0.1392 data: 0.0589 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:23 lr: 0.000124 grad: 0.0692 (0.0744) loss: 0.8462 (0.8488) time: 0.1379 data: 0.0541 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:08 lr: 0.000124 grad: 0.0724 (0.0745) loss: 0.8438 (0.8487) time: 0.1458 data: 0.0597 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:53 lr: 0.000124 grad: 0.0652 (0.0746) loss: 0.8482 (0.8487) time: 0.1439 data: 0.0671 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:37 lr: 0.000124 grad: 0.0698 (0.0745) loss: 0.8434 (0.8486) time: 0.1840 data: 0.1028 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:22 lr: 0.000124 grad: 0.0725 (0.0745) loss: 0.8418 (0.8485) time: 0.1574 data: 0.0759 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0726 (0.0745) loss: 0.8478 (0.8485) time: 0.1734 data: 0.0967 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0674 (0.0744) loss: 0.8445 (0.8484) time: 0.1852 data: 0.1011 max mem: 9377 +Train: [10] Total time: 0:16:00 (0.1536 s / it) +Averaged stats: lr: 0.000124 grad: 0.0674 (0.0744) loss: 0.8445 (0.8484) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:06:02 loss: 0.8513 (0.8513) time: 5.8489 data: 5.8164 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8537 (0.8526) time: 0.1168 data: 0.0921 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2386 s / it) +Averaged stats (hcp-train-subset): loss: 0.8537 (0.8526) +Eval (hcp-val): [10] [ 0/62] eta: 0:06:30 loss: 0.8453 (0.8453) time: 6.3054 data: 6.2731 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8500 (0.8504) time: 0.1571 data: 0.1318 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:15 (0.2514 s / it) +Averaged stats (hcp-val): loss: 0.8500 (0.8504) +Eval (nsd-val): [10] [ 0/62] eta: 0:06:12 loss: 0.8066 (0.8066) time: 6.0035 data: 5.9706 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8146 (0.8170) time: 0.1448 data: 0.1196 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (nsd-val): loss: 0.8146 (0.8170) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 10:55:26 lr: 0.000124 grad: 0.0873 (0.0873) loss: 0.8430 (0.8430) time: 6.2923 data: 6.1855 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:22:43 lr: 0.000124 grad: 0.0736 (0.0866) loss: 0.8480 (0.8467) time: 0.1626 data: 0.0679 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:19:50 lr: 0.000124 grad: 0.0765 (0.0854) loss: 0.8472 (0.8470) time: 0.1774 data: 0.0921 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:18:37 lr: 0.000124 grad: 0.0717 (0.0847) loss: 0.8502 (0.8468) time: 0.1911 data: 0.1023 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:17:29 lr: 0.000124 grad: 0.0667 (0.0842) loss: 0.8506 (0.8469) time: 0.1376 data: 0.0477 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:16:42 lr: 0.000124 grad: 0.0678 (0.0823) loss: 0.8480 (0.8473) time: 0.1253 data: 0.0394 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:16:03 lr: 0.000124 grad: 0.0714 (0.0809) loss: 0.8452 (0.8474) time: 0.1619 data: 0.0774 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:15:25 lr: 0.000124 grad: 0.0656 (0.0797) loss: 0.8518 (0.8474) time: 0.1393 data: 0.0509 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:14:54 lr: 0.000124 grad: 0.0697 (0.0790) loss: 0.8389 (0.8474) time: 0.1582 data: 0.0769 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:14:29 lr: 0.000124 grad: 0.0699 (0.0785) loss: 0.8430 (0.8475) time: 0.1607 data: 0.0753 max mem: 9377 +Train: [11] [1000/6250] eta: 0:14:00 lr: 0.000124 grad: 0.0628 (0.0777) loss: 0.8470 (0.8475) time: 0.1518 data: 0.0714 max mem: 9377 +Train: [11] [1100/6250] eta: 0:13:35 lr: 0.000124 grad: 0.0789 (0.0776) loss: 0.8468 (0.8473) time: 0.1353 data: 0.0541 max mem: 9377 +Train: [11] [1200/6250] eta: 0:13:13 lr: 0.000124 grad: 0.0699 (0.0771) loss: 0.8478 (0.8473) time: 0.1207 data: 0.0471 max mem: 9377 +Train: [11] [1300/6250] eta: 0:12:52 lr: 0.000124 grad: 0.0724 (0.0766) loss: 0.8470 (0.8472) time: 0.1374 data: 0.0543 max mem: 9377 +Train: [11] [1400/6250] eta: 0:12:30 lr: 0.000124 grad: 0.0687 (0.0762) loss: 0.8417 (0.8471) time: 0.1285 data: 0.0498 max mem: 9377 +Train: [11] [1500/6250] eta: 0:12:15 lr: 0.000124 grad: 0.0697 (0.0761) loss: 0.8483 (0.8472) time: 0.1387 data: 0.0593 max mem: 9377 +Train: [11] [1600/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0726 (0.0758) loss: 0.8438 (0.8473) time: 0.1538 data: 0.0678 max mem: 9377 +Train: [11] [1700/6250] eta: 0:11:43 lr: 0.000124 grad: 0.0701 (0.0761) loss: 0.8455 (0.8474) time: 0.1574 data: 0.0742 max mem: 9377 +Train: [11] [1800/6250] eta: 0:11:28 lr: 0.000124 grad: 0.0660 (0.0758) loss: 0.8515 (0.8474) time: 0.1618 data: 0.0886 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:12 lr: 0.000124 grad: 0.0685 (0.0754) loss: 0.8480 (0.8475) time: 0.1539 data: 0.0723 max mem: 9377 +Train: [11] [2000/6250] eta: 0:10:55 lr: 0.000124 grad: 0.0677 (0.0752) loss: 0.8497 (0.8476) time: 0.1328 data: 0.0477 max mem: 9377 +Train: [11] [2100/6250] eta: 0:10:39 lr: 0.000124 grad: 0.0686 (0.0750) loss: 0.8494 (0.8476) time: 0.1654 data: 0.0805 max mem: 9377 +Train: [11] [2200/6250] eta: 0:10:21 lr: 0.000124 grad: 0.0725 (0.0748) loss: 0.8495 (0.8476) time: 0.1479 data: 0.0731 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:04 lr: 0.000124 grad: 0.0704 (0.0748) loss: 0.8487 (0.8477) time: 0.1478 data: 0.0590 max mem: 9377 +Train: [11] [2400/6250] eta: 0:09:48 lr: 0.000124 grad: 0.0681 (0.0748) loss: 0.8477 (0.8477) time: 0.1491 data: 0.0583 max mem: 9377 +Train: [11] [2500/6250] eta: 0:09:30 lr: 0.000124 grad: 0.0694 (0.0746) loss: 0.8470 (0.8478) time: 0.1376 data: 0.0479 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:14 lr: 0.000124 grad: 0.0697 (0.0745) loss: 0.8494 (0.8479) time: 0.1609 data: 0.0817 max mem: 9377 +Train: [11] [2700/6250] eta: 0:08:57 lr: 0.000124 grad: 0.0678 (0.0744) loss: 0.8518 (0.8479) time: 0.1625 data: 0.0790 max mem: 9377 +Train: [11] [2800/6250] eta: 0:08:40 lr: 0.000124 grad: 0.0632 (0.0743) loss: 0.8536 (0.8480) time: 0.1512 data: 0.0694 max mem: 9377 +Train: [11] [2900/6250] eta: 0:08:23 lr: 0.000124 grad: 0.0740 (0.0742) loss: 0.8531 (0.8481) time: 0.1103 data: 0.0293 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:08 lr: 0.000124 grad: 0.0648 (0.0741) loss: 0.8489 (0.8481) time: 0.1553 data: 0.0800 max mem: 9377 +Train: [11] [3100/6250] eta: 0:07:51 lr: 0.000124 grad: 0.0673 (0.0744) loss: 0.8504 (0.8482) time: 0.1166 data: 0.0326 max mem: 9377 +Train: [11] [3200/6250] eta: 0:07:35 lr: 0.000124 grad: 0.0662 (0.0743) loss: 0.8540 (0.8482) time: 0.1509 data: 0.0726 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:19 lr: 0.000124 grad: 0.0694 (0.0742) loss: 0.8495 (0.8482) time: 0.1427 data: 0.0606 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:03 lr: 0.000124 grad: 0.0704 (0.0742) loss: 0.8459 (0.8482) time: 0.1137 data: 0.0249 max mem: 9377 +Train: [11] [3500/6250] eta: 0:06:48 lr: 0.000124 grad: 0.0674 (0.0741) loss: 0.8441 (0.8481) time: 0.1498 data: 0.0712 max mem: 9377 +Train: [11] [3600/6250] eta: 0:06:32 lr: 0.000124 grad: 0.0685 (0.0740) loss: 0.8482 (0.8481) time: 0.1458 data: 0.0572 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:16 lr: 0.000124 grad: 0.0707 (0.0738) loss: 0.8476 (0.8481) time: 0.1311 data: 0.0451 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:01 lr: 0.000124 grad: 0.0636 (0.0738) loss: 0.8477 (0.8481) time: 0.1318 data: 0.0494 max mem: 9377 +Train: [11] [3900/6250] eta: 0:05:46 lr: 0.000124 grad: 0.0633 (0.0737) loss: 0.8506 (0.8481) time: 0.1432 data: 0.0620 max mem: 9377 +Train: [11] [4000/6250] eta: 0:05:30 lr: 0.000123 grad: 0.0672 (0.0736) loss: 0.8437 (0.8481) time: 0.1511 data: 0.0718 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:15 lr: 0.000123 grad: 0.0687 (0.0736) loss: 0.8481 (0.8481) time: 0.1386 data: 0.0552 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:00 lr: 0.000123 grad: 0.0657 (0.0736) loss: 0.8455 (0.8480) time: 0.1324 data: 0.0468 max mem: 9377 +Train: [11] [4300/6250] eta: 0:04:45 lr: 0.000123 grad: 0.0647 (0.0735) loss: 0.8492 (0.8480) time: 0.1272 data: 0.0492 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:30 lr: 0.000123 grad: 0.0711 (0.0735) loss: 0.8459 (0.8479) time: 0.1195 data: 0.0398 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:15 lr: 0.000123 grad: 0.0721 (0.0734) loss: 0.8476 (0.8479) time: 0.1396 data: 0.0624 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:00 lr: 0.000123 grad: 0.0681 (0.0735) loss: 0.8475 (0.8478) time: 0.1279 data: 0.0462 max mem: 9377 +Train: [11] [4700/6250] eta: 0:03:45 lr: 0.000123 grad: 0.0683 (0.0733) loss: 0.8476 (0.8478) time: 0.1298 data: 0.0536 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:31 lr: 0.000123 grad: 0.0682 (0.0733) loss: 0.8417 (0.8478) time: 0.1609 data: 0.0844 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:16 lr: 0.000123 grad: 0.0718 (0.0732) loss: 0.8457 (0.8477) time: 0.1421 data: 0.0644 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:01 lr: 0.000123 grad: 0.0696 (0.0732) loss: 0.8445 (0.8476) time: 0.1519 data: 0.0691 max mem: 9377 +Train: [11] [5100/6250] eta: 0:02:47 lr: 0.000123 grad: 0.0681 (0.0732) loss: 0.8508 (0.8476) time: 0.1498 data: 0.0730 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:32 lr: 0.000123 grad: 0.0716 (0.0732) loss: 0.8437 (0.8475) time: 0.1629 data: 0.0864 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:18 lr: 0.000123 grad: 0.0730 (0.0732) loss: 0.8396 (0.8475) time: 0.1664 data: 0.0847 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:03 lr: 0.000123 grad: 0.0677 (0.0732) loss: 0.8455 (0.8474) time: 0.1517 data: 0.0730 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:49 lr: 0.000123 grad: 0.0713 (0.0731) loss: 0.8454 (0.8474) time: 0.1636 data: 0.0816 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:34 lr: 0.000123 grad: 0.0670 (0.0731) loss: 0.8443 (0.8474) time: 0.1472 data: 0.0706 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:20 lr: 0.000123 grad: 0.0701 (0.0731) loss: 0.8429 (0.8474) time: 0.1321 data: 0.0538 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0719 (0.0731) loss: 0.8447 (0.8473) time: 0.1346 data: 0.0554 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:50 lr: 0.000123 grad: 0.0693 (0.0731) loss: 0.8458 (0.8473) time: 0.1368 data: 0.0577 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:36 lr: 0.000123 grad: 0.0704 (0.0730) loss: 0.8419 (0.8472) time: 0.1564 data: 0.0752 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:21 lr: 0.000123 grad: 0.0749 (0.0731) loss: 0.8460 (0.8472) time: 0.1588 data: 0.0705 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0711 (0.0731) loss: 0.8441 (0.8471) time: 0.1287 data: 0.0449 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0702 (0.0731) loss: 0.8443 (0.8471) time: 0.1477 data: 0.0670 max mem: 9377 +Train: [11] Total time: 0:15:15 (0.1466 s / it) +Averaged stats: lr: 0.000123 grad: 0.0702 (0.0731) loss: 0.8443 (0.8471) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:09 loss: 0.8502 (0.8502) time: 4.9907 data: 4.9583 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8509 (0.8506) time: 0.1292 data: 0.1022 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-train-subset): loss: 0.8509 (0.8506) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:25 loss: 0.8454 (0.8454) time: 5.2432 data: 5.1968 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8470 (0.8484) time: 0.1057 data: 0.0807 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2215 s / it) +Averaged stats (hcp-val): loss: 0.8470 (0.8484) +Eval (nsd-val): [11] [ 0/62] eta: 0:05:36 loss: 0.8056 (0.8056) time: 5.4265 data: 5.3958 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8170 (0.8183) time: 0.1407 data: 0.1150 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (nsd-val): loss: 0.8170 (0.8183) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 13:02:18 lr: 0.000123 grad: 0.0880 (0.0880) loss: 0.8802 (0.8802) time: 7.5101 data: 7.4099 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:25:01 lr: 0.000123 grad: 0.0717 (0.0765) loss: 0.8542 (0.8557) time: 0.2124 data: 0.1192 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:21:14 lr: 0.000123 grad: 0.0706 (0.0754) loss: 0.8497 (0.8550) time: 0.1578 data: 0.0677 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:19:00 lr: 0.000123 grad: 0.0723 (0.0746) loss: 0.8470 (0.8530) time: 0.1489 data: 0.0636 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:17:39 lr: 0.000123 grad: 0.0757 (0.0762) loss: 0.8436 (0.8504) time: 0.1326 data: 0.0385 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:16:49 lr: 0.000123 grad: 0.0777 (0.0763) loss: 0.8427 (0.8485) time: 0.1475 data: 0.0630 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:16:18 lr: 0.000123 grad: 0.0759 (0.0760) loss: 0.8406 (0.8473) time: 0.1512 data: 0.0733 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:15:39 lr: 0.000123 grad: 0.0690 (0.0757) loss: 0.8411 (0.8464) time: 0.1680 data: 0.0838 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:15:15 lr: 0.000123 grad: 0.0668 (0.0751) loss: 0.8477 (0.8459) time: 0.1343 data: 0.0484 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:14:47 lr: 0.000123 grad: 0.0684 (0.0749) loss: 0.8417 (0.8455) time: 0.1386 data: 0.0533 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:18 lr: 0.000123 grad: 0.0726 (0.0746) loss: 0.8387 (0.8450) time: 0.1400 data: 0.0551 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:51 lr: 0.000123 grad: 0.0661 (0.0741) loss: 0.8385 (0.8445) time: 0.1418 data: 0.0603 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:23 lr: 0.000123 grad: 0.0713 (0.0738) loss: 0.8409 (0.8442) time: 0.1413 data: 0.0549 max mem: 9377 +Train: [12] [1300/6250] eta: 0:12:59 lr: 0.000123 grad: 0.0662 (0.0735) loss: 0.8408 (0.8442) time: 0.1374 data: 0.0570 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:37 lr: 0.000123 grad: 0.0685 (0.0732) loss: 0.8395 (0.8441) time: 0.1364 data: 0.0504 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:15 lr: 0.000123 grad: 0.0710 (0.0732) loss: 0.8405 (0.8441) time: 0.1423 data: 0.0592 max mem: 9377 +Train: [12] [1600/6250] eta: 0:11:55 lr: 0.000123 grad: 0.0631 (0.0730) loss: 0.8464 (0.8440) time: 0.1414 data: 0.0643 max mem: 9377 +Train: [12] [1700/6250] eta: 0:11:43 lr: 0.000123 grad: 0.0680 (0.0728) loss: 0.8402 (0.8439) time: 0.1754 data: 0.0931 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:26 lr: 0.000123 grad: 0.0678 (0.0726) loss: 0.8461 (0.8439) time: 0.1680 data: 0.0839 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:11 lr: 0.000123 grad: 0.0671 (0.0724) loss: 0.8436 (0.8440) time: 0.1407 data: 0.0629 max mem: 9377 +Train: [12] [2000/6250] eta: 0:10:55 lr: 0.000123 grad: 0.0665 (0.0723) loss: 0.8464 (0.8441) time: 0.1470 data: 0.0614 max mem: 9377 +Train: [12] [2100/6250] eta: 0:10:40 lr: 0.000123 grad: 0.0699 (0.0723) loss: 0.8428 (0.8440) time: 0.1553 data: 0.0755 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:23 lr: 0.000123 grad: 0.0712 (0.0723) loss: 0.8445 (0.8440) time: 0.1414 data: 0.0631 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:07 lr: 0.000123 grad: 0.0727 (0.0728) loss: 0.8488 (0.8440) time: 0.1575 data: 0.0754 max mem: 9377 +Train: [12] [2400/6250] eta: 0:09:51 lr: 0.000123 grad: 0.0654 (0.0727) loss: 0.8417 (0.8441) time: 0.1345 data: 0.0497 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:34 lr: 0.000123 grad: 0.0665 (0.0727) loss: 0.8455 (0.8440) time: 0.1531 data: 0.0673 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:17 lr: 0.000123 grad: 0.0694 (0.0727) loss: 0.8414 (0.8440) time: 0.1370 data: 0.0547 max mem: 9377 +Train: [12] [2700/6250] eta: 0:08:59 lr: 0.000123 grad: 0.0732 (0.0728) loss: 0.8435 (0.8439) time: 0.1212 data: 0.0389 max mem: 9377 +Train: [12] [2800/6250] eta: 0:08:42 lr: 0.000123 grad: 0.0694 (0.0728) loss: 0.8387 (0.8438) time: 0.1403 data: 0.0611 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:25 lr: 0.000123 grad: 0.0658 (0.0729) loss: 0.8428 (0.8437) time: 0.1495 data: 0.0695 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:09 lr: 0.000123 grad: 0.0662 (0.0728) loss: 0.8445 (0.8437) time: 0.1395 data: 0.0560 max mem: 9377 +Train: [12] [3100/6250] eta: 0:07:52 lr: 0.000123 grad: 0.0765 (0.0729) loss: 0.8458 (0.8437) time: 0.1358 data: 0.0557 max mem: 9377 +Train: [12] [3200/6250] eta: 0:07:36 lr: 0.000123 grad: 0.0670 (0.0730) loss: 0.8452 (0.8437) time: 0.1337 data: 0.0565 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:20 lr: 0.000123 grad: 0.0649 (0.0730) loss: 0.8431 (0.8436) time: 0.1371 data: 0.0590 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:04 lr: 0.000123 grad: 0.0719 (0.0731) loss: 0.8439 (0.8435) time: 0.1415 data: 0.0596 max mem: 9377 +Train: [12] [3500/6250] eta: 0:06:48 lr: 0.000123 grad: 0.0717 (0.0732) loss: 0.8393 (0.8434) time: 0.1416 data: 0.0644 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:32 lr: 0.000123 grad: 0.0680 (0.0733) loss: 0.8419 (0.8434) time: 0.1214 data: 0.0346 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:17 lr: 0.000123 grad: 0.0737 (0.0733) loss: 0.8379 (0.8433) time: 0.1339 data: 0.0500 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:01 lr: 0.000123 grad: 0.0746 (0.0736) loss: 0.8402 (0.8432) time: 0.1288 data: 0.0443 max mem: 9377 +Train: [12] [3900/6250] eta: 0:05:47 lr: 0.000123 grad: 0.0759 (0.0736) loss: 0.8446 (0.8432) time: 0.1624 data: 0.0770 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:31 lr: 0.000123 grad: 0.0746 (0.0736) loss: 0.8360 (0.8431) time: 0.1251 data: 0.0378 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:16 lr: 0.000123 grad: 0.0695 (0.0736) loss: 0.8350 (0.8430) time: 0.1151 data: 0.0381 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:01 lr: 0.000123 grad: 0.0692 (0.0736) loss: 0.8421 (0.8430) time: 0.1409 data: 0.0604 max mem: 9377 +Train: [12] [4300/6250] eta: 0:04:45 lr: 0.000123 grad: 0.0726 (0.0737) loss: 0.8392 (0.8430) time: 0.1375 data: 0.0548 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:30 lr: 0.000123 grad: 0.0658 (0.0737) loss: 0.8383 (0.8429) time: 0.1549 data: 0.0743 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:16 lr: 0.000123 grad: 0.0671 (0.0736) loss: 0.8481 (0.8429) time: 0.1564 data: 0.0742 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:00 lr: 0.000123 grad: 0.0680 (0.0736) loss: 0.8455 (0.8430) time: 0.1418 data: 0.0589 max mem: 9377 +Train: [12] [4700/6250] eta: 0:03:46 lr: 0.000123 grad: 0.0672 (0.0735) loss: 0.8470 (0.8430) time: 0.1314 data: 0.0470 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:31 lr: 0.000123 grad: 0.0646 (0.0735) loss: 0.8488 (0.8431) time: 0.1351 data: 0.0544 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:16 lr: 0.000123 grad: 0.0701 (0.0734) loss: 0.8428 (0.8431) time: 0.1571 data: 0.0812 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:01 lr: 0.000123 grad: 0.0649 (0.0734) loss: 0.8472 (0.8431) time: 0.1272 data: 0.0462 max mem: 9377 +Train: [12] [5100/6250] eta: 0:02:47 lr: 0.000123 grad: 0.0657 (0.0733) loss: 0.8486 (0.8432) time: 0.1449 data: 0.0670 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:32 lr: 0.000123 grad: 0.0668 (0.0732) loss: 0.8459 (0.8433) time: 0.1222 data: 0.0405 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:17 lr: 0.000123 grad: 0.0651 (0.0731) loss: 0.8472 (0.8434) time: 0.1572 data: 0.0764 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:03 lr: 0.000123 grad: 0.0639 (0.0730) loss: 0.8434 (0.8434) time: 0.1625 data: 0.0834 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:48 lr: 0.000123 grad: 0.0690 (0.0730) loss: 0.8420 (0.8434) time: 0.1322 data: 0.0505 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:34 lr: 0.000123 grad: 0.0661 (0.0729) loss: 0.8479 (0.8434) time: 0.1385 data: 0.0640 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:19 lr: 0.000123 grad: 0.0671 (0.0729) loss: 0.8451 (0.8434) time: 0.1372 data: 0.0521 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0697 (0.0728) loss: 0.8410 (0.8433) time: 0.1310 data: 0.0475 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:50 lr: 0.000123 grad: 0.0676 (0.0728) loss: 0.8426 (0.8433) time: 0.1838 data: 0.1045 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:36 lr: 0.000123 grad: 0.0660 (0.0727) loss: 0.8433 (0.8433) time: 0.1642 data: 0.0823 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:21 lr: 0.000123 grad: 0.0725 (0.0727) loss: 0.8414 (0.8433) time: 0.1532 data: 0.0717 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0689 (0.0727) loss: 0.8334 (0.8432) time: 0.1360 data: 0.0534 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0668 (0.0727) loss: 0.8370 (0.8432) time: 0.1569 data: 0.0776 max mem: 9377 +Train: [12] Total time: 0:15:14 (0.1463 s / it) +Averaged stats: lr: 0.000123 grad: 0.0668 (0.0727) loss: 0.8370 (0.8432) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:59 loss: 0.8491 (0.8491) time: 5.8061 data: 5.7753 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8490 (0.8495) time: 0.1264 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (hcp-train-subset): loss: 0.8490 (0.8495) +Eval (hcp-val): [12] [ 0/62] eta: 0:04:06 loss: 0.8447 (0.8447) time: 3.9762 data: 3.9055 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8465 (0.8471) time: 0.0991 data: 0.0744 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:12 (0.1974 s / it) +Averaged stats (hcp-val): loss: 0.8465 (0.8471) +Eval (nsd-val): [12] [ 0/62] eta: 0:02:53 loss: 0.8108 (0.8108) time: 2.8013 data: 2.7266 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8190 (0.8186) time: 0.1165 data: 0.0882 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:11 (0.1916 s / it) +Averaged stats (nsd-val): loss: 0.8190 (0.8186) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 6:27:20 lr: 0.000123 grad: 0.0546 (0.0546) loss: 0.8653 (0.8653) time: 3.7184 data: 3.4822 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:19:22 lr: 0.000123 grad: 0.0834 (0.0845) loss: 0.8345 (0.8472) time: 0.1287 data: 0.0410 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:16:26 lr: 0.000123 grad: 0.0669 (0.0778) loss: 0.8444 (0.8461) time: 0.1513 data: 0.0669 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:15:13 lr: 0.000123 grad: 0.0640 (0.0739) loss: 0.8434 (0.8465) time: 0.1429 data: 0.0543 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:14:35 lr: 0.000123 grad: 0.0649 (0.0725) loss: 0.8391 (0.8464) time: 0.1518 data: 0.0560 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:14:09 lr: 0.000123 grad: 0.0659 (0.0722) loss: 0.8425 (0.8458) time: 0.1479 data: 0.0510 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:13:38 lr: 0.000123 grad: 0.0666 (0.0724) loss: 0.8399 (0.8455) time: 0.1259 data: 0.0329 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:13:20 lr: 0.000123 grad: 0.0653 (0.0723) loss: 0.8457 (0.8454) time: 0.1295 data: 0.0414 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:13:08 lr: 0.000123 grad: 0.0860 (0.0721) loss: 0.8429 (0.8454) time: 0.1607 data: 0.0748 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:12:50 lr: 0.000123 grad: 0.0676 (0.0722) loss: 0.8484 (0.8456) time: 0.1315 data: 0.0500 max mem: 9377 +Train: [13] [1000/6250] eta: 0:12:37 lr: 0.000123 grad: 0.0738 (0.0722) loss: 0.8434 (0.8456) time: 0.1421 data: 0.0607 max mem: 9377 +Train: [13] [1100/6250] eta: 0:12:19 lr: 0.000123 grad: 0.0687 (0.0722) loss: 0.8473 (0.8456) time: 0.1390 data: 0.0556 max mem: 9377 +Train: [13] [1200/6250] eta: 0:12:03 lr: 0.000123 grad: 0.0682 (0.0721) loss: 0.8431 (0.8457) time: 0.1387 data: 0.0570 max mem: 9377 +Train: [13] [1300/6250] eta: 0:11:47 lr: 0.000123 grad: 0.0697 (0.0725) loss: 0.8470 (0.8456) time: 0.1381 data: 0.0566 max mem: 9377 +Train: [13] [1400/6250] eta: 0:11:29 lr: 0.000123 grad: 0.0719 (0.0725) loss: 0.8429 (0.8455) time: 0.1300 data: 0.0483 max mem: 9377 +Train: [13] [1500/6250] eta: 0:11:14 lr: 0.000123 grad: 0.0647 (0.0724) loss: 0.8446 (0.8454) time: 0.1272 data: 0.0414 max mem: 9377 +Train: [13] [1600/6250] eta: 0:10:58 lr: 0.000123 grad: 0.0691 (0.0722) loss: 0.8398 (0.8453) time: 0.1324 data: 0.0503 max mem: 9377 +Train: [13] [1700/6250] eta: 0:10:44 lr: 0.000123 grad: 0.0676 (0.0720) loss: 0.8454 (0.8453) time: 0.1668 data: 0.0926 max mem: 9377 +Train: [13] [1800/6250] eta: 0:10:31 lr: 0.000123 grad: 0.0667 (0.0719) loss: 0.8498 (0.8452) time: 0.1864 data: 0.1141 max mem: 9377 +Train: [13] [1900/6250] eta: 0:10:20 lr: 0.000123 grad: 0.0635 (0.0720) loss: 0.8437 (0.8452) time: 0.1570 data: 0.0752 max mem: 9377 +Train: [13] [2000/6250] eta: 0:10:08 lr: 0.000123 grad: 0.0702 (0.0720) loss: 0.8414 (0.8452) time: 0.1621 data: 0.0682 max mem: 9377 +Train: [13] [2100/6250] eta: 0:09:56 lr: 0.000123 grad: 0.0646 (0.0718) loss: 0.8482 (0.8452) time: 0.1541 data: 0.0738 max mem: 9377 +Train: [13] [2200/6250] eta: 0:09:42 lr: 0.000123 grad: 0.0717 (0.0718) loss: 0.8463 (0.8452) time: 0.1664 data: 0.0845 max mem: 9377 +Train: [13] [2300/6250] eta: 0:09:29 lr: 0.000123 grad: 0.0671 (0.0720) loss: 0.8450 (0.8452) time: 0.1449 data: 0.0624 max mem: 9377 +Train: [13] [2400/6250] eta: 0:09:14 lr: 0.000123 grad: 0.0673 (0.0722) loss: 0.8476 (0.8451) time: 0.1526 data: 0.0766 max mem: 9377 +Train: [13] [2500/6250] eta: 0:09:00 lr: 0.000123 grad: 0.0661 (0.0720) loss: 0.8445 (0.8450) time: 0.1487 data: 0.0684 max mem: 9377 +Train: [13] [2600/6250] eta: 0:08:46 lr: 0.000123 grad: 0.0666 (0.0719) loss: 0.8436 (0.8450) time: 0.1511 data: 0.0734 max mem: 9377 +Train: [13] [2700/6250] eta: 0:08:31 lr: 0.000123 grad: 0.0646 (0.0720) loss: 0.8473 (0.8449) time: 0.1336 data: 0.0479 max mem: 9377 +Train: [13] [2800/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0662 (0.0720) loss: 0.8438 (0.8449) time: 0.1348 data: 0.0566 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:01 lr: 0.000123 grad: 0.0644 (0.0720) loss: 0.8444 (0.8449) time: 0.1491 data: 0.0691 max mem: 9377 +Train: [13] [3000/6250] eta: 0:07:46 lr: 0.000123 grad: 0.0762 (0.0719) loss: 0.8453 (0.8448) time: 0.1265 data: 0.0475 max mem: 9377 +Train: [13] [3100/6250] eta: 0:07:31 lr: 0.000123 grad: 0.0690 (0.0721) loss: 0.8440 (0.8448) time: 0.1378 data: 0.0616 max mem: 9377 +Train: [13] [3200/6250] eta: 0:07:15 lr: 0.000123 grad: 0.0652 (0.0721) loss: 0.8471 (0.8448) time: 0.1334 data: 0.0611 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:01 lr: 0.000123 grad: 0.0702 (0.0720) loss: 0.8405 (0.8447) time: 0.1447 data: 0.0598 max mem: 9377 +Train: [13] [3400/6250] eta: 0:06:45 lr: 0.000123 grad: 0.0656 (0.0720) loss: 0.8406 (0.8447) time: 0.1101 data: 0.0254 max mem: 9377 +Train: [13] [3500/6250] eta: 0:06:30 lr: 0.000123 grad: 0.0693 (0.0719) loss: 0.8428 (0.8446) time: 0.1331 data: 0.0480 max mem: 9377 +Train: [13] [3600/6250] eta: 0:06:16 lr: 0.000123 grad: 0.0663 (0.0718) loss: 0.8430 (0.8447) time: 0.1314 data: 0.0413 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:01 lr: 0.000122 grad: 0.0664 (0.0718) loss: 0.8443 (0.8446) time: 0.1319 data: 0.0536 max mem: 9377 +Train: [13] [3800/6250] eta: 0:05:47 lr: 0.000122 grad: 0.0669 (0.0718) loss: 0.8396 (0.8445) time: 0.1467 data: 0.0664 max mem: 9377 +Train: [13] [3900/6250] eta: 0:05:32 lr: 0.000122 grad: 0.0710 (0.0717) loss: 0.8426 (0.8445) time: 0.1295 data: 0.0511 max mem: 9377 +Train: [13] [4000/6250] eta: 0:05:18 lr: 0.000122 grad: 0.0697 (0.0716) loss: 0.8438 (0.8445) time: 0.1257 data: 0.0421 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:03 lr: 0.000122 grad: 0.0658 (0.0716) loss: 0.8443 (0.8445) time: 0.1412 data: 0.0569 max mem: 9377 +Train: [13] [4200/6250] eta: 0:04:49 lr: 0.000122 grad: 0.0651 (0.0715) loss: 0.8427 (0.8444) time: 0.1349 data: 0.0472 max mem: 9377 +Train: [13] [4300/6250] eta: 0:04:35 lr: 0.000122 grad: 0.0665 (0.0714) loss: 0.8472 (0.8444) time: 0.1293 data: 0.0459 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:20 lr: 0.000122 grad: 0.0731 (0.0715) loss: 0.8409 (0.8444) time: 0.1462 data: 0.0635 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:06 lr: 0.000122 grad: 0.0670 (0.0714) loss: 0.8453 (0.8443) time: 0.1731 data: 0.0989 max mem: 9377 +Train: [13] [4600/6250] eta: 0:03:52 lr: 0.000122 grad: 0.0682 (0.0714) loss: 0.8452 (0.8443) time: 0.1376 data: 0.0542 max mem: 9377 +Train: [13] [4700/6250] eta: 0:03:38 lr: 0.000122 grad: 0.0678 (0.0714) loss: 0.8436 (0.8443) time: 0.1077 data: 0.0221 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:24 lr: 0.000122 grad: 0.0646 (0.0714) loss: 0.8433 (0.8442) time: 0.1336 data: 0.0532 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:09 lr: 0.000122 grad: 0.0679 (0.0714) loss: 0.8429 (0.8442) time: 0.1344 data: 0.0557 max mem: 9377 +Train: [13] [5000/6250] eta: 0:02:55 lr: 0.000122 grad: 0.0668 (0.0713) loss: 0.8416 (0.8442) time: 0.1372 data: 0.0547 max mem: 9377 +Train: [13] [5100/6250] eta: 0:02:41 lr: 0.000122 grad: 0.0664 (0.0713) loss: 0.8431 (0.8442) time: 0.1442 data: 0.0672 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:27 lr: 0.000122 grad: 0.0673 (0.0713) loss: 0.8440 (0.8442) time: 0.1539 data: 0.0743 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:13 lr: 0.000122 grad: 0.0660 (0.0713) loss: 0.8433 (0.8442) time: 0.1311 data: 0.0516 max mem: 9377 +Train: [13] [5400/6250] eta: 0:01:59 lr: 0.000122 grad: 0.0702 (0.0713) loss: 0.8450 (0.8443) time: 0.1418 data: 0.0572 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:45 lr: 0.000122 grad: 0.0751 (0.0713) loss: 0.8436 (0.8442) time: 0.1254 data: 0.0440 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:31 lr: 0.000122 grad: 0.0702 (0.0714) loss: 0.8427 (0.8442) time: 0.1327 data: 0.0522 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:17 lr: 0.000122 grad: 0.0675 (0.0714) loss: 0.8435 (0.8442) time: 0.1299 data: 0.0378 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:03 lr: 0.000122 grad: 0.0766 (0.0715) loss: 0.8456 (0.8441) time: 0.1759 data: 0.0932 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:49 lr: 0.000122 grad: 0.0713 (0.0715) loss: 0.8405 (0.8441) time: 0.1992 data: 0.1215 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:35 lr: 0.000122 grad: 0.0738 (0.0716) loss: 0.8405 (0.8440) time: 0.1378 data: 0.0561 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.0704 (0.0717) loss: 0.8420 (0.8440) time: 0.1543 data: 0.0718 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0763 (0.0717) loss: 0.8422 (0.8440) time: 0.1278 data: 0.0484 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0716 (0.0718) loss: 0.8400 (0.8439) time: 0.1561 data: 0.0810 max mem: 9377 +Train: [13] Total time: 0:14:47 (0.1420 s / it) +Averaged stats: lr: 0.000122 grad: 0.0716 (0.0718) loss: 0.8400 (0.8439) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:04:46 loss: 0.8483 (0.8483) time: 4.6186 data: 4.5891 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8529 (0.8502) time: 0.1047 data: 0.0786 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:12 (0.2001 s / it) +Averaged stats (hcp-train-subset): loss: 0.8529 (0.8502) +Eval (hcp-val): [13] [ 0/62] eta: 0:04:28 loss: 0.8441 (0.8441) time: 4.3232 data: 4.2931 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8478 (0.8480) time: 0.1217 data: 0.0966 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:12 (0.1977 s / it) +Averaged stats (hcp-val): loss: 0.8478 (0.8480) +Eval (nsd-val): [13] [ 0/62] eta: 0:04:27 loss: 0.8123 (0.8123) time: 4.3077 data: 4.2773 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8186 (0.8200) time: 0.1149 data: 0.0901 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:11 (0.1910 s / it) +Averaged stats (nsd-val): loss: 0.8186 (0.8200) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [14] [ 0/6250] eta: 6:50:58 lr: 0.000122 grad: 0.0456 (0.0456) loss: 0.8876 (0.8876) time: 3.9454 data: 3.6548 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:18:47 lr: 0.000122 grad: 0.0736 (0.0788) loss: 0.8340 (0.8521) time: 0.1292 data: 0.0378 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:16:32 lr: 0.000122 grad: 0.0715 (0.0760) loss: 0.8375 (0.8464) time: 0.1383 data: 0.0506 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:15:31 lr: 0.000122 grad: 0.0705 (0.0756) loss: 0.8439 (0.8458) time: 0.1522 data: 0.0693 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:14:43 lr: 0.000122 grad: 0.0689 (0.0761) loss: 0.8452 (0.8456) time: 0.1364 data: 0.0416 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:14:06 lr: 0.000122 grad: 0.0706 (0.0748) loss: 0.8464 (0.8458) time: 0.1295 data: 0.0444 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:13:42 lr: 0.000122 grad: 0.0694 (0.0738) loss: 0.8502 (0.8456) time: 0.1639 data: 0.0291 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:13:17 lr: 0.000122 grad: 0.0601 (0.0727) loss: 0.8477 (0.8459) time: 0.1254 data: 0.0291 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:13:04 lr: 0.000122 grad: 0.0638 (0.0719) loss: 0.8456 (0.8458) time: 0.1832 data: 0.0915 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:12:49 lr: 0.000122 grad: 0.0632 (0.0716) loss: 0.8425 (0.8458) time: 0.1448 data: 0.0566 max mem: 9377 +Train: [14] [1000/6250] eta: 0:12:41 lr: 0.000122 grad: 0.0660 (0.0713) loss: 0.8443 (0.8458) time: 0.1195 data: 0.0338 max mem: 9377 +Train: [14] [1100/6250] eta: 0:12:25 lr: 0.000122 grad: 0.0638 (0.0711) loss: 0.8443 (0.8457) time: 0.1423 data: 0.0545 max mem: 9377 +Train: [14] [1200/6250] eta: 0:12:12 lr: 0.000122 grad: 0.0663 (0.0710) loss: 0.8486 (0.8456) time: 0.1420 data: 0.0524 max mem: 9377 +Train: [14] [1300/6250] eta: 0:11:55 lr: 0.000122 grad: 0.0656 (0.0706) loss: 0.8456 (0.8454) time: 0.1360 data: 0.0512 max mem: 9377 +Train: [14] [1400/6250] eta: 0:11:40 lr: 0.000122 grad: 0.0654 (0.0707) loss: 0.8389 (0.8452) time: 0.1370 data: 0.0529 max mem: 9377 +Train: [14] [1500/6250] eta: 0:11:26 lr: 0.000122 grad: 0.0637 (0.0704) loss: 0.8406 (0.8450) time: 0.1433 data: 0.0576 max mem: 9377 +Train: [14] [1600/6250] eta: 0:11:14 lr: 0.000122 grad: 0.0678 (0.0703) loss: 0.8413 (0.8449) time: 0.1493 data: 0.0659 max mem: 9377 +Train: [14] [1700/6250] eta: 0:10:59 lr: 0.000122 grad: 0.0662 (0.0701) loss: 0.8453 (0.8448) time: 0.1504 data: 0.0671 max mem: 9377 +Train: [14] [1800/6250] eta: 0:10:45 lr: 0.000122 grad: 0.0647 (0.0700) loss: 0.8398 (0.8446) time: 0.1257 data: 0.0439 max mem: 9377 +Train: [14] [1900/6250] eta: 0:10:30 lr: 0.000122 grad: 0.0724 (0.0699) loss: 0.8369 (0.8445) time: 0.1534 data: 0.0740 max mem: 9377 +Train: [14] [2000/6250] eta: 0:10:15 lr: 0.000122 grad: 0.0782 (0.0701) loss: 0.8351 (0.8442) time: 0.1245 data: 0.0410 max mem: 9377 +Train: [14] [2100/6250] eta: 0:10:04 lr: 0.000122 grad: 0.0667 (0.0701) loss: 0.8419 (0.8441) time: 0.1715 data: 0.0959 max mem: 9377 +Train: [14] [2200/6250] eta: 0:09:52 lr: 0.000122 grad: 0.0718 (0.0701) loss: 0.8409 (0.8439) time: 0.1664 data: 0.0801 max mem: 9377 +Train: [14] [2300/6250] eta: 0:09:39 lr: 0.000122 grad: 0.0624 (0.0700) loss: 0.8480 (0.8438) time: 0.1628 data: 0.0878 max mem: 9377 +Train: [14] [2400/6250] eta: 0:09:26 lr: 0.000122 grad: 0.0711 (0.0699) loss: 0.8380 (0.8437) time: 0.1685 data: 0.0871 max mem: 9377 +Train: [14] [2500/6250] eta: 0:09:12 lr: 0.000122 grad: 0.0664 (0.0698) loss: 0.8460 (0.8436) time: 0.1450 data: 0.0680 max mem: 9377 +Train: [14] [2600/6250] eta: 0:08:57 lr: 0.000122 grad: 0.0669 (0.0698) loss: 0.8388 (0.8434) time: 0.1541 data: 0.0716 max mem: 9377 +Train: [14] [2700/6250] eta: 0:08:42 lr: 0.000122 grad: 0.0664 (0.0697) loss: 0.8418 (0.8433) time: 0.1537 data: 0.0754 max mem: 9377 +Train: [14] [2800/6250] eta: 0:08:28 lr: 0.000122 grad: 0.0669 (0.0697) loss: 0.8406 (0.8432) time: 0.1550 data: 0.0735 max mem: 9377 +Train: [14] [2900/6250] eta: 0:08:12 lr: 0.000122 grad: 0.0699 (0.0697) loss: 0.8457 (0.8431) time: 0.1263 data: 0.0360 max mem: 9377 +Train: [14] [3000/6250] eta: 0:07:57 lr: 0.000122 grad: 0.0715 (0.0697) loss: 0.8364 (0.8430) time: 0.1350 data: 0.0567 max mem: 9377 +Train: [14] [3100/6250] eta: 0:07:41 lr: 0.000122 grad: 0.0683 (0.0698) loss: 0.8402 (0.8430) time: 0.1181 data: 0.0306 max mem: 9377 +Train: [14] [3200/6250] eta: 0:07:25 lr: 0.000122 grad: 0.0617 (0.0698) loss: 0.8454 (0.8429) time: 0.1353 data: 0.0526 max mem: 9377 +Train: [14] [3300/6250] eta: 0:07:10 lr: 0.000122 grad: 0.0645 (0.0697) loss: 0.8452 (0.8429) time: 0.1357 data: 0.0536 max mem: 9377 +Train: [14] [3400/6250] eta: 0:06:55 lr: 0.000122 grad: 0.0678 (0.0697) loss: 0.8454 (0.8430) time: 0.1429 data: 0.0571 max mem: 9377 +Train: [14] [3500/6250] eta: 0:06:40 lr: 0.000122 grad: 0.0670 (0.0696) loss: 0.8492 (0.8431) time: 0.1327 data: 0.0487 max mem: 9377 +Train: [14] [3600/6250] eta: 0:06:25 lr: 0.000122 grad: 0.0657 (0.0697) loss: 0.8444 (0.8431) time: 0.1333 data: 0.0503 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:10 lr: 0.000122 grad: 0.0753 (0.0699) loss: 0.8438 (0.8431) time: 0.1505 data: 0.0712 max mem: 9377 +Train: [14] [3800/6250] eta: 0:05:55 lr: 0.000122 grad: 0.0690 (0.0700) loss: 0.8464 (0.8431) time: 0.1392 data: 0.0633 max mem: 9377 +Train: [14] [3900/6250] eta: 0:05:40 lr: 0.000122 grad: 0.0680 (0.0701) loss: 0.8353 (0.8431) time: 0.1504 data: 0.0690 max mem: 9377 +Train: [14] [4000/6250] eta: 0:05:25 lr: 0.000122 grad: 0.0674 (0.0701) loss: 0.8387 (0.8431) time: 0.1526 data: 0.0712 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:10 lr: 0.000122 grad: 0.0725 (0.0702) loss: 0.8427 (0.8430) time: 0.1445 data: 0.0664 max mem: 9377 +Train: [14] [4200/6250] eta: 0:04:55 lr: 0.000122 grad: 0.0670 (0.0702) loss: 0.8440 (0.8431) time: 0.1587 data: 0.0792 max mem: 9377 +Train: [14] [4300/6250] eta: 0:04:40 lr: 0.000122 grad: 0.0701 (0.0702) loss: 0.8413 (0.8430) time: 0.1191 data: 0.0367 max mem: 9377 +Train: [14] [4400/6250] eta: 0:04:26 lr: 0.000122 grad: 0.0664 (0.0702) loss: 0.8432 (0.8430) time: 0.1416 data: 0.0574 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:11 lr: 0.000122 grad: 0.0665 (0.0701) loss: 0.8494 (0.8431) time: 0.1486 data: 0.0732 max mem: 9377 +Train: [14] [4600/6250] eta: 0:03:57 lr: 0.000122 grad: 0.0648 (0.0701) loss: 0.8436 (0.8431) time: 0.1298 data: 0.0498 max mem: 9377 +Train: [14] [4700/6250] eta: 0:03:42 lr: 0.000122 grad: 0.0676 (0.0701) loss: 0.8426 (0.8431) time: 0.1170 data: 0.0348 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:28 lr: 0.000122 grad: 0.0688 (0.0701) loss: 0.8410 (0.8431) time: 0.1548 data: 0.0730 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:13 lr: 0.000122 grad: 0.0676 (0.0702) loss: 0.8483 (0.8431) time: 0.1312 data: 0.0527 max mem: 9377 +Train: [14] [5000/6250] eta: 0:02:59 lr: 0.000122 grad: 0.0700 (0.0702) loss: 0.8378 (0.8431) time: 0.1412 data: 0.0620 max mem: 9377 +Train: [14] [5100/6250] eta: 0:02:44 lr: 0.000122 grad: 0.0685 (0.0702) loss: 0.8422 (0.8431) time: 0.1440 data: 0.0671 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:30 lr: 0.000122 grad: 0.0696 (0.0702) loss: 0.8480 (0.8431) time: 0.1259 data: 0.0438 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:15 lr: 0.000122 grad: 0.0727 (0.0704) loss: 0.8424 (0.8430) time: 0.1212 data: 0.0360 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:01 lr: 0.000122 grad: 0.0705 (0.0704) loss: 0.8446 (0.8430) time: 0.1562 data: 0.0735 max mem: 9377 +Train: [14] [5500/6250] eta: 0:01:46 lr: 0.000122 grad: 0.0665 (0.0703) loss: 0.8473 (0.8430) time: 0.1191 data: 0.0352 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:32 lr: 0.000122 grad: 0.0698 (0.0704) loss: 0.8426 (0.8430) time: 0.1517 data: 0.0700 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:18 lr: 0.000122 grad: 0.0632 (0.0704) loss: 0.8462 (0.8430) time: 0.1428 data: 0.0572 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:03 lr: 0.000122 grad: 0.0649 (0.0704) loss: 0.8412 (0.8430) time: 0.1349 data: 0.0564 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:49 lr: 0.000122 grad: 0.0642 (0.0704) loss: 0.8422 (0.8430) time: 0.1352 data: 0.0517 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:35 lr: 0.000122 grad: 0.0672 (0.0705) loss: 0.8420 (0.8430) time: 0.1691 data: 0.0796 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.0734 (0.0705) loss: 0.8424 (0.8430) time: 0.1478 data: 0.0642 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0698 (0.0705) loss: 0.8421 (0.8430) time: 0.1516 data: 0.0719 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0699 (0.0705) loss: 0.8443 (0.8430) time: 0.1500 data: 0.0696 max mem: 9377 +Train: [14] Total time: 0:15:00 (0.1441 s / it) +Averaged stats: lr: 0.000122 grad: 0.0699 (0.0705) loss: 0.8443 (0.8430) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:03:10 loss: 0.8468 (0.8468) time: 3.0792 data: 3.0108 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8483 (0.8478) time: 0.1291 data: 0.1046 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-train-subset): loss: 0.8483 (0.8478) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [14] [ 0/62] eta: 0:04:49 loss: 0.8400 (0.8400) time: 4.6629 data: 4.6320 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8460 (0.8458) time: 0.1131 data: 0.0883 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (hcp-val): loss: 0.8460 (0.8458) +Making plots (hcp-val): example=24 +Eval (nsd-val): [14] [ 0/62] eta: 0:03:50 loss: 0.8071 (0.8071) time: 3.7195 data: 3.6292 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8163 (0.8182) time: 0.1082 data: 0.0835 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:12 (0.1957 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8182) +Making plots (nsd-val): example=6 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 6:14:15 lr: 0.000122 grad: 0.0449 (0.0449) loss: 0.8764 (0.8764) time: 3.5930 data: 3.3992 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:18:58 lr: 0.000122 grad: 0.0662 (0.0812) loss: 0.8443 (0.8479) time: 0.1458 data: 0.0603 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:16:29 lr: 0.000122 grad: 0.0680 (0.0766) loss: 0.8437 (0.8471) time: 0.1254 data: 0.0422 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:15:16 lr: 0.000122 grad: 0.0698 (0.0751) loss: 0.8391 (0.8449) time: 0.1237 data: 0.0382 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:14:36 lr: 0.000122 grad: 0.0736 (0.0759) loss: 0.8315 (0.8434) time: 0.1250 data: 0.0382 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:14:00 lr: 0.000122 grad: 0.0761 (0.0760) loss: 0.8328 (0.8418) time: 0.1293 data: 0.0454 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:13:32 lr: 0.000122 grad: 0.0746 (0.0760) loss: 0.8396 (0.8405) time: 0.1263 data: 0.0405 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:13:11 lr: 0.000122 grad: 0.0772 (0.0764) loss: 0.8376 (0.8398) time: 0.1401 data: 0.0527 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:12:57 lr: 0.000122 grad: 0.0732 (0.0762) loss: 0.8384 (0.8391) time: 0.1608 data: 0.0769 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:12:51 lr: 0.000122 grad: 0.0738 (0.0758) loss: 0.8369 (0.8389) time: 0.1451 data: 0.0603 max mem: 9377 +Train: [15] [1000/6250] eta: 0:12:41 lr: 0.000122 grad: 0.0667 (0.0756) loss: 0.8385 (0.8387) time: 0.1577 data: 0.0749 max mem: 9377 +Train: [15] [1100/6250] eta: 0:12:29 lr: 0.000121 grad: 0.0681 (0.0753) loss: 0.8328 (0.8384) time: 0.1409 data: 0.0608 max mem: 9377 +Train: [15] [1200/6250] eta: 0:12:21 lr: 0.000121 grad: 0.0739 (0.0753) loss: 0.8322 (0.8382) time: 0.1421 data: 0.0622 max mem: 9377 +Train: [15] [1300/6250] eta: 0:12:14 lr: 0.000121 grad: 0.0718 (0.0752) loss: 0.8419 (0.8380) time: 0.1393 data: 0.0535 max mem: 9377 +Train: [15] [1400/6250] eta: 0:12:03 lr: 0.000121 grad: 0.0680 (0.0752) loss: 0.8396 (0.8379) time: 0.1695 data: 0.0913 max mem: 9377 +Train: [15] [1500/6250] eta: 0:11:47 lr: 0.000121 grad: 0.0708 (0.0753) loss: 0.8350 (0.8379) time: 0.1642 data: 0.0859 max mem: 9377 +Train: [15] [1600/6250] eta: 0:11:34 lr: 0.000121 grad: 0.0684 (0.0752) loss: 0.8360 (0.8378) time: 0.1379 data: 0.0608 max mem: 9377 +Train: [15] [1700/6250] eta: 0:11:18 lr: 0.000121 grad: 0.0720 (0.0750) loss: 0.8353 (0.8377) time: 0.1238 data: 0.0427 max mem: 9377 +Train: [15] [1800/6250] eta: 0:11:02 lr: 0.000121 grad: 0.0678 (0.0750) loss: 0.8395 (0.8378) time: 0.1280 data: 0.0449 max mem: 9377 +Train: [15] [1900/6250] eta: 0:10:44 lr: 0.000121 grad: 0.0742 (0.0749) loss: 0.8346 (0.8377) time: 0.1367 data: 0.0563 max mem: 9377 +Train: [15] [2000/6250] eta: 0:10:28 lr: 0.000121 grad: 0.0699 (0.0747) loss: 0.8384 (0.8378) time: 0.1419 data: 0.0590 max mem: 9377 +Train: [15] [2100/6250] eta: 0:10:18 lr: 0.000121 grad: 0.0688 (0.0746) loss: 0.8376 (0.8378) time: 0.1683 data: 0.0928 max mem: 9377 +Train: [15] [2200/6250] eta: 0:10:09 lr: 0.000121 grad: 0.0700 (0.0744) loss: 0.8405 (0.8379) time: 0.1964 data: 0.1137 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:00 lr: 0.000121 grad: 0.0743 (0.0743) loss: 0.8400 (0.8378) time: 0.2206 data: 0.1334 max mem: 9377 +Train: [15] [2400/6250] eta: 0:09:49 lr: 0.000121 grad: 0.0666 (0.0742) loss: 0.8343 (0.8378) time: 0.1726 data: 0.0875 max mem: 9377 +Train: [15] [2500/6250] eta: 0:09:34 lr: 0.000121 grad: 0.0691 (0.0742) loss: 0.8441 (0.8378) time: 0.1576 data: 0.0753 max mem: 9377 +Train: [15] [2600/6250] eta: 0:09:23 lr: 0.000121 grad: 0.0691 (0.0741) loss: 0.8385 (0.8378) time: 0.1723 data: 0.0872 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:09 lr: 0.000121 grad: 0.0711 (0.0740) loss: 0.8369 (0.8377) time: 0.1789 data: 0.0831 max mem: 9377 +Train: [15] [2800/6250] eta: 0:08:54 lr: 0.000121 grad: 0.0725 (0.0742) loss: 0.8371 (0.8376) time: 0.1709 data: 0.0854 max mem: 9377 +Train: [15] [2900/6250] eta: 0:08:38 lr: 0.000121 grad: 0.0663 (0.0742) loss: 0.8399 (0.8376) time: 0.1392 data: 0.0495 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:22 lr: 0.000121 grad: 0.0687 (0.0741) loss: 0.8373 (0.8376) time: 0.1513 data: 0.0699 max mem: 9377 +Train: [15] [3100/6250] eta: 0:08:05 lr: 0.000121 grad: 0.0680 (0.0740) loss: 0.8393 (0.8376) time: 0.1391 data: 0.0501 max mem: 9377 +Train: [15] [3200/6250] eta: 0:07:50 lr: 0.000121 grad: 0.0656 (0.0739) loss: 0.8365 (0.8376) time: 0.1753 data: 0.0948 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:33 lr: 0.000121 grad: 0.0678 (0.0738) loss: 0.8369 (0.8376) time: 0.1491 data: 0.0678 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:18 lr: 0.000121 grad: 0.0703 (0.0738) loss: 0.8357 (0.8376) time: 0.1511 data: 0.0816 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:02 lr: 0.000121 grad: 0.0680 (0.0737) loss: 0.8350 (0.8376) time: 0.1557 data: 0.0733 max mem: 9377 +Train: [15] [3600/6250] eta: 0:06:46 lr: 0.000121 grad: 0.0668 (0.0736) loss: 0.8373 (0.8376) time: 0.1475 data: 0.0651 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:30 lr: 0.000121 grad: 0.0669 (0.0735) loss: 0.8408 (0.8376) time: 0.1505 data: 0.0703 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:14 lr: 0.000121 grad: 0.0726 (0.0735) loss: 0.8327 (0.8376) time: 0.1218 data: 0.0431 max mem: 9377 +Train: [15] [3900/6250] eta: 0:05:58 lr: 0.000121 grad: 0.0690 (0.0734) loss: 0.8400 (0.8376) time: 0.1150 data: 0.0355 max mem: 9377 +Train: [15] [4000/6250] eta: 0:05:42 lr: 0.000121 grad: 0.0718 (0.0734) loss: 0.8328 (0.8376) time: 0.1174 data: 0.0309 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:26 lr: 0.000121 grad: 0.0685 (0.0734) loss: 0.8363 (0.8376) time: 0.1398 data: 0.0610 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:10 lr: 0.000121 grad: 0.0728 (0.0735) loss: 0.8353 (0.8376) time: 0.1304 data: 0.0442 max mem: 9377 +Train: [15] [4300/6250] eta: 0:04:54 lr: 0.000121 grad: 0.0715 (0.0735) loss: 0.8313 (0.8376) time: 0.1217 data: 0.0338 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:39 lr: 0.000121 grad: 0.0694 (0.0735) loss: 0.8334 (0.8375) time: 0.1400 data: 0.0566 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:23 lr: 0.000121 grad: 0.0684 (0.0735) loss: 0.8387 (0.8375) time: 0.1412 data: 0.0594 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:08 lr: 0.000121 grad: 0.0655 (0.0735) loss: 0.8410 (0.8375) time: 0.1575 data: 0.0799 max mem: 9377 +Train: [15] [4700/6250] eta: 0:03:52 lr: 0.000121 grad: 0.0760 (0.0735) loss: 0.8413 (0.8375) time: 0.1477 data: 0.0666 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:37 lr: 0.000121 grad: 0.0732 (0.0736) loss: 0.8366 (0.8375) time: 0.1389 data: 0.0534 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:22 lr: 0.000121 grad: 0.0698 (0.0735) loss: 0.8412 (0.8376) time: 0.1272 data: 0.0457 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:07 lr: 0.000121 grad: 0.0700 (0.0736) loss: 0.8321 (0.8377) time: 0.1274 data: 0.0492 max mem: 9377 +Train: [15] [5100/6250] eta: 0:02:51 lr: 0.000121 grad: 0.0726 (0.0735) loss: 0.8435 (0.8377) time: 0.1495 data: 0.0683 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:36 lr: 0.000121 grad: 0.0685 (0.0735) loss: 0.8401 (0.8378) time: 0.1349 data: 0.0543 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:21 lr: 0.000121 grad: 0.0741 (0.0735) loss: 0.8403 (0.8379) time: 0.1310 data: 0.0524 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:06 lr: 0.000121 grad: 0.0694 (0.0734) loss: 0.8429 (0.8379) time: 0.1127 data: 0.0343 max mem: 9377 +Train: [15] [5500/6250] eta: 0:01:51 lr: 0.000121 grad: 0.0698 (0.0734) loss: 0.8380 (0.8380) time: 0.1357 data: 0.0545 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:36 lr: 0.000121 grad: 0.0693 (0.0734) loss: 0.8369 (0.8380) time: 0.1590 data: 0.0774 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:21 lr: 0.000121 grad: 0.0691 (0.0734) loss: 0.8380 (0.8380) time: 0.1354 data: 0.0607 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:06 lr: 0.000121 grad: 0.0682 (0.0733) loss: 0.8406 (0.8380) time: 0.1357 data: 0.0547 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:52 lr: 0.000121 grad: 0.0671 (0.0733) loss: 0.8342 (0.8380) time: 0.1593 data: 0.0823 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:37 lr: 0.000121 grad: 0.0698 (0.0733) loss: 0.8404 (0.8380) time: 0.1579 data: 0.0776 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:22 lr: 0.000121 grad: 0.0662 (0.0733) loss: 0.8445 (0.8380) time: 0.1574 data: 0.0726 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:07 lr: 0.000121 grad: 0.0659 (0.0732) loss: 0.8436 (0.8381) time: 0.1369 data: 0.0564 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0676 (0.0732) loss: 0.8444 (0.8381) time: 0.1411 data: 0.0582 max mem: 9377 +Train: [15] Total time: 0:15:37 (0.1500 s / it) +Averaged stats: lr: 0.000121 grad: 0.0676 (0.0732) loss: 0.8444 (0.8381) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:03:10 loss: 0.8438 (0.8438) time: 3.0660 data: 2.9776 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8490 (0.8474) time: 0.1249 data: 0.1002 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:12 (0.2073 s / it) +Averaged stats (hcp-train-subset): loss: 0.8490 (0.8474) +Eval (hcp-val): [15] [ 0/62] eta: 0:04:49 loss: 0.8400 (0.8400) time: 4.6661 data: 4.6358 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8443 (0.8451) time: 0.1010 data: 0.0745 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (hcp-val): loss: 0.8443 (0.8451) +Eval (nsd-val): [15] [ 0/62] eta: 0:03:48 loss: 0.8066 (0.8066) time: 3.6792 data: 3.5864 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8173 (0.8175) time: 0.1213 data: 0.0964 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2098 s / it) +Averaged stats (nsd-val): loss: 0.8173 (0.8175) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [16] [ 0/6250] eta: 9:20:58 lr: 0.000121 grad: 0.1143 (0.1143) loss: 0.8319 (0.8319) time: 5.3854 data: 5.2236 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:20:46 lr: 0.000121 grad: 0.0758 (0.0798) loss: 0.8435 (0.8425) time: 0.1699 data: 0.0734 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:17:51 lr: 0.000121 grad: 0.0763 (0.0788) loss: 0.8394 (0.8397) time: 0.1445 data: 0.0587 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:16:35 lr: 0.000121 grad: 0.0648 (0.0771) loss: 0.8372 (0.8385) time: 0.1365 data: 0.0494 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:15:29 lr: 0.000121 grad: 0.0688 (0.0761) loss: 0.8256 (0.8377) time: 0.1351 data: 0.0458 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:14:50 lr: 0.000121 grad: 0.0690 (0.0753) loss: 0.8392 (0.8371) time: 0.1021 data: 0.0150 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:14:34 lr: 0.000121 grad: 0.0719 (0.0749) loss: 0.8375 (0.8365) time: 0.1754 data: 0.0934 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:14:23 lr: 0.000121 grad: 0.0650 (0.0743) loss: 0.8389 (0.8363) time: 0.1468 data: 0.0616 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:14:15 lr: 0.000121 grad: 0.0673 (0.0736) loss: 0.8376 (0.8363) time: 0.1610 data: 0.0815 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:14:13 lr: 0.000121 grad: 0.0669 (0.0729) loss: 0.8334 (0.8363) time: 0.1493 data: 0.0670 max mem: 9377 +Train: [16] [1000/6250] eta: 0:14:04 lr: 0.000121 grad: 0.0683 (0.0726) loss: 0.8332 (0.8362) time: 0.1820 data: 0.1012 max mem: 9377 +Train: [16] [1100/6250] eta: 0:13:48 lr: 0.000121 grad: 0.0673 (0.0721) loss: 0.8342 (0.8360) time: 0.1568 data: 0.0688 max mem: 9377 +Train: [16] [1200/6250] eta: 0:13:30 lr: 0.000121 grad: 0.0674 (0.0721) loss: 0.8348 (0.8360) time: 0.1659 data: 0.0847 max mem: 9377 +Train: [16] [1300/6250] eta: 0:13:11 lr: 0.000121 grad: 0.0701 (0.0719) loss: 0.8338 (0.8361) time: 0.1622 data: 0.0788 max mem: 9377 +Train: [16] [1400/6250] eta: 0:12:50 lr: 0.000121 grad: 0.0661 (0.0718) loss: 0.8378 (0.8359) time: 0.1501 data: 0.0653 max mem: 9377 +Train: [16] [1500/6250] eta: 0:12:30 lr: 0.000121 grad: 0.0658 (0.0715) loss: 0.8384 (0.8360) time: 0.1346 data: 0.0490 max mem: 9377 +Train: [16] [1600/6250] eta: 0:12:08 lr: 0.000121 grad: 0.0732 (0.0714) loss: 0.8336 (0.8361) time: 0.1518 data: 0.0721 max mem: 9377 +Train: [16] [1700/6250] eta: 0:11:48 lr: 0.000121 grad: 0.0707 (0.0714) loss: 0.8367 (0.8361) time: 0.1341 data: 0.0470 max mem: 9377 +Train: [16] [1800/6250] eta: 0:11:30 lr: 0.000121 grad: 0.0751 (0.0714) loss: 0.8403 (0.8361) time: 0.1282 data: 0.0441 max mem: 9377 +Train: [16] [1900/6250] eta: 0:11:12 lr: 0.000121 grad: 0.0643 (0.0714) loss: 0.8393 (0.8363) time: 0.1271 data: 0.0426 max mem: 9377 +Train: [16] [2000/6250] eta: 0:10:53 lr: 0.000121 grad: 0.0686 (0.0714) loss: 0.8393 (0.8365) time: 0.1483 data: 0.0672 max mem: 9377 +Train: [16] [2100/6250] eta: 0:10:33 lr: 0.000121 grad: 0.0675 (0.0714) loss: 0.8342 (0.8365) time: 0.1277 data: 0.0462 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:16 lr: 0.000121 grad: 0.0696 (0.0714) loss: 0.8400 (0.8365) time: 0.1484 data: 0.0672 max mem: 9377 +Train: [16] [2300/6250] eta: 0:10:02 lr: 0.000121 grad: 0.0667 (0.0714) loss: 0.8467 (0.8366) time: 0.1503 data: 0.0676 max mem: 9377 +Train: [16] [2400/6250] eta: 0:09:46 lr: 0.000121 grad: 0.0715 (0.0715) loss: 0.8329 (0.8365) time: 0.1474 data: 0.0641 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:31 lr: 0.000121 grad: 0.0753 (0.0716) loss: 0.8356 (0.8365) time: 0.1650 data: 0.0823 max mem: 9377 +Train: [16] [2600/6250] eta: 0:09:15 lr: 0.000121 grad: 0.0715 (0.0718) loss: 0.8338 (0.8365) time: 0.1245 data: 0.0445 max mem: 9377 +Train: [16] [2700/6250] eta: 0:09:00 lr: 0.000121 grad: 0.0684 (0.0718) loss: 0.8282 (0.8365) time: 0.1525 data: 0.0626 max mem: 9377 +Train: [16] [2800/6250] eta: 0:08:46 lr: 0.000121 grad: 0.0703 (0.0719) loss: 0.8383 (0.8364) time: 0.1545 data: 0.0637 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:31 lr: 0.000121 grad: 0.0699 (0.0719) loss: 0.8382 (0.8364) time: 0.1413 data: 0.0531 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:15 lr: 0.000121 grad: 0.0687 (0.0720) loss: 0.8355 (0.8362) time: 0.1427 data: 0.0554 max mem: 9377 +Train: [16] [3100/6250] eta: 0:07:59 lr: 0.000121 grad: 0.0673 (0.0720) loss: 0.8415 (0.8362) time: 0.1378 data: 0.0614 max mem: 9377 +Train: [16] [3200/6250] eta: 0:07:43 lr: 0.000121 grad: 0.0702 (0.0721) loss: 0.8377 (0.8361) time: 0.1498 data: 0.0673 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:26 lr: 0.000121 grad: 0.0664 (0.0720) loss: 0.8382 (0.8362) time: 0.1101 data: 0.0243 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:10 lr: 0.000121 grad: 0.0780 (0.0723) loss: 0.8316 (0.8361) time: 0.1343 data: 0.0461 max mem: 9377 +Train: [16] [3500/6250] eta: 0:06:55 lr: 0.000120 grad: 0.0649 (0.0722) loss: 0.8405 (0.8361) time: 0.1407 data: 0.0587 max mem: 9377 +Train: [16] [3600/6250] eta: 0:06:39 lr: 0.000120 grad: 0.0694 (0.0722) loss: 0.8349 (0.8361) time: 0.1411 data: 0.0649 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:23 lr: 0.000120 grad: 0.0745 (0.0722) loss: 0.8384 (0.8362) time: 0.1313 data: 0.0557 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:08 lr: 0.000120 grad: 0.0683 (0.0723) loss: 0.8370 (0.8363) time: 0.1249 data: 0.0417 max mem: 9377 +Train: [16] [3900/6250] eta: 0:05:52 lr: 0.000120 grad: 0.0695 (0.0723) loss: 0.8341 (0.8363) time: 0.1223 data: 0.0380 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:37 lr: 0.000120 grad: 0.0720 (0.0722) loss: 0.8377 (0.8364) time: 0.1666 data: 0.0917 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:21 lr: 0.000120 grad: 0.0678 (0.0722) loss: 0.8355 (0.8364) time: 0.1430 data: 0.0563 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:06 lr: 0.000120 grad: 0.0668 (0.0721) loss: 0.8428 (0.8365) time: 0.1433 data: 0.0586 max mem: 9377 +Train: [16] [4300/6250] eta: 0:04:50 lr: 0.000120 grad: 0.0674 (0.0722) loss: 0.8335 (0.8365) time: 0.1073 data: 0.0245 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:35 lr: 0.000120 grad: 0.0714 (0.0722) loss: 0.8358 (0.8365) time: 0.1531 data: 0.0726 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:20 lr: 0.000120 grad: 0.0686 (0.0722) loss: 0.8405 (0.8365) time: 0.1378 data: 0.0660 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:04 lr: 0.000120 grad: 0.0696 (0.0722) loss: 0.8378 (0.8366) time: 0.1253 data: 0.0411 max mem: 9377 +Train: [16] [4700/6250] eta: 0:03:49 lr: 0.000120 grad: 0.0699 (0.0722) loss: 0.8353 (0.8366) time: 0.1525 data: 0.0730 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:34 lr: 0.000120 grad: 0.0717 (0.0722) loss: 0.8343 (0.8366) time: 0.1244 data: 0.0443 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:19 lr: 0.000120 grad: 0.0648 (0.0722) loss: 0.8387 (0.8367) time: 0.1492 data: 0.0736 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:04 lr: 0.000120 grad: 0.0732 (0.0721) loss: 0.8324 (0.8367) time: 0.1339 data: 0.0557 max mem: 9377 +Train: [16] [5100/6250] eta: 0:02:49 lr: 0.000120 grad: 0.0708 (0.0721) loss: 0.8389 (0.8367) time: 0.1317 data: 0.0494 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:34 lr: 0.000120 grad: 0.0681 (0.0721) loss: 0.8359 (0.8367) time: 0.1300 data: 0.0514 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:19 lr: 0.000120 grad: 0.0747 (0.0721) loss: 0.8400 (0.8368) time: 0.1389 data: 0.0582 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:04 lr: 0.000120 grad: 0.0714 (0.0722) loss: 0.8361 (0.8367) time: 0.1559 data: 0.0761 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:50 lr: 0.000120 grad: 0.0705 (0.0722) loss: 0.8376 (0.8368) time: 0.1325 data: 0.0582 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:35 lr: 0.000120 grad: 0.0703 (0.0722) loss: 0.8375 (0.8367) time: 0.1452 data: 0.0707 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:20 lr: 0.000120 grad: 0.0667 (0.0722) loss: 0.8366 (0.8367) time: 0.1514 data: 0.0648 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:06 lr: 0.000120 grad: 0.0696 (0.0723) loss: 0.8363 (0.8367) time: 0.1868 data: 0.1149 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:51 lr: 0.000120 grad: 0.0725 (0.0723) loss: 0.8374 (0.8366) time: 0.1284 data: 0.0482 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:36 lr: 0.000120 grad: 0.0721 (0.0724) loss: 0.8361 (0.8366) time: 0.1220 data: 0.0390 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:22 lr: 0.000120 grad: 0.0646 (0.0724) loss: 0.8397 (0.8367) time: 0.1342 data: 0.0545 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.0678 (0.0724) loss: 0.8402 (0.8367) time: 0.1654 data: 0.0860 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0704 (0.0724) loss: 0.8398 (0.8367) time: 0.1445 data: 0.0654 max mem: 9377 +Train: [16] Total time: 0:15:23 (0.1477 s / it) +Averaged stats: lr: 0.000120 grad: 0.0704 (0.0724) loss: 0.8398 (0.8367) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:58 loss: 0.8452 (0.8452) time: 4.8130 data: 4.7830 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8458 (0.8465) time: 0.1199 data: 0.0936 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:12 (0.2044 s / it) +Averaged stats (hcp-train-subset): loss: 0.8458 (0.8465) +Eval (hcp-val): [16] [ 0/62] eta: 0:05:25 loss: 0.8393 (0.8393) time: 5.2516 data: 5.2215 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8440 (0.8447) time: 0.0946 data: 0.0700 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:12 (0.1970 s / it) +Averaged stats (hcp-val): loss: 0.8440 (0.8447) +Eval (nsd-val): [16] [ 0/62] eta: 0:03:19 loss: 0.8040 (0.8040) time: 3.2117 data: 3.1317 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8158 (0.8178) time: 0.1048 data: 0.0800 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:11 (0.1921 s / it) +Averaged stats (nsd-val): loss: 0.8158 (0.8178) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [17] [ 0/6250] eta: 6:44:40 lr: 0.000120 grad: 0.0540 (0.0540) loss: 0.8580 (0.8580) time: 3.8848 data: 3.6812 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:19:23 lr: 0.000120 grad: 0.0657 (0.0700) loss: 0.8415 (0.8429) time: 0.1484 data: 0.0649 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:17:17 lr: 0.000120 grad: 0.0635 (0.0701) loss: 0.8336 (0.8383) time: 0.1581 data: 0.0721 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:16:00 lr: 0.000120 grad: 0.0706 (0.0711) loss: 0.8387 (0.8374) time: 0.1258 data: 0.0389 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:15:12 lr: 0.000120 grad: 0.0673 (0.0704) loss: 0.8398 (0.8380) time: 0.1452 data: 0.0592 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:14:44 lr: 0.000120 grad: 0.0692 (0.0704) loss: 0.8332 (0.8381) time: 0.1391 data: 0.0535 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:14:12 lr: 0.000120 grad: 0.0674 (0.0703) loss: 0.8372 (0.8379) time: 0.1386 data: 0.0489 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:13:47 lr: 0.000120 grad: 0.0682 (0.0701) loss: 0.8369 (0.8379) time: 0.1395 data: 0.0578 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:13:31 lr: 0.000120 grad: 0.0666 (0.0700) loss: 0.8447 (0.8379) time: 0.1557 data: 0.0728 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:13:13 lr: 0.000120 grad: 0.0652 (0.0700) loss: 0.8409 (0.8382) time: 0.1273 data: 0.0537 max mem: 9377 +Train: [17] [1000/6250] eta: 0:12:56 lr: 0.000120 grad: 0.0682 (0.0700) loss: 0.8372 (0.8382) time: 0.1349 data: 0.0563 max mem: 9377 +Train: [17] [1100/6250] eta: 0:12:40 lr: 0.000120 grad: 0.0692 (0.0701) loss: 0.8430 (0.8381) time: 0.1388 data: 0.0649 max mem: 9377 +Train: [17] [1200/6250] eta: 0:12:23 lr: 0.000120 grad: 0.0660 (0.0702) loss: 0.8373 (0.8380) time: 0.1405 data: 0.0572 max mem: 9377 +Train: [17] [1300/6250] eta: 0:12:11 lr: 0.000120 grad: 0.0686 (0.0704) loss: 0.8345 (0.8377) time: 0.1238 data: 0.0457 max mem: 9377 +Train: [17] [1400/6250] eta: 0:11:59 lr: 0.000120 grad: 0.0657 (0.0704) loss: 0.8358 (0.8376) time: 0.1462 data: 0.0669 max mem: 9377 +Train: [17] [1500/6250] eta: 0:11:43 lr: 0.000120 grad: 0.0693 (0.0704) loss: 0.8418 (0.8376) time: 0.1473 data: 0.0749 max mem: 9377 +Train: [17] [1600/6250] eta: 0:11:29 lr: 0.000120 grad: 0.0686 (0.0703) loss: 0.8395 (0.8376) time: 0.1522 data: 0.0712 max mem: 9377 +Train: [17] [1700/6250] eta: 0:11:15 lr: 0.000120 grad: 0.0672 (0.0703) loss: 0.8394 (0.8376) time: 0.1725 data: 0.0934 max mem: 9377 +Train: [17] [1800/6250] eta: 0:10:58 lr: 0.000120 grad: 0.0707 (0.0706) loss: 0.8344 (0.8376) time: 0.1565 data: 0.0744 max mem: 9377 +Train: [17] [1900/6250] eta: 0:10:43 lr: 0.000120 grad: 0.0699 (0.0706) loss: 0.8394 (0.8376) time: 0.1510 data: 0.0725 max mem: 9377 +Train: [17] [2000/6250] eta: 0:10:28 lr: 0.000120 grad: 0.0703 (0.0707) loss: 0.8314 (0.8375) time: 0.1597 data: 0.0822 max mem: 9377 +Train: [17] [2100/6250] eta: 0:10:13 lr: 0.000120 grad: 0.0697 (0.0708) loss: 0.8377 (0.8374) time: 0.1641 data: 0.0861 max mem: 9377 +Train: [17] [2200/6250] eta: 0:09:57 lr: 0.000120 grad: 0.0696 (0.0709) loss: 0.8389 (0.8373) time: 0.1559 data: 0.0797 max mem: 9377 +Train: [17] [2300/6250] eta: 0:09:44 lr: 0.000120 grad: 0.0673 (0.0709) loss: 0.8389 (0.8373) time: 0.1443 data: 0.0621 max mem: 9377 +Train: [17] [2400/6250] eta: 0:09:31 lr: 0.000120 grad: 0.0724 (0.0710) loss: 0.8329 (0.8372) time: 0.1483 data: 0.0676 max mem: 9377 +Train: [17] [2500/6250] eta: 0:09:17 lr: 0.000120 grad: 0.0713 (0.0711) loss: 0.8305 (0.8372) time: 0.1579 data: 0.0702 max mem: 9377 +Train: [17] [2600/6250] eta: 0:09:03 lr: 0.000120 grad: 0.0742 (0.0714) loss: 0.8386 (0.8372) time: 0.1686 data: 0.0869 max mem: 9377 +Train: [17] [2700/6250] eta: 0:08:48 lr: 0.000120 grad: 0.0689 (0.0716) loss: 0.8351 (0.8371) time: 0.1261 data: 0.0425 max mem: 9377 +Train: [17] [2800/6250] eta: 0:08:32 lr: 0.000120 grad: 0.0702 (0.0716) loss: 0.8337 (0.8370) time: 0.1383 data: 0.0555 max mem: 9377 +Train: [17] [2900/6250] eta: 0:08:16 lr: 0.000120 grad: 0.0682 (0.0717) loss: 0.8357 (0.8370) time: 0.1307 data: 0.0460 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:00 lr: 0.000120 grad: 0.0756 (0.0717) loss: 0.8317 (0.8368) time: 0.1456 data: 0.0652 max mem: 9377 +Train: [17] [3100/6250] eta: 0:07:44 lr: 0.000120 grad: 0.0678 (0.0718) loss: 0.8350 (0.8367) time: 0.1461 data: 0.0602 max mem: 9377 +Train: [17] [3200/6250] eta: 0:07:28 lr: 0.000120 grad: 0.0739 (0.0718) loss: 0.8329 (0.8366) time: 0.1408 data: 0.0580 max mem: 9377 +Train: [17] [3300/6250] eta: 0:07:12 lr: 0.000120 grad: 0.0684 (0.0718) loss: 0.8349 (0.8365) time: 0.1414 data: 0.0583 max mem: 9377 +Train: [17] [3400/6250] eta: 0:06:56 lr: 0.000120 grad: 0.0666 (0.0719) loss: 0.8395 (0.8364) time: 0.1335 data: 0.0376 max mem: 9377 +Train: [17] [3500/6250] eta: 0:06:41 lr: 0.000120 grad: 0.0709 (0.0719) loss: 0.8329 (0.8363) time: 0.1328 data: 0.0602 max mem: 9377 +Train: [17] [3600/6250] eta: 0:06:26 lr: 0.000120 grad: 0.0696 (0.0719) loss: 0.8368 (0.8363) time: 0.1359 data: 0.0463 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:11 lr: 0.000120 grad: 0.0721 (0.0720) loss: 0.8344 (0.8363) time: 0.1692 data: 0.0900 max mem: 9377 +Train: [17] [3800/6250] eta: 0:05:56 lr: 0.000120 grad: 0.0752 (0.0720) loss: 0.8317 (0.8362) time: 0.1301 data: 0.0457 max mem: 9377 +Train: [17] [3900/6250] eta: 0:05:41 lr: 0.000120 grad: 0.0728 (0.0721) loss: 0.8301 (0.8362) time: 0.1446 data: 0.0583 max mem: 9377 +Train: [17] [4000/6250] eta: 0:05:26 lr: 0.000120 grad: 0.0766 (0.0722) loss: 0.8378 (0.8362) time: 0.1467 data: 0.0613 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:11 lr: 0.000120 grad: 0.0709 (0.0722) loss: 0.8350 (0.8362) time: 0.1201 data: 0.0292 max mem: 9377 +Train: [17] [4200/6250] eta: 0:04:57 lr: 0.000120 grad: 0.0713 (0.0722) loss: 0.8369 (0.8363) time: 0.1468 data: 0.0713 max mem: 9377 +Train: [17] [4300/6250] eta: 0:04:42 lr: 0.000120 grad: 0.0697 (0.0723) loss: 0.8366 (0.8363) time: 0.1481 data: 0.0682 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:27 lr: 0.000120 grad: 0.0704 (0.0723) loss: 0.8421 (0.8363) time: 0.1490 data: 0.0750 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:13 lr: 0.000120 grad: 0.0715 (0.0723) loss: 0.8392 (0.8363) time: 0.1405 data: 0.0586 max mem: 9377 +Train: [17] [4600/6250] eta: 0:03:58 lr: 0.000120 grad: 0.0684 (0.0722) loss: 0.8344 (0.8363) time: 0.1378 data: 0.0575 max mem: 9377 +Train: [17] [4700/6250] eta: 0:03:43 lr: 0.000120 grad: 0.0676 (0.0722) loss: 0.8343 (0.8363) time: 0.1454 data: 0.0633 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:29 lr: 0.000120 grad: 0.0732 (0.0722) loss: 0.8383 (0.8363) time: 0.1437 data: 0.0627 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:14 lr: 0.000119 grad: 0.0666 (0.0722) loss: 0.8380 (0.8364) time: 0.1485 data: 0.0673 max mem: 9377 +Train: [17] [5000/6250] eta: 0:02:59 lr: 0.000119 grad: 0.0732 (0.0723) loss: 0.8366 (0.8364) time: 0.1288 data: 0.0416 max mem: 9377 +Train: [17] [5100/6250] eta: 0:02:45 lr: 0.000119 grad: 0.0682 (0.0722) loss: 0.8400 (0.8364) time: 0.1294 data: 0.0412 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:30 lr: 0.000119 grad: 0.0671 (0.0722) loss: 0.8325 (0.8365) time: 0.1462 data: 0.0622 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:16 lr: 0.000119 grad: 0.0676 (0.0722) loss: 0.8397 (0.8365) time: 0.1244 data: 0.0430 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:01 lr: 0.000119 grad: 0.0710 (0.0723) loss: 0.8368 (0.8365) time: 0.1510 data: 0.0701 max mem: 9377 +Train: [17] [5500/6250] eta: 0:01:47 lr: 0.000119 grad: 0.0679 (0.0722) loss: 0.8382 (0.8365) time: 0.1512 data: 0.0679 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:33 lr: 0.000119 grad: 0.0740 (0.0723) loss: 0.8322 (0.8365) time: 0.1496 data: 0.0672 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:19 lr: 0.000119 grad: 0.0700 (0.0724) loss: 0.8308 (0.8365) time: 0.1384 data: 0.0474 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:04 lr: 0.000119 grad: 0.0700 (0.0724) loss: 0.8328 (0.8364) time: 0.1124 data: 0.0283 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:50 lr: 0.000119 grad: 0.0717 (0.0725) loss: 0.8348 (0.8364) time: 0.1404 data: 0.0644 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:36 lr: 0.000119 grad: 0.0723 (0.0725) loss: 0.8363 (0.8363) time: 0.1283 data: 0.0491 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:21 lr: 0.000119 grad: 0.0760 (0.0726) loss: 0.8369 (0.8363) time: 0.1526 data: 0.0629 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:07 lr: 0.000119 grad: 0.0778 (0.0726) loss: 0.8329 (0.8362) time: 0.1239 data: 0.0458 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0728 (0.0727) loss: 0.8303 (0.8362) time: 0.1507 data: 0.0706 max mem: 9377 +Train: [17] Total time: 0:15:09 (0.1455 s / it) +Averaged stats: lr: 0.000119 grad: 0.0728 (0.0727) loss: 0.8303 (0.8362) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:03:12 loss: 0.8471 (0.8471) time: 3.1071 data: 3.0339 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8450 (0.8467) time: 0.1104 data: 0.0858 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:12 (0.2036 s / it) +Averaged stats (hcp-train-subset): loss: 0.8450 (0.8467) +Eval (hcp-val): [17] [ 0/62] eta: 0:03:25 loss: 0.8417 (0.8417) time: 3.3069 data: 3.2200 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8443 (0.8446) time: 0.1058 data: 0.0811 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:12 (0.2068 s / it) +Averaged stats (hcp-val): loss: 0.8443 (0.8446) +Eval (nsd-val): [17] [ 0/62] eta: 0:04:37 loss: 0.8090 (0.8090) time: 4.4725 data: 4.4426 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8148 (0.8177) time: 0.1253 data: 0.1004 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:12 (0.1996 s / it) +Averaged stats (nsd-val): loss: 0.8148 (0.8177) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [18] [ 0/6250] eta: 11:14:33 lr: 0.000119 grad: 0.1052 (0.1052) loss: 0.8004 (0.8004) time: 6.4758 data: 6.3818 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:20:52 lr: 0.000119 grad: 0.0695 (0.0817) loss: 0.8334 (0.8446) time: 0.1573 data: 0.0665 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:18:05 lr: 0.000119 grad: 0.0603 (0.0770) loss: 0.8447 (0.8431) time: 0.1603 data: 0.0636 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:16:51 lr: 0.000119 grad: 0.0686 (0.0754) loss: 0.8460 (0.8435) time: 0.1719 data: 0.0816 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:15:58 lr: 0.000119 grad: 0.0695 (0.0746) loss: 0.8467 (0.8434) time: 0.1557 data: 0.0726 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:15:09 lr: 0.000119 grad: 0.0680 (0.0742) loss: 0.8422 (0.8424) time: 0.1486 data: 0.0601 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:14:38 lr: 0.000119 grad: 0.0693 (0.0743) loss: 0.8389 (0.8416) time: 0.1244 data: 0.0366 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:14:16 lr: 0.000119 grad: 0.0687 (0.0740) loss: 0.8376 (0.8412) time: 0.1500 data: 0.0646 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:13:56 lr: 0.000119 grad: 0.0696 (0.0738) loss: 0.8362 (0.8408) time: 0.1219 data: 0.0384 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:13:39 lr: 0.000119 grad: 0.0674 (0.0737) loss: 0.8396 (0.8405) time: 0.1684 data: 0.0829 max mem: 9377 +Train: [18] [1000/6250] eta: 0:13:18 lr: 0.000119 grad: 0.0670 (0.0736) loss: 0.8418 (0.8405) time: 0.1465 data: 0.0656 max mem: 9377 +Train: [18] [1100/6250] eta: 0:12:59 lr: 0.000119 grad: 0.0700 (0.0736) loss: 0.8388 (0.8405) time: 0.1379 data: 0.0551 max mem: 9377 +Train: [18] [1200/6250] eta: 0:12:38 lr: 0.000119 grad: 0.0674 (0.0733) loss: 0.8428 (0.8405) time: 0.1332 data: 0.0488 max mem: 9377 +Train: [18] [1300/6250] eta: 0:12:19 lr: 0.000119 grad: 0.0690 (0.0734) loss: 0.8373 (0.8404) time: 0.1335 data: 0.0505 max mem: 9377 +Train: [18] [1400/6250] eta: 0:12:00 lr: 0.000119 grad: 0.0710 (0.0735) loss: 0.8365 (0.8403) time: 0.1570 data: 0.0766 max mem: 9377 +Train: [18] [1500/6250] eta: 0:11:42 lr: 0.000119 grad: 0.0744 (0.0736) loss: 0.8312 (0.8399) time: 0.1353 data: 0.0517 max mem: 9377 +Train: [18] [1600/6250] eta: 0:11:25 lr: 0.000119 grad: 0.0689 (0.0734) loss: 0.8455 (0.8399) time: 0.1511 data: 0.0728 max mem: 9377 +Train: [18] [1700/6250] eta: 0:11:08 lr: 0.000119 grad: 0.0675 (0.0732) loss: 0.8342 (0.8399) time: 0.1311 data: 0.0483 max mem: 9377 +Train: [18] [1800/6250] eta: 0:10:52 lr: 0.000119 grad: 0.0723 (0.0731) loss: 0.8402 (0.8399) time: 0.1509 data: 0.0703 max mem: 9377 +Train: [18] [1900/6250] eta: 0:10:40 lr: 0.000119 grad: 0.0696 (0.0730) loss: 0.8361 (0.8398) time: 0.1477 data: 0.0681 max mem: 9377 +Train: [18] [2000/6250] eta: 0:10:24 lr: 0.000119 grad: 0.0696 (0.0730) loss: 0.8364 (0.8398) time: 0.1327 data: 0.0528 max mem: 9377 +Train: [18] [2100/6250] eta: 0:10:10 lr: 0.000119 grad: 0.0689 (0.0730) loss: 0.8388 (0.8396) time: 0.1587 data: 0.0785 max mem: 9377 +Train: [18] [2200/6250] eta: 0:09:56 lr: 0.000119 grad: 0.0685 (0.0730) loss: 0.8359 (0.8395) time: 0.1516 data: 0.0695 max mem: 9377 +Train: [18] [2300/6250] eta: 0:09:42 lr: 0.000119 grad: 0.0686 (0.0730) loss: 0.8388 (0.8395) time: 0.1448 data: 0.0670 max mem: 9377 +Train: [18] [2400/6250] eta: 0:09:29 lr: 0.000119 grad: 0.0701 (0.0730) loss: 0.8345 (0.8394) time: 0.1427 data: 0.0578 max mem: 9377 +Train: [18] [2500/6250] eta: 0:09:14 lr: 0.000119 grad: 0.0771 (0.0730) loss: 0.8326 (0.8393) time: 0.1507 data: 0.0722 max mem: 9377 +Train: [18] [2600/6250] eta: 0:09:01 lr: 0.000119 grad: 0.0690 (0.0730) loss: 0.8366 (0.8392) time: 0.1499 data: 0.0670 max mem: 9377 +Train: [18] [2700/6250] eta: 0:08:44 lr: 0.000119 grad: 0.0710 (0.0730) loss: 0.8359 (0.8392) time: 0.1333 data: 0.0523 max mem: 9377 +Train: [18] [2800/6250] eta: 0:08:29 lr: 0.000119 grad: 0.0759 (0.0731) loss: 0.8361 (0.8391) time: 0.1458 data: 0.0620 max mem: 9377 +Train: [18] [2900/6250] eta: 0:08:13 lr: 0.000119 grad: 0.0702 (0.0731) loss: 0.8408 (0.8391) time: 0.1284 data: 0.0431 max mem: 9377 +Train: [18] [3000/6250] eta: 0:07:57 lr: 0.000119 grad: 0.0706 (0.0731) loss: 0.8334 (0.8389) time: 0.1273 data: 0.0460 max mem: 9377 +Train: [18] [3100/6250] eta: 0:07:41 lr: 0.000119 grad: 0.0646 (0.0731) loss: 0.8333 (0.8387) time: 0.1280 data: 0.0440 max mem: 9377 +Train: [18] [3200/6250] eta: 0:07:25 lr: 0.000119 grad: 0.0686 (0.0731) loss: 0.8386 (0.8387) time: 0.1201 data: 0.0268 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:09 lr: 0.000119 grad: 0.0725 (0.0731) loss: 0.8386 (0.8386) time: 0.1210 data: 0.0456 max mem: 9377 +Train: [18] [3400/6250] eta: 0:06:54 lr: 0.000119 grad: 0.0704 (0.0732) loss: 0.8340 (0.8385) time: 0.1396 data: 0.0587 max mem: 9377 +Train: [18] [3500/6250] eta: 0:06:41 lr: 0.000119 grad: 0.0720 (0.0732) loss: 0.8375 (0.8384) time: 0.1483 data: 0.0724 max mem: 9377 +Train: [18] [3600/6250] eta: 0:06:27 lr: 0.000119 grad: 0.0743 (0.0732) loss: 0.8352 (0.8383) time: 0.1556 data: 0.0752 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:12 lr: 0.000119 grad: 0.0726 (0.0732) loss: 0.8361 (0.8382) time: 0.1520 data: 0.0735 max mem: 9377 +Train: [18] [3800/6250] eta: 0:05:58 lr: 0.000119 grad: 0.0706 (0.0732) loss: 0.8328 (0.8381) time: 0.1456 data: 0.0639 max mem: 9377 +Train: [18] [3900/6250] eta: 0:05:44 lr: 0.000119 grad: 0.0774 (0.0733) loss: 0.8340 (0.8381) time: 0.1486 data: 0.0703 max mem: 9377 +Train: [18] [4000/6250] eta: 0:05:29 lr: 0.000119 grad: 0.0750 (0.0734) loss: 0.8268 (0.8379) time: 0.1272 data: 0.0505 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:14 lr: 0.000119 grad: 0.0750 (0.0735) loss: 0.8273 (0.8378) time: 0.1400 data: 0.0547 max mem: 9377 +Train: [18] [4200/6250] eta: 0:04:59 lr: 0.000119 grad: 0.0739 (0.0736) loss: 0.8394 (0.8377) time: 0.1397 data: 0.0577 max mem: 9377 +Train: [18] [4300/6250] eta: 0:04:44 lr: 0.000119 grad: 0.0688 (0.0736) loss: 0.8355 (0.8376) time: 0.1423 data: 0.0583 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:29 lr: 0.000119 grad: 0.0745 (0.0736) loss: 0.8319 (0.8376) time: 0.1492 data: 0.0706 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:14 lr: 0.000119 grad: 0.0692 (0.0736) loss: 0.8381 (0.8375) time: 0.1186 data: 0.0375 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:00 lr: 0.000119 grad: 0.0735 (0.0736) loss: 0.8289 (0.8375) time: 0.1386 data: 0.0564 max mem: 9377 +Train: [18] [4700/6250] eta: 0:03:45 lr: 0.000119 grad: 0.0720 (0.0736) loss: 0.8354 (0.8375) time: 0.1532 data: 0.0724 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:30 lr: 0.000119 grad: 0.0759 (0.0737) loss: 0.8339 (0.8374) time: 0.1402 data: 0.0590 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:16 lr: 0.000119 grad: 0.0710 (0.0737) loss: 0.8362 (0.8373) time: 0.1484 data: 0.0665 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:01 lr: 0.000119 grad: 0.0723 (0.0737) loss: 0.8400 (0.8373) time: 0.1452 data: 0.0636 max mem: 9377 +Train: [18] [5100/6250] eta: 0:02:46 lr: 0.000119 grad: 0.0730 (0.0738) loss: 0.8325 (0.8372) time: 0.1126 data: 0.0307 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:32 lr: 0.000119 grad: 0.0733 (0.0738) loss: 0.8335 (0.8372) time: 0.1629 data: 0.0806 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:17 lr: 0.000119 grad: 0.0750 (0.0738) loss: 0.8328 (0.8371) time: 0.1588 data: 0.0794 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:02 lr: 0.000119 grad: 0.0698 (0.0738) loss: 0.8356 (0.8371) time: 0.1362 data: 0.0510 max mem: 9377 +Train: [18] [5500/6250] eta: 0:01:48 lr: 0.000119 grad: 0.0725 (0.0738) loss: 0.8343 (0.8370) time: 0.1532 data: 0.0722 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:34 lr: 0.000119 grad: 0.0739 (0.0738) loss: 0.8348 (0.8370) time: 0.1695 data: 0.0886 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:19 lr: 0.000119 grad: 0.0691 (0.0738) loss: 0.8360 (0.8370) time: 0.1510 data: 0.0726 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:05 lr: 0.000118 grad: 0.0690 (0.0737) loss: 0.8339 (0.8369) time: 0.1397 data: 0.0610 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:50 lr: 0.000118 grad: 0.0723 (0.0737) loss: 0.8355 (0.8369) time: 0.1561 data: 0.0713 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:36 lr: 0.000118 grad: 0.0725 (0.0737) loss: 0.8337 (0.8368) time: 0.1383 data: 0.0489 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:21 lr: 0.000118 grad: 0.0713 (0.0737) loss: 0.8351 (0.8368) time: 0.1435 data: 0.0509 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:07 lr: 0.000118 grad: 0.0693 (0.0737) loss: 0.8341 (0.8367) time: 0.1494 data: 0.0602 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0689 (0.0737) loss: 0.8299 (0.8367) time: 0.1335 data: 0.0504 max mem: 9377 +Train: [18] Total time: 0:15:15 (0.1465 s / it) +Averaged stats: lr: 0.000118 grad: 0.0689 (0.0737) loss: 0.8299 (0.8367) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:03:23 loss: 0.8431 (0.8431) time: 3.2895 data: 3.2152 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8463 (0.8455) time: 0.1304 data: 0.1058 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:12 (0.2029 s / it) +Averaged stats (hcp-train-subset): loss: 0.8463 (0.8455) +Eval (hcp-val): [18] [ 0/62] eta: 0:03:50 loss: 0.8417 (0.8417) time: 3.7171 data: 3.6459 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8444 (0.8445) time: 0.1226 data: 0.0963 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (hcp-val): loss: 0.8444 (0.8445) +Eval (nsd-val): [18] [ 0/62] eta: 0:05:20 loss: 0.8044 (0.8044) time: 5.1683 data: 5.1384 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8136 (0.8141) time: 0.1129 data: 0.0881 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:12 (0.2025 s / it) +Averaged stats (nsd-val): loss: 0.8136 (0.8141) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 7:34:22 lr: 0.000118 grad: 0.1061 (0.1061) loss: 0.7845 (0.7845) time: 4.3620 data: 4.1630 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:20:18 lr: 0.000118 grad: 0.0717 (0.0762) loss: 0.8377 (0.8421) time: 0.1595 data: 0.0617 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:17:01 lr: 0.000118 grad: 0.0714 (0.0747) loss: 0.8367 (0.8414) time: 0.1478 data: 0.0678 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:16:11 lr: 0.000118 grad: 0.0747 (0.0741) loss: 0.8412 (0.8407) time: 0.1609 data: 0.0821 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:15:19 lr: 0.000118 grad: 0.0711 (0.0740) loss: 0.8257 (0.8393) time: 0.1356 data: 0.0514 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:14:42 lr: 0.000118 grad: 0.0766 (0.0740) loss: 0.8404 (0.8383) time: 0.1231 data: 0.0364 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:14:29 lr: 0.000118 grad: 0.0721 (0.0738) loss: 0.8341 (0.8377) time: 0.1609 data: 0.0811 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:14:06 lr: 0.000118 grad: 0.0739 (0.0738) loss: 0.8364 (0.8373) time: 0.1495 data: 0.0631 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:13:43 lr: 0.000118 grad: 0.0717 (0.0735) loss: 0.8356 (0.8369) time: 0.1211 data: 0.0333 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:13:26 lr: 0.000118 grad: 0.0708 (0.0736) loss: 0.8326 (0.8367) time: 0.1436 data: 0.0552 max mem: 9377 +Train: [19] [1000/6250] eta: 0:13:06 lr: 0.000118 grad: 0.0714 (0.0737) loss: 0.8384 (0.8367) time: 0.1361 data: 0.0510 max mem: 9377 +Train: [19] [1100/6250] eta: 0:12:45 lr: 0.000118 grad: 0.0699 (0.0734) loss: 0.8390 (0.8368) time: 0.1406 data: 0.0583 max mem: 9377 +Train: [19] [1200/6250] eta: 0:12:26 lr: 0.000118 grad: 0.0654 (0.0733) loss: 0.8377 (0.8367) time: 0.1556 data: 0.0794 max mem: 9377 +Train: [19] [1300/6250] eta: 0:12:06 lr: 0.000118 grad: 0.0732 (0.0732) loss: 0.8346 (0.8367) time: 0.1389 data: 0.0574 max mem: 9377 +Train: [19] [1400/6250] eta: 0:11:50 lr: 0.000118 grad: 0.0714 (0.0731) loss: 0.8416 (0.8367) time: 0.1532 data: 0.0760 max mem: 9377 +Train: [19] [1500/6250] eta: 0:11:32 lr: 0.000118 grad: 0.0744 (0.0732) loss: 0.8363 (0.8366) time: 0.1326 data: 0.0484 max mem: 9377 +Train: [19] [1600/6250] eta: 0:11:14 lr: 0.000118 grad: 0.0698 (0.0732) loss: 0.8307 (0.8365) time: 0.1310 data: 0.0449 max mem: 9377 +Train: [19] [1700/6250] eta: 0:10:58 lr: 0.000118 grad: 0.0666 (0.0733) loss: 0.8416 (0.8364) time: 0.1466 data: 0.0678 max mem: 9377 +Train: [19] [1800/6250] eta: 0:10:41 lr: 0.000118 grad: 0.0709 (0.0735) loss: 0.8316 (0.8362) time: 0.1477 data: 0.0660 max mem: 9377 +Train: [19] [1900/6250] eta: 0:10:25 lr: 0.000118 grad: 0.0681 (0.0737) loss: 0.8386 (0.8361) time: 0.1232 data: 0.0415 max mem: 9377 +Train: [19] [2000/6250] eta: 0:10:09 lr: 0.000118 grad: 0.0740 (0.0738) loss: 0.8340 (0.8359) time: 0.1412 data: 0.0599 max mem: 9377 +Train: [19] [2100/6250] eta: 0:09:54 lr: 0.000118 grad: 0.0728 (0.0739) loss: 0.8328 (0.8359) time: 0.1367 data: 0.0593 max mem: 9377 +Train: [19] [2200/6250] eta: 0:09:38 lr: 0.000118 grad: 0.0767 (0.0739) loss: 0.8326 (0.8358) time: 0.1271 data: 0.0411 max mem: 9377 +Train: [19] [2300/6250] eta: 0:09:22 lr: 0.000118 grad: 0.0717 (0.0739) loss: 0.8372 (0.8358) time: 0.1322 data: 0.0500 max mem: 9377 +Train: [19] [2400/6250] eta: 0:09:07 lr: 0.000118 grad: 0.0705 (0.0740) loss: 0.8320 (0.8357) time: 0.1312 data: 0.0548 max mem: 9377 +Train: [19] [2500/6250] eta: 0:08:54 lr: 0.000118 grad: 0.0708 (0.0740) loss: 0.8309 (0.8356) time: 0.1212 data: 0.0376 max mem: 9377 +Train: [19] [2600/6250] eta: 0:08:41 lr: 0.000118 grad: 0.0712 (0.0741) loss: 0.8299 (0.8356) time: 0.1474 data: 0.0702 max mem: 9377 +Train: [19] [2700/6250] eta: 0:08:28 lr: 0.000118 grad: 0.0721 (0.0741) loss: 0.8374 (0.8355) time: 0.1421 data: 0.0475 max mem: 9377 +Train: [19] [2800/6250] eta: 0:08:15 lr: 0.000118 grad: 0.0694 (0.0741) loss: 0.8357 (0.8354) time: 0.1420 data: 0.0586 max mem: 9377 +Train: [19] [2900/6250] eta: 0:08:01 lr: 0.000118 grad: 0.0743 (0.0741) loss: 0.8347 (0.8353) time: 0.1323 data: 0.0599 max mem: 9377 +Train: [19] [3000/6250] eta: 0:07:46 lr: 0.000118 grad: 0.0694 (0.0741) loss: 0.8358 (0.8352) time: 0.1535 data: 0.0695 max mem: 9377 +Train: [19] [3100/6250] eta: 0:07:31 lr: 0.000118 grad: 0.0704 (0.0740) loss: 0.8297 (0.8352) time: 0.1267 data: 0.0407 max mem: 9377 +Train: [19] [3200/6250] eta: 0:07:16 lr: 0.000118 grad: 0.0735 (0.0741) loss: 0.8291 (0.8351) time: 0.1361 data: 0.0552 max mem: 9377 +Train: [19] [3300/6250] eta: 0:07:01 lr: 0.000118 grad: 0.0760 (0.0742) loss: 0.8341 (0.8349) time: 0.1304 data: 0.0487 max mem: 9377 +Train: [19] [3400/6250] eta: 0:06:46 lr: 0.000118 grad: 0.0712 (0.0743) loss: 0.8368 (0.8349) time: 0.1147 data: 0.0296 max mem: 9377 +Train: [19] [3500/6250] eta: 0:06:31 lr: 0.000118 grad: 0.0787 (0.0744) loss: 0.8341 (0.8348) time: 0.1365 data: 0.0568 max mem: 9377 +Train: [19] [3600/6250] eta: 0:06:15 lr: 0.000118 grad: 0.0755 (0.0744) loss: 0.8307 (0.8347) time: 0.1341 data: 0.0551 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:02 lr: 0.000118 grad: 0.0701 (0.0744) loss: 0.8314 (0.8346) time: 0.1690 data: 0.0909 max mem: 9377 +Train: [19] [3800/6250] eta: 0:05:49 lr: 0.000118 grad: 0.0685 (0.0743) loss: 0.8366 (0.8347) time: 0.1615 data: 0.0833 max mem: 9377 +Train: [19] [3900/6250] eta: 0:05:35 lr: 0.000118 grad: 0.0686 (0.0743) loss: 0.8360 (0.8346) time: 0.1495 data: 0.0698 max mem: 9377 +Train: [19] [4000/6250] eta: 0:05:21 lr: 0.000118 grad: 0.0738 (0.0742) loss: 0.8296 (0.8346) time: 0.1354 data: 0.0468 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:07 lr: 0.000118 grad: 0.0716 (0.0743) loss: 0.8324 (0.8346) time: 0.1327 data: 0.0544 max mem: 9377 +Train: [19] [4200/6250] eta: 0:04:53 lr: 0.000118 grad: 0.0711 (0.0743) loss: 0.8346 (0.8346) time: 0.1114 data: 0.0315 max mem: 9377 +Train: [19] [4300/6250] eta: 0:04:39 lr: 0.000118 grad: 0.0679 (0.0743) loss: 0.8336 (0.8345) time: 0.1496 data: 0.0714 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:25 lr: 0.000118 grad: 0.0736 (0.0744) loss: 0.8318 (0.8345) time: 0.1440 data: 0.0665 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:11 lr: 0.000118 grad: 0.0756 (0.0745) loss: 0.8283 (0.8344) time: 0.1390 data: 0.0567 max mem: 9377 +Train: [19] [4600/6250] eta: 0:03:57 lr: 0.000118 grad: 0.0754 (0.0747) loss: 0.8327 (0.8343) time: 0.1331 data: 0.0488 max mem: 9377 +Train: [19] [4700/6250] eta: 0:03:42 lr: 0.000118 grad: 0.0743 (0.0747) loss: 0.8351 (0.8343) time: 0.1338 data: 0.0540 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:28 lr: 0.000118 grad: 0.0760 (0.0748) loss: 0.8275 (0.8342) time: 0.1451 data: 0.0659 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:13 lr: 0.000118 grad: 0.0752 (0.0748) loss: 0.8284 (0.8341) time: 0.1358 data: 0.0558 max mem: 9377 +Train: [19] [5000/6250] eta: 0:02:59 lr: 0.000118 grad: 0.0787 (0.0749) loss: 0.8271 (0.8340) time: 0.1313 data: 0.0514 max mem: 9377 +Train: [19] [5100/6250] eta: 0:02:45 lr: 0.000118 grad: 0.0724 (0.0749) loss: 0.8314 (0.8339) time: 0.1676 data: 0.0900 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:30 lr: 0.000118 grad: 0.0744 (0.0750) loss: 0.8228 (0.8338) time: 0.1546 data: 0.0706 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:16 lr: 0.000118 grad: 0.0747 (0.0750) loss: 0.8244 (0.8337) time: 0.1300 data: 0.0448 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:02 lr: 0.000118 grad: 0.0838 (0.0752) loss: 0.8259 (0.8335) time: 0.2082 data: 0.1294 max mem: 9377 +Train: [19] [5500/6250] eta: 0:01:48 lr: 0.000118 grad: 0.0703 (0.0753) loss: 0.8287 (0.8334) time: 0.1675 data: 0.0780 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:34 lr: 0.000118 grad: 0.0692 (0.0753) loss: 0.8280 (0.8333) time: 0.1693 data: 0.0763 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:20 lr: 0.000118 grad: 0.0702 (0.0754) loss: 0.8275 (0.8332) time: 0.1723 data: 0.0910 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:05 lr: 0.000118 grad: 0.0731 (0.0754) loss: 0.8329 (0.8331) time: 0.1769 data: 0.0950 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:51 lr: 0.000118 grad: 0.0733 (0.0754) loss: 0.8314 (0.8331) time: 0.1828 data: 0.1009 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:36 lr: 0.000118 grad: 0.0745 (0.0754) loss: 0.8337 (0.8330) time: 0.1355 data: 0.0504 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:22 lr: 0.000117 grad: 0.0786 (0.0755) loss: 0.8331 (0.8330) time: 0.1349 data: 0.0490 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:07 lr: 0.000117 grad: 0.0750 (0.0756) loss: 0.8249 (0.8330) time: 0.1351 data: 0.0455 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0723 (0.0756) loss: 0.8350 (0.8329) time: 0.1450 data: 0.0614 max mem: 9377 +Train: [19] Total time: 0:15:22 (0.1476 s / it) +Averaged stats: lr: 0.000117 grad: 0.0723 (0.0756) loss: 0.8350 (0.8329) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:05:13 loss: 0.8486 (0.8486) time: 5.0585 data: 5.0279 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8445 (0.8459) time: 0.1285 data: 0.1018 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:13 (0.2125 s / it) +Averaged stats (hcp-train-subset): loss: 0.8445 (0.8459) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:46 loss: 0.8453 (0.8453) time: 4.6244 data: 4.5930 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8429 (0.8437) time: 0.1257 data: 0.1006 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (hcp-val): loss: 0.8429 (0.8437) +Making plots (hcp-val): example=6 +Eval (nsd-val): [19] [ 0/62] eta: 0:05:21 loss: 0.8048 (0.8048) time: 5.1796 data: 5.1474 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8165 (0.8170) time: 0.1402 data: 0.1145 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (nsd-val): loss: 0.8165 (0.8170) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 8:38:12 lr: 0.000117 grad: 0.0643 (0.0643) loss: 0.8624 (0.8624) time: 4.9748 data: 4.6935 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:20:41 lr: 0.000117 grad: 0.0761 (0.0893) loss: 0.8339 (0.8357) time: 0.1493 data: 0.0611 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:17:43 lr: 0.000117 grad: 0.0681 (0.0810) loss: 0.8391 (0.8368) time: 0.1412 data: 0.0459 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:16:40 lr: 0.000117 grad: 0.0702 (0.0791) loss: 0.8450 (0.8380) time: 0.1412 data: 0.0511 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:15:53 lr: 0.000117 grad: 0.0642 (0.0764) loss: 0.8376 (0.8385) time: 0.1428 data: 0.0547 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:15:11 lr: 0.000117 grad: 0.0664 (0.0749) loss: 0.8398 (0.8385) time: 0.1474 data: 0.0643 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:14:46 lr: 0.000117 grad: 0.0716 (0.0740) loss: 0.8400 (0.8385) time: 0.1588 data: 0.0801 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:14:17 lr: 0.000117 grad: 0.0687 (0.0733) loss: 0.8383 (0.8377) time: 0.1353 data: 0.0515 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:13:55 lr: 0.000117 grad: 0.0640 (0.0726) loss: 0.8328 (0.8376) time: 0.1432 data: 0.0580 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:13:34 lr: 0.000117 grad: 0.0661 (0.0720) loss: 0.8383 (0.8374) time: 0.1510 data: 0.0672 max mem: 9377 +Train: [20] [1000/6250] eta: 0:13:17 lr: 0.000117 grad: 0.0661 (0.0717) loss: 0.8369 (0.8373) time: 0.1721 data: 0.0951 max mem: 9377 +Train: [20] [1100/6250] eta: 0:12:54 lr: 0.000117 grad: 0.0675 (0.0715) loss: 0.8389 (0.8372) time: 0.1444 data: 0.0602 max mem: 9377 +Train: [20] [1200/6250] eta: 0:12:34 lr: 0.000117 grad: 0.0729 (0.0713) loss: 0.8345 (0.8370) time: 0.1549 data: 0.0769 max mem: 9377 +Train: [20] [1300/6250] eta: 0:12:16 lr: 0.000117 grad: 0.0659 (0.0713) loss: 0.8414 (0.8369) time: 0.1583 data: 0.0784 max mem: 9377 +Train: [20] [1400/6250] eta: 0:11:57 lr: 0.000117 grad: 0.0669 (0.0713) loss: 0.8373 (0.8367) time: 0.1306 data: 0.0486 max mem: 9377 +Train: [20] [1500/6250] eta: 0:11:40 lr: 0.000117 grad: 0.0656 (0.0712) loss: 0.8387 (0.8367) time: 0.1542 data: 0.0704 max mem: 9377 +Train: [20] [1600/6250] eta: 0:11:22 lr: 0.000117 grad: 0.0645 (0.0712) loss: 0.8350 (0.8366) time: 0.1403 data: 0.0628 max mem: 9377 +Train: [20] [1700/6250] eta: 0:11:07 lr: 0.000117 grad: 0.0668 (0.0713) loss: 0.8419 (0.8366) time: 0.1476 data: 0.0648 max mem: 9377 +Train: [20] [1800/6250] eta: 0:10:50 lr: 0.000117 grad: 0.0693 (0.0714) loss: 0.8300 (0.8365) time: 0.1477 data: 0.0606 max mem: 9377 +Train: [20] [1900/6250] eta: 0:10:35 lr: 0.000117 grad: 0.0692 (0.0717) loss: 0.8329 (0.8363) time: 0.1907 data: 0.1116 max mem: 9377 +Train: [20] [2000/6250] eta: 0:10:16 lr: 0.000117 grad: 0.0698 (0.0719) loss: 0.8349 (0.8363) time: 0.1491 data: 0.0651 max mem: 9377 +Train: [20] [2100/6250] eta: 0:10:00 lr: 0.000117 grad: 0.0683 (0.0719) loss: 0.8367 (0.8362) time: 0.1273 data: 0.0342 max mem: 9377 +Train: [20] [2200/6250] eta: 0:09:44 lr: 0.000117 grad: 0.0698 (0.0719) loss: 0.8353 (0.8361) time: 0.1389 data: 0.0580 max mem: 9377 +Train: [20] [2300/6250] eta: 0:09:29 lr: 0.000117 grad: 0.0684 (0.0719) loss: 0.8382 (0.8361) time: 0.1421 data: 0.0597 max mem: 9377 +Train: [20] [2400/6250] eta: 0:09:14 lr: 0.000117 grad: 0.0681 (0.0719) loss: 0.8446 (0.8361) time: 0.1579 data: 0.0791 max mem: 9377 +Train: [20] [2500/6250] eta: 0:08:58 lr: 0.000117 grad: 0.0784 (0.0720) loss: 0.8366 (0.8361) time: 0.1171 data: 0.0339 max mem: 9377 +Train: [20] [2600/6250] eta: 0:08:42 lr: 0.000117 grad: 0.0682 (0.0721) loss: 0.8399 (0.8360) time: 0.1257 data: 0.0454 max mem: 9377 +Train: [20] [2700/6250] eta: 0:08:30 lr: 0.000117 grad: 0.0686 (0.0721) loss: 0.8339 (0.8360) time: 0.1490 data: 0.0639 max mem: 9377 +Train: [20] [2800/6250] eta: 0:08:17 lr: 0.000117 grad: 0.0705 (0.0722) loss: 0.8372 (0.8359) time: 0.1492 data: 0.0688 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:03 lr: 0.000117 grad: 0.0711 (0.0723) loss: 0.8382 (0.8358) time: 0.1702 data: 0.0835 max mem: 9377 +Train: [20] [3000/6250] eta: 0:07:48 lr: 0.000117 grad: 0.0690 (0.0723) loss: 0.8364 (0.8357) time: 0.1327 data: 0.0561 max mem: 9377 +Train: [20] [3100/6250] eta: 0:07:34 lr: 0.000117 grad: 0.0704 (0.0723) loss: 0.8408 (0.8357) time: 0.1664 data: 0.0874 max mem: 9377 +Train: [20] [3200/6250] eta: 0:07:19 lr: 0.000117 grad: 0.0670 (0.0724) loss: 0.8355 (0.8356) time: 0.1434 data: 0.0585 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:04 lr: 0.000117 grad: 0.0713 (0.0724) loss: 0.8337 (0.8356) time: 0.1347 data: 0.0508 max mem: 9377 +Train: [20] [3400/6250] eta: 0:06:49 lr: 0.000117 grad: 0.0699 (0.0725) loss: 0.8370 (0.8356) time: 0.1378 data: 0.0547 max mem: 9377 +Train: [20] [3500/6250] eta: 0:06:34 lr: 0.000117 grad: 0.0697 (0.0725) loss: 0.8360 (0.8355) time: 0.1249 data: 0.0419 max mem: 9377 +Train: [20] [3600/6250] eta: 0:06:19 lr: 0.000117 grad: 0.0786 (0.0725) loss: 0.8340 (0.8355) time: 0.1380 data: 0.0539 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:04 lr: 0.000117 grad: 0.0743 (0.0726) loss: 0.8288 (0.8355) time: 0.1400 data: 0.0524 max mem: 9377 +Train: [20] [3800/6250] eta: 0:05:49 lr: 0.000117 grad: 0.0692 (0.0727) loss: 0.8388 (0.8354) time: 0.1484 data: 0.0670 max mem: 9377 +Train: [20] [3900/6250] eta: 0:05:35 lr: 0.000117 grad: 0.0782 (0.0728) loss: 0.8313 (0.8353) time: 0.1489 data: 0.0621 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:20 lr: 0.000117 grad: 0.0784 (0.0730) loss: 0.8303 (0.8352) time: 0.0894 data: 0.0098 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:06 lr: 0.000117 grad: 0.0719 (0.0730) loss: 0.8353 (0.8351) time: 0.1148 data: 0.0316 max mem: 9377 +Train: [20] [4200/6250] eta: 0:04:51 lr: 0.000117 grad: 0.0725 (0.0731) loss: 0.8344 (0.8351) time: 0.1380 data: 0.0597 max mem: 9377 +Train: [20] [4300/6250] eta: 0:04:36 lr: 0.000117 grad: 0.0786 (0.0732) loss: 0.8344 (0.8351) time: 0.1497 data: 0.0654 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:22 lr: 0.000117 grad: 0.0747 (0.0732) loss: 0.8303 (0.8350) time: 0.1626 data: 0.0879 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:08 lr: 0.000117 grad: 0.0753 (0.0733) loss: 0.8273 (0.8350) time: 0.1483 data: 0.0725 max mem: 9377 +Train: [20] [4600/6250] eta: 0:03:53 lr: 0.000117 grad: 0.0728 (0.0734) loss: 0.8286 (0.8349) time: 0.1332 data: 0.0595 max mem: 9377 +Train: [20] [4700/6250] eta: 0:03:39 lr: 0.000117 grad: 0.0729 (0.0735) loss: 0.8348 (0.8348) time: 0.1335 data: 0.0572 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:25 lr: 0.000117 grad: 0.0799 (0.0736) loss: 0.8362 (0.8348) time: 0.1607 data: 0.0811 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:11 lr: 0.000117 grad: 0.0796 (0.0737) loss: 0.8323 (0.8348) time: 0.1555 data: 0.0740 max mem: 9377 +Train: [20] [5000/6250] eta: 0:02:57 lr: 0.000117 grad: 0.0793 (0.0737) loss: 0.8321 (0.8347) time: 0.1448 data: 0.0660 max mem: 9377 +Train: [20] [5100/6250] eta: 0:02:43 lr: 0.000117 grad: 0.0747 (0.0738) loss: 0.8322 (0.8346) time: 0.1557 data: 0.0785 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:29 lr: 0.000117 grad: 0.0761 (0.0739) loss: 0.8300 (0.8346) time: 0.1212 data: 0.0483 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:15 lr: 0.000117 grad: 0.0752 (0.0740) loss: 0.8307 (0.8345) time: 0.1479 data: 0.0672 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:01 lr: 0.000117 grad: 0.0736 (0.0740) loss: 0.8360 (0.8345) time: 0.1628 data: 0.0757 max mem: 9377 +Train: [20] [5500/6250] eta: 0:01:47 lr: 0.000117 grad: 0.0775 (0.0741) loss: 0.8297 (0.8344) time: 0.1427 data: 0.0595 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:32 lr: 0.000117 grad: 0.0716 (0.0741) loss: 0.8287 (0.8343) time: 0.1376 data: 0.0600 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:18 lr: 0.000117 grad: 0.0740 (0.0741) loss: 0.8356 (0.8343) time: 0.1409 data: 0.0611 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:04 lr: 0.000117 grad: 0.0704 (0.0742) loss: 0.8260 (0.8342) time: 0.1381 data: 0.0490 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:50 lr: 0.000117 grad: 0.0741 (0.0742) loss: 0.8369 (0.8342) time: 0.1539 data: 0.0674 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:35 lr: 0.000116 grad: 0.0729 (0.0742) loss: 0.8354 (0.8342) time: 0.1572 data: 0.0708 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:21 lr: 0.000116 grad: 0.0725 (0.0742) loss: 0.8292 (0.8342) time: 0.1438 data: 0.0562 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:07 lr: 0.000116 grad: 0.0745 (0.0742) loss: 0.8335 (0.8341) time: 0.1160 data: 0.0346 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0710 (0.0742) loss: 0.8317 (0.8341) time: 0.1338 data: 0.0481 max mem: 9377 +Train: [20] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000116 grad: 0.0710 (0.0742) loss: 0.8317 (0.8341) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:03:13 loss: 0.8459 (0.8459) time: 3.1183 data: 3.0366 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8450 (0.8461) time: 0.1100 data: 0.0852 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:12 (0.2080 s / it) +Averaged stats (hcp-train-subset): loss: 0.8450 (0.8461) +Eval (hcp-val): [20] [ 0/62] eta: 0:03:31 loss: 0.8408 (0.8408) time: 3.4114 data: 3.3114 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8431 (0.8436) time: 0.1333 data: 0.1067 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (hcp-val): loss: 0.8431 (0.8436) +Eval (nsd-val): [20] [ 0/62] eta: 0:04:36 loss: 0.8104 (0.8104) time: 4.4583 data: 4.3973 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8191 (0.8198) time: 0.1268 data: 0.1019 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:12 (0.2079 s / it) +Averaged stats (nsd-val): loss: 0.8191 (0.8198) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [21] [ 0/6250] eta: 9:32:16 lr: 0.000116 grad: 0.0474 (0.0474) loss: 0.8718 (0.8718) time: 5.4938 data: 5.3208 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:20:27 lr: 0.000116 grad: 0.0666 (0.0779) loss: 0.8407 (0.8431) time: 0.1522 data: 0.0573 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:17:47 lr: 0.000116 grad: 0.0654 (0.0766) loss: 0.8303 (0.8388) time: 0.1666 data: 0.0774 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:16:31 lr: 0.000116 grad: 0.0699 (0.0759) loss: 0.8296 (0.8353) time: 0.1655 data: 0.0688 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:15:40 lr: 0.000116 grad: 0.0688 (0.0746) loss: 0.8277 (0.8339) time: 0.1417 data: 0.0598 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:15:04 lr: 0.000116 grad: 0.0708 (0.0738) loss: 0.8336 (0.8338) time: 0.1209 data: 0.0332 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:14:39 lr: 0.000116 grad: 0.0678 (0.0728) loss: 0.8358 (0.8340) time: 0.1521 data: 0.0696 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:14:18 lr: 0.000116 grad: 0.0695 (0.0724) loss: 0.8289 (0.8338) time: 0.1443 data: 0.0617 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:14:02 lr: 0.000116 grad: 0.0697 (0.0722) loss: 0.8319 (0.8335) time: 0.1592 data: 0.0763 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:13:43 lr: 0.000116 grad: 0.0700 (0.0724) loss: 0.8275 (0.8333) time: 0.1504 data: 0.0660 max mem: 9377 +Train: [21] [1000/6250] eta: 0:13:20 lr: 0.000116 grad: 0.0694 (0.0725) loss: 0.8290 (0.8329) time: 0.1304 data: 0.0448 max mem: 9377 +Train: [21] [1100/6250] eta: 0:12:59 lr: 0.000116 grad: 0.0727 (0.0727) loss: 0.8291 (0.8327) time: 0.1495 data: 0.0705 max mem: 9377 +Train: [21] [1200/6250] eta: 0:12:36 lr: 0.000116 grad: 0.0693 (0.0728) loss: 0.8281 (0.8326) time: 0.1362 data: 0.0524 max mem: 9377 +Train: [21] [1300/6250] eta: 0:12:13 lr: 0.000116 grad: 0.0777 (0.0729) loss: 0.8331 (0.8325) time: 0.1309 data: 0.0491 max mem: 9377 +Train: [21] [1400/6250] eta: 0:11:54 lr: 0.000116 grad: 0.0769 (0.0730) loss: 0.8258 (0.8323) time: 0.1214 data: 0.0431 max mem: 9377 +Train: [21] [1500/6250] eta: 0:11:35 lr: 0.000116 grad: 0.0695 (0.0732) loss: 0.8252 (0.8321) time: 0.1213 data: 0.0351 max mem: 9377 +Train: [21] [1600/6250] eta: 0:11:18 lr: 0.000116 grad: 0.0730 (0.0734) loss: 0.8324 (0.8321) time: 0.1449 data: 0.0626 max mem: 9377 +Train: [21] [1700/6250] eta: 0:11:00 lr: 0.000116 grad: 0.0784 (0.0737) loss: 0.8276 (0.8320) time: 0.1198 data: 0.0382 max mem: 9377 +Train: [21] [1800/6250] eta: 0:10:44 lr: 0.000116 grad: 0.0758 (0.0739) loss: 0.8292 (0.8319) time: 0.1492 data: 0.0698 max mem: 9377 +Train: [21] [1900/6250] eta: 0:10:31 lr: 0.000116 grad: 0.0662 (0.0739) loss: 0.8345 (0.8319) time: 0.2029 data: 0.1278 max mem: 9377 +Train: [21] [2000/6250] eta: 0:10:13 lr: 0.000116 grad: 0.0710 (0.0740) loss: 0.8311 (0.8319) time: 0.1338 data: 0.0504 max mem: 9377 +Train: [21] [2100/6250] eta: 0:09:56 lr: 0.000116 grad: 0.0751 (0.0740) loss: 0.8285 (0.8319) time: 0.1246 data: 0.0384 max mem: 9377 +Train: [21] [2200/6250] eta: 0:09:41 lr: 0.000116 grad: 0.0719 (0.0741) loss: 0.8334 (0.8319) time: 0.1297 data: 0.0469 max mem: 9377 +Train: [21] [2300/6250] eta: 0:09:25 lr: 0.000116 grad: 0.0733 (0.0741) loss: 0.8278 (0.8318) time: 0.1381 data: 0.0559 max mem: 9377 +Train: [21] [2400/6250] eta: 0:09:09 lr: 0.000116 grad: 0.0720 (0.0742) loss: 0.8350 (0.8318) time: 0.1429 data: 0.0624 max mem: 9377 +Train: [21] [2500/6250] eta: 0:08:54 lr: 0.000116 grad: 0.0747 (0.0743) loss: 0.8303 (0.8317) time: 0.1334 data: 0.0488 max mem: 9377 +Train: [21] [2600/6250] eta: 0:08:39 lr: 0.000116 grad: 0.0730 (0.0745) loss: 0.8280 (0.8316) time: 0.1473 data: 0.0638 max mem: 9377 +Train: [21] [2700/6250] eta: 0:08:25 lr: 0.000116 grad: 0.0700 (0.0745) loss: 0.8271 (0.8316) time: 0.1550 data: 0.0735 max mem: 9377 +Train: [21] [2800/6250] eta: 0:08:09 lr: 0.000116 grad: 0.0778 (0.0746) loss: 0.8277 (0.8315) time: 0.1446 data: 0.0627 max mem: 9377 +Train: [21] [2900/6250] eta: 0:07:55 lr: 0.000116 grad: 0.0760 (0.0746) loss: 0.8290 (0.8315) time: 0.1451 data: 0.0637 max mem: 9377 +Train: [21] [3000/6250] eta: 0:07:40 lr: 0.000116 grad: 0.0700 (0.0746) loss: 0.8260 (0.8314) time: 0.1337 data: 0.0551 max mem: 9377 +Train: [21] [3100/6250] eta: 0:07:26 lr: 0.000116 grad: 0.0737 (0.0747) loss: 0.8339 (0.8314) time: 0.1736 data: 0.0977 max mem: 9377 +Train: [21] [3200/6250] eta: 0:07:13 lr: 0.000116 grad: 0.0705 (0.0747) loss: 0.8284 (0.8314) time: 0.1638 data: 0.0839 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:00 lr: 0.000116 grad: 0.0689 (0.0747) loss: 0.8310 (0.8313) time: 0.1669 data: 0.0820 max mem: 9377 +Train: [21] [3400/6250] eta: 0:06:47 lr: 0.000116 grad: 0.0730 (0.0748) loss: 0.8250 (0.8312) time: 0.1597 data: 0.0805 max mem: 9377 +Train: [21] [3500/6250] eta: 0:06:33 lr: 0.000116 grad: 0.0715 (0.0748) loss: 0.8298 (0.8312) time: 0.1501 data: 0.0667 max mem: 9377 +Train: [21] [3600/6250] eta: 0:06:18 lr: 0.000116 grad: 0.0713 (0.0748) loss: 0.8332 (0.8312) time: 0.1202 data: 0.0365 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:04 lr: 0.000116 grad: 0.0724 (0.0748) loss: 0.8267 (0.8311) time: 0.1463 data: 0.0593 max mem: 9377 +Train: [21] [3800/6250] eta: 0:05:49 lr: 0.000116 grad: 0.0691 (0.0748) loss: 0.8322 (0.8312) time: 0.1204 data: 0.0319 max mem: 9377 +Train: [21] [3900/6250] eta: 0:05:34 lr: 0.000116 grad: 0.0683 (0.0748) loss: 0.8354 (0.8311) time: 0.1331 data: 0.0485 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:20 lr: 0.000116 grad: 0.0722 (0.0747) loss: 0.8307 (0.8311) time: 0.1338 data: 0.0388 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:04 lr: 0.000116 grad: 0.0698 (0.0747) loss: 0.8307 (0.8311) time: 0.1256 data: 0.0419 max mem: 9377 +Train: [21] [4200/6250] eta: 0:04:50 lr: 0.000116 grad: 0.0669 (0.0747) loss: 0.8286 (0.8311) time: 0.1634 data: 0.0873 max mem: 9377 +Train: [21] [4300/6250] eta: 0:04:36 lr: 0.000116 grad: 0.0728 (0.0747) loss: 0.8323 (0.8311) time: 0.1765 data: 0.1000 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:23 lr: 0.000116 grad: 0.0719 (0.0747) loss: 0.8313 (0.8311) time: 0.1682 data: 0.0945 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:09 lr: 0.000116 grad: 0.0770 (0.0747) loss: 0.8297 (0.8311) time: 0.1497 data: 0.0715 max mem: 9377 +Train: [21] [4600/6250] eta: 0:03:56 lr: 0.000116 grad: 0.0698 (0.0748) loss: 0.8340 (0.8310) time: 0.1848 data: 0.1086 max mem: 9377 +Train: [21] [4700/6250] eta: 0:03:42 lr: 0.000116 grad: 0.0697 (0.0747) loss: 0.8329 (0.8310) time: 0.1609 data: 0.0813 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:28 lr: 0.000116 grad: 0.0768 (0.0747) loss: 0.8262 (0.8310) time: 0.1311 data: 0.0500 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:14 lr: 0.000116 grad: 0.0701 (0.0748) loss: 0.8280 (0.8309) time: 0.1536 data: 0.0680 max mem: 9377 +Train: [21] [5000/6250] eta: 0:02:59 lr: 0.000116 grad: 0.0726 (0.0748) loss: 0.8308 (0.8309) time: 0.1352 data: 0.0557 max mem: 9377 +Train: [21] [5100/6250] eta: 0:02:45 lr: 0.000116 grad: 0.0685 (0.0748) loss: 0.8317 (0.8309) time: 0.1270 data: 0.0502 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:31 lr: 0.000116 grad: 0.0708 (0.0747) loss: 0.8300 (0.8309) time: 0.1834 data: 0.1054 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:17 lr: 0.000116 grad: 0.0728 (0.0747) loss: 0.8217 (0.8309) time: 0.1322 data: 0.0515 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:02 lr: 0.000116 grad: 0.0707 (0.0747) loss: 0.8336 (0.8309) time: 0.1525 data: 0.0640 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:48 lr: 0.000116 grad: 0.0692 (0.0748) loss: 0.8300 (0.8308) time: 0.1837 data: 0.1084 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:34 lr: 0.000115 grad: 0.0792 (0.0748) loss: 0.8243 (0.8307) time: 0.1526 data: 0.0666 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:19 lr: 0.000115 grad: 0.0744 (0.0748) loss: 0.8274 (0.8307) time: 0.1337 data: 0.0508 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:04 lr: 0.000115 grad: 0.0690 (0.0749) loss: 0.8309 (0.8307) time: 0.1268 data: 0.0458 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:50 lr: 0.000115 grad: 0.0704 (0.0749) loss: 0.8286 (0.8306) time: 0.1361 data: 0.0539 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:35 lr: 0.000115 grad: 0.0724 (0.0749) loss: 0.8313 (0.8306) time: 0.1297 data: 0.0432 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:21 lr: 0.000115 grad: 0.0738 (0.0750) loss: 0.8227 (0.8306) time: 0.1320 data: 0.0499 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.0662 (0.0750) loss: 0.8323 (0.8306) time: 0.1069 data: 0.0189 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0725 (0.0749) loss: 0.8326 (0.8306) time: 0.1455 data: 0.0675 max mem: 9377 +Train: [21] Total time: 0:15:00 (0.1441 s / it) +Averaged stats: lr: 0.000115 grad: 0.0725 (0.0749) loss: 0.8326 (0.8306) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:55 loss: 0.8469 (0.8469) time: 4.7644 data: 4.7346 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8441 (0.8452) time: 0.1327 data: 0.1060 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:12 (0.2006 s / it) +Averaged stats (hcp-train-subset): loss: 0.8441 (0.8452) +Eval (hcp-val): [21] [ 0/62] eta: 0:04:23 loss: 0.8400 (0.8400) time: 4.2499 data: 4.2011 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8426 (0.8431) time: 0.1301 data: 0.1033 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:12 (0.2064 s / it) +Averaged stats (hcp-val): loss: 0.8426 (0.8431) +Eval (nsd-val): [21] [ 0/62] eta: 0:04:03 loss: 0.8060 (0.8060) time: 3.9282 data: 3.8738 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8163 (0.8158) time: 0.0989 data: 0.0740 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:12 (0.2031 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8158) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [22] [ 0/6250] eta: 6:18:41 lr: 0.000115 grad: 0.0963 (0.0963) loss: 0.8198 (0.8198) time: 3.6355 data: 3.4259 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:18:28 lr: 0.000115 grad: 0.0669 (0.0891) loss: 0.8311 (0.8345) time: 0.1220 data: 0.0309 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:16:00 lr: 0.000115 grad: 0.0728 (0.0826) loss: 0.8378 (0.8344) time: 0.1323 data: 0.0423 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:15:02 lr: 0.000115 grad: 0.0800 (0.0824) loss: 0.8219 (0.8321) time: 0.1351 data: 0.0480 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:14:29 lr: 0.000115 grad: 0.0791 (0.0832) loss: 0.8237 (0.8304) time: 0.1516 data: 0.0632 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:14:03 lr: 0.000115 grad: 0.0747 (0.0824) loss: 0.8258 (0.8298) time: 0.1672 data: 0.0886 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:13:42 lr: 0.000115 grad: 0.0750 (0.0815) loss: 0.8282 (0.8299) time: 0.1449 data: 0.0581 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:13:33 lr: 0.000115 grad: 0.0747 (0.0806) loss: 0.8294 (0.8302) time: 0.1462 data: 0.0568 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:13:14 lr: 0.000115 grad: 0.0737 (0.0801) loss: 0.8334 (0.8300) time: 0.1393 data: 0.0546 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:12:58 lr: 0.000115 grad: 0.0794 (0.0798) loss: 0.8289 (0.8299) time: 0.1459 data: 0.0611 max mem: 9377 +Train: [22] [1000/6250] eta: 0:12:41 lr: 0.000115 grad: 0.0723 (0.0794) loss: 0.8277 (0.8298) time: 0.1186 data: 0.0366 max mem: 9377 +Train: [22] [1100/6250] eta: 0:12:23 lr: 0.000115 grad: 0.0716 (0.0790) loss: 0.8297 (0.8297) time: 0.1410 data: 0.0572 max mem: 9377 +Train: [22] [1200/6250] eta: 0:12:07 lr: 0.000115 grad: 0.0714 (0.0789) loss: 0.8262 (0.8296) time: 0.1435 data: 0.0593 max mem: 9377 +Train: [22] [1300/6250] eta: 0:11:47 lr: 0.000115 grad: 0.0789 (0.0788) loss: 0.8264 (0.8293) time: 0.1324 data: 0.0495 max mem: 9377 +Train: [22] [1400/6250] eta: 0:11:31 lr: 0.000115 grad: 0.0802 (0.0787) loss: 0.8223 (0.8289) time: 0.1321 data: 0.0457 max mem: 9377 +Train: [22] [1500/6250] eta: 0:11:15 lr: 0.000115 grad: 0.0713 (0.0792) loss: 0.8293 (0.8287) time: 0.1393 data: 0.0593 max mem: 9377 +Train: [22] [1600/6250] eta: 0:10:59 lr: 0.000115 grad: 0.0742 (0.0793) loss: 0.8258 (0.8287) time: 0.1388 data: 0.0584 max mem: 9377 +Train: [22] [1700/6250] eta: 0:10:45 lr: 0.000115 grad: 0.0796 (0.0792) loss: 0.8296 (0.8286) time: 0.1216 data: 0.0327 max mem: 9377 +Train: [22] [1800/6250] eta: 0:10:29 lr: 0.000115 grad: 0.0768 (0.0791) loss: 0.8227 (0.8284) time: 0.1452 data: 0.0671 max mem: 9377 +Train: [22] [1900/6250] eta: 0:10:15 lr: 0.000115 grad: 0.0746 (0.0790) loss: 0.8295 (0.8282) time: 0.1467 data: 0.0608 max mem: 9377 +Train: [22] [2000/6250] eta: 0:10:00 lr: 0.000115 grad: 0.0768 (0.0790) loss: 0.8254 (0.8281) time: 0.1499 data: 0.0688 max mem: 9377 +Train: [22] [2100/6250] eta: 0:09:45 lr: 0.000115 grad: 0.0717 (0.0791) loss: 0.8288 (0.8279) time: 0.1585 data: 0.0774 max mem: 9377 +Train: [22] [2200/6250] eta: 0:09:30 lr: 0.000115 grad: 0.0813 (0.0791) loss: 0.8261 (0.8278) time: 0.1296 data: 0.0485 max mem: 9377 +Train: [22] [2300/6250] eta: 0:09:14 lr: 0.000115 grad: 0.0760 (0.0790) loss: 0.8309 (0.8279) time: 0.1416 data: 0.0591 max mem: 9377 +Train: [22] [2400/6250] eta: 0:09:00 lr: 0.000115 grad: 0.0697 (0.0788) loss: 0.8268 (0.8280) time: 0.1109 data: 0.0267 max mem: 9377 +Train: [22] [2500/6250] eta: 0:08:46 lr: 0.000115 grad: 0.0704 (0.0787) loss: 0.8314 (0.8280) time: 0.1380 data: 0.0555 max mem: 9377 +Train: [22] [2600/6250] eta: 0:08:31 lr: 0.000115 grad: 0.0759 (0.0786) loss: 0.8277 (0.8280) time: 0.1345 data: 0.0522 max mem: 9377 +Train: [22] [2700/6250] eta: 0:08:17 lr: 0.000115 grad: 0.0676 (0.0785) loss: 0.8351 (0.8281) time: 0.1423 data: 0.0632 max mem: 9377 +Train: [22] [2800/6250] eta: 0:08:03 lr: 0.000115 grad: 0.0738 (0.0785) loss: 0.8253 (0.8281) time: 0.1508 data: 0.0695 max mem: 9377 +Train: [22] [2900/6250] eta: 0:07:49 lr: 0.000115 grad: 0.0716 (0.0783) loss: 0.8256 (0.8281) time: 0.1462 data: 0.0679 max mem: 9377 +Train: [22] [3000/6250] eta: 0:07:34 lr: 0.000115 grad: 0.0750 (0.0783) loss: 0.8309 (0.8281) time: 0.1282 data: 0.0446 max mem: 9377 +Train: [22] [3100/6250] eta: 0:07:20 lr: 0.000115 grad: 0.0714 (0.0782) loss: 0.8383 (0.8281) time: 0.1499 data: 0.0698 max mem: 9377 +Train: [22] [3200/6250] eta: 0:07:06 lr: 0.000115 grad: 0.0678 (0.0782) loss: 0.8345 (0.8282) time: 0.1361 data: 0.0546 max mem: 9377 +Train: [22] [3300/6250] eta: 0:06:52 lr: 0.000115 grad: 0.0681 (0.0781) loss: 0.8274 (0.8283) time: 0.1407 data: 0.0583 max mem: 9377 +Train: [22] [3400/6250] eta: 0:06:39 lr: 0.000115 grad: 0.0718 (0.0780) loss: 0.8323 (0.8284) time: 0.1421 data: 0.0593 max mem: 9377 +Train: [22] [3500/6250] eta: 0:06:25 lr: 0.000115 grad: 0.0754 (0.0779) loss: 0.8333 (0.8284) time: 0.1449 data: 0.0561 max mem: 9377 +Train: [22] [3600/6250] eta: 0:06:12 lr: 0.000115 grad: 0.0674 (0.0779) loss: 0.8379 (0.8285) time: 0.1686 data: 0.0802 max mem: 9377 +Train: [22] [3700/6250] eta: 0:05:58 lr: 0.000115 grad: 0.0729 (0.0778) loss: 0.8299 (0.8286) time: 0.1480 data: 0.0636 max mem: 9377 +Train: [22] [3800/6250] eta: 0:05:44 lr: 0.000115 grad: 0.0709 (0.0778) loss: 0.8366 (0.8287) time: 0.1434 data: 0.0610 max mem: 9377 +Train: [22] [3900/6250] eta: 0:05:31 lr: 0.000115 grad: 0.0746 (0.0777) loss: 0.8285 (0.8288) time: 0.1601 data: 0.0816 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:16 lr: 0.000115 grad: 0.0720 (0.0777) loss: 0.8352 (0.8289) time: 0.1366 data: 0.0566 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:02 lr: 0.000115 grad: 0.0772 (0.0777) loss: 0.8291 (0.8289) time: 0.1291 data: 0.0430 max mem: 9377 +Train: [22] [4200/6250] eta: 0:04:48 lr: 0.000115 grad: 0.0751 (0.0776) loss: 0.8338 (0.8289) time: 0.1284 data: 0.0403 max mem: 9377 +Train: [22] [4300/6250] eta: 0:04:34 lr: 0.000115 grad: 0.0756 (0.0776) loss: 0.8329 (0.8290) time: 0.1290 data: 0.0446 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:19 lr: 0.000115 grad: 0.0759 (0.0775) loss: 0.8306 (0.8290) time: 0.1371 data: 0.0547 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:05 lr: 0.000115 grad: 0.0750 (0.0775) loss: 0.8353 (0.8290) time: 0.1284 data: 0.0423 max mem: 9377 +Train: [22] [4600/6250] eta: 0:03:51 lr: 0.000115 grad: 0.0721 (0.0775) loss: 0.8350 (0.8290) time: 0.1490 data: 0.0651 max mem: 9377 +Train: [22] [4700/6250] eta: 0:03:36 lr: 0.000115 grad: 0.0723 (0.0774) loss: 0.8274 (0.8290) time: 0.1232 data: 0.0342 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:23 lr: 0.000115 grad: 0.0718 (0.0774) loss: 0.8232 (0.8290) time: 0.1344 data: 0.0566 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:09 lr: 0.000114 grad: 0.0726 (0.0774) loss: 0.8277 (0.8290) time: 0.1420 data: 0.0563 max mem: 9377 +Train: [22] [5000/6250] eta: 0:02:55 lr: 0.000114 grad: 0.0739 (0.0773) loss: 0.8281 (0.8290) time: 0.1461 data: 0.0654 max mem: 9377 +Train: [22] [5100/6250] eta: 0:02:42 lr: 0.000114 grad: 0.0746 (0.0773) loss: 0.8262 (0.8290) time: 0.1758 data: 0.0990 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:28 lr: 0.000114 grad: 0.0756 (0.0773) loss: 0.8302 (0.8290) time: 0.1850 data: 0.1072 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:14 lr: 0.000114 grad: 0.0706 (0.0773) loss: 0.8274 (0.8290) time: 0.1436 data: 0.0640 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:00 lr: 0.000114 grad: 0.0769 (0.0772) loss: 0.8267 (0.8290) time: 0.1532 data: 0.0635 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:46 lr: 0.000114 grad: 0.0711 (0.0772) loss: 0.8319 (0.8290) time: 0.1357 data: 0.0559 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:32 lr: 0.000114 grad: 0.0745 (0.0772) loss: 0.8274 (0.8290) time: 0.1410 data: 0.0582 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:18 lr: 0.000114 grad: 0.0749 (0.0772) loss: 0.8315 (0.8290) time: 0.1232 data: 0.0406 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:03 lr: 0.000114 grad: 0.0784 (0.0772) loss: 0.8353 (0.8291) time: 0.1441 data: 0.0613 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:49 lr: 0.000114 grad: 0.0697 (0.0772) loss: 0.8352 (0.8291) time: 0.1233 data: 0.0389 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:35 lr: 0.000114 grad: 0.0731 (0.0772) loss: 0.8315 (0.8292) time: 0.1182 data: 0.0283 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:21 lr: 0.000114 grad: 0.0708 (0.0771) loss: 0.8310 (0.8292) time: 0.1405 data: 0.0516 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.0734 (0.0771) loss: 0.8299 (0.8292) time: 0.1485 data: 0.0651 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0715 (0.0771) loss: 0.8321 (0.8293) time: 0.1349 data: 0.0506 max mem: 9377 +Train: [22] Total time: 0:14:48 (0.1421 s / it) +Averaged stats: lr: 0.000114 grad: 0.0715 (0.0771) loss: 0.8321 (0.8293) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:05:22 loss: 0.8441 (0.8441) time: 5.2048 data: 5.1753 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8448 (0.8457) time: 0.1239 data: 0.0964 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:12 (0.2052 s / it) +Averaged stats (hcp-train-subset): loss: 0.8448 (0.8457) +Eval (hcp-val): [22] [ 0/62] eta: 0:05:31 loss: 0.8423 (0.8423) time: 5.3539 data: 5.3245 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8423 (0.8433) time: 0.1370 data: 0.1115 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (hcp-val): loss: 0.8423 (0.8433) +Eval (nsd-val): [22] [ 0/62] eta: 0:03:03 loss: 0.8040 (0.8040) time: 2.9666 data: 2.8798 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8154 (0.8164) time: 0.1099 data: 0.0847 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:12 (0.2064 s / it) +Averaged stats (nsd-val): loss: 0.8154 (0.8164) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 7:27:56 lr: 0.000114 grad: 0.1143 (0.1143) loss: 0.8377 (0.8377) time: 4.3002 data: 4.0921 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:18:52 lr: 0.000114 grad: 0.0943 (0.0884) loss: 0.8253 (0.8325) time: 0.1257 data: 0.0377 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:16:31 lr: 0.000114 grad: 0.0698 (0.0830) loss: 0.8340 (0.8324) time: 0.1628 data: 0.0757 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:15:22 lr: 0.000114 grad: 0.0760 (0.0810) loss: 0.8349 (0.8312) time: 0.1443 data: 0.0554 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:14:32 lr: 0.000114 grad: 0.0703 (0.0803) loss: 0.8303 (0.8304) time: 0.1230 data: 0.0256 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:13:54 lr: 0.000114 grad: 0.0773 (0.0796) loss: 0.8299 (0.8296) time: 0.1362 data: 0.0384 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:13:29 lr: 0.000114 grad: 0.0771 (0.0792) loss: 0.8324 (0.8293) time: 0.1409 data: 0.0498 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:13:21 lr: 0.000114 grad: 0.0735 (0.0784) loss: 0.8283 (0.8292) time: 0.1660 data: 0.0803 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:13:10 lr: 0.000114 grad: 0.0712 (0.0781) loss: 0.8312 (0.8290) time: 0.1396 data: 0.0544 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:12:58 lr: 0.000114 grad: 0.0746 (0.0777) loss: 0.8315 (0.8291) time: 0.1497 data: 0.0626 max mem: 9377 +Train: [23] [1000/6250] eta: 0:12:42 lr: 0.000114 grad: 0.0739 (0.0774) loss: 0.8278 (0.8291) time: 0.1463 data: 0.0621 max mem: 9377 +Train: [23] [1100/6250] eta: 0:12:24 lr: 0.000114 grad: 0.0752 (0.0772) loss: 0.8283 (0.8290) time: 0.1333 data: 0.0469 max mem: 9377 +Train: [23] [1200/6250] eta: 0:12:10 lr: 0.000114 grad: 0.0687 (0.0772) loss: 0.8309 (0.8289) time: 0.1419 data: 0.0565 max mem: 9377 +Train: [23] [1300/6250] eta: 0:11:56 lr: 0.000114 grad: 0.0736 (0.0770) loss: 0.8324 (0.8289) time: 0.1420 data: 0.0603 max mem: 9377 +Train: [23] [1400/6250] eta: 0:11:42 lr: 0.000114 grad: 0.0726 (0.0770) loss: 0.8350 (0.8291) time: 0.1231 data: 0.0443 max mem: 9377 +Train: [23] [1500/6250] eta: 0:11:29 lr: 0.000114 grad: 0.0744 (0.0769) loss: 0.8277 (0.8291) time: 0.1589 data: 0.0811 max mem: 9377 +Train: [23] [1600/6250] eta: 0:11:15 lr: 0.000114 grad: 0.0748 (0.0769) loss: 0.8298 (0.8291) time: 0.1679 data: 0.0857 max mem: 9377 +Train: [23] [1700/6250] eta: 0:10:59 lr: 0.000114 grad: 0.0732 (0.0769) loss: 0.8295 (0.8291) time: 0.1395 data: 0.0585 max mem: 9377 +Train: [23] [1800/6250] eta: 0:10:44 lr: 0.000114 grad: 0.0804 (0.0771) loss: 0.8299 (0.8292) time: 0.1458 data: 0.0591 max mem: 9377 +Train: [23] [1900/6250] eta: 0:10:30 lr: 0.000114 grad: 0.0728 (0.0770) loss: 0.8295 (0.8293) time: 0.1359 data: 0.0549 max mem: 9377 +Train: [23] [2000/6250] eta: 0:10:15 lr: 0.000114 grad: 0.0726 (0.0769) loss: 0.8327 (0.8294) time: 0.1246 data: 0.0439 max mem: 9377 +Train: [23] [2100/6250] eta: 0:10:00 lr: 0.000114 grad: 0.0719 (0.0770) loss: 0.8359 (0.8295) time: 0.1427 data: 0.0638 max mem: 9377 +Train: [23] [2200/6250] eta: 0:09:44 lr: 0.000114 grad: 0.0725 (0.0769) loss: 0.8319 (0.8296) time: 0.1378 data: 0.0596 max mem: 9377 +Train: [23] [2300/6250] eta: 0:09:30 lr: 0.000114 grad: 0.0777 (0.0772) loss: 0.8312 (0.8297) time: 0.1465 data: 0.0596 max mem: 9377 +Train: [23] [2400/6250] eta: 0:09:14 lr: 0.000114 grad: 0.0771 (0.0773) loss: 0.8268 (0.8297) time: 0.1428 data: 0.0639 max mem: 9377 +Train: [23] [2500/6250] eta: 0:09:00 lr: 0.000114 grad: 0.0726 (0.0773) loss: 0.8282 (0.8298) time: 0.1547 data: 0.0717 max mem: 9377 +Train: [23] [2600/6250] eta: 0:08:44 lr: 0.000114 grad: 0.0782 (0.0773) loss: 0.8247 (0.8297) time: 0.1403 data: 0.0631 max mem: 9377 +Train: [23] [2700/6250] eta: 0:08:29 lr: 0.000114 grad: 0.0741 (0.0773) loss: 0.8348 (0.8298) time: 0.1457 data: 0.0724 max mem: 9377 +Train: [23] [2800/6250] eta: 0:08:14 lr: 0.000114 grad: 0.0726 (0.0773) loss: 0.8287 (0.8298) time: 0.1364 data: 0.0508 max mem: 9377 +Train: [23] [2900/6250] eta: 0:07:59 lr: 0.000114 grad: 0.0746 (0.0773) loss: 0.8285 (0.8299) time: 0.1336 data: 0.0538 max mem: 9377 +Train: [23] [3000/6250] eta: 0:07:44 lr: 0.000114 grad: 0.0722 (0.0773) loss: 0.8372 (0.8299) time: 0.1344 data: 0.0450 max mem: 9377 +Train: [23] [3100/6250] eta: 0:07:29 lr: 0.000114 grad: 0.0739 (0.0773) loss: 0.8263 (0.8299) time: 0.1411 data: 0.0612 max mem: 9377 +Train: [23] [3200/6250] eta: 0:07:14 lr: 0.000114 grad: 0.0736 (0.0773) loss: 0.8362 (0.8300) time: 0.1422 data: 0.0568 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:00 lr: 0.000114 grad: 0.0735 (0.0773) loss: 0.8304 (0.8299) time: 0.1489 data: 0.0632 max mem: 9377 +Train: [23] [3400/6250] eta: 0:06:45 lr: 0.000114 grad: 0.0756 (0.0774) loss: 0.8298 (0.8300) time: 0.1448 data: 0.0660 max mem: 9377 +Train: [23] [3500/6250] eta: 0:06:30 lr: 0.000114 grad: 0.0738 (0.0773) loss: 0.8341 (0.8300) time: 0.1383 data: 0.0496 max mem: 9377 +Train: [23] [3600/6250] eta: 0:06:17 lr: 0.000114 grad: 0.0744 (0.0773) loss: 0.8361 (0.8300) time: 0.1191 data: 0.0321 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:03 lr: 0.000114 grad: 0.0741 (0.0774) loss: 0.8313 (0.8300) time: 0.1603 data: 0.0748 max mem: 9377 +Train: [23] [3800/6250] eta: 0:05:50 lr: 0.000114 grad: 0.0777 (0.0774) loss: 0.8261 (0.8300) time: 0.1553 data: 0.0653 max mem: 9377 +Train: [23] [3900/6250] eta: 0:05:36 lr: 0.000114 grad: 0.0749 (0.0773) loss: 0.8286 (0.8299) time: 0.1718 data: 0.0888 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:22 lr: 0.000113 grad: 0.0741 (0.0774) loss: 0.8261 (0.8299) time: 0.1388 data: 0.0579 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:08 lr: 0.000113 grad: 0.0747 (0.0773) loss: 0.8336 (0.8299) time: 0.2254 data: 0.1406 max mem: 9377 +Train: [23] [4200/6250] eta: 0:04:54 lr: 0.000113 grad: 0.0756 (0.0774) loss: 0.8265 (0.8299) time: 0.1431 data: 0.0618 max mem: 9377 +Train: [23] [4300/6250] eta: 0:04:39 lr: 0.000113 grad: 0.0755 (0.0774) loss: 0.8285 (0.8298) time: 0.1447 data: 0.0603 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:25 lr: 0.000113 grad: 0.0758 (0.0775) loss: 0.8256 (0.8298) time: 0.1078 data: 0.0002 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:10 lr: 0.000113 grad: 0.0709 (0.0775) loss: 0.8278 (0.8298) time: 0.1140 data: 0.0245 max mem: 9377 +Train: [23] [4600/6250] eta: 0:03:55 lr: 0.000113 grad: 0.0756 (0.0776) loss: 0.8293 (0.8298) time: 0.1376 data: 0.0563 max mem: 9377 +Train: [23] [4700/6250] eta: 0:03:41 lr: 0.000113 grad: 0.0781 (0.0777) loss: 0.8345 (0.8297) time: 0.1220 data: 0.0416 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:26 lr: 0.000113 grad: 0.0808 (0.0779) loss: 0.8221 (0.8297) time: 0.1317 data: 0.0549 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:12 lr: 0.000113 grad: 0.0720 (0.0779) loss: 0.8291 (0.8296) time: 0.1259 data: 0.0441 max mem: 9377 +Train: [23] [5000/6250] eta: 0:02:58 lr: 0.000113 grad: 0.0772 (0.0779) loss: 0.8332 (0.8296) time: 0.1397 data: 0.0584 max mem: 9377 +Train: [23] [5100/6250] eta: 0:02:43 lr: 0.000113 grad: 0.0763 (0.0779) loss: 0.8308 (0.8296) time: 0.1378 data: 0.0598 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:29 lr: 0.000113 grad: 0.0786 (0.0780) loss: 0.8285 (0.8296) time: 0.1306 data: 0.0565 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:15 lr: 0.000113 grad: 0.0775 (0.0780) loss: 0.8267 (0.8296) time: 0.1559 data: 0.0734 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:01 lr: 0.000113 grad: 0.0783 (0.0781) loss: 0.8317 (0.8296) time: 0.1766 data: 0.0953 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:47 lr: 0.000113 grad: 0.0772 (0.0781) loss: 0.8309 (0.8296) time: 0.1307 data: 0.0475 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:33 lr: 0.000113 grad: 0.0738 (0.0781) loss: 0.8293 (0.8296) time: 0.1508 data: 0.0700 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:18 lr: 0.000113 grad: 0.0768 (0.0782) loss: 0.8290 (0.8295) time: 0.1322 data: 0.0494 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:04 lr: 0.000113 grad: 0.0803 (0.0782) loss: 0.8231 (0.8294) time: 0.1282 data: 0.0416 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:50 lr: 0.000113 grad: 0.0771 (0.0782) loss: 0.8266 (0.8294) time: 0.1552 data: 0.0712 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:35 lr: 0.000113 grad: 0.0813 (0.0783) loss: 0.8209 (0.8293) time: 0.1359 data: 0.0502 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:21 lr: 0.000113 grad: 0.0806 (0.0783) loss: 0.8272 (0.8292) time: 0.1271 data: 0.0460 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.0733 (0.0783) loss: 0.8217 (0.8292) time: 0.1218 data: 0.0350 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.0740 (0.0783) loss: 0.8271 (0.8292) time: 0.1112 data: 0.0224 max mem: 9377 +Train: [23] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000113 grad: 0.0740 (0.0783) loss: 0.8271 (0.8292) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:04:30 loss: 0.8466 (0.8466) time: 4.3634 data: 4.3228 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8466 (0.8455) time: 0.1128 data: 0.0882 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:11 (0.1909 s / it) +Averaged stats (hcp-train-subset): loss: 0.8466 (0.8455) +Eval (hcp-val): [23] [ 0/62] eta: 0:04:59 loss: 0.8411 (0.8411) time: 4.8352 data: 4.8053 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8434 (0.8432) time: 0.1187 data: 0.0940 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:12 (0.1989 s / it) +Averaged stats (hcp-val): loss: 0.8434 (0.8432) +Eval (nsd-val): [23] [ 0/62] eta: 0:03:01 loss: 0.8035 (0.8035) time: 2.9331 data: 2.8714 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8122 (0.8133) time: 0.1104 data: 0.0857 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:11 (0.1907 s / it) +Averaged stats (nsd-val): loss: 0.8122 (0.8133) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 6:21:00 lr: 0.000113 grad: 0.0537 (0.0537) loss: 0.8849 (0.8849) time: 3.6576 data: 3.4766 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:18:42 lr: 0.000113 grad: 0.0626 (0.0746) loss: 0.8420 (0.8432) time: 0.1526 data: 0.0610 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:16:08 lr: 0.000113 grad: 0.0735 (0.0727) loss: 0.8414 (0.8430) time: 0.1469 data: 0.0620 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:15:08 lr: 0.000113 grad: 0.0760 (0.0742) loss: 0.8346 (0.8403) time: 0.1282 data: 0.0384 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:14:33 lr: 0.000113 grad: 0.0745 (0.0769) loss: 0.8286 (0.8373) time: 0.1418 data: 0.0553 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:14:04 lr: 0.000113 grad: 0.0742 (0.0769) loss: 0.8346 (0.8362) time: 0.1473 data: 0.0641 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:13:39 lr: 0.000113 grad: 0.0719 (0.0767) loss: 0.8287 (0.8352) time: 0.1676 data: 0.0832 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:13:15 lr: 0.000113 grad: 0.0700 (0.0766) loss: 0.8226 (0.8342) time: 0.1316 data: 0.0430 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:13:06 lr: 0.000113 grad: 0.0807 (0.0767) loss: 0.8228 (0.8333) time: 0.1451 data: 0.0602 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:12:55 lr: 0.000113 grad: 0.0744 (0.0772) loss: 0.8314 (0.8328) time: 0.1481 data: 0.0597 max mem: 9377 +Train: [24] [1000/6250] eta: 0:12:38 lr: 0.000113 grad: 0.0747 (0.0769) loss: 0.8345 (0.8324) time: 0.1394 data: 0.0574 max mem: 9377 +Train: [24] [1100/6250] eta: 0:12:18 lr: 0.000113 grad: 0.0731 (0.0769) loss: 0.8305 (0.8318) time: 0.1346 data: 0.0518 max mem: 9377 +Train: [24] [1200/6250] eta: 0:12:01 lr: 0.000113 grad: 0.0696 (0.0768) loss: 0.8378 (0.8315) time: 0.1411 data: 0.0576 max mem: 9377 +Train: [24] [1300/6250] eta: 0:11:44 lr: 0.000113 grad: 0.0727 (0.0767) loss: 0.8282 (0.8313) time: 0.1406 data: 0.0588 max mem: 9377 +Train: [24] [1400/6250] eta: 0:11:27 lr: 0.000113 grad: 0.0728 (0.0766) loss: 0.8331 (0.8310) time: 0.1273 data: 0.0501 max mem: 9377 +Train: [24] [1500/6250] eta: 0:11:13 lr: 0.000113 grad: 0.0731 (0.0768) loss: 0.8332 (0.8307) time: 0.1529 data: 0.0762 max mem: 9377 +Train: [24] [1600/6250] eta: 0:10:59 lr: 0.000113 grad: 0.0736 (0.0769) loss: 0.8308 (0.8306) time: 0.1489 data: 0.0678 max mem: 9377 +Train: [24] [1700/6250] eta: 0:10:42 lr: 0.000113 grad: 0.0741 (0.0769) loss: 0.8314 (0.8304) time: 0.1420 data: 0.0616 max mem: 9377 +Train: [24] [1800/6250] eta: 0:10:28 lr: 0.000113 grad: 0.0718 (0.0768) loss: 0.8357 (0.8304) time: 0.1454 data: 0.0705 max mem: 9377 +Train: [24] [1900/6250] eta: 0:10:13 lr: 0.000113 grad: 0.0744 (0.0768) loss: 0.8323 (0.8304) time: 0.1407 data: 0.0592 max mem: 9377 +Train: [24] [2000/6250] eta: 0:10:00 lr: 0.000113 grad: 0.0717 (0.0768) loss: 0.8327 (0.8304) time: 0.1287 data: 0.0481 max mem: 9377 +Train: [24] [2100/6250] eta: 0:09:48 lr: 0.000113 grad: 0.0706 (0.0768) loss: 0.8301 (0.8304) time: 0.1429 data: 0.0644 max mem: 9377 +Train: [24] [2200/6250] eta: 0:09:33 lr: 0.000113 grad: 0.0773 (0.0768) loss: 0.8372 (0.8304) time: 0.1377 data: 0.0558 max mem: 9377 +Train: [24] [2300/6250] eta: 0:09:19 lr: 0.000113 grad: 0.0735 (0.0769) loss: 0.8300 (0.8304) time: 0.1421 data: 0.0605 max mem: 9377 +Train: [24] [2400/6250] eta: 0:09:05 lr: 0.000113 grad: 0.0781 (0.0769) loss: 0.8274 (0.8303) time: 0.1474 data: 0.0676 max mem: 9377 +Train: [24] [2500/6250] eta: 0:08:51 lr: 0.000113 grad: 0.0688 (0.0771) loss: 0.8339 (0.8302) time: 0.1404 data: 0.0546 max mem: 9377 +Train: [24] [2600/6250] eta: 0:08:37 lr: 0.000113 grad: 0.0717 (0.0772) loss: 0.8347 (0.8302) time: 0.1182 data: 0.0379 max mem: 9377 +Train: [24] [2700/6250] eta: 0:08:22 lr: 0.000113 grad: 0.0752 (0.0772) loss: 0.8271 (0.8302) time: 0.1465 data: 0.0634 max mem: 9377 +Train: [24] [2800/6250] eta: 0:08:09 lr: 0.000113 grad: 0.0809 (0.0773) loss: 0.8232 (0.8301) time: 0.1552 data: 0.0742 max mem: 9377 +Train: [24] [2900/6250] eta: 0:07:54 lr: 0.000112 grad: 0.0722 (0.0773) loss: 0.8359 (0.8302) time: 0.1527 data: 0.0727 max mem: 9377 +Train: [24] [3000/6250] eta: 0:07:40 lr: 0.000112 grad: 0.0781 (0.0772) loss: 0.8382 (0.8303) time: 0.1459 data: 0.0645 max mem: 9377 +Train: [24] [3100/6250] eta: 0:07:26 lr: 0.000112 grad: 0.0758 (0.0772) loss: 0.8280 (0.8302) time: 0.1424 data: 0.0593 max mem: 9377 +Train: [24] [3200/6250] eta: 0:07:12 lr: 0.000112 grad: 0.0714 (0.0772) loss: 0.8261 (0.8302) time: 0.1455 data: 0.0615 max mem: 9377 +Train: [24] [3300/6250] eta: 0:06:57 lr: 0.000112 grad: 0.0761 (0.0772) loss: 0.8299 (0.8301) time: 0.1484 data: 0.0624 max mem: 9377 +Train: [24] [3400/6250] eta: 0:06:43 lr: 0.000112 grad: 0.0706 (0.0772) loss: 0.8342 (0.8300) time: 0.1277 data: 0.0438 max mem: 9377 +Train: [24] [3500/6250] eta: 0:06:28 lr: 0.000112 grad: 0.0810 (0.0772) loss: 0.8278 (0.8300) time: 0.1404 data: 0.0539 max mem: 9377 +Train: [24] [3600/6250] eta: 0:06:13 lr: 0.000112 grad: 0.0755 (0.0772) loss: 0.8294 (0.8300) time: 0.1319 data: 0.0500 max mem: 9377 +Train: [24] [3700/6250] eta: 0:05:59 lr: 0.000112 grad: 0.0715 (0.0773) loss: 0.8293 (0.8299) time: 0.1320 data: 0.0526 max mem: 9377 +Train: [24] [3800/6250] eta: 0:05:46 lr: 0.000112 grad: 0.0743 (0.0772) loss: 0.8280 (0.8299) time: 0.1439 data: 0.0637 max mem: 9377 +Train: [24] [3900/6250] eta: 0:05:32 lr: 0.000112 grad: 0.0741 (0.0773) loss: 0.8279 (0.8298) time: 0.1527 data: 0.0718 max mem: 9377 +Train: [24] [4000/6250] eta: 0:05:19 lr: 0.000112 grad: 0.0737 (0.0772) loss: 0.8243 (0.8298) time: 0.1758 data: 0.1008 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:05 lr: 0.000112 grad: 0.0820 (0.0774) loss: 0.8296 (0.8298) time: 0.1407 data: 0.0633 max mem: 9377 +Train: [24] [4200/6250] eta: 0:04:51 lr: 0.000112 grad: 0.0779 (0.0774) loss: 0.8264 (0.8298) time: 0.1598 data: 0.0737 max mem: 9377 +Train: [24] [4300/6250] eta: 0:04:37 lr: 0.000112 grad: 0.0760 (0.0775) loss: 0.8273 (0.8297) time: 0.1443 data: 0.0610 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:23 lr: 0.000112 grad: 0.0749 (0.0776) loss: 0.8230 (0.8296) time: 0.1472 data: 0.0626 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:09 lr: 0.000112 grad: 0.0810 (0.0777) loss: 0.8222 (0.8295) time: 0.1450 data: 0.0603 max mem: 9377 +Train: [24] [4600/6250] eta: 0:03:55 lr: 0.000112 grad: 0.0711 (0.0778) loss: 0.8293 (0.8294) time: 0.1553 data: 0.0701 max mem: 9377 +Train: [24] [4700/6250] eta: 0:03:40 lr: 0.000112 grad: 0.0754 (0.0779) loss: 0.8232 (0.8294) time: 0.1578 data: 0.0697 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:26 lr: 0.000112 grad: 0.0837 (0.0780) loss: 0.8279 (0.8293) time: 0.1377 data: 0.0533 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:11 lr: 0.000112 grad: 0.0798 (0.0782) loss: 0.8296 (0.8292) time: 0.1416 data: 0.0592 max mem: 9377 +Train: [24] [5000/6250] eta: 0:02:57 lr: 0.000112 grad: 0.0771 (0.0784) loss: 0.8270 (0.8291) time: 0.1698 data: 0.0907 max mem: 9377 +Train: [24] [5100/6250] eta: 0:02:43 lr: 0.000112 grad: 0.0777 (0.0784) loss: 0.8247 (0.8290) time: 0.1250 data: 0.0444 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:29 lr: 0.000112 grad: 0.0757 (0.0785) loss: 0.8251 (0.8289) time: 0.1179 data: 0.0357 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:15 lr: 0.000112 grad: 0.0772 (0.0785) loss: 0.8254 (0.8288) time: 0.1446 data: 0.0609 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:01 lr: 0.000112 grad: 0.0747 (0.0787) loss: 0.8230 (0.8287) time: 0.1529 data: 0.0699 max mem: 9377 +Train: [24] [5500/6250] eta: 0:01:47 lr: 0.000112 grad: 0.0750 (0.0787) loss: 0.8242 (0.8286) time: 0.1483 data: 0.0722 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:32 lr: 0.000112 grad: 0.0735 (0.0788) loss: 0.8323 (0.8285) time: 0.1390 data: 0.0572 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:18 lr: 0.000112 grad: 0.0800 (0.0788) loss: 0.8240 (0.8284) time: 0.1500 data: 0.0679 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:04 lr: 0.000112 grad: 0.0824 (0.0789) loss: 0.8201 (0.8283) time: 0.1451 data: 0.0594 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:50 lr: 0.000112 grad: 0.0776 (0.0790) loss: 0.8294 (0.8282) time: 0.1601 data: 0.0800 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:35 lr: 0.000112 grad: 0.0800 (0.0791) loss: 0.8237 (0.8281) time: 0.1444 data: 0.0621 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:21 lr: 0.000112 grad: 0.0845 (0.0792) loss: 0.8192 (0.8280) time: 0.1386 data: 0.0503 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:07 lr: 0.000112 grad: 0.0803 (0.0793) loss: 0.8245 (0.8279) time: 0.1472 data: 0.0662 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0806 (0.0793) loss: 0.8253 (0.8279) time: 0.1412 data: 0.0546 max mem: 9377 +Train: [24] Total time: 0:14:58 (0.1437 s / it) +Averaged stats: lr: 0.000112 grad: 0.0806 (0.0793) loss: 0.8253 (0.8279) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:04:46 loss: 0.8431 (0.8431) time: 4.6147 data: 4.5858 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8487 (0.8464) time: 0.1132 data: 0.0869 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:11 (0.1925 s / it) +Averaged stats (hcp-train-subset): loss: 0.8487 (0.8464) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [24] [ 0/62] eta: 0:04:31 loss: 0.8431 (0.8431) time: 4.3757 data: 4.3457 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8440 (0.8442) time: 0.0936 data: 0.0689 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:12 (0.1960 s / it) +Averaged stats (hcp-val): loss: 0.8440 (0.8442) +Making plots (hcp-val): example=39 +Eval (nsd-val): [24] [ 0/62] eta: 0:04:10 loss: 0.8046 (0.8046) time: 4.0443 data: 3.9635 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8179 (0.8190) time: 0.1071 data: 0.0824 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:11 (0.1914 s / it) +Averaged stats (nsd-val): loss: 0.8179 (0.8190) +Making plots (nsd-val): example=23 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 10:12:37 lr: 0.000112 grad: 0.0705 (0.0705) loss: 0.8383 (0.8383) time: 5.8812 data: 5.7630 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:19:32 lr: 0.000112 grad: 0.0754 (0.0922) loss: 0.8332 (0.8315) time: 0.1603 data: 0.0660 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:17:10 lr: 0.000112 grad: 0.0749 (0.0875) loss: 0.8313 (0.8306) time: 0.1622 data: 0.0695 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:15:53 lr: 0.000112 grad: 0.0728 (0.0831) loss: 0.8293 (0.8316) time: 0.1426 data: 0.0552 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:15:04 lr: 0.000112 grad: 0.0735 (0.0814) loss: 0.8346 (0.8316) time: 0.1253 data: 0.0396 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:14:27 lr: 0.000112 grad: 0.0722 (0.0800) loss: 0.8351 (0.8320) time: 0.1423 data: 0.0509 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:13:57 lr: 0.000112 grad: 0.0720 (0.0792) loss: 0.8334 (0.8321) time: 0.1354 data: 0.0501 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:13:27 lr: 0.000112 grad: 0.0696 (0.0787) loss: 0.8354 (0.8324) time: 0.1305 data: 0.0440 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:13:08 lr: 0.000112 grad: 0.0685 (0.0779) loss: 0.8374 (0.8328) time: 0.1547 data: 0.0747 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:12:53 lr: 0.000112 grad: 0.0721 (0.0774) loss: 0.8313 (0.8326) time: 0.1526 data: 0.0644 max mem: 9377 +Train: [25] [1000/6250] eta: 0:12:33 lr: 0.000112 grad: 0.0727 (0.0771) loss: 0.8337 (0.8324) time: 0.1342 data: 0.0550 max mem: 9377 +Train: [25] [1100/6250] eta: 0:12:17 lr: 0.000112 grad: 0.0705 (0.0769) loss: 0.8359 (0.8323) time: 0.1455 data: 0.0644 max mem: 9377 +Train: [25] [1200/6250] eta: 0:12:05 lr: 0.000112 grad: 0.0750 (0.0768) loss: 0.8291 (0.8321) time: 0.1665 data: 0.0934 max mem: 9377 +Train: [25] [1300/6250] eta: 0:11:48 lr: 0.000112 grad: 0.0739 (0.0768) loss: 0.8313 (0.8320) time: 0.1554 data: 0.0741 max mem: 9377 +Train: [25] [1400/6250] eta: 0:11:35 lr: 0.000112 grad: 0.0753 (0.0768) loss: 0.8272 (0.8318) time: 0.1593 data: 0.0829 max mem: 9377 +Train: [25] [1500/6250] eta: 0:11:20 lr: 0.000112 grad: 0.0756 (0.0767) loss: 0.8298 (0.8316) time: 0.1385 data: 0.0586 max mem: 9377 +Train: [25] [1600/6250] eta: 0:11:06 lr: 0.000111 grad: 0.0738 (0.0766) loss: 0.8250 (0.8314) time: 0.1261 data: 0.0481 max mem: 9377 +Train: [25] [1700/6250] eta: 0:10:51 lr: 0.000111 grad: 0.0752 (0.0767) loss: 0.8279 (0.8311) time: 0.1431 data: 0.0626 max mem: 9377 +Train: [25] [1800/6250] eta: 0:10:36 lr: 0.000111 grad: 0.0726 (0.0767) loss: 0.8273 (0.8309) time: 0.1611 data: 0.0821 max mem: 9377 +Train: [25] [1900/6250] eta: 0:10:23 lr: 0.000111 grad: 0.0707 (0.0766) loss: 0.8293 (0.8308) time: 0.1314 data: 0.0451 max mem: 9377 +Train: [25] [2000/6250] eta: 0:10:10 lr: 0.000111 grad: 0.0744 (0.0767) loss: 0.8284 (0.8308) time: 0.1442 data: 0.0619 max mem: 9377 +Train: [25] [2100/6250] eta: 0:09:55 lr: 0.000111 grad: 0.0781 (0.0768) loss: 0.8248 (0.8306) time: 0.1475 data: 0.0673 max mem: 9377 +Train: [25] [2200/6250] eta: 0:09:39 lr: 0.000111 grad: 0.0782 (0.0769) loss: 0.8219 (0.8305) time: 0.1270 data: 0.0472 max mem: 9377 +Train: [25] [2300/6250] eta: 0:09:27 lr: 0.000111 grad: 0.0706 (0.0769) loss: 0.8301 (0.8304) time: 0.1558 data: 0.0806 max mem: 9377 +Train: [25] [2400/6250] eta: 0:09:13 lr: 0.000111 grad: 0.0767 (0.0769) loss: 0.8289 (0.8303) time: 0.1494 data: 0.0694 max mem: 9377 +Train: [25] [2500/6250] eta: 0:08:58 lr: 0.000111 grad: 0.0815 (0.0771) loss: 0.8231 (0.8301) time: 0.1361 data: 0.0551 max mem: 9377 +Train: [25] [2600/6250] eta: 0:08:43 lr: 0.000111 grad: 0.0813 (0.0772) loss: 0.8285 (0.8299) time: 0.1349 data: 0.0539 max mem: 9377 +Train: [25] [2700/6250] eta: 0:08:28 lr: 0.000111 grad: 0.0743 (0.0774) loss: 0.8271 (0.8298) time: 0.1416 data: 0.0628 max mem: 9377 +Train: [25] [2800/6250] eta: 0:08:13 lr: 0.000111 grad: 0.0730 (0.0775) loss: 0.8334 (0.8298) time: 0.1436 data: 0.0629 max mem: 9377 +Train: [25] [2900/6250] eta: 0:07:59 lr: 0.000111 grad: 0.0784 (0.0776) loss: 0.8283 (0.8298) time: 0.1448 data: 0.0627 max mem: 9377 +Train: [25] [3000/6250] eta: 0:07:45 lr: 0.000111 grad: 0.0783 (0.0777) loss: 0.8305 (0.8298) time: 0.1500 data: 0.0600 max mem: 9377 +Train: [25] [3100/6250] eta: 0:07:32 lr: 0.000111 grad: 0.0736 (0.0779) loss: 0.8339 (0.8297) time: 0.1462 data: 0.0660 max mem: 9377 +Train: [25] [3200/6250] eta: 0:07:17 lr: 0.000111 grad: 0.0735 (0.0779) loss: 0.8301 (0.8297) time: 0.1559 data: 0.0689 max mem: 9377 +Train: [25] [3300/6250] eta: 0:07:03 lr: 0.000111 grad: 0.0804 (0.0780) loss: 0.8314 (0.8296) time: 0.1479 data: 0.0701 max mem: 9377 +Train: [25] [3400/6250] eta: 0:06:48 lr: 0.000111 grad: 0.0759 (0.0782) loss: 0.8321 (0.8296) time: 0.1385 data: 0.0539 max mem: 9377 +Train: [25] [3500/6250] eta: 0:06:34 lr: 0.000111 grad: 0.0771 (0.0783) loss: 0.8274 (0.8295) time: 0.1473 data: 0.0641 max mem: 9377 +Train: [25] [3600/6250] eta: 0:06:19 lr: 0.000111 grad: 0.0745 (0.0783) loss: 0.8260 (0.8295) time: 0.1523 data: 0.0685 max mem: 9377 +Train: [25] [3700/6250] eta: 0:06:04 lr: 0.000111 grad: 0.0745 (0.0784) loss: 0.8299 (0.8294) time: 0.1302 data: 0.0525 max mem: 9377 +Train: [25] [3800/6250] eta: 0:05:51 lr: 0.000111 grad: 0.0730 (0.0784) loss: 0.8273 (0.8294) time: 0.1464 data: 0.0650 max mem: 9377 +Train: [25] [3900/6250] eta: 0:05:37 lr: 0.000111 grad: 0.0717 (0.0784) loss: 0.8220 (0.8294) time: 0.1605 data: 0.0794 max mem: 9377 +Train: [25] [4000/6250] eta: 0:05:23 lr: 0.000111 grad: 0.0753 (0.0784) loss: 0.8297 (0.8293) time: 0.1592 data: 0.0726 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:09 lr: 0.000111 grad: 0.0748 (0.0784) loss: 0.8289 (0.8292) time: 0.1364 data: 0.0478 max mem: 9377 +Train: [25] [4200/6250] eta: 0:04:55 lr: 0.000111 grad: 0.0752 (0.0785) loss: 0.8288 (0.8292) time: 0.1643 data: 0.0870 max mem: 9377 +Train: [25] [4300/6250] eta: 0:04:41 lr: 0.000111 grad: 0.0751 (0.0785) loss: 0.8294 (0.8292) time: 0.1543 data: 0.0744 max mem: 9377 +Train: [25] [4400/6250] eta: 0:04:26 lr: 0.000111 grad: 0.0817 (0.0785) loss: 0.8219 (0.8291) time: 0.1442 data: 0.0591 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:12 lr: 0.000111 grad: 0.0811 (0.0785) loss: 0.8303 (0.8291) time: 0.1241 data: 0.0408 max mem: 9377 +Train: [25] [4600/6250] eta: 0:03:58 lr: 0.000111 grad: 0.0791 (0.0786) loss: 0.8174 (0.8290) time: 0.1476 data: 0.0595 max mem: 9377 +Train: [25] [4700/6250] eta: 0:03:43 lr: 0.000111 grad: 0.0845 (0.0787) loss: 0.8290 (0.8290) time: 0.1426 data: 0.0543 max mem: 9377 +Train: [25] [4800/6250] eta: 0:03:29 lr: 0.000111 grad: 0.0797 (0.0788) loss: 0.8268 (0.8289) time: 0.1269 data: 0.0369 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:14 lr: 0.000111 grad: 0.0751 (0.0789) loss: 0.8320 (0.8289) time: 0.1687 data: 0.0756 max mem: 9377 +Train: [25] [5000/6250] eta: 0:02:59 lr: 0.000111 grad: 0.0815 (0.0790) loss: 0.8244 (0.8288) time: 0.1433 data: 0.0595 max mem: 9377 +Train: [25] [5100/6250] eta: 0:02:45 lr: 0.000111 grad: 0.0838 (0.0791) loss: 0.8254 (0.8288) time: 0.1516 data: 0.0770 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:31 lr: 0.000111 grad: 0.0859 (0.0792) loss: 0.8215 (0.8288) time: 0.1615 data: 0.0807 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:16 lr: 0.000111 grad: 0.0838 (0.0793) loss: 0.8290 (0.8288) time: 0.1518 data: 0.0678 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:02 lr: 0.000111 grad: 0.0775 (0.0793) loss: 0.8291 (0.8288) time: 0.1665 data: 0.0850 max mem: 9377 +Train: [25] [5500/6250] eta: 0:01:48 lr: 0.000111 grad: 0.0845 (0.0794) loss: 0.8269 (0.8288) time: 0.1451 data: 0.0607 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:33 lr: 0.000111 grad: 0.0789 (0.0794) loss: 0.8200 (0.8287) time: 0.1633 data: 0.0785 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:19 lr: 0.000111 grad: 0.0748 (0.0794) loss: 0.8330 (0.8288) time: 0.1406 data: 0.0598 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:04 lr: 0.000111 grad: 0.0849 (0.0795) loss: 0.8265 (0.8287) time: 0.1528 data: 0.0723 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:50 lr: 0.000111 grad: 0.0738 (0.0796) loss: 0.8262 (0.8287) time: 0.1348 data: 0.0493 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:35 lr: 0.000111 grad: 0.0774 (0.0796) loss: 0.8287 (0.8287) time: 0.1211 data: 0.0338 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:21 lr: 0.000111 grad: 0.0812 (0.0797) loss: 0.8219 (0.8286) time: 0.1203 data: 0.0381 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:07 lr: 0.000111 grad: 0.0777 (0.0797) loss: 0.8226 (0.8285) time: 0.1462 data: 0.0632 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0794 (0.0798) loss: 0.8261 (0.8285) time: 0.1465 data: 0.0680 max mem: 9377 +Train: [25] Total time: 0:15:03 (0.1445 s / it) +Averaged stats: lr: 0.000111 grad: 0.0794 (0.0798) loss: 0.8261 (0.8285) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:05:15 loss: 0.8450 (0.8450) time: 5.0961 data: 5.0655 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8450 (0.8449) time: 0.1086 data: 0.0837 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:12 (0.2017 s / it) +Averaged stats (hcp-train-subset): loss: 0.8450 (0.8449) +Eval (hcp-val): [25] [ 0/62] eta: 0:05:18 loss: 0.8381 (0.8381) time: 5.1408 data: 5.1094 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8420 (0.8424) time: 0.1052 data: 0.0782 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (hcp-val): loss: 0.8420 (0.8424) +Eval (nsd-val): [25] [ 0/62] eta: 0:03:21 loss: 0.8081 (0.8081) time: 3.2456 data: 3.1801 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8159 (0.8170) time: 0.1272 data: 0.1005 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:12 (0.2046 s / it) +Averaged stats (nsd-val): loss: 0.8159 (0.8170) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [26] [ 0/6250] eta: 9:18:01 lr: 0.000111 grad: 0.1375 (0.1375) loss: 0.8754 (0.8754) time: 5.3570 data: 5.2582 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:19:28 lr: 0.000111 grad: 0.0786 (0.0881) loss: 0.8391 (0.8368) time: 0.1531 data: 0.0579 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:16:36 lr: 0.000110 grad: 0.0803 (0.0899) loss: 0.8290 (0.8331) time: 0.1551 data: 0.0725 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:15:21 lr: 0.000110 grad: 0.0808 (0.0887) loss: 0.8248 (0.8312) time: 0.1289 data: 0.0418 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:14:38 lr: 0.000110 grad: 0.0688 (0.0855) loss: 0.8259 (0.8304) time: 0.1260 data: 0.0349 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:14:06 lr: 0.000110 grad: 0.0737 (0.0838) loss: 0.8296 (0.8293) time: 0.1409 data: 0.0498 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:13:53 lr: 0.000110 grad: 0.0726 (0.0829) loss: 0.8283 (0.8289) time: 0.1686 data: 0.0906 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:13:41 lr: 0.000110 grad: 0.0762 (0.0822) loss: 0.8275 (0.8286) time: 0.1297 data: 0.0494 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:13:25 lr: 0.000110 grad: 0.0714 (0.0813) loss: 0.8315 (0.8287) time: 0.1282 data: 0.0421 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:13:21 lr: 0.000110 grad: 0.0713 (0.0807) loss: 0.8295 (0.8286) time: 0.1607 data: 0.0768 max mem: 9377 +Train: [26] [1000/6250] eta: 0:13:14 lr: 0.000110 grad: 0.0750 (0.0801) loss: 0.8278 (0.8286) time: 0.1758 data: 0.0971 max mem: 9377 +Train: [26] [1100/6250] eta: 0:13:00 lr: 0.000110 grad: 0.0737 (0.0797) loss: 0.8321 (0.8284) time: 0.1505 data: 0.0664 max mem: 9377 +Train: [26] [1200/6250] eta: 0:12:46 lr: 0.000110 grad: 0.0725 (0.0795) loss: 0.8302 (0.8283) time: 0.1303 data: 0.0453 max mem: 9377 +Train: [26] [1300/6250] eta: 0:12:31 lr: 0.000110 grad: 0.0722 (0.0793) loss: 0.8288 (0.8282) time: 0.1455 data: 0.0599 max mem: 9377 +Train: [26] [1400/6250] eta: 0:12:14 lr: 0.000110 grad: 0.0761 (0.0791) loss: 0.8194 (0.8281) time: 0.1456 data: 0.0592 max mem: 9377 +Train: [26] [1500/6250] eta: 0:11:59 lr: 0.000110 grad: 0.0778 (0.0791) loss: 0.8259 (0.8281) time: 0.1663 data: 0.0847 max mem: 9377 +Train: [26] [1600/6250] eta: 0:11:39 lr: 0.000110 grad: 0.0749 (0.0790) loss: 0.8253 (0.8281) time: 0.1409 data: 0.0580 max mem: 9377 +Train: [26] [1700/6250] eta: 0:11:21 lr: 0.000110 grad: 0.0753 (0.0789) loss: 0.8276 (0.8281) time: 0.1230 data: 0.0372 max mem: 9377 +Train: [26] [1800/6250] eta: 0:11:04 lr: 0.000110 grad: 0.0769 (0.0789) loss: 0.8258 (0.8281) time: 0.1284 data: 0.0484 max mem: 9377 +Train: [26] [1900/6250] eta: 0:10:46 lr: 0.000110 grad: 0.0788 (0.0789) loss: 0.8210 (0.8280) time: 0.1520 data: 0.0671 max mem: 9377 +Train: [26] [2000/6250] eta: 0:10:29 lr: 0.000110 grad: 0.0727 (0.0788) loss: 0.8295 (0.8280) time: 0.1506 data: 0.0724 max mem: 9377 +Train: [26] [2100/6250] eta: 0:10:12 lr: 0.000110 grad: 0.0779 (0.0789) loss: 0.8276 (0.8279) time: 0.1259 data: 0.0442 max mem: 9377 +Train: [26] [2200/6250] eta: 0:09:55 lr: 0.000110 grad: 0.0722 (0.0791) loss: 0.8321 (0.8280) time: 0.1422 data: 0.0590 max mem: 9377 +Train: [26] [2300/6250] eta: 0:09:39 lr: 0.000110 grad: 0.0773 (0.0790) loss: 0.8280 (0.8280) time: 0.1525 data: 0.0728 max mem: 9377 +Train: [26] [2400/6250] eta: 0:09:23 lr: 0.000110 grad: 0.0780 (0.0790) loss: 0.8206 (0.8280) time: 0.1131 data: 0.0337 max mem: 9377 +Train: [26] [2500/6250] eta: 0:09:09 lr: 0.000110 grad: 0.0757 (0.0790) loss: 0.8242 (0.8280) time: 0.1755 data: 0.0931 max mem: 9377 +Train: [26] [2600/6250] eta: 0:08:54 lr: 0.000110 grad: 0.0759 (0.0791) loss: 0.8242 (0.8279) time: 0.1394 data: 0.0593 max mem: 9377 +Train: [26] [2700/6250] eta: 0:08:40 lr: 0.000110 grad: 0.0799 (0.0791) loss: 0.8277 (0.8278) time: 0.1514 data: 0.0719 max mem: 9377 +Train: [26] [2800/6250] eta: 0:08:25 lr: 0.000110 grad: 0.0768 (0.0792) loss: 0.8288 (0.8277) time: 0.1136 data: 0.0292 max mem: 9377 +Train: [26] [2900/6250] eta: 0:08:10 lr: 0.000110 grad: 0.0778 (0.0793) loss: 0.8223 (0.8276) time: 0.1305 data: 0.0463 max mem: 9377 +Train: [26] [3000/6250] eta: 0:07:55 lr: 0.000110 grad: 0.0803 (0.0793) loss: 0.8260 (0.8275) time: 0.1548 data: 0.0740 max mem: 9377 +Train: [26] [3100/6250] eta: 0:07:40 lr: 0.000110 grad: 0.0824 (0.0795) loss: 0.8207 (0.8273) time: 0.1511 data: 0.0719 max mem: 9377 +Train: [26] [3200/6250] eta: 0:07:25 lr: 0.000110 grad: 0.0764 (0.0797) loss: 0.8259 (0.8272) time: 0.1389 data: 0.0559 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:10 lr: 0.000110 grad: 0.0853 (0.0798) loss: 0.8231 (0.8269) time: 0.1257 data: 0.0410 max mem: 9377 +Train: [26] [3400/6250] eta: 0:06:55 lr: 0.000110 grad: 0.0768 (0.0799) loss: 0.8237 (0.8268) time: 0.1451 data: 0.0648 max mem: 9377 +Train: [26] [3500/6250] eta: 0:06:40 lr: 0.000110 grad: 0.0826 (0.0800) loss: 0.8195 (0.8266) time: 0.1359 data: 0.0558 max mem: 9377 +Train: [26] [3600/6250] eta: 0:06:25 lr: 0.000110 grad: 0.0732 (0.0801) loss: 0.8244 (0.8265) time: 0.1395 data: 0.0571 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:11 lr: 0.000110 grad: 0.0741 (0.0802) loss: 0.8172 (0.8263) time: 0.1635 data: 0.0816 max mem: 9377 +Train: [26] [3800/6250] eta: 0:05:56 lr: 0.000110 grad: 0.0778 (0.0803) loss: 0.8142 (0.8262) time: 0.1458 data: 0.0665 max mem: 9377 +Train: [26] [3900/6250] eta: 0:05:41 lr: 0.000110 grad: 0.0786 (0.0805) loss: 0.8278 (0.8261) time: 0.1610 data: 0.0858 max mem: 9377 +Train: [26] [4000/6250] eta: 0:05:26 lr: 0.000110 grad: 0.0779 (0.0804) loss: 0.8171 (0.8259) time: 0.1313 data: 0.0580 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:12 lr: 0.000110 grad: 0.0747 (0.0805) loss: 0.8267 (0.8258) time: 0.1834 data: 0.1050 max mem: 9377 +Train: [26] [4200/6250] eta: 0:04:58 lr: 0.000110 grad: 0.0787 (0.0804) loss: 0.8238 (0.8257) time: 0.1443 data: 0.0559 max mem: 9377 +Train: [26] [4300/6250] eta: 0:04:44 lr: 0.000110 grad: 0.0841 (0.0805) loss: 0.8243 (0.8257) time: 0.1516 data: 0.0653 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:30 lr: 0.000110 grad: 0.0742 (0.0804) loss: 0.8298 (0.8256) time: 0.1632 data: 0.0769 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:16 lr: 0.000110 grad: 0.0727 (0.0804) loss: 0.8211 (0.8255) time: 0.1433 data: 0.0649 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:01 lr: 0.000110 grad: 0.0793 (0.0804) loss: 0.8222 (0.8254) time: 0.1434 data: 0.0608 max mem: 9377 +Train: [26] [4700/6250] eta: 0:03:47 lr: 0.000110 grad: 0.0793 (0.0805) loss: 0.8200 (0.8253) time: 0.1486 data: 0.0614 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:32 lr: 0.000109 grad: 0.0777 (0.0805) loss: 0.8219 (0.8252) time: 0.1326 data: 0.0453 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:17 lr: 0.000109 grad: 0.0837 (0.0805) loss: 0.8218 (0.8251) time: 0.1473 data: 0.0631 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:03 lr: 0.000109 grad: 0.0805 (0.0806) loss: 0.8121 (0.8249) time: 0.1400 data: 0.0536 max mem: 9377 +Train: [26] [5100/6250] eta: 0:02:48 lr: 0.000109 grad: 0.0802 (0.0806) loss: 0.8184 (0.8248) time: 0.1494 data: 0.0681 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:34 lr: 0.000109 grad: 0.0775 (0.0806) loss: 0.8221 (0.8248) time: 0.1555 data: 0.0756 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:19 lr: 0.000109 grad: 0.0767 (0.0807) loss: 0.8251 (0.8247) time: 0.1320 data: 0.0532 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:04 lr: 0.000109 grad: 0.0803 (0.0807) loss: 0.8232 (0.8247) time: 0.1317 data: 0.0462 max mem: 9377 +Train: [26] [5500/6250] eta: 0:01:49 lr: 0.000109 grad: 0.0769 (0.0807) loss: 0.8269 (0.8246) time: 0.1258 data: 0.0425 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:35 lr: 0.000109 grad: 0.0795 (0.0808) loss: 0.8229 (0.8246) time: 0.1402 data: 0.0534 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:20 lr: 0.000109 grad: 0.0817 (0.0809) loss: 0.8208 (0.8246) time: 0.1426 data: 0.0587 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:05 lr: 0.000109 grad: 0.0766 (0.0809) loss: 0.8205 (0.8245) time: 0.1384 data: 0.0566 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:51 lr: 0.000109 grad: 0.0820 (0.0810) loss: 0.8226 (0.8244) time: 0.1341 data: 0.0521 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:36 lr: 0.000109 grad: 0.0805 (0.0811) loss: 0.8190 (0.8244) time: 0.1197 data: 0.0392 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:21 lr: 0.000109 grad: 0.0815 (0.0812) loss: 0.8255 (0.8244) time: 0.1460 data: 0.0640 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:07 lr: 0.000109 grad: 0.0816 (0.0812) loss: 0.8257 (0.8244) time: 0.1334 data: 0.0561 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.0850 (0.0812) loss: 0.8210 (0.8243) time: 0.1136 data: 0.0266 max mem: 9377 +Train: [26] Total time: 0:15:14 (0.1463 s / it) +Averaged stats: lr: 0.000109 grad: 0.0850 (0.0812) loss: 0.8210 (0.8243) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:03:54 loss: 0.8428 (0.8428) time: 3.7789 data: 3.6627 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8428 (0.8439) time: 0.1210 data: 0.0962 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:12 (0.1995 s / it) +Averaged stats (hcp-train-subset): loss: 0.8428 (0.8439) +Eval (hcp-val): [26] [ 0/62] eta: 0:03:08 loss: 0.8357 (0.8357) time: 3.0357 data: 2.9753 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8430 (0.8420) time: 0.1259 data: 0.1008 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:12 (0.2041 s / it) +Averaged stats (hcp-val): loss: 0.8430 (0.8420) +Eval (nsd-val): [26] [ 0/62] eta: 0:02:59 loss: 0.8093 (0.8093) time: 2.9002 data: 2.8255 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8164 (0.8177) time: 0.0992 data: 0.0740 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (nsd-val): loss: 0.8164 (0.8177) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [27] [ 0/6250] eta: 8:20:27 lr: 0.000109 grad: 0.0628 (0.0628) loss: 0.8516 (0.8516) time: 4.8044 data: 4.5692 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:19:04 lr: 0.000109 grad: 0.0840 (0.0968) loss: 0.8267 (0.8346) time: 0.1457 data: 0.0557 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:16:37 lr: 0.000109 grad: 0.0694 (0.0929) loss: 0.8286 (0.8277) time: 0.1537 data: 0.0619 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:15:48 lr: 0.000109 grad: 0.0808 (0.0906) loss: 0.8135 (0.8255) time: 0.1678 data: 0.0819 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:15:02 lr: 0.000109 grad: 0.0822 (0.0895) loss: 0.8191 (0.8238) time: 0.1321 data: 0.0484 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:14:34 lr: 0.000109 grad: 0.0786 (0.0881) loss: 0.8141 (0.8226) time: 0.1601 data: 0.0715 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:14:02 lr: 0.000109 grad: 0.0790 (0.0870) loss: 0.8188 (0.8220) time: 0.1408 data: 0.0586 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:13:39 lr: 0.000109 grad: 0.0833 (0.0862) loss: 0.8272 (0.8225) time: 0.1370 data: 0.0552 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:13:20 lr: 0.000109 grad: 0.0749 (0.0858) loss: 0.8234 (0.8224) time: 0.1363 data: 0.0452 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:13:04 lr: 0.000109 grad: 0.0734 (0.0857) loss: 0.8323 (0.8228) time: 0.1648 data: 0.0819 max mem: 9377 +Train: [27] [1000/6250] eta: 0:12:46 lr: 0.000109 grad: 0.0774 (0.0854) loss: 0.8209 (0.8228) time: 0.1346 data: 0.0526 max mem: 9377 +Train: [27] [1100/6250] eta: 0:12:30 lr: 0.000109 grad: 0.0847 (0.0853) loss: 0.8256 (0.8227) time: 0.1259 data: 0.0419 max mem: 9377 +Train: [27] [1200/6250] eta: 0:12:17 lr: 0.000109 grad: 0.0763 (0.0852) loss: 0.8191 (0.8226) time: 0.1452 data: 0.0631 max mem: 9377 +Train: [27] [1300/6250] eta: 0:12:00 lr: 0.000109 grad: 0.0765 (0.0850) loss: 0.8231 (0.8224) time: 0.1484 data: 0.0689 max mem: 9377 +Train: [27] [1400/6250] eta: 0:11:45 lr: 0.000109 grad: 0.0826 (0.0847) loss: 0.8187 (0.8223) time: 0.1601 data: 0.0840 max mem: 9377 +Train: [27] [1500/6250] eta: 0:11:36 lr: 0.000109 grad: 0.0858 (0.0848) loss: 0.8213 (0.8223) time: 0.1841 data: 0.1082 max mem: 9377 +Train: [27] [1600/6250] eta: 0:11:22 lr: 0.000109 grad: 0.0822 (0.0847) loss: 0.8265 (0.8221) time: 0.1285 data: 0.0525 max mem: 9377 +Train: [27] [1700/6250] eta: 0:11:09 lr: 0.000109 grad: 0.0827 (0.0847) loss: 0.8199 (0.8219) time: 0.1559 data: 0.0732 max mem: 9377 +Train: [27] [1800/6250] eta: 0:10:57 lr: 0.000109 grad: 0.0795 (0.0848) loss: 0.8258 (0.8219) time: 0.1617 data: 0.0806 max mem: 9377 +Train: [27] [1900/6250] eta: 0:10:42 lr: 0.000109 grad: 0.0806 (0.0849) loss: 0.8180 (0.8219) time: 0.1601 data: 0.0800 max mem: 9377 +Train: [27] [2000/6250] eta: 0:10:28 lr: 0.000109 grad: 0.0861 (0.0850) loss: 0.8184 (0.8217) time: 0.1414 data: 0.0597 max mem: 9377 +Train: [27] [2100/6250] eta: 0:10:14 lr: 0.000109 grad: 0.0822 (0.0851) loss: 0.8170 (0.8217) time: 0.1361 data: 0.0519 max mem: 9377 +Train: [27] [2200/6250] eta: 0:09:58 lr: 0.000109 grad: 0.0860 (0.0851) loss: 0.8172 (0.8217) time: 0.1547 data: 0.0762 max mem: 9377 +Train: [27] [2300/6250] eta: 0:09:44 lr: 0.000109 grad: 0.0798 (0.0851) loss: 0.8226 (0.8217) time: 0.1214 data: 0.0371 max mem: 9377 +Train: [27] [2400/6250] eta: 0:09:30 lr: 0.000109 grad: 0.0801 (0.0851) loss: 0.8256 (0.8218) time: 0.1491 data: 0.0686 max mem: 9377 +Train: [27] [2500/6250] eta: 0:09:14 lr: 0.000109 grad: 0.0817 (0.0851) loss: 0.8220 (0.8218) time: 0.1404 data: 0.0536 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:02 lr: 0.000109 grad: 0.0853 (0.0851) loss: 0.8237 (0.8219) time: 0.1938 data: 0.1183 max mem: 9377 +Train: [27] [2700/6250] eta: 0:08:49 lr: 0.000109 grad: 0.0783 (0.0849) loss: 0.8231 (0.8220) time: 0.1557 data: 0.0771 max mem: 9377 +Train: [27] [2800/6250] eta: 0:08:35 lr: 0.000109 grad: 0.0799 (0.0849) loss: 0.8208 (0.8221) time: 0.1446 data: 0.0589 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:21 lr: 0.000109 grad: 0.0751 (0.0847) loss: 0.8259 (0.8222) time: 0.1486 data: 0.0766 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:07 lr: 0.000109 grad: 0.0726 (0.0845) loss: 0.8304 (0.8224) time: 0.1462 data: 0.0671 max mem: 9377 +Train: [27] [3100/6250] eta: 0:07:54 lr: 0.000108 grad: 0.0794 (0.0844) loss: 0.8271 (0.8224) time: 0.1736 data: 0.0929 max mem: 9377 +Train: [27] [3200/6250] eta: 0:07:39 lr: 0.000108 grad: 0.0812 (0.0844) loss: 0.8234 (0.8224) time: 0.1551 data: 0.0731 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:25 lr: 0.000108 grad: 0.0736 (0.0844) loss: 0.8263 (0.8225) time: 0.1647 data: 0.0840 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:12 lr: 0.000108 grad: 0.0812 (0.0845) loss: 0.8185 (0.8226) time: 0.1703 data: 0.0927 max mem: 9377 +Train: [27] [3500/6250] eta: 0:06:57 lr: 0.000108 grad: 0.0823 (0.0845) loss: 0.8334 (0.8227) time: 0.1694 data: 0.0899 max mem: 9377 +Train: [27] [3600/6250] eta: 0:06:43 lr: 0.000108 grad: 0.0767 (0.0845) loss: 0.8316 (0.8228) time: 0.1626 data: 0.0846 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:27 lr: 0.000108 grad: 0.0837 (0.0845) loss: 0.8268 (0.8228) time: 0.1307 data: 0.0543 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:12 lr: 0.000108 grad: 0.0855 (0.0846) loss: 0.8235 (0.8228) time: 0.1651 data: 0.0911 max mem: 9377 +Train: [27] [3900/6250] eta: 0:05:57 lr: 0.000108 grad: 0.0866 (0.0847) loss: 0.8238 (0.8228) time: 0.1498 data: 0.0679 max mem: 9377 +Train: [27] [4000/6250] eta: 0:05:42 lr: 0.000108 grad: 0.0785 (0.0848) loss: 0.8321 (0.8228) time: 0.1255 data: 0.0392 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:27 lr: 0.000108 grad: 0.0812 (0.0848) loss: 0.8306 (0.8229) time: 0.1389 data: 0.0566 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:11 lr: 0.000108 grad: 0.0849 (0.0848) loss: 0.8280 (0.8230) time: 0.1174 data: 0.0332 max mem: 9377 +Train: [27] [4300/6250] eta: 0:04:56 lr: 0.000108 grad: 0.0869 (0.0848) loss: 0.8258 (0.8231) time: 0.1888 data: 0.1071 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:41 lr: 0.000108 grad: 0.0771 (0.0849) loss: 0.8292 (0.8231) time: 0.1500 data: 0.0677 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:27 lr: 0.000108 grad: 0.0852 (0.0849) loss: 0.8272 (0.8232) time: 0.1586 data: 0.0748 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:12 lr: 0.000108 grad: 0.0860 (0.0850) loss: 0.8283 (0.8233) time: 0.1729 data: 0.0892 max mem: 9377 +Train: [27] [4700/6250] eta: 0:03:57 lr: 0.000108 grad: 0.0826 (0.0849) loss: 0.8263 (0.8233) time: 0.1948 data: 0.1111 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:42 lr: 0.000108 grad: 0.0836 (0.0849) loss: 0.8223 (0.8234) time: 0.2069 data: 0.1238 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:27 lr: 0.000108 grad: 0.0794 (0.0849) loss: 0.8310 (0.8234) time: 0.1682 data: 0.0803 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:12 lr: 0.000108 grad: 0.0808 (0.0849) loss: 0.8229 (0.8235) time: 0.1443 data: 0.0581 max mem: 9377 +Train: [27] [5100/6250] eta: 0:02:57 lr: 0.000108 grad: 0.0763 (0.0849) loss: 0.8297 (0.8235) time: 0.1551 data: 0.0740 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:41 lr: 0.000108 grad: 0.0783 (0.0848) loss: 0.8241 (0.8236) time: 0.1642 data: 0.0714 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:26 lr: 0.000108 grad: 0.0784 (0.0848) loss: 0.8221 (0.8235) time: 0.1856 data: 0.1005 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:11 lr: 0.000108 grad: 0.0815 (0.0847) loss: 0.8230 (0.8236) time: 0.1455 data: 0.0620 max mem: 9377 +Train: [27] [5500/6250] eta: 0:01:55 lr: 0.000108 grad: 0.0864 (0.0847) loss: 0.8191 (0.8236) time: 0.1711 data: 0.0849 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:40 lr: 0.000108 grad: 0.0855 (0.0849) loss: 0.8197 (0.8236) time: 0.1521 data: 0.0690 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:24 lr: 0.000108 grad: 0.0834 (0.0849) loss: 0.8274 (0.8236) time: 0.1475 data: 0.0688 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:09 lr: 0.000108 grad: 0.0808 (0.0849) loss: 0.8279 (0.8236) time: 0.1407 data: 0.0587 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:53 lr: 0.000108 grad: 0.0793 (0.0849) loss: 0.8278 (0.8237) time: 0.1410 data: 0.0618 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:38 lr: 0.000108 grad: 0.0769 (0.0849) loss: 0.8300 (0.8237) time: 0.1355 data: 0.0547 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:23 lr: 0.000108 grad: 0.0810 (0.0848) loss: 0.8302 (0.8237) time: 0.1445 data: 0.0651 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:07 lr: 0.000108 grad: 0.0769 (0.0849) loss: 0.8230 (0.8237) time: 0.1455 data: 0.0634 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0763 (0.0849) loss: 0.8270 (0.8237) time: 0.1460 data: 0.0635 max mem: 9377 +Train: [27] Total time: 0:16:00 (0.1537 s / it) +Averaged stats: lr: 0.000108 grad: 0.0763 (0.0849) loss: 0.8270 (0.8237) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:24 loss: 0.8429 (0.8429) time: 3.3020 data: 3.2265 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8437 (0.8442) time: 0.1306 data: 0.1038 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:12 (0.2043 s / it) +Averaged stats (hcp-train-subset): loss: 0.8437 (0.8442) +Eval (hcp-val): [27] [ 0/62] eta: 0:03:21 loss: 0.8422 (0.8422) time: 3.2522 data: 3.2049 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8407 (0.8422) time: 0.1141 data: 0.0889 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:12 (0.2075 s / it) +Averaged stats (hcp-val): loss: 0.8407 (0.8422) +Eval (nsd-val): [27] [ 0/62] eta: 0:05:25 loss: 0.8109 (0.8109) time: 5.2516 data: 5.2188 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8199 (0.8222) time: 0.1315 data: 0.1042 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8222) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 9:41:25 lr: 0.000108 grad: nan (nan) loss: 0.8546 (0.8546) time: 5.5817 data: 5.4790 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:19:46 lr: 0.000108 grad: 0.0820 (0.1042) loss: 0.8290 (0.8273) time: 0.1392 data: 0.0429 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:16:54 lr: 0.000108 grad: 0.0829 (0.0970) loss: 0.8240 (0.8238) time: 0.1328 data: 0.0433 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:15:56 lr: 0.000108 grad: 0.0842 (0.0921) loss: 0.8220 (0.8228) time: 0.1163 data: 0.0002 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:15:09 lr: 0.000108 grad: 0.0795 (0.0889) loss: 0.8237 (0.8225) time: 0.1428 data: 0.0551 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:14:40 lr: 0.000108 grad: 0.0777 (0.0866) loss: 0.8260 (0.8225) time: 0.1580 data: 0.0719 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:14:11 lr: 0.000108 grad: 0.0777 (0.0853) loss: 0.8285 (0.8233) time: 0.1347 data: 0.0455 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:13:48 lr: 0.000108 grad: 0.0793 (0.0845) loss: 0.8201 (0.8238) time: 0.1569 data: 0.0725 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:13:27 lr: 0.000108 grad: 0.0755 (0.0840) loss: 0.8250 (0.8242) time: 0.1236 data: 0.0367 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:13:09 lr: 0.000108 grad: 0.0740 (0.0834) loss: 0.8309 (0.8245) time: 0.1578 data: 0.0715 max mem: 9377 +Train: [28] [1000/6250] eta: 0:12:50 lr: 0.000108 grad: 0.0845 (0.0831) loss: 0.8174 (0.8244) time: 0.1387 data: 0.0580 max mem: 9377 +Train: [28] [1100/6250] eta: 0:12:33 lr: 0.000108 grad: 0.0787 (0.0829) loss: 0.8230 (0.8244) time: 0.1216 data: 0.0390 max mem: 9377 +Train: [28] [1200/6250] eta: 0:12:17 lr: 0.000108 grad: 0.0783 (0.0827) loss: 0.8298 (0.8246) time: 0.1518 data: 0.0751 max mem: 9377 +Train: [28] [1300/6250] eta: 0:11:58 lr: 0.000107 grad: 0.0793 (0.0824) loss: 0.8257 (0.8247) time: 0.1273 data: 0.0451 max mem: 9377 +Train: [28] [1400/6250] eta: 0:11:41 lr: 0.000107 grad: 0.0778 (0.0826) loss: 0.8308 (0.8247) time: 0.1362 data: 0.0454 max mem: 9377 +Train: [28] [1500/6250] eta: 0:11:24 lr: 0.000107 grad: 0.0767 (0.0824) loss: 0.8306 (0.8248) time: 0.1286 data: 0.0496 max mem: 9377 +Train: [28] [1600/6250] eta: 0:11:08 lr: 0.000107 grad: 0.0814 (0.0823) loss: 0.8274 (0.8248) time: 0.1462 data: 0.0624 max mem: 9377 +Train: [28] [1700/6250] eta: 0:10:51 lr: 0.000107 grad: 0.0750 (0.0821) loss: 0.8335 (0.8250) time: 0.1345 data: 0.0573 max mem: 9377 +Train: [28] [1800/6250] eta: 0:10:37 lr: 0.000107 grad: 0.0735 (0.0819) loss: 0.8276 (0.8251) time: 0.1417 data: 0.0654 max mem: 9377 +Train: [28] [1900/6250] eta: 0:10:22 lr: 0.000107 grad: 0.0766 (0.0819) loss: 0.8306 (0.8253) time: 0.1499 data: 0.0719 max mem: 9377 +Train: [28] [2000/6250] eta: 0:10:07 lr: 0.000107 grad: 0.0818 (0.0818) loss: 0.8317 (0.8254) time: 0.1550 data: 0.0747 max mem: 9377 +Train: [28] [2100/6250] eta: 0:09:54 lr: 0.000107 grad: 0.0743 (0.0817) loss: 0.8279 (0.8256) time: 0.1451 data: 0.0628 max mem: 9377 +Train: [28] [2200/6250] eta: 0:09:42 lr: 0.000107 grad: 0.0840 (0.0817) loss: 0.8283 (0.8257) time: 0.1715 data: 0.0921 max mem: 9377 +Train: [28] [2300/6250] eta: 0:09:32 lr: 0.000107 grad: 0.0758 (0.0816) loss: 0.8246 (0.8259) time: 0.1633 data: 0.0900 max mem: 9377 +Train: [28] [2400/6250] eta: 0:09:18 lr: 0.000107 grad: 0.0820 (0.0817) loss: 0.8334 (0.8259) time: 0.1460 data: 0.0678 max mem: 9377 +Train: [28] [2500/6250] eta: 0:09:04 lr: 0.000107 grad: 0.0780 (0.0817) loss: 0.8257 (0.8260) time: 0.1438 data: 0.0634 max mem: 9377 +Train: [28] [2600/6250] eta: 0:08:50 lr: 0.000107 grad: 0.0793 (0.0818) loss: 0.8245 (0.8260) time: 0.1494 data: 0.0657 max mem: 9377 +Train: [28] [2700/6250] eta: 0:08:35 lr: 0.000107 grad: 0.0806 (0.0819) loss: 0.8268 (0.8260) time: 0.1395 data: 0.0581 max mem: 9377 +Train: [28] [2800/6250] eta: 0:08:20 lr: 0.000107 grad: 0.0807 (0.0820) loss: 0.8291 (0.8259) time: 0.1504 data: 0.0667 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:05 lr: 0.000107 grad: 0.0810 (0.0819) loss: 0.8309 (0.8260) time: 0.1192 data: 0.0430 max mem: 9377 +Train: [28] [3000/6250] eta: 0:07:51 lr: 0.000107 grad: 0.0768 (0.0820) loss: 0.8240 (0.8260) time: 0.1600 data: 0.0802 max mem: 9377 +Train: [28] [3100/6250] eta: 0:07:36 lr: 0.000107 grad: 0.0799 (0.0821) loss: 0.8269 (0.8260) time: 0.1436 data: 0.0642 max mem: 9377 +Train: [28] [3200/6250] eta: 0:07:21 lr: 0.000107 grad: 0.0823 (0.0821) loss: 0.8322 (0.8260) time: 0.1284 data: 0.0380 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:06 lr: 0.000107 grad: 0.0828 (0.0822) loss: 0.8273 (0.8261) time: 0.1543 data: 0.0741 max mem: 9377 +Train: [28] [3400/6250] eta: 0:06:51 lr: 0.000107 grad: 0.0843 (0.0822) loss: 0.8258 (0.8261) time: 0.1446 data: 0.0676 max mem: 9377 +Train: [28] [3500/6250] eta: 0:06:36 lr: 0.000107 grad: 0.0812 (0.0822) loss: 0.8241 (0.8260) time: 0.1321 data: 0.0473 max mem: 9377 +Train: [28] [3600/6250] eta: 0:06:21 lr: 0.000107 grad: 0.0810 (0.0821) loss: 0.8323 (0.8260) time: 0.1310 data: 0.0438 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:06 lr: 0.000107 grad: 0.0784 (0.0821) loss: 0.8262 (0.8261) time: 0.1481 data: 0.0657 max mem: 9377 +Train: [28] [3800/6250] eta: 0:05:52 lr: 0.000107 grad: 0.0771 (0.0822) loss: 0.8250 (0.8261) time: 0.1565 data: 0.0777 max mem: 9377 +Train: [28] [3900/6250] eta: 0:05:37 lr: 0.000107 grad: 0.0799 (0.0822) loss: 0.8213 (0.8260) time: 0.1370 data: 0.0560 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:23 lr: 0.000107 grad: 0.0776 (0.0823) loss: 0.8240 (0.8261) time: 0.1390 data: 0.0574 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:08 lr: 0.000107 grad: 0.0779 (0.0824) loss: 0.8172 (0.8260) time: 0.1451 data: 0.0655 max mem: 9377 +Train: [28] [4200/6250] eta: 0:04:54 lr: 0.000107 grad: 0.0784 (0.0824) loss: 0.8270 (0.8259) time: 0.1474 data: 0.0646 max mem: 9377 +Train: [28] [4300/6250] eta: 0:04:39 lr: 0.000107 grad: 0.0807 (0.0825) loss: 0.8252 (0.8259) time: 0.1353 data: 0.0540 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:25 lr: 0.000107 grad: 0.0777 (0.0824) loss: 0.8210 (0.8259) time: 0.1547 data: 0.0704 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:10 lr: 0.000107 grad: 0.0856 (0.0825) loss: 0.8194 (0.8258) time: 0.1475 data: 0.0683 max mem: 9377 +Train: [28] [4600/6250] eta: 0:03:56 lr: 0.000107 grad: 0.0766 (0.0825) loss: 0.8196 (0.8257) time: 0.1368 data: 0.0586 max mem: 9377 +Train: [28] [4700/6250] eta: 0:03:42 lr: 0.000107 grad: 0.0810 (0.0825) loss: 0.8267 (0.8257) time: 0.1508 data: 0.0708 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:28 lr: 0.000107 grad: 0.0820 (0.0825) loss: 0.8187 (0.8256) time: 0.1500 data: 0.0627 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:14 lr: 0.000107 grad: 0.0774 (0.0826) loss: 0.8269 (0.8255) time: 0.1327 data: 0.0512 max mem: 9377 +Train: [28] [5000/6250] eta: 0:02:59 lr: 0.000107 grad: 0.0836 (0.0826) loss: 0.8186 (0.8254) time: 0.1138 data: 0.0327 max mem: 9377 +Train: [28] [5100/6250] eta: 0:02:46 lr: 0.000107 grad: 0.0802 (0.0826) loss: 0.8229 (0.8253) time: 0.1605 data: 0.0788 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:31 lr: 0.000107 grad: 0.0805 (0.0826) loss: 0.8180 (0.8253) time: 0.1756 data: 0.0965 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:17 lr: 0.000107 grad: 0.0736 (0.0826) loss: 0.8315 (0.8252) time: 0.1719 data: 0.0909 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:03 lr: 0.000107 grad: 0.0803 (0.0826) loss: 0.8136 (0.8251) time: 0.1608 data: 0.0697 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:49 lr: 0.000107 grad: 0.0828 (0.0826) loss: 0.8207 (0.8251) time: 0.1560 data: 0.0722 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:35 lr: 0.000106 grad: 0.0826 (0.0826) loss: 0.8183 (0.8250) time: 0.1432 data: 0.0579 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:20 lr: 0.000106 grad: 0.0817 (0.0828) loss: 0.8169 (0.8249) time: 0.1375 data: 0.0543 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:05 lr: 0.000106 grad: 0.0833 (0.0828) loss: 0.8233 (0.8249) time: 0.1554 data: 0.0739 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:51 lr: 0.000106 grad: 0.0829 (0.0828) loss: 0.8150 (0.8248) time: 0.1393 data: 0.0566 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:36 lr: 0.000106 grad: 0.0806 (0.0829) loss: 0.8266 (0.8248) time: 0.1445 data: 0.0629 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:21 lr: 0.000106 grad: 0.0813 (0.0828) loss: 0.8259 (0.8248) time: 0.1294 data: 0.0461 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.0759 (0.0828) loss: 0.8281 (0.8248) time: 0.1550 data: 0.0727 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.0806 (0.0828) loss: 0.8219 (0.8248) time: 0.1449 data: 0.0654 max mem: 9377 +Train: [28] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000106 grad: 0.0806 (0.0828) loss: 0.8219 (0.8248) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:05:21 loss: 0.8428 (0.8428) time: 5.1815 data: 5.1516 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8433 (0.8443) time: 0.1327 data: 0.0973 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-train-subset): loss: 0.8433 (0.8443) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:05 loss: 0.8391 (0.8391) time: 3.9578 data: 3.8853 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8416 (0.8423) time: 0.1333 data: 0.1080 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (hcp-val): loss: 0.8416 (0.8423) +Eval (nsd-val): [28] [ 0/62] eta: 0:03:08 loss: 0.8121 (0.8121) time: 3.0419 data: 2.9585 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8181 (0.8192) time: 0.1345 data: 0.1062 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:13 (0.2129 s / it) +Averaged stats (nsd-val): loss: 0.8181 (0.8192) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 9:28:29 lr: 0.000106 grad: 0.1333 (0.1333) loss: 0.8717 (0.8717) time: 5.4576 data: 5.3338 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:20:01 lr: 0.000106 grad: 0.0881 (0.0994) loss: 0.8152 (0.8282) time: 0.1584 data: 0.0698 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:17:11 lr: 0.000106 grad: 0.0895 (0.0960) loss: 0.8120 (0.8229) time: 0.1437 data: 0.0548 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:16:05 lr: 0.000106 grad: 0.0873 (0.0942) loss: 0.8183 (0.8204) time: 0.1548 data: 0.0667 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:15:24 lr: 0.000106 grad: 0.0819 (0.0929) loss: 0.8207 (0.8197) time: 0.1492 data: 0.0519 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:14:45 lr: 0.000106 grad: 0.0841 (0.0917) loss: 0.8172 (0.8190) time: 0.1213 data: 0.0221 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:14:13 lr: 0.000106 grad: 0.0763 (0.0913) loss: 0.8231 (0.8186) time: 0.1020 data: 0.0100 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:13:51 lr: 0.000106 grad: 0.0791 (0.0903) loss: 0.8248 (0.8184) time: 0.1445 data: 0.0543 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:13:34 lr: 0.000106 grad: 0.0792 (0.0894) loss: 0.8261 (0.8190) time: 0.1353 data: 0.0507 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:13:14 lr: 0.000106 grad: 0.0771 (0.0887) loss: 0.8312 (0.8196) time: 0.1497 data: 0.0701 max mem: 9377 +Train: [29] [1000/6250] eta: 0:12:55 lr: 0.000106 grad: 0.0786 (0.0881) loss: 0.8259 (0.8202) time: 0.1530 data: 0.0724 max mem: 9377 +Train: [29] [1100/6250] eta: 0:12:37 lr: 0.000106 grad: 0.0790 (0.0875) loss: 0.8355 (0.8207) time: 0.1552 data: 0.0722 max mem: 9377 +Train: [29] [1200/6250] eta: 0:12:19 lr: 0.000106 grad: 0.0822 (0.0870) loss: 0.8196 (0.8208) time: 0.1579 data: 0.0789 max mem: 9377 +Train: [29] [1300/6250] eta: 0:12:03 lr: 0.000106 grad: 0.0810 (0.0868) loss: 0.8169 (0.8208) time: 0.1537 data: 0.0741 max mem: 9377 +Train: [29] [1400/6250] eta: 0:11:44 lr: 0.000106 grad: 0.0801 (0.0867) loss: 0.8187 (0.8209) time: 0.1307 data: 0.0515 max mem: 9377 +Train: [29] [1500/6250] eta: 0:11:26 lr: 0.000106 grad: 0.0814 (0.0866) loss: 0.8265 (0.8208) time: 0.1323 data: 0.0528 max mem: 9377 +Train: [29] [1600/6250] eta: 0:11:10 lr: 0.000106 grad: 0.0741 (0.0866) loss: 0.8228 (0.8208) time: 0.1391 data: 0.0587 max mem: 9377 +Train: [29] [1700/6250] eta: 0:10:51 lr: 0.000106 grad: 0.0806 (0.0866) loss: 0.8233 (0.8207) time: 0.1289 data: 0.0436 max mem: 9377 +Train: [29] [1800/6250] eta: 0:10:35 lr: 0.000106 grad: 0.0813 (0.0865) loss: 0.8256 (0.8207) time: 0.1428 data: 0.0655 max mem: 9377 +Train: [29] [1900/6250] eta: 0:10:18 lr: 0.000106 grad: 0.0817 (0.0863) loss: 0.8184 (0.8206) time: 0.1390 data: 0.0623 max mem: 9377 +Train: [29] [2000/6250] eta: 0:10:03 lr: 0.000106 grad: 0.0799 (0.0862) loss: 0.8216 (0.8206) time: 0.1222 data: 0.0374 max mem: 9377 +Train: [29] [2100/6250] eta: 0:09:47 lr: 0.000106 grad: 0.0820 (0.0861) loss: 0.8203 (0.8206) time: 0.1381 data: 0.0556 max mem: 9377 +Train: [29] [2200/6250] eta: 0:09:33 lr: 0.000106 grad: 0.0785 (0.0860) loss: 0.8267 (0.8207) time: 0.1448 data: 0.0607 max mem: 9377 +Train: [29] [2300/6250] eta: 0:09:18 lr: 0.000106 grad: 0.0759 (0.0858) loss: 0.8202 (0.8209) time: 0.1399 data: 0.0560 max mem: 9377 +Train: [29] [2400/6250] eta: 0:09:03 lr: 0.000106 grad: 0.0745 (0.0856) loss: 0.8295 (0.8211) time: 0.1343 data: 0.0484 max mem: 9377 +Train: [29] [2500/6250] eta: 0:08:48 lr: 0.000106 grad: 0.0788 (0.0858) loss: 0.8307 (0.8212) time: 0.1325 data: 0.0544 max mem: 9377 +Train: [29] [2600/6250] eta: 0:08:33 lr: 0.000106 grad: 0.0819 (0.0858) loss: 0.8243 (0.8213) time: 0.1330 data: 0.0475 max mem: 9377 +Train: [29] [2700/6250] eta: 0:08:18 lr: 0.000106 grad: 0.0817 (0.0857) loss: 0.8222 (0.8213) time: 0.1252 data: 0.0475 max mem: 9377 +Train: [29] [2800/6250] eta: 0:08:05 lr: 0.000106 grad: 0.0854 (0.0858) loss: 0.8181 (0.8214) time: 0.1666 data: 0.0850 max mem: 9377 +Train: [29] [2900/6250] eta: 0:07:51 lr: 0.000106 grad: 0.0806 (0.0858) loss: 0.8248 (0.8214) time: 0.1494 data: 0.0700 max mem: 9377 +Train: [29] [3000/6250] eta: 0:07:39 lr: 0.000106 grad: 0.0824 (0.0857) loss: 0.8249 (0.8214) time: 0.2134 data: 0.1329 max mem: 9377 +Train: [29] [3100/6250] eta: 0:07:25 lr: 0.000106 grad: 0.0901 (0.0858) loss: 0.8258 (0.8214) time: 0.1459 data: 0.0620 max mem: 9377 +Train: [29] [3200/6250] eta: 0:07:12 lr: 0.000106 grad: 0.0821 (0.0858) loss: 0.8212 (0.8214) time: 0.1420 data: 0.0612 max mem: 9377 +Train: [29] [3300/6250] eta: 0:06:58 lr: 0.000106 grad: 0.0850 (0.0859) loss: 0.8230 (0.8215) time: 0.1394 data: 0.0577 max mem: 9377 +Train: [29] [3400/6250] eta: 0:06:44 lr: 0.000106 grad: 0.0813 (0.0859) loss: 0.8230 (0.8215) time: 0.1477 data: 0.0736 max mem: 9377 +Train: [29] [3500/6250] eta: 0:06:31 lr: 0.000105 grad: 0.0806 (0.0859) loss: 0.8276 (0.8215) time: 0.1492 data: 0.0700 max mem: 9377 +Train: [29] [3600/6250] eta: 0:06:17 lr: 0.000105 grad: 0.0840 (0.0859) loss: 0.8242 (0.8215) time: 0.1395 data: 0.0598 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:02 lr: 0.000105 grad: 0.0779 (0.0859) loss: 0.8291 (0.8216) time: 0.1487 data: 0.0672 max mem: 9377 +Train: [29] [3800/6250] eta: 0:05:48 lr: 0.000105 grad: 0.0745 (0.0858) loss: 0.8278 (0.8218) time: 0.1345 data: 0.0478 max mem: 9377 +Train: [29] [3900/6250] eta: 0:05:34 lr: 0.000105 grad: 0.0828 (0.0857) loss: 0.8230 (0.8219) time: 0.1346 data: 0.0561 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:19 lr: 0.000105 grad: 0.0858 (0.0858) loss: 0.8246 (0.8219) time: 0.1570 data: 0.0719 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:05 lr: 0.000105 grad: 0.0797 (0.0858) loss: 0.8258 (0.8220) time: 0.1338 data: 0.0500 max mem: 9377 +Train: [29] [4200/6250] eta: 0:04:50 lr: 0.000105 grad: 0.0795 (0.0857) loss: 0.8235 (0.8220) time: 0.1379 data: 0.0520 max mem: 9377 +Train: [29] [4300/6250] eta: 0:04:36 lr: 0.000105 grad: 0.0856 (0.0858) loss: 0.8266 (0.8221) time: 0.1434 data: 0.0592 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:21 lr: 0.000105 grad: 0.0841 (0.0858) loss: 0.8190 (0.8221) time: 0.1147 data: 0.0359 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:07 lr: 0.000105 grad: 0.0858 (0.0858) loss: 0.8220 (0.8221) time: 0.1508 data: 0.0689 max mem: 9377 +Train: [29] [4600/6250] eta: 0:03:53 lr: 0.000105 grad: 0.0816 (0.0858) loss: 0.8223 (0.8221) time: 0.1404 data: 0.0606 max mem: 9377 +Train: [29] [4700/6250] eta: 0:03:39 lr: 0.000105 grad: 0.0884 (0.0858) loss: 0.8224 (0.8221) time: 0.1500 data: 0.0653 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:24 lr: 0.000105 grad: 0.0801 (0.0858) loss: 0.8269 (0.8221) time: 0.1480 data: 0.0697 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:10 lr: 0.000105 grad: 0.0815 (0.0858) loss: 0.8274 (0.8222) time: 0.1848 data: 0.1084 max mem: 9377 +Train: [29] [5000/6250] eta: 0:02:56 lr: 0.000105 grad: 0.0830 (0.0858) loss: 0.8188 (0.8222) time: 0.1338 data: 0.0506 max mem: 9377 +Train: [29] [5100/6250] eta: 0:02:43 lr: 0.000105 grad: 0.0813 (0.0858) loss: 0.8221 (0.8222) time: 0.1526 data: 0.0634 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:29 lr: 0.000105 grad: 0.0816 (0.0858) loss: 0.8207 (0.8222) time: 0.1468 data: 0.0650 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:15 lr: 0.000105 grad: 0.0770 (0.0858) loss: 0.8192 (0.8222) time: 0.1390 data: 0.0603 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:00 lr: 0.000105 grad: 0.0830 (0.0858) loss: 0.8216 (0.8223) time: 0.1191 data: 0.0429 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:46 lr: 0.000105 grad: 0.0816 (0.0858) loss: 0.8193 (0.8223) time: 0.1410 data: 0.0630 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:32 lr: 0.000105 grad: 0.0844 (0.0858) loss: 0.8190 (0.8223) time: 0.1319 data: 0.0468 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:18 lr: 0.000105 grad: 0.0820 (0.0859) loss: 0.8238 (0.8223) time: 0.1150 data: 0.0274 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:03 lr: 0.000105 grad: 0.0841 (0.0859) loss: 0.8218 (0.8222) time: 0.1384 data: 0.0561 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:49 lr: 0.000105 grad: 0.0828 (0.0859) loss: 0.8164 (0.8222) time: 0.1339 data: 0.0418 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:35 lr: 0.000105 grad: 0.0804 (0.0859) loss: 0.8227 (0.8222) time: 0.1147 data: 0.0288 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:21 lr: 0.000105 grad: 0.0834 (0.0859) loss: 0.8203 (0.8222) time: 0.1356 data: 0.0492 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.0812 (0.0858) loss: 0.8167 (0.8222) time: 0.1418 data: 0.0560 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.0833 (0.0859) loss: 0.8188 (0.8222) time: 0.1324 data: 0.0513 max mem: 9377 +Train: [29] Total time: 0:14:50 (0.1425 s / it) +Averaged stats: lr: 0.000105 grad: 0.0833 (0.0859) loss: 0.8188 (0.8222) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:03:11 loss: 0.8411 (0.8411) time: 3.0830 data: 2.9807 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8457 (0.8447) time: 0.1311 data: 0.1044 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-train-subset): loss: 0.8457 (0.8447) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [29] [ 0/62] eta: 0:05:57 loss: 0.8408 (0.8408) time: 5.7617 data: 5.7283 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8425 (0.8427) time: 0.1409 data: 0.1161 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (hcp-val): loss: 0.8425 (0.8427) +Making plots (hcp-val): example=28 +Eval (nsd-val): [29] [ 0/62] eta: 0:05:43 loss: 0.8111 (0.8111) time: 5.5373 data: 5.4822 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8245 (0.8249) time: 0.1495 data: 0.1237 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:14 (0.2358 s / it) +Averaged stats (nsd-val): loss: 0.8245 (0.8249) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 11:46:59 lr: 0.000105 grad: 0.1595 (0.1595) loss: 0.8282 (0.8282) time: 6.7872 data: 6.6861 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:22:23 lr: 0.000105 grad: 0.0814 (0.1013) loss: 0.8330 (0.8336) time: 0.1604 data: 0.0560 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:19:21 lr: 0.000105 grad: 0.0749 (0.0969) loss: 0.8289 (0.8299) time: 0.1721 data: 0.0728 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:18:05 lr: 0.000105 grad: 0.0884 (0.0929) loss: 0.8224 (0.8278) time: 0.1665 data: 0.0752 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:17:07 lr: 0.000105 grad: 0.0827 (0.0900) loss: 0.8202 (0.8264) time: 0.1392 data: 0.0499 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:16:19 lr: 0.000105 grad: 0.0810 (0.0880) loss: 0.8226 (0.8262) time: 0.1441 data: 0.0514 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:15:34 lr: 0.000105 grad: 0.0746 (0.0865) loss: 0.8243 (0.8257) time: 0.1236 data: 0.0388 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:15:01 lr: 0.000105 grad: 0.0820 (0.0866) loss: 0.8233 (0.8252) time: 0.1513 data: 0.0713 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:14:36 lr: 0.000105 grad: 0.0894 (0.0861) loss: 0.8179 (0.8253) time: 0.1402 data: 0.0537 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:14:13 lr: 0.000105 grad: 0.0767 (0.0856) loss: 0.8271 (0.8250) time: 0.1485 data: 0.0663 max mem: 9377 +Train: [30] [1000/6250] eta: 0:13:45 lr: 0.000105 grad: 0.0804 (0.0851) loss: 0.8271 (0.8249) time: 0.1281 data: 0.0430 max mem: 9377 +Train: [30] [1100/6250] eta: 0:13:23 lr: 0.000105 grad: 0.0844 (0.0850) loss: 0.8268 (0.8247) time: 0.1598 data: 0.0832 max mem: 9377 +Train: [30] [1200/6250] eta: 0:12:59 lr: 0.000105 grad: 0.0829 (0.0849) loss: 0.8226 (0.8246) time: 0.1402 data: 0.0615 max mem: 9377 +Train: [30] [1300/6250] eta: 0:12:36 lr: 0.000105 grad: 0.0796 (0.0846) loss: 0.8247 (0.8245) time: 0.1307 data: 0.0508 max mem: 9377 +Train: [30] [1400/6250] eta: 0:12:16 lr: 0.000104 grad: 0.0824 (0.0844) loss: 0.8166 (0.8243) time: 0.1290 data: 0.0426 max mem: 9377 +Train: [30] [1500/6250] eta: 0:11:56 lr: 0.000104 grad: 0.0814 (0.0845) loss: 0.8197 (0.8240) time: 0.1356 data: 0.0572 max mem: 9377 +Train: [30] [1600/6250] eta: 0:11:37 lr: 0.000104 grad: 0.0826 (0.0845) loss: 0.8191 (0.8238) time: 0.1468 data: 0.0615 max mem: 9377 +Train: [30] [1700/6250] eta: 0:11:20 lr: 0.000104 grad: 0.0783 (0.0846) loss: 0.8242 (0.8236) time: 0.1499 data: 0.0683 max mem: 9377 +Train: [30] [1800/6250] eta: 0:11:00 lr: 0.000104 grad: 0.0832 (0.0848) loss: 0.8206 (0.8233) time: 0.1342 data: 0.0526 max mem: 9377 +Train: [30] [1900/6250] eta: 0:10:45 lr: 0.000104 grad: 0.0878 (0.0848) loss: 0.8187 (0.8232) time: 0.1709 data: 0.0897 max mem: 9377 +Train: [30] [2000/6250] eta: 0:10:27 lr: 0.000104 grad: 0.0844 (0.0848) loss: 0.8222 (0.8229) time: 0.1313 data: 0.0513 max mem: 9377 +Train: [30] [2100/6250] eta: 0:10:11 lr: 0.000104 grad: 0.0837 (0.0850) loss: 0.8221 (0.8228) time: 0.1328 data: 0.0539 max mem: 9377 +Train: [30] [2200/6250] eta: 0:09:55 lr: 0.000104 grad: 0.0862 (0.0853) loss: 0.8181 (0.8226) time: 0.1474 data: 0.0658 max mem: 9377 +Train: [30] [2300/6250] eta: 0:09:38 lr: 0.000104 grad: 0.0838 (0.0854) loss: 0.8184 (0.8225) time: 0.1251 data: 0.0377 max mem: 9377 +Train: [30] [2400/6250] eta: 0:09:22 lr: 0.000104 grad: 0.0798 (0.0854) loss: 0.8217 (0.8225) time: 0.1409 data: 0.0618 max mem: 9377 +Train: [30] [2500/6250] eta: 0:09:06 lr: 0.000104 grad: 0.0762 (0.0855) loss: 0.8229 (0.8224) time: 0.1304 data: 0.0476 max mem: 9377 +Train: [30] [2600/6250] eta: 0:08:51 lr: 0.000104 grad: 0.0818 (0.0855) loss: 0.8261 (0.8224) time: 0.1360 data: 0.0584 max mem: 9377 +Train: [30] [2700/6250] eta: 0:08:36 lr: 0.000104 grad: 0.0846 (0.0855) loss: 0.8214 (0.8225) time: 0.1560 data: 0.0728 max mem: 9377 +Train: [30] [2800/6250] eta: 0:08:21 lr: 0.000104 grad: 0.0789 (0.0855) loss: 0.8226 (0.8226) time: 0.1394 data: 0.0620 max mem: 9377 +Train: [30] [2900/6250] eta: 0:08:07 lr: 0.000104 grad: 0.0858 (0.0854) loss: 0.8276 (0.8226) time: 0.1672 data: 0.0860 max mem: 9377 +Train: [30] [3000/6250] eta: 0:07:51 lr: 0.000104 grad: 0.0832 (0.0855) loss: 0.8172 (0.8226) time: 0.1076 data: 0.0203 max mem: 9377 +Train: [30] [3100/6250] eta: 0:07:36 lr: 0.000104 grad: 0.0834 (0.0856) loss: 0.8213 (0.8225) time: 0.1418 data: 0.0674 max mem: 9377 +Train: [30] [3200/6250] eta: 0:07:21 lr: 0.000104 grad: 0.0781 (0.0856) loss: 0.8232 (0.8227) time: 0.1500 data: 0.0727 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:05 lr: 0.000104 grad: 0.0811 (0.0855) loss: 0.8267 (0.8228) time: 0.1459 data: 0.0664 max mem: 9377 +Train: [30] [3400/6250] eta: 0:06:51 lr: 0.000104 grad: 0.0879 (0.0855) loss: 0.8219 (0.8228) time: 0.1482 data: 0.0699 max mem: 9377 +Train: [30] [3500/6250] eta: 0:06:36 lr: 0.000104 grad: 0.0766 (0.0853) loss: 0.8251 (0.8230) time: 0.1281 data: 0.0437 max mem: 9377 +Train: [30] [3600/6250] eta: 0:06:22 lr: 0.000104 grad: 0.0755 (0.0852) loss: 0.8349 (0.8231) time: 0.1304 data: 0.0460 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:08 lr: 0.000104 grad: 0.0832 (0.0854) loss: 0.8281 (0.8231) time: 0.1640 data: 0.0861 max mem: 9377 +Train: [30] [3800/6250] eta: 0:05:53 lr: 0.000104 grad: 0.0813 (0.0854) loss: 0.8192 (0.8232) time: 0.1601 data: 0.0809 max mem: 9377 +Train: [30] [3900/6250] eta: 0:05:39 lr: 0.000104 grad: 0.0824 (0.0854) loss: 0.8213 (0.8233) time: 0.1637 data: 0.0836 max mem: 9377 +Train: [30] [4000/6250] eta: 0:05:25 lr: 0.000104 grad: 0.0795 (0.0853) loss: 0.8305 (0.8234) time: 0.1574 data: 0.0816 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:10 lr: 0.000104 grad: 0.0821 (0.0853) loss: 0.8273 (0.8234) time: 0.1416 data: 0.0550 max mem: 9377 +Train: [30] [4200/6250] eta: 0:04:56 lr: 0.000104 grad: 0.0812 (0.0852) loss: 0.8276 (0.8235) time: 0.1624 data: 0.0834 max mem: 9377 +Train: [30] [4300/6250] eta: 0:04:42 lr: 0.000104 grad: 0.0851 (0.0853) loss: 0.8248 (0.8236) time: 0.1500 data: 0.0714 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:27 lr: 0.000104 grad: 0.0820 (0.0854) loss: 0.8268 (0.8236) time: 0.1296 data: 0.0494 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:13 lr: 0.000104 grad: 0.0875 (0.0854) loss: 0.8208 (0.8237) time: 0.1204 data: 0.0431 max mem: 9377 +Train: [30] [4600/6250] eta: 0:03:58 lr: 0.000104 grad: 0.0882 (0.0855) loss: 0.8194 (0.8236) time: 0.1457 data: 0.0650 max mem: 9377 +Train: [30] [4700/6250] eta: 0:03:43 lr: 0.000104 grad: 0.0887 (0.0856) loss: 0.8136 (0.8235) time: 0.1466 data: 0.0623 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:29 lr: 0.000104 grad: 0.0835 (0.0856) loss: 0.8272 (0.8235) time: 0.1392 data: 0.0576 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:14 lr: 0.000104 grad: 0.0887 (0.0859) loss: 0.8269 (0.8234) time: 0.1427 data: 0.0602 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:00 lr: 0.000104 grad: 0.0872 (0.0860) loss: 0.8196 (0.8233) time: 0.1444 data: 0.0614 max mem: 9377 +Train: [30] [5100/6250] eta: 0:02:46 lr: 0.000104 grad: 0.0947 (0.0862) loss: 0.8112 (0.8232) time: 0.1661 data: 0.0839 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:32 lr: 0.000104 grad: 0.0923 (0.0863) loss: 0.8168 (0.8232) time: 0.1538 data: 0.0682 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:17 lr: 0.000104 grad: 0.0873 (0.0864) loss: 0.8111 (0.8231) time: 0.1383 data: 0.0590 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:03 lr: 0.000103 grad: 0.0859 (0.0864) loss: 0.8234 (0.8231) time: 0.1114 data: 0.0326 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:48 lr: 0.000103 grad: 0.0874 (0.0864) loss: 0.8187 (0.8230) time: 0.1609 data: 0.0780 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:33 lr: 0.000103 grad: 0.0806 (0.0864) loss: 0.8286 (0.8230) time: 0.1357 data: 0.0486 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:19 lr: 0.000103 grad: 0.0834 (0.0864) loss: 0.8304 (0.8230) time: 0.1426 data: 0.0615 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:04 lr: 0.000103 grad: 0.0824 (0.0864) loss: 0.8258 (0.8230) time: 0.1402 data: 0.0630 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:50 lr: 0.000103 grad: 0.0913 (0.0865) loss: 0.8155 (0.8230) time: 0.1204 data: 0.0309 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:35 lr: 0.000103 grad: 0.0835 (0.0865) loss: 0.8260 (0.8230) time: 0.1229 data: 0.0351 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:21 lr: 0.000103 grad: 0.0857 (0.0865) loss: 0.8250 (0.8230) time: 0.1264 data: 0.0429 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.0856 (0.0866) loss: 0.8174 (0.8230) time: 0.1782 data: 0.0980 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.0852 (0.0866) loss: 0.8245 (0.8230) time: 0.1369 data: 0.0391 max mem: 9377 +Train: [30] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000103 grad: 0.0852 (0.0866) loss: 0.8245 (0.8230) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:04:41 loss: 0.8443 (0.8443) time: 4.5415 data: 4.5114 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8431 (0.8439) time: 0.0982 data: 0.0735 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:12 (0.1971 s / it) +Averaged stats (hcp-train-subset): loss: 0.8431 (0.8439) +Eval (hcp-val): [30] [ 0/62] eta: 0:05:35 loss: 0.8408 (0.8408) time: 5.4161 data: 5.3854 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8411 (0.8420) time: 0.1049 data: 0.0802 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:12 (0.2009 s / it) +Averaged stats (hcp-val): loss: 0.8411 (0.8420) +Eval (nsd-val): [30] [ 0/62] eta: 0:04:27 loss: 0.8034 (0.8034) time: 4.3187 data: 4.2857 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8149 (0.8151) time: 0.1133 data: 0.0885 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:11 (0.1918 s / it) +Averaged stats (nsd-val): loss: 0.8149 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [31] [ 0/6250] eta: 6:50:07 lr: 0.000103 grad: 0.0512 (0.0512) loss: 0.8645 (0.8645) time: 3.9372 data: 3.6195 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:21:03 lr: 0.000103 grad: 0.0862 (0.0973) loss: 0.8236 (0.8358) time: 0.1487 data: 0.0469 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:18:29 lr: 0.000103 grad: 0.0832 (0.0971) loss: 0.8237 (0.8294) time: 0.1747 data: 0.0778 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:16:48 lr: 0.000103 grad: 0.0845 (0.0957) loss: 0.8268 (0.8270) time: 0.1395 data: 0.0464 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:15:52 lr: 0.000103 grad: 0.0805 (0.0938) loss: 0.8215 (0.8247) time: 0.1408 data: 0.0550 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:15:19 lr: 0.000103 grad: 0.0900 (0.0937) loss: 0.8138 (0.8228) time: 0.1560 data: 0.0707 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:14:46 lr: 0.000103 grad: 0.0892 (0.0944) loss: 0.8137 (0.8220) time: 0.1582 data: 0.0745 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:14:25 lr: 0.000103 grad: 0.0878 (0.0938) loss: 0.8162 (0.8210) time: 0.1636 data: 0.0799 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:14:02 lr: 0.000103 grad: 0.0896 (0.0931) loss: 0.8177 (0.8204) time: 0.1545 data: 0.0666 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:13:40 lr: 0.000103 grad: 0.0869 (0.0922) loss: 0.8220 (0.8204) time: 0.1281 data: 0.0374 max mem: 9377 +Train: [31] [1000/6250] eta: 0:13:18 lr: 0.000103 grad: 0.0877 (0.0921) loss: 0.8158 (0.8199) time: 0.1346 data: 0.0541 max mem: 9377 +Train: [31] [1100/6250] eta: 0:12:58 lr: 0.000103 grad: 0.0806 (0.0916) loss: 0.8178 (0.8196) time: 0.1615 data: 0.0802 max mem: 9377 +Train: [31] [1200/6250] eta: 0:12:36 lr: 0.000103 grad: 0.0833 (0.0915) loss: 0.8182 (0.8190) time: 0.1356 data: 0.0529 max mem: 9377 +Train: [31] [1300/6250] eta: 0:12:17 lr: 0.000103 grad: 0.0818 (0.0912) loss: 0.8135 (0.8189) time: 0.1399 data: 0.0485 max mem: 9377 +Train: [31] [1400/6250] eta: 0:11:58 lr: 0.000103 grad: 0.0832 (0.0911) loss: 0.8164 (0.8187) time: 0.1525 data: 0.0699 max mem: 9377 +Train: [31] [1500/6250] eta: 0:11:39 lr: 0.000103 grad: 0.0851 (0.0909) loss: 0.8144 (0.8184) time: 0.1367 data: 0.0542 max mem: 9377 +Train: [31] [1600/6250] eta: 0:11:20 lr: 0.000103 grad: 0.0864 (0.0907) loss: 0.8219 (0.8184) time: 0.1296 data: 0.0491 max mem: 9377 +Train: [31] [1700/6250] eta: 0:11:04 lr: 0.000103 grad: 0.0798 (0.0904) loss: 0.8238 (0.8183) time: 0.1496 data: 0.0717 max mem: 9377 +Train: [31] [1800/6250] eta: 0:10:47 lr: 0.000103 grad: 0.0813 (0.0901) loss: 0.8261 (0.8184) time: 0.1262 data: 0.0421 max mem: 9377 +Train: [31] [1900/6250] eta: 0:10:31 lr: 0.000103 grad: 0.0869 (0.0901) loss: 0.8199 (0.8184) time: 0.1260 data: 0.0443 max mem: 9377 +Train: [31] [2000/6250] eta: 0:10:14 lr: 0.000103 grad: 0.0856 (0.0901) loss: 0.8137 (0.8183) time: 0.1308 data: 0.0451 max mem: 9377 +Train: [31] [2100/6250] eta: 0:09:58 lr: 0.000103 grad: 0.0862 (0.0900) loss: 0.8202 (0.8184) time: 0.1293 data: 0.0498 max mem: 9377 +Train: [31] [2200/6250] eta: 0:09:43 lr: 0.000103 grad: 0.0890 (0.0902) loss: 0.8193 (0.8186) time: 0.1412 data: 0.0594 max mem: 9377 +Train: [31] [2300/6250] eta: 0:09:27 lr: 0.000103 grad: 0.0880 (0.0901) loss: 0.8156 (0.8187) time: 0.1617 data: 0.0851 max mem: 9377 +Train: [31] [2400/6250] eta: 0:09:12 lr: 0.000103 grad: 0.0882 (0.0900) loss: 0.8233 (0.8189) time: 0.1435 data: 0.0707 max mem: 9377 +Train: [31] [2500/6250] eta: 0:08:56 lr: 0.000103 grad: 0.0815 (0.0899) loss: 0.8224 (0.8191) time: 0.1354 data: 0.0550 max mem: 9377 +Train: [31] [2600/6250] eta: 0:08:40 lr: 0.000103 grad: 0.0885 (0.0897) loss: 0.8229 (0.8192) time: 0.1315 data: 0.0471 max mem: 9377 +Train: [31] [2700/6250] eta: 0:08:25 lr: 0.000103 grad: 0.0791 (0.0896) loss: 0.8273 (0.8194) time: 0.1340 data: 0.0532 max mem: 9377 +Train: [31] [2800/6250] eta: 0:08:10 lr: 0.000103 grad: 0.0830 (0.0895) loss: 0.8248 (0.8194) time: 0.1255 data: 0.0437 max mem: 9377 +Train: [31] [2900/6250] eta: 0:07:55 lr: 0.000103 grad: 0.0842 (0.0894) loss: 0.8199 (0.8194) time: 0.1394 data: 0.0623 max mem: 9377 +Train: [31] [3000/6250] eta: 0:07:41 lr: 0.000103 grad: 0.0872 (0.0894) loss: 0.8205 (0.8195) time: 0.1501 data: 0.0707 max mem: 9377 +Train: [31] [3100/6250] eta: 0:07:26 lr: 0.000103 grad: 0.0829 (0.0893) loss: 0.8112 (0.8194) time: 0.1405 data: 0.0578 max mem: 9377 +Train: [31] [3200/6250] eta: 0:07:12 lr: 0.000102 grad: 0.0902 (0.0894) loss: 0.8150 (0.8193) time: 0.1471 data: 0.0688 max mem: 9377 +Train: [31] [3300/6250] eta: 0:06:57 lr: 0.000102 grad: 0.0832 (0.0893) loss: 0.8222 (0.8193) time: 0.1503 data: 0.0623 max mem: 9377 +Train: [31] [3400/6250] eta: 0:06:43 lr: 0.000102 grad: 0.0853 (0.0893) loss: 0.8199 (0.8193) time: 0.1241 data: 0.0436 max mem: 9377 +Train: [31] [3500/6250] eta: 0:06:28 lr: 0.000102 grad: 0.0809 (0.0893) loss: 0.8267 (0.8193) time: 0.1210 data: 0.0369 max mem: 9377 +Train: [31] [3600/6250] eta: 0:06:13 lr: 0.000102 grad: 0.0841 (0.0896) loss: 0.8179 (0.8192) time: 0.1247 data: 0.0428 max mem: 9377 +Train: [31] [3700/6250] eta: 0:05:59 lr: 0.000102 grad: 0.0832 (0.0895) loss: 0.8228 (0.8193) time: 0.1476 data: 0.0672 max mem: 9377 +Train: [31] [3800/6250] eta: 0:05:44 lr: 0.000102 grad: 0.0849 (0.0895) loss: 0.8206 (0.8193) time: 0.1411 data: 0.0622 max mem: 9377 +Train: [31] [3900/6250] eta: 0:05:30 lr: 0.000102 grad: 0.0814 (0.0894) loss: 0.8201 (0.8194) time: 0.1504 data: 0.0720 max mem: 9377 +Train: [31] [4000/6250] eta: 0:05:15 lr: 0.000102 grad: 0.0871 (0.0893) loss: 0.8196 (0.8194) time: 0.1265 data: 0.0424 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:01 lr: 0.000102 grad: 0.0811 (0.0892) loss: 0.8230 (0.8195) time: 0.1355 data: 0.0520 max mem: 9377 +Train: [31] [4200/6250] eta: 0:04:46 lr: 0.000102 grad: 0.0927 (0.0893) loss: 0.8204 (0.8195) time: 0.1412 data: 0.0650 max mem: 9377 +Train: [31] [4300/6250] eta: 0:04:32 lr: 0.000102 grad: 0.0820 (0.0892) loss: 0.8189 (0.8196) time: 0.1334 data: 0.0506 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:18 lr: 0.000102 grad: 0.0851 (0.0892) loss: 0.8207 (0.8196) time: 0.1423 data: 0.0577 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:04 lr: 0.000102 grad: 0.0797 (0.0892) loss: 0.8251 (0.8197) time: 0.1158 data: 0.0290 max mem: 9377 +Train: [31] [4600/6250] eta: 0:03:50 lr: 0.000102 grad: 0.0795 (0.0891) loss: 0.8268 (0.8197) time: 0.1855 data: 0.1055 max mem: 9377 +Train: [31] [4700/6250] eta: 0:03:37 lr: 0.000102 grad: 0.0812 (0.0891) loss: 0.8277 (0.8198) time: 0.1705 data: 0.0908 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:23 lr: 0.000102 grad: 0.0890 (0.0892) loss: 0.8241 (0.8199) time: 0.1409 data: 0.0600 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:09 lr: 0.000102 grad: 0.0823 (0.0892) loss: 0.8293 (0.8199) time: 0.1389 data: 0.0592 max mem: 9377 +Train: [31] [5000/6250] eta: 0:02:55 lr: 0.000102 grad: 0.0856 (0.0892) loss: 0.8236 (0.8200) time: 0.1488 data: 0.0673 max mem: 9377 +Train: [31] [5100/6250] eta: 0:02:41 lr: 0.000102 grad: 0.0907 (0.0892) loss: 0.8259 (0.8202) time: 0.1417 data: 0.0642 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:27 lr: 0.000102 grad: 0.0832 (0.0892) loss: 0.8243 (0.8203) time: 0.1581 data: 0.0763 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:13 lr: 0.000102 grad: 0.0869 (0.0892) loss: 0.8269 (0.8204) time: 0.1593 data: 0.0740 max mem: 9377 +Train: [31] [5400/6250] eta: 0:01:59 lr: 0.000102 grad: 0.0857 (0.0891) loss: 0.8276 (0.8205) time: 0.1503 data: 0.0710 max mem: 9377 +Train: [31] [5500/6250] eta: 0:01:45 lr: 0.000102 grad: 0.0841 (0.0890) loss: 0.8226 (0.8206) time: 0.1514 data: 0.0621 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:31 lr: 0.000102 grad: 0.0878 (0.0889) loss: 0.8224 (0.8207) time: 0.1235 data: 0.0404 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:17 lr: 0.000102 grad: 0.0775 (0.0888) loss: 0.8306 (0.8209) time: 0.1454 data: 0.0626 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:03 lr: 0.000102 grad: 0.0778 (0.0887) loss: 0.8317 (0.8210) time: 0.1531 data: 0.0691 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:49 lr: 0.000102 grad: 0.0811 (0.0886) loss: 0.8274 (0.8211) time: 0.1311 data: 0.0469 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:35 lr: 0.000102 grad: 0.0806 (0.0885) loss: 0.8258 (0.8212) time: 0.1309 data: 0.0389 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:21 lr: 0.000102 grad: 0.0780 (0.0884) loss: 0.8305 (0.8214) time: 0.1152 data: 0.0359 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:07 lr: 0.000102 grad: 0.0843 (0.0883) loss: 0.8240 (0.8215) time: 0.1149 data: 0.0308 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.0829 (0.0883) loss: 0.8272 (0.8216) time: 0.1251 data: 0.0414 max mem: 9377 +Train: [31] Total time: 0:14:45 (0.1416 s / it) +Averaged stats: lr: 0.000102 grad: 0.0829 (0.0883) loss: 0.8272 (0.8216) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:04:43 loss: 0.8403 (0.8403) time: 4.5806 data: 4.5503 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8420 (0.8433) time: 0.1188 data: 0.0942 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:12 (0.1952 s / it) +Averaged stats (hcp-train-subset): loss: 0.8420 (0.8433) +Eval (hcp-val): [31] [ 0/62] eta: 0:03:20 loss: 0.8383 (0.8383) time: 3.2319 data: 3.1519 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8402 (0.8412) time: 0.1121 data: 0.0858 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:12 (0.2020 s / it) +Averaged stats (hcp-val): loss: 0.8402 (0.8412) +Eval (nsd-val): [31] [ 0/62] eta: 0:04:07 loss: 0.8054 (0.8054) time: 3.9896 data: 3.9245 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8153 (0.8170) time: 0.1321 data: 0.1071 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:12 (0.2072 s / it) +Averaged stats (nsd-val): loss: 0.8153 (0.8170) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [32] [ 0/6250] eta: 8:41:50 lr: 0.000102 grad: 0.0564 (0.0564) loss: 0.8670 (0.8670) time: 5.0096 data: 4.7751 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:19:18 lr: 0.000102 grad: 0.0954 (0.1127) loss: 0.8229 (0.8333) time: 0.1386 data: 0.0434 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:16:42 lr: 0.000102 grad: 0.0784 (0.1007) loss: 0.8253 (0.8282) time: 0.1274 data: 0.0403 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:15:27 lr: 0.000102 grad: 0.0795 (0.0953) loss: 0.8200 (0.8256) time: 0.1391 data: 0.0440 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:14:47 lr: 0.000102 grad: 0.0857 (0.0931) loss: 0.8178 (0.8234) time: 0.1542 data: 0.0676 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:14:06 lr: 0.000102 grad: 0.0809 (0.0911) loss: 0.8138 (0.8230) time: 0.1271 data: 0.0339 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:13:49 lr: 0.000102 grad: 0.0758 (0.0897) loss: 0.8271 (0.8231) time: 0.1168 data: 0.0266 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:13:38 lr: 0.000102 grad: 0.0866 (0.0891) loss: 0.8188 (0.8225) time: 0.1480 data: 0.0705 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:13:23 lr: 0.000101 grad: 0.0858 (0.0888) loss: 0.8124 (0.8218) time: 0.1579 data: 0.0752 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:13:03 lr: 0.000101 grad: 0.0863 (0.0888) loss: 0.8228 (0.8213) time: 0.1498 data: 0.0584 max mem: 9377 +Train: [32] [1000/6250] eta: 0:12:42 lr: 0.000101 grad: 0.0827 (0.0888) loss: 0.8265 (0.8210) time: 0.1243 data: 0.0378 max mem: 9377 +Train: [32] [1100/6250] eta: 0:12:26 lr: 0.000101 grad: 0.0822 (0.0886) loss: 0.8192 (0.8208) time: 0.1435 data: 0.0587 max mem: 9377 +Train: [32] [1200/6250] eta: 0:12:10 lr: 0.000101 grad: 0.0806 (0.0886) loss: 0.8142 (0.8205) time: 0.1640 data: 0.0811 max mem: 9377 +Train: [32] [1300/6250] eta: 0:11:52 lr: 0.000101 grad: 0.0826 (0.0885) loss: 0.8203 (0.8203) time: 0.1450 data: 0.0613 max mem: 9377 +Train: [32] [1400/6250] eta: 0:11:36 lr: 0.000101 grad: 0.0778 (0.0882) loss: 0.8287 (0.8202) time: 0.1422 data: 0.0593 max mem: 9377 +Train: [32] [1500/6250] eta: 0:11:21 lr: 0.000101 grad: 0.0844 (0.0882) loss: 0.8152 (0.8200) time: 0.1422 data: 0.0559 max mem: 9377 +Train: [32] [1600/6250] eta: 0:11:05 lr: 0.000101 grad: 0.0842 (0.0882) loss: 0.8191 (0.8199) time: 0.1519 data: 0.0704 max mem: 9377 +Train: [32] [1700/6250] eta: 0:10:51 lr: 0.000101 grad: 0.0870 (0.0884) loss: 0.8176 (0.8196) time: 0.1493 data: 0.0661 max mem: 9377 +Train: [32] [1800/6250] eta: 0:10:36 lr: 0.000101 grad: 0.0830 (0.0885) loss: 0.8232 (0.8195) time: 0.1467 data: 0.0632 max mem: 9377 +Train: [32] [1900/6250] eta: 0:10:21 lr: 0.000101 grad: 0.0809 (0.0886) loss: 0.8174 (0.8193) time: 0.1351 data: 0.0502 max mem: 9377 +Train: [32] [2000/6250] eta: 0:10:06 lr: 0.000101 grad: 0.0862 (0.0887) loss: 0.8145 (0.8192) time: 0.1354 data: 0.0524 max mem: 9377 +Train: [32] [2100/6250] eta: 0:09:49 lr: 0.000101 grad: 0.0838 (0.0890) loss: 0.8159 (0.8191) time: 0.1203 data: 0.0390 max mem: 9377 +Train: [32] [2200/6250] eta: 0:09:34 lr: 0.000101 grad: 0.0867 (0.0890) loss: 0.8174 (0.8190) time: 0.1401 data: 0.0562 max mem: 9377 +Train: [32] [2300/6250] eta: 0:09:19 lr: 0.000101 grad: 0.0918 (0.0890) loss: 0.8141 (0.8189) time: 0.1454 data: 0.0649 max mem: 9377 +Train: [32] [2400/6250] eta: 0:09:03 lr: 0.000101 grad: 0.0797 (0.0890) loss: 0.8184 (0.8188) time: 0.1349 data: 0.0555 max mem: 9377 +Train: [32] [2500/6250] eta: 0:08:48 lr: 0.000101 grad: 0.0790 (0.0890) loss: 0.8249 (0.8188) time: 0.1350 data: 0.0524 max mem: 9377 +Train: [32] [2600/6250] eta: 0:08:34 lr: 0.000101 grad: 0.0844 (0.0889) loss: 0.8243 (0.8189) time: 0.1382 data: 0.0564 max mem: 9377 +Train: [32] [2700/6250] eta: 0:08:20 lr: 0.000101 grad: 0.0848 (0.0889) loss: 0.8195 (0.8189) time: 0.1324 data: 0.0551 max mem: 9377 +Train: [32] [2800/6250] eta: 0:08:06 lr: 0.000101 grad: 0.0871 (0.0888) loss: 0.8216 (0.8190) time: 0.1172 data: 0.0339 max mem: 9377 +Train: [32] [2900/6250] eta: 0:07:51 lr: 0.000101 grad: 0.0821 (0.0888) loss: 0.8171 (0.8191) time: 0.1174 data: 0.0317 max mem: 9377 +Train: [32] [3000/6250] eta: 0:07:36 lr: 0.000101 grad: 0.0829 (0.0890) loss: 0.8179 (0.8191) time: 0.1444 data: 0.0665 max mem: 9377 +Train: [32] [3100/6250] eta: 0:07:22 lr: 0.000101 grad: 0.0896 (0.0889) loss: 0.8204 (0.8191) time: 0.1317 data: 0.0466 max mem: 9377 +Train: [32] [3200/6250] eta: 0:07:07 lr: 0.000101 grad: 0.0859 (0.0888) loss: 0.8170 (0.8191) time: 0.1293 data: 0.0507 max mem: 9377 +Train: [32] [3300/6250] eta: 0:06:53 lr: 0.000101 grad: 0.0851 (0.0889) loss: 0.8174 (0.8191) time: 0.1356 data: 0.0548 max mem: 9377 +Train: [32] [3400/6250] eta: 0:06:39 lr: 0.000101 grad: 0.0910 (0.0889) loss: 0.8209 (0.8192) time: 0.1447 data: 0.0669 max mem: 9377 +Train: [32] [3500/6250] eta: 0:06:25 lr: 0.000101 grad: 0.0810 (0.0888) loss: 0.8201 (0.8193) time: 0.1449 data: 0.0625 max mem: 9377 +Train: [32] [3600/6250] eta: 0:06:11 lr: 0.000101 grad: 0.0911 (0.0889) loss: 0.8213 (0.8193) time: 0.1454 data: 0.0664 max mem: 9377 +Train: [32] [3700/6250] eta: 0:05:57 lr: 0.000101 grad: 0.0819 (0.0890) loss: 0.8257 (0.8193) time: 0.1318 data: 0.0504 max mem: 9377 +Train: [32] [3800/6250] eta: 0:05:42 lr: 0.000101 grad: 0.0833 (0.0891) loss: 0.8194 (0.8193) time: 0.1276 data: 0.0469 max mem: 9377 +Train: [32] [3900/6250] eta: 0:05:28 lr: 0.000101 grad: 0.0804 (0.0891) loss: 0.8221 (0.8193) time: 0.1334 data: 0.0512 max mem: 9377 +Train: [32] [4000/6250] eta: 0:05:14 lr: 0.000101 grad: 0.0894 (0.0891) loss: 0.8181 (0.8194) time: 0.1315 data: 0.0491 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:00 lr: 0.000101 grad: 0.0873 (0.0891) loss: 0.8192 (0.8194) time: 0.1478 data: 0.0749 max mem: 9377 +Train: [32] [4200/6250] eta: 0:04:45 lr: 0.000101 grad: 0.0888 (0.0890) loss: 0.8166 (0.8194) time: 0.1234 data: 0.0423 max mem: 9377 +Train: [32] [4300/6250] eta: 0:04:31 lr: 0.000101 grad: 0.0807 (0.0890) loss: 0.8241 (0.8194) time: 0.1202 data: 0.0342 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:17 lr: 0.000101 grad: 0.0890 (0.0890) loss: 0.8291 (0.8196) time: 0.1445 data: 0.0545 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:03 lr: 0.000101 grad: 0.0879 (0.0890) loss: 0.8245 (0.8196) time: 0.1315 data: 0.0503 max mem: 9377 +Train: [32] [4600/6250] eta: 0:03:49 lr: 0.000101 grad: 0.0924 (0.0890) loss: 0.8119 (0.8197) time: 0.1244 data: 0.0434 max mem: 9377 +Train: [32] [4700/6250] eta: 0:03:35 lr: 0.000100 grad: 0.0844 (0.0890) loss: 0.8265 (0.8197) time: 0.1328 data: 0.0498 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:21 lr: 0.000100 grad: 0.0891 (0.0891) loss: 0.8164 (0.8197) time: 0.1599 data: 0.0801 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:08 lr: 0.000100 grad: 0.0888 (0.0891) loss: 0.8231 (0.8197) time: 0.1839 data: 0.1114 max mem: 9377 +Train: [32] [5000/6250] eta: 0:02:54 lr: 0.000100 grad: 0.0821 (0.0890) loss: 0.8225 (0.8198) time: 0.1633 data: 0.0805 max mem: 9377 +Train: [32] [5100/6250] eta: 0:02:40 lr: 0.000100 grad: 0.0859 (0.0890) loss: 0.8275 (0.8198) time: 0.1678 data: 0.0885 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:27 lr: 0.000100 grad: 0.0857 (0.0890) loss: 0.8171 (0.8198) time: 0.1458 data: 0.0678 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:13 lr: 0.000100 grad: 0.0951 (0.0891) loss: 0.8178 (0.8198) time: 0.1359 data: 0.0536 max mem: 9377 +Train: [32] [5400/6250] eta: 0:01:59 lr: 0.000100 grad: 0.0834 (0.0891) loss: 0.8274 (0.8198) time: 0.1489 data: 0.0679 max mem: 9377 +Train: [32] [5500/6250] eta: 0:01:45 lr: 0.000100 grad: 0.0807 (0.0892) loss: 0.8273 (0.8199) time: 0.1341 data: 0.0531 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:30 lr: 0.000100 grad: 0.0901 (0.0893) loss: 0.8204 (0.8199) time: 0.1375 data: 0.0533 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:16 lr: 0.000100 grad: 0.0859 (0.0893) loss: 0.8206 (0.8199) time: 0.1778 data: 0.1040 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:02 lr: 0.000100 grad: 0.0827 (0.0892) loss: 0.8203 (0.8199) time: 0.1213 data: 0.0463 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:48 lr: 0.000100 grad: 0.0868 (0.0892) loss: 0.8232 (0.8199) time: 0.1279 data: 0.0538 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:34 lr: 0.000100 grad: 0.0894 (0.0892) loss: 0.8107 (0.8198) time: 0.1488 data: 0.0713 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:20 lr: 0.000100 grad: 0.0903 (0.0892) loss: 0.8101 (0.8197) time: 0.1448 data: 0.0677 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:06 lr: 0.000100 grad: 0.0836 (0.0893) loss: 0.8192 (0.8197) time: 0.1395 data: 0.0583 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.0846 (0.0893) loss: 0.8176 (0.8197) time: 0.1265 data: 0.0440 max mem: 9377 +Train: [32] Total time: 0:14:38 (0.1406 s / it) +Averaged stats: lr: 0.000100 grad: 0.0846 (0.0893) loss: 0.8176 (0.8197) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:03:58 loss: 0.8424 (0.8424) time: 3.8471 data: 3.7883 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8417 (0.8437) time: 0.1165 data: 0.0902 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:12 (0.1984 s / it) +Averaged stats (hcp-train-subset): loss: 0.8417 (0.8437) +Eval (hcp-val): [32] [ 0/62] eta: 0:03:14 loss: 0.8396 (0.8396) time: 3.1356 data: 3.0361 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8389 (0.8414) time: 0.1084 data: 0.0836 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:11 (0.1903 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8414) +Eval (nsd-val): [32] [ 0/62] eta: 0:04:47 loss: 0.8108 (0.8108) time: 4.6328 data: 4.6019 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8234 (0.8227) time: 0.1124 data: 0.0874 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:11 (0.1928 s / it) +Averaged stats (nsd-val): loss: 0.8234 (0.8227) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 9:52:16 lr: 0.000100 grad: 0.2595 (0.2595) loss: 0.7772 (0.7772) time: 5.6858 data: 5.5915 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:18:45 lr: 0.000100 grad: 0.0786 (0.1039) loss: 0.8364 (0.8414) time: 0.1273 data: 0.0236 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:16:17 lr: 0.000100 grad: 0.0842 (0.0977) loss: 0.8233 (0.8323) time: 0.1459 data: 0.0592 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:15:09 lr: 0.000100 grad: 0.0816 (0.0941) loss: 0.8240 (0.8286) time: 0.1399 data: 0.0543 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:14:23 lr: 0.000100 grad: 0.0821 (0.0920) loss: 0.8229 (0.8274) time: 0.1246 data: 0.0332 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:13:50 lr: 0.000100 grad: 0.0859 (0.0906) loss: 0.8296 (0.8265) time: 0.1412 data: 0.0490 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:13:27 lr: 0.000100 grad: 0.0861 (0.0900) loss: 0.8215 (0.8258) time: 0.1419 data: 0.0417 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:13:19 lr: 0.000100 grad: 0.0877 (0.0894) loss: 0.8191 (0.8251) time: 0.1410 data: 0.0570 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:13:03 lr: 0.000100 grad: 0.0815 (0.0889) loss: 0.8226 (0.8244) time: 0.1452 data: 0.0612 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:12:47 lr: 0.000100 grad: 0.0829 (0.0887) loss: 0.8250 (0.8237) time: 0.1497 data: 0.0627 max mem: 9377 +Train: [33] [1000/6250] eta: 0:12:40 lr: 0.000100 grad: 0.0876 (0.0884) loss: 0.8122 (0.8232) time: 0.1524 data: 0.0749 max mem: 9377 +Train: [33] [1100/6250] eta: 0:12:29 lr: 0.000100 grad: 0.0840 (0.0883) loss: 0.8206 (0.8228) time: 0.1760 data: 0.0975 max mem: 9377 +Train: [33] [1200/6250] eta: 0:12:16 lr: 0.000100 grad: 0.0889 (0.0886) loss: 0.8160 (0.8224) time: 0.1456 data: 0.0681 max mem: 9377 +Train: [33] [1300/6250] eta: 0:11:59 lr: 0.000100 grad: 0.0840 (0.0886) loss: 0.8107 (0.8219) time: 0.1384 data: 0.0602 max mem: 9377 +Train: [33] [1400/6250] eta: 0:11:44 lr: 0.000100 grad: 0.0930 (0.0888) loss: 0.8191 (0.8215) time: 0.1289 data: 0.0470 max mem: 9377 +Train: [33] [1500/6250] eta: 0:11:32 lr: 0.000100 grad: 0.0833 (0.0886) loss: 0.8152 (0.8212) time: 0.1744 data: 0.0946 max mem: 9377 +Train: [33] [1600/6250] eta: 0:11:17 lr: 0.000100 grad: 0.0839 (0.0886) loss: 0.8234 (0.8211) time: 0.1184 data: 0.0373 max mem: 9377 +Train: [33] [1700/6250] eta: 0:11:02 lr: 0.000100 grad: 0.0809 (0.0886) loss: 0.8157 (0.8209) time: 0.1656 data: 0.0865 max mem: 9377 +Train: [33] [1800/6250] eta: 0:10:44 lr: 0.000100 grad: 0.0925 (0.0887) loss: 0.8138 (0.8206) time: 0.1317 data: 0.0453 max mem: 9377 +Train: [33] [1900/6250] eta: 0:10:28 lr: 0.000100 grad: 0.0866 (0.0889) loss: 0.8145 (0.8203) time: 0.1250 data: 0.0417 max mem: 9377 +Train: [33] [2000/6250] eta: 0:10:13 lr: 0.000100 grad: 0.0877 (0.0891) loss: 0.8197 (0.8200) time: 0.1243 data: 0.0395 max mem: 9377 +Train: [33] [2100/6250] eta: 0:09:59 lr: 0.000100 grad: 0.0825 (0.0891) loss: 0.8153 (0.8198) time: 0.1710 data: 0.0911 max mem: 9377 +Train: [33] [2200/6250] eta: 0:09:44 lr: 0.000099 grad: 0.0842 (0.0892) loss: 0.8195 (0.8197) time: 0.1402 data: 0.0573 max mem: 9377 +Train: [33] [2300/6250] eta: 0:09:30 lr: 0.000099 grad: 0.0941 (0.0892) loss: 0.8144 (0.8194) time: 0.1559 data: 0.0758 max mem: 9377 +Train: [33] [2400/6250] eta: 0:09:14 lr: 0.000099 grad: 0.0900 (0.0893) loss: 0.8134 (0.8193) time: 0.1178 data: 0.0431 max mem: 9377 +Train: [33] [2500/6250] eta: 0:08:59 lr: 0.000099 grad: 0.0850 (0.0893) loss: 0.8117 (0.8191) time: 0.1535 data: 0.0715 max mem: 9377 +Train: [33] [2600/6250] eta: 0:08:44 lr: 0.000099 grad: 0.0841 (0.0893) loss: 0.8151 (0.8190) time: 0.1354 data: 0.0577 max mem: 9377 +Train: [33] [2700/6250] eta: 0:08:29 lr: 0.000099 grad: 0.0893 (0.0894) loss: 0.8142 (0.8188) time: 0.1452 data: 0.0635 max mem: 9377 +Train: [33] [2800/6250] eta: 0:08:15 lr: 0.000099 grad: 0.0878 (0.0894) loss: 0.8177 (0.8187) time: 0.1475 data: 0.0725 max mem: 9377 +Train: [33] [2900/6250] eta: 0:08:00 lr: 0.000099 grad: 0.0898 (0.0894) loss: 0.8129 (0.8186) time: 0.1551 data: 0.0752 max mem: 9377 +Train: [33] [3000/6250] eta: 0:07:45 lr: 0.000099 grad: 0.0866 (0.0894) loss: 0.8140 (0.8186) time: 0.1443 data: 0.0638 max mem: 9377 +Train: [33] [3100/6250] eta: 0:07:30 lr: 0.000099 grad: 0.0892 (0.0894) loss: 0.8178 (0.8185) time: 0.1602 data: 0.0800 max mem: 9377 +Train: [33] [3200/6250] eta: 0:07:15 lr: 0.000099 grad: 0.0827 (0.0895) loss: 0.8216 (0.8184) time: 0.1452 data: 0.0691 max mem: 9377 +Train: [33] [3300/6250] eta: 0:07:01 lr: 0.000099 grad: 0.0874 (0.0895) loss: 0.8191 (0.8184) time: 0.1548 data: 0.0755 max mem: 9377 +Train: [33] [3400/6250] eta: 0:06:46 lr: 0.000099 grad: 0.0889 (0.0895) loss: 0.8197 (0.8184) time: 0.1549 data: 0.0691 max mem: 9377 +Train: [33] [3500/6250] eta: 0:06:31 lr: 0.000099 grad: 0.0862 (0.0895) loss: 0.8150 (0.8184) time: 0.1402 data: 0.0598 max mem: 9377 +Train: [33] [3600/6250] eta: 0:06:16 lr: 0.000099 grad: 0.0888 (0.0896) loss: 0.8104 (0.8184) time: 0.1260 data: 0.0423 max mem: 9377 +Train: [33] [3700/6250] eta: 0:06:01 lr: 0.000099 grad: 0.0862 (0.0896) loss: 0.8174 (0.8183) time: 0.1354 data: 0.0517 max mem: 9377 +Train: [33] [3800/6250] eta: 0:05:47 lr: 0.000099 grad: 0.0932 (0.0898) loss: 0.8115 (0.8183) time: 0.1369 data: 0.0560 max mem: 9377 +Train: [33] [3900/6250] eta: 0:05:32 lr: 0.000099 grad: 0.0871 (0.0898) loss: 0.8206 (0.8182) time: 0.1377 data: 0.0580 max mem: 9377 +Train: [33] [4000/6250] eta: 0:05:18 lr: 0.000099 grad: 0.0908 (0.0898) loss: 0.8177 (0.8183) time: 0.1555 data: 0.0773 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:03 lr: 0.000099 grad: 0.0962 (0.0899) loss: 0.8193 (0.8183) time: 0.1321 data: 0.0506 max mem: 9377 +Train: [33] [4200/6250] eta: 0:04:49 lr: 0.000099 grad: 0.0916 (0.0900) loss: 0.8212 (0.8183) time: 0.1341 data: 0.0521 max mem: 9377 +Train: [33] [4300/6250] eta: 0:04:35 lr: 0.000099 grad: 0.0824 (0.0899) loss: 0.8204 (0.8183) time: 0.1603 data: 0.0804 max mem: 9377 +Train: [33] [4400/6250] eta: 0:04:21 lr: 0.000099 grad: 0.0914 (0.0900) loss: 0.8153 (0.8183) time: 0.1558 data: 0.0766 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:07 lr: 0.000099 grad: 0.0864 (0.0900) loss: 0.8207 (0.8183) time: 0.1549 data: 0.0741 max mem: 9377 +Train: [33] [4600/6250] eta: 0:03:52 lr: 0.000099 grad: 0.0859 (0.0900) loss: 0.8216 (0.8183) time: 0.1269 data: 0.0448 max mem: 9377 +Train: [33] [4700/6250] eta: 0:03:38 lr: 0.000099 grad: 0.0853 (0.0900) loss: 0.8319 (0.8184) time: 0.1404 data: 0.0575 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:24 lr: 0.000099 grad: 0.0862 (0.0899) loss: 0.8181 (0.8185) time: 0.1217 data: 0.0358 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:11 lr: 0.000099 grad: 0.0865 (0.0899) loss: 0.8249 (0.8186) time: 0.1586 data: 0.0724 max mem: 9377 +Train: [33] [5000/6250] eta: 0:02:57 lr: 0.000099 grad: 0.0823 (0.0898) loss: 0.8300 (0.8187) time: 0.1458 data: 0.0662 max mem: 9377 +Train: [33] [5100/6250] eta: 0:02:43 lr: 0.000099 grad: 0.0810 (0.0897) loss: 0.8297 (0.8188) time: 0.1297 data: 0.0510 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:29 lr: 0.000099 grad: 0.0824 (0.0896) loss: 0.8221 (0.8189) time: 0.1288 data: 0.0521 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:15 lr: 0.000099 grad: 0.0873 (0.0896) loss: 0.8260 (0.8190) time: 0.1415 data: 0.0535 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:00 lr: 0.000099 grad: 0.0799 (0.0896) loss: 0.8272 (0.8191) time: 0.1402 data: 0.0577 max mem: 9377 +Train: [33] [5500/6250] eta: 0:01:46 lr: 0.000099 grad: 0.0872 (0.0896) loss: 0.8228 (0.8192) time: 0.1620 data: 0.0821 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:32 lr: 0.000099 grad: 0.0895 (0.0896) loss: 0.8255 (0.8192) time: 0.1421 data: 0.0607 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:18 lr: 0.000099 grad: 0.0865 (0.0896) loss: 0.8192 (0.8193) time: 0.1366 data: 0.0502 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:03 lr: 0.000099 grad: 0.0836 (0.0896) loss: 0.8236 (0.8193) time: 0.1356 data: 0.0510 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:49 lr: 0.000098 grad: 0.0860 (0.0896) loss: 0.8252 (0.8194) time: 0.1453 data: 0.0628 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:35 lr: 0.000098 grad: 0.0834 (0.0896) loss: 0.8232 (0.8194) time: 0.1472 data: 0.0679 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:21 lr: 0.000098 grad: 0.0852 (0.0895) loss: 0.8207 (0.8195) time: 0.1402 data: 0.0597 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:07 lr: 0.000098 grad: 0.0817 (0.0895) loss: 0.8234 (0.8195) time: 0.1846 data: 0.0954 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.0884 (0.0895) loss: 0.8192 (0.8195) time: 0.1387 data: 0.0536 max mem: 9377 +Train: [33] Total time: 0:14:53 (0.1430 s / it) +Averaged stats: lr: 0.000098 grad: 0.0884 (0.0895) loss: 0.8192 (0.8195) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:04:45 loss: 0.8454 (0.8454) time: 4.6083 data: 4.5785 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8441 (0.8456) time: 0.0931 data: 0.0686 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (hcp-train-subset): loss: 0.8441 (0.8456) +Eval (hcp-val): [33] [ 0/62] eta: 0:03:10 loss: 0.8414 (0.8414) time: 3.0715 data: 2.9935 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8396 (0.8427) time: 0.0968 data: 0.0722 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:12 (0.1947 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8427) +Eval (nsd-val): [33] [ 0/62] eta: 0:03:54 loss: 0.8107 (0.8107) time: 3.7865 data: 3.7219 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8222 (0.8235) time: 0.0950 data: 0.0686 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:11 (0.1933 s / it) +Averaged stats (nsd-val): loss: 0.8222 (0.8235) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 9:40:13 lr: 0.000098 grad: 0.0588 (0.0588) loss: 0.8479 (0.8479) time: 5.5702 data: 5.4763 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:19:18 lr: 0.000098 grad: 0.0862 (0.1004) loss: 0.8236 (0.8325) time: 0.1313 data: 0.0367 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:16:43 lr: 0.000098 grad: 0.0831 (0.0972) loss: 0.8249 (0.8287) time: 0.1555 data: 0.0651 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:15:32 lr: 0.000098 grad: 0.0965 (0.0971) loss: 0.8182 (0.8260) time: 0.1543 data: 0.0552 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:14:35 lr: 0.000098 grad: 0.0833 (0.0956) loss: 0.8274 (0.8246) time: 0.1194 data: 0.0225 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:13:58 lr: 0.000098 grad: 0.0877 (0.0957) loss: 0.8217 (0.8240) time: 0.1129 data: 0.0221 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:13:36 lr: 0.000098 grad: 0.0845 (0.0951) loss: 0.8256 (0.8236) time: 0.1364 data: 0.0532 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:13:20 lr: 0.000098 grad: 0.0878 (0.0941) loss: 0.8228 (0.8237) time: 0.1537 data: 0.0677 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:13:06 lr: 0.000098 grad: 0.0844 (0.0936) loss: 0.8253 (0.8234) time: 0.1399 data: 0.0486 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:12:53 lr: 0.000098 grad: 0.0877 (0.0934) loss: 0.8226 (0.8231) time: 0.1564 data: 0.0712 max mem: 9377 +Train: [34] [1000/6250] eta: 0:12:38 lr: 0.000098 grad: 0.0925 (0.0940) loss: 0.8144 (0.8225) time: 0.1528 data: 0.0693 max mem: 9377 +Train: [34] [1100/6250] eta: 0:12:25 lr: 0.000098 grad: 0.0907 (0.0942) loss: 0.8225 (0.8221) time: 0.1368 data: 0.0564 max mem: 9377 +Train: [34] [1200/6250] eta: 0:12:10 lr: 0.000098 grad: 0.0891 (0.0940) loss: 0.8182 (0.8215) time: 0.1357 data: 0.0529 max mem: 9377 +Train: [34] [1300/6250] eta: 0:11:59 lr: 0.000098 grad: 0.0941 (0.0938) loss: 0.8171 (0.8212) time: 0.1704 data: 0.0901 max mem: 9377 +Train: [34] [1400/6250] eta: 0:11:47 lr: 0.000098 grad: 0.0865 (0.0937) loss: 0.8129 (0.8209) time: 0.1827 data: 0.1069 max mem: 9377 +Train: [34] [1500/6250] eta: 0:11:32 lr: 0.000098 grad: 0.0919 (0.0933) loss: 0.8152 (0.8207) time: 0.1552 data: 0.0726 max mem: 9377 +Train: [34] [1600/6250] eta: 0:11:18 lr: 0.000098 grad: 0.0909 (0.0931) loss: 0.8183 (0.8206) time: 0.1432 data: 0.0598 max mem: 9377 +Train: [34] [1700/6250] eta: 0:11:02 lr: 0.000098 grad: 0.0847 (0.0928) loss: 0.8187 (0.8204) time: 0.1450 data: 0.0631 max mem: 9377 +Train: [34] [1800/6250] eta: 0:10:45 lr: 0.000098 grad: 0.0865 (0.0927) loss: 0.8105 (0.8202) time: 0.1280 data: 0.0475 max mem: 9377 +Train: [34] [1900/6250] eta: 0:10:30 lr: 0.000098 grad: 0.0884 (0.0925) loss: 0.8089 (0.8201) time: 0.1519 data: 0.0730 max mem: 9377 +Train: [34] [2000/6250] eta: 0:10:15 lr: 0.000098 grad: 0.0845 (0.0925) loss: 0.8255 (0.8200) time: 0.1381 data: 0.0563 max mem: 9377 +Train: [34] [2100/6250] eta: 0:09:58 lr: 0.000098 grad: 0.0794 (0.0922) loss: 0.8267 (0.8201) time: 0.1309 data: 0.0500 max mem: 9377 +Train: [34] [2200/6250] eta: 0:09:44 lr: 0.000098 grad: 0.0832 (0.0920) loss: 0.8205 (0.8201) time: 0.1484 data: 0.0695 max mem: 9377 +Train: [34] [2300/6250] eta: 0:09:29 lr: 0.000098 grad: 0.0770 (0.0918) loss: 0.8294 (0.8202) time: 0.1162 data: 0.0381 max mem: 9377 +Train: [34] [2400/6250] eta: 0:09:14 lr: 0.000098 grad: 0.0865 (0.0917) loss: 0.8222 (0.8202) time: 0.1269 data: 0.0436 max mem: 9377 +Train: [34] [2500/6250] eta: 0:09:00 lr: 0.000098 grad: 0.0853 (0.0916) loss: 0.8176 (0.8203) time: 0.1725 data: 0.0889 max mem: 9377 +Train: [34] [2600/6250] eta: 0:08:45 lr: 0.000098 grad: 0.0831 (0.0914) loss: 0.8277 (0.8204) time: 0.1635 data: 0.0806 max mem: 9377 +Train: [34] [2700/6250] eta: 0:08:30 lr: 0.000098 grad: 0.0825 (0.0912) loss: 0.8256 (0.8206) time: 0.1466 data: 0.0630 max mem: 9377 +Train: [34] [2800/6250] eta: 0:08:15 lr: 0.000098 grad: 0.0875 (0.0912) loss: 0.8199 (0.8207) time: 0.1413 data: 0.0588 max mem: 9377 +Train: [34] [2900/6250] eta: 0:08:00 lr: 0.000098 grad: 0.0871 (0.0910) loss: 0.8208 (0.8207) time: 0.1383 data: 0.0586 max mem: 9377 +Train: [34] [3000/6250] eta: 0:07:46 lr: 0.000098 grad: 0.0850 (0.0910) loss: 0.8169 (0.8208) time: 0.1309 data: 0.0518 max mem: 9377 +Train: [34] [3100/6250] eta: 0:07:32 lr: 0.000098 grad: 0.0878 (0.0910) loss: 0.8194 (0.8207) time: 0.1372 data: 0.0563 max mem: 9377 +Train: [34] [3200/6250] eta: 0:07:18 lr: 0.000098 grad: 0.0891 (0.0909) loss: 0.8127 (0.8206) time: 0.1505 data: 0.0600 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:03 lr: 0.000097 grad: 0.0884 (0.0909) loss: 0.8123 (0.8205) time: 0.1461 data: 0.0694 max mem: 9377 +Train: [34] [3400/6250] eta: 0:06:49 lr: 0.000097 grad: 0.0864 (0.0908) loss: 0.8234 (0.8205) time: 0.1390 data: 0.0539 max mem: 9377 +Train: [34] [3500/6250] eta: 0:06:34 lr: 0.000097 grad: 0.0888 (0.0908) loss: 0.8211 (0.8205) time: 0.1330 data: 0.0514 max mem: 9377 +Train: [34] [3600/6250] eta: 0:06:20 lr: 0.000097 grad: 0.0882 (0.0908) loss: 0.8192 (0.8204) time: 0.1469 data: 0.0676 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:05 lr: 0.000097 grad: 0.0855 (0.0908) loss: 0.8131 (0.8204) time: 0.1343 data: 0.0553 max mem: 9377 +Train: [34] [3800/6250] eta: 0:05:51 lr: 0.000097 grad: 0.0819 (0.0908) loss: 0.8217 (0.8205) time: 0.1513 data: 0.0677 max mem: 9377 +Train: [34] [3900/6250] eta: 0:05:36 lr: 0.000097 grad: 0.0837 (0.0909) loss: 0.8258 (0.8205) time: 0.1225 data: 0.0324 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:22 lr: 0.000097 grad: 0.0882 (0.0909) loss: 0.8201 (0.8205) time: 0.1538 data: 0.0764 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:07 lr: 0.000097 grad: 0.0843 (0.0909) loss: 0.8204 (0.8206) time: 0.1451 data: 0.0643 max mem: 9377 +Train: [34] [4200/6250] eta: 0:04:52 lr: 0.000097 grad: 0.0851 (0.0908) loss: 0.8189 (0.8206) time: 0.1327 data: 0.0505 max mem: 9377 +Train: [34] [4300/6250] eta: 0:04:38 lr: 0.000097 grad: 0.0876 (0.0909) loss: 0.8191 (0.8206) time: 0.1254 data: 0.0398 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:24 lr: 0.000097 grad: 0.0816 (0.0908) loss: 0.8265 (0.8206) time: 0.1392 data: 0.0577 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:09 lr: 0.000097 grad: 0.0827 (0.0907) loss: 0.8274 (0.8207) time: 0.1287 data: 0.0469 max mem: 9377 +Train: [34] [4600/6250] eta: 0:03:55 lr: 0.000097 grad: 0.0848 (0.0907) loss: 0.8228 (0.8207) time: 0.1513 data: 0.0663 max mem: 9377 +Train: [34] [4700/6250] eta: 0:03:40 lr: 0.000097 grad: 0.0798 (0.0906) loss: 0.8202 (0.8207) time: 0.1308 data: 0.0479 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:26 lr: 0.000097 grad: 0.0834 (0.0907) loss: 0.8273 (0.8208) time: 0.1458 data: 0.0637 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:12 lr: 0.000097 grad: 0.0850 (0.0907) loss: 0.8266 (0.8208) time: 0.1396 data: 0.0572 max mem: 9377 +Train: [34] [5000/6250] eta: 0:02:58 lr: 0.000097 grad: 0.0925 (0.0907) loss: 0.8272 (0.8208) time: 0.1637 data: 0.0827 max mem: 9377 +Train: [34] [5100/6250] eta: 0:02:44 lr: 0.000097 grad: 0.0934 (0.0908) loss: 0.8201 (0.8208) time: 0.1888 data: 0.1086 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:30 lr: 0.000097 grad: 0.0905 (0.0908) loss: 0.8234 (0.8208) time: 0.1705 data: 0.0884 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:15 lr: 0.000097 grad: 0.0866 (0.0908) loss: 0.8256 (0.8208) time: 0.1434 data: 0.0709 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:01 lr: 0.000097 grad: 0.0903 (0.0909) loss: 0.8215 (0.8208) time: 0.1365 data: 0.0543 max mem: 9377 +Train: [34] [5500/6250] eta: 0:01:47 lr: 0.000097 grad: 0.0912 (0.0909) loss: 0.8237 (0.8209) time: 0.1483 data: 0.0621 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:33 lr: 0.000097 grad: 0.0843 (0.0908) loss: 0.8287 (0.8209) time: 0.1311 data: 0.0439 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:18 lr: 0.000097 grad: 0.0869 (0.0908) loss: 0.8263 (0.8210) time: 0.1377 data: 0.0559 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:04 lr: 0.000097 grad: 0.0869 (0.0909) loss: 0.8201 (0.8209) time: 0.1169 data: 0.0297 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:50 lr: 0.000097 grad: 0.0895 (0.0909) loss: 0.8168 (0.8209) time: 0.1356 data: 0.0506 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:35 lr: 0.000097 grad: 0.0942 (0.0909) loss: 0.8211 (0.8209) time: 0.1509 data: 0.0636 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:21 lr: 0.000097 grad: 0.0899 (0.0909) loss: 0.8187 (0.8208) time: 0.1412 data: 0.0629 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:07 lr: 0.000097 grad: 0.0925 (0.0909) loss: 0.8109 (0.8208) time: 0.1330 data: 0.0536 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.0863 (0.0909) loss: 0.8157 (0.8208) time: 0.1289 data: 0.0440 max mem: 9377 +Train: [34] Total time: 0:14:59 (0.1440 s / it) +Averaged stats: lr: 0.000097 grad: 0.0863 (0.0909) loss: 0.8157 (0.8208) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:04:12 loss: 0.8413 (0.8413) time: 4.0805 data: 4.0113 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8421 (0.8435) time: 0.1470 data: 0.1203 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (hcp-train-subset): loss: 0.8421 (0.8435) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [34] [ 0/62] eta: 0:04:54 loss: 0.8386 (0.8386) time: 4.7526 data: 4.7230 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8408 (0.8422) time: 0.1107 data: 0.0840 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:13 (0.2126 s / it) +Averaged stats (hcp-val): loss: 0.8408 (0.8422) +Making plots (hcp-val): example=45 +Eval (nsd-val): [34] [ 0/62] eta: 0:04:48 loss: 0.8047 (0.8047) time: 4.6555 data: 4.6034 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8170 (0.8171) time: 0.1305 data: 0.1053 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (nsd-val): loss: 0.8170 (0.8171) +Making plots (nsd-val): example=30 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 7:08:25 lr: 0.000097 grad: 0.2864 (0.2864) loss: 0.8740 (0.8740) time: 4.1128 data: 3.8500 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:19:18 lr: 0.000097 grad: 0.0924 (0.1282) loss: 0.8259 (0.8304) time: 0.1517 data: 0.0557 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:16:19 lr: 0.000097 grad: 0.0798 (0.1085) loss: 0.8323 (0.8294) time: 0.1428 data: 0.0535 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:15:07 lr: 0.000097 grad: 0.0813 (0.1002) loss: 0.8249 (0.8288) time: 0.1374 data: 0.0443 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:14:28 lr: 0.000097 grad: 0.0839 (0.0968) loss: 0.8243 (0.8278) time: 0.1402 data: 0.0539 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:13:53 lr: 0.000097 grad: 0.0908 (0.0956) loss: 0.8125 (0.8257) time: 0.1351 data: 0.0434 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:13:24 lr: 0.000097 grad: 0.0874 (0.0944) loss: 0.8219 (0.8241) time: 0.1207 data: 0.0281 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:13:01 lr: 0.000096 grad: 0.0855 (0.0936) loss: 0.8174 (0.8230) time: 0.1253 data: 0.0312 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:12:40 lr: 0.000096 grad: 0.0850 (0.0930) loss: 0.8186 (0.8223) time: 0.1304 data: 0.0393 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:12:32 lr: 0.000096 grad: 0.0846 (0.0924) loss: 0.8197 (0.8219) time: 0.1512 data: 0.0655 max mem: 9377 +Train: [35] [1000/6250] eta: 0:12:19 lr: 0.000096 grad: 0.0822 (0.0917) loss: 0.8152 (0.8215) time: 0.1381 data: 0.0522 max mem: 9377 +Train: [35] [1100/6250] eta: 0:12:02 lr: 0.000096 grad: 0.0837 (0.0912) loss: 0.8167 (0.8211) time: 0.1199 data: 0.0370 max mem: 9377 +Train: [35] [1200/6250] eta: 0:11:53 lr: 0.000096 grad: 0.0861 (0.0909) loss: 0.8188 (0.8207) time: 0.1617 data: 0.0830 max mem: 9377 +Train: [35] [1300/6250] eta: 0:11:37 lr: 0.000096 grad: 0.0843 (0.0904) loss: 0.8182 (0.8206) time: 0.1345 data: 0.0464 max mem: 9377 +Train: [35] [1400/6250] eta: 0:11:19 lr: 0.000096 grad: 0.0817 (0.0903) loss: 0.8216 (0.8205) time: 0.1372 data: 0.0552 max mem: 9377 +Train: [35] [1500/6250] eta: 0:11:03 lr: 0.000096 grad: 0.0910 (0.0902) loss: 0.8226 (0.8205) time: 0.1357 data: 0.0539 max mem: 9377 +Train: [35] [1600/6250] eta: 0:10:47 lr: 0.000096 grad: 0.0845 (0.0902) loss: 0.8209 (0.8204) time: 0.1312 data: 0.0489 max mem: 9377 +Train: [35] [1700/6250] eta: 0:10:33 lr: 0.000096 grad: 0.0843 (0.0901) loss: 0.8203 (0.8204) time: 0.1309 data: 0.0473 max mem: 9377 +Train: [35] [1800/6250] eta: 0:10:17 lr: 0.000096 grad: 0.0860 (0.0900) loss: 0.8117 (0.8202) time: 0.1336 data: 0.0549 max mem: 9377 +Train: [35] [1900/6250] eta: 0:10:01 lr: 0.000096 grad: 0.0862 (0.0900) loss: 0.8292 (0.8202) time: 0.1101 data: 0.0247 max mem: 9377 +Train: [35] [2000/6250] eta: 0:09:46 lr: 0.000096 grad: 0.0859 (0.0899) loss: 0.8157 (0.8201) time: 0.1376 data: 0.0559 max mem: 9377 +Train: [35] [2100/6250] eta: 0:09:33 lr: 0.000096 grad: 0.0896 (0.0898) loss: 0.8121 (0.8201) time: 0.1468 data: 0.0685 max mem: 9377 +Train: [35] [2200/6250] eta: 0:09:19 lr: 0.000096 grad: 0.0855 (0.0897) loss: 0.8145 (0.8200) time: 0.1460 data: 0.0641 max mem: 9377 +Train: [35] [2300/6250] eta: 0:09:07 lr: 0.000096 grad: 0.0865 (0.0896) loss: 0.8171 (0.8201) time: 0.1423 data: 0.0599 max mem: 9377 +Train: [35] [2400/6250] eta: 0:08:54 lr: 0.000096 grad: 0.0921 (0.0896) loss: 0.8179 (0.8200) time: 0.1424 data: 0.0649 max mem: 9377 +Train: [35] [2500/6250] eta: 0:08:40 lr: 0.000096 grad: 0.0803 (0.0896) loss: 0.8184 (0.8199) time: 0.1362 data: 0.0521 max mem: 9377 +Train: [35] [2600/6250] eta: 0:08:27 lr: 0.000096 grad: 0.0888 (0.0896) loss: 0.8174 (0.8197) time: 0.1304 data: 0.0568 max mem: 9377 +Train: [35] [2700/6250] eta: 0:08:13 lr: 0.000096 grad: 0.0846 (0.0896) loss: 0.8124 (0.8196) time: 0.1424 data: 0.0607 max mem: 9377 +Train: [35] [2800/6250] eta: 0:07:59 lr: 0.000096 grad: 0.0888 (0.0897) loss: 0.8103 (0.8195) time: 0.1539 data: 0.0756 max mem: 9377 +Train: [35] [2900/6250] eta: 0:07:45 lr: 0.000096 grad: 0.0881 (0.0898) loss: 0.8187 (0.8193) time: 0.1481 data: 0.0694 max mem: 9377 +Train: [35] [3000/6250] eta: 0:07:31 lr: 0.000096 grad: 0.0907 (0.0900) loss: 0.8168 (0.8190) time: 0.1300 data: 0.0471 max mem: 9377 +Train: [35] [3100/6250] eta: 0:07:17 lr: 0.000096 grad: 0.0829 (0.0901) loss: 0.8145 (0.8189) time: 0.1341 data: 0.0520 max mem: 9377 +Train: [35] [3200/6250] eta: 0:07:03 lr: 0.000096 grad: 0.0938 (0.0902) loss: 0.8144 (0.8188) time: 0.1428 data: 0.0624 max mem: 9377 +Train: [35] [3300/6250] eta: 0:06:49 lr: 0.000096 grad: 0.0837 (0.0903) loss: 0.8182 (0.8187) time: 0.1315 data: 0.0486 max mem: 9377 +Train: [35] [3400/6250] eta: 0:06:35 lr: 0.000096 grad: 0.0926 (0.0904) loss: 0.8124 (0.8186) time: 0.1290 data: 0.0456 max mem: 9377 +Train: [35] [3500/6250] eta: 0:06:22 lr: 0.000096 grad: 0.0930 (0.0905) loss: 0.8098 (0.8185) time: 0.1160 data: 0.0316 max mem: 9377 +Train: [35] [3600/6250] eta: 0:06:08 lr: 0.000096 grad: 0.0855 (0.0906) loss: 0.8183 (0.8184) time: 0.1267 data: 0.0435 max mem: 9377 +Train: [35] [3700/6250] eta: 0:05:54 lr: 0.000096 grad: 0.0829 (0.0907) loss: 0.8185 (0.8183) time: 0.1408 data: 0.0609 max mem: 9377 +Train: [35] [3800/6250] eta: 0:05:40 lr: 0.000096 grad: 0.0865 (0.0907) loss: 0.8116 (0.8182) time: 0.1245 data: 0.0411 max mem: 9377 +Train: [35] [3900/6250] eta: 0:05:27 lr: 0.000096 grad: 0.0900 (0.0907) loss: 0.8192 (0.8181) time: 0.1237 data: 0.0422 max mem: 9377 +Train: [35] [4000/6250] eta: 0:05:13 lr: 0.000096 grad: 0.0878 (0.0906) loss: 0.8111 (0.8180) time: 0.1446 data: 0.0634 max mem: 9377 +Train: [35] [4100/6250] eta: 0:04:59 lr: 0.000096 grad: 0.0886 (0.0907) loss: 0.8113 (0.8179) time: 0.1322 data: 0.0491 max mem: 9377 +Train: [35] [4200/6250] eta: 0:04:46 lr: 0.000096 grad: 0.0859 (0.0907) loss: 0.8127 (0.8178) time: 0.1543 data: 0.0763 max mem: 9377 +Train: [35] [4300/6250] eta: 0:04:32 lr: 0.000095 grad: 0.0908 (0.0907) loss: 0.8194 (0.8177) time: 0.1230 data: 0.0434 max mem: 9377 +Train: [35] [4400/6250] eta: 0:04:18 lr: 0.000095 grad: 0.0869 (0.0907) loss: 0.8131 (0.8177) time: 0.1523 data: 0.0717 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:04 lr: 0.000095 grad: 0.0783 (0.0907) loss: 0.8155 (0.8176) time: 0.1116 data: 0.0250 max mem: 9377 +Train: [35] [4600/6250] eta: 0:03:50 lr: 0.000095 grad: 0.0843 (0.0907) loss: 0.8187 (0.8176) time: 0.1392 data: 0.0568 max mem: 9377 +Train: [35] [4700/6250] eta: 0:03:36 lr: 0.000095 grad: 0.0871 (0.0907) loss: 0.8183 (0.8177) time: 0.1505 data: 0.0641 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:22 lr: 0.000095 grad: 0.0867 (0.0907) loss: 0.8107 (0.8177) time: 0.1436 data: 0.0611 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:09 lr: 0.000095 grad: 0.0941 (0.0907) loss: 0.8150 (0.8177) time: 0.1734 data: 0.0927 max mem: 9377 +Train: [35] [5000/6250] eta: 0:02:55 lr: 0.000095 grad: 0.0891 (0.0907) loss: 0.8145 (0.8177) time: 0.1588 data: 0.0796 max mem: 9377 +Train: [35] [5100/6250] eta: 0:02:41 lr: 0.000095 grad: 0.0929 (0.0908) loss: 0.8184 (0.8177) time: 0.1384 data: 0.0638 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:27 lr: 0.000095 grad: 0.0921 (0.0908) loss: 0.8137 (0.8177) time: 0.1412 data: 0.0573 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:13 lr: 0.000095 grad: 0.0928 (0.0908) loss: 0.8155 (0.8177) time: 0.1439 data: 0.0624 max mem: 9377 +Train: [35] [5400/6250] eta: 0:01:59 lr: 0.000095 grad: 0.0922 (0.0909) loss: 0.8157 (0.8177) time: 0.1386 data: 0.0589 max mem: 9377 +Train: [35] [5500/6250] eta: 0:01:45 lr: 0.000095 grad: 0.0866 (0.0909) loss: 0.8159 (0.8177) time: 0.1424 data: 0.0565 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:31 lr: 0.000095 grad: 0.0853 (0.0909) loss: 0.8157 (0.8176) time: 0.1407 data: 0.0535 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:17 lr: 0.000095 grad: 0.0940 (0.0909) loss: 0.8071 (0.8176) time: 0.1321 data: 0.0437 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:03 lr: 0.000095 grad: 0.0923 (0.0910) loss: 0.8070 (0.8175) time: 0.1135 data: 0.0283 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:49 lr: 0.000095 grad: 0.0844 (0.0910) loss: 0.8164 (0.8175) time: 0.1303 data: 0.0549 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:35 lr: 0.000095 grad: 0.0938 (0.0910) loss: 0.8152 (0.8175) time: 0.1477 data: 0.0639 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:21 lr: 0.000095 grad: 0.0898 (0.0910) loss: 0.8183 (0.8175) time: 0.1544 data: 0.0740 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:07 lr: 0.000095 grad: 0.0912 (0.0910) loss: 0.8203 (0.8175) time: 0.1406 data: 0.0601 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.0916 (0.0910) loss: 0.8178 (0.8175) time: 0.1108 data: 0.0272 max mem: 9377 +Train: [35] Total time: 0:14:40 (0.1409 s / it) +Averaged stats: lr: 0.000095 grad: 0.0916 (0.0910) loss: 0.8178 (0.8175) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:04:09 loss: 0.8479 (0.8479) time: 4.0304 data: 3.9376 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8437 (0.8446) time: 0.0970 data: 0.0725 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:12 (0.2029 s / it) +Averaged stats (hcp-train-subset): loss: 0.8437 (0.8446) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:50 loss: 0.8380 (0.8380) time: 3.7230 data: 3.6363 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8423 (0.8426) time: 0.1354 data: 0.1086 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-val): loss: 0.8423 (0.8426) +Eval (nsd-val): [35] [ 0/62] eta: 0:05:16 loss: 0.8064 (0.8064) time: 5.1020 data: 5.0683 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8170 (0.8186) time: 0.1251 data: 0.0984 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:12 (0.2045 s / it) +Averaged stats (nsd-val): loss: 0.8170 (0.8186) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 9:03:42 lr: 0.000095 grad: 0.0716 (0.0716) loss: 0.8665 (0.8665) time: 5.2196 data: 4.9122 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:21:17 lr: 0.000095 grad: 0.1002 (0.1266) loss: 0.8119 (0.8152) time: 0.1611 data: 0.0641 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:17:42 lr: 0.000095 grad: 0.0913 (0.1167) loss: 0.8115 (0.8112) time: 0.1383 data: 0.0488 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:16:29 lr: 0.000095 grad: 0.0856 (0.1101) loss: 0.8123 (0.8112) time: 0.1437 data: 0.0505 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:15:50 lr: 0.000095 grad: 0.0863 (0.1056) loss: 0.8121 (0.8120) time: 0.1423 data: 0.0456 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:15:11 lr: 0.000095 grad: 0.0778 (0.1019) loss: 0.8237 (0.8136) time: 0.1496 data: 0.0660 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:14:46 lr: 0.000095 grad: 0.0824 (0.0995) loss: 0.8229 (0.8151) time: 0.1275 data: 0.0349 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:14:22 lr: 0.000095 grad: 0.0809 (0.0974) loss: 0.8255 (0.8163) time: 0.1414 data: 0.0636 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:13:51 lr: 0.000095 grad: 0.0824 (0.0962) loss: 0.8224 (0.8169) time: 0.1438 data: 0.0569 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:13:24 lr: 0.000095 grad: 0.0873 (0.0955) loss: 0.8209 (0.8174) time: 0.1266 data: 0.0356 max mem: 9377 +Train: [36] [1000/6250] eta: 0:12:58 lr: 0.000095 grad: 0.0830 (0.0948) loss: 0.8339 (0.8176) time: 0.1210 data: 0.0360 max mem: 9377 +Train: [36] [1100/6250] eta: 0:12:37 lr: 0.000095 grad: 0.0846 (0.0942) loss: 0.8227 (0.8176) time: 0.1452 data: 0.0637 max mem: 9377 +Train: [36] [1200/6250] eta: 0:12:24 lr: 0.000095 grad: 0.0786 (0.0935) loss: 0.8203 (0.8178) time: 0.1610 data: 0.0806 max mem: 9377 +Train: [36] [1300/6250] eta: 0:12:10 lr: 0.000095 grad: 0.0890 (0.0930) loss: 0.8091 (0.8178) time: 0.1440 data: 0.0672 max mem: 9377 +Train: [36] [1400/6250] eta: 0:11:59 lr: 0.000095 grad: 0.0790 (0.0926) loss: 0.8248 (0.8177) time: 0.1782 data: 0.0973 max mem: 9377 +Train: [36] [1500/6250] eta: 0:11:47 lr: 0.000095 grad: 0.0844 (0.0922) loss: 0.8169 (0.8179) time: 0.1593 data: 0.0827 max mem: 9377 +Train: [36] [1600/6250] eta: 0:11:34 lr: 0.000094 grad: 0.0865 (0.0921) loss: 0.8204 (0.8180) time: 0.1317 data: 0.0538 max mem: 9377 +Train: [36] [1700/6250] eta: 0:11:20 lr: 0.000094 grad: 0.0867 (0.0918) loss: 0.8172 (0.8181) time: 0.1334 data: 0.0535 max mem: 9377 +Train: [36] [1800/6250] eta: 0:11:07 lr: 0.000094 grad: 0.0862 (0.0915) loss: 0.8068 (0.8180) time: 0.1811 data: 0.1039 max mem: 9377 +Train: [36] [1900/6250] eta: 0:10:55 lr: 0.000094 grad: 0.0868 (0.0912) loss: 0.8158 (0.8180) time: 0.1655 data: 0.0883 max mem: 9377 +Train: [36] [2000/6250] eta: 0:10:38 lr: 0.000094 grad: 0.0847 (0.0910) loss: 0.8190 (0.8180) time: 0.1653 data: 0.0827 max mem: 9377 +Train: [36] [2100/6250] eta: 0:10:21 lr: 0.000094 grad: 0.0852 (0.0909) loss: 0.8198 (0.8181) time: 0.1523 data: 0.0705 max mem: 9377 +Train: [36] [2200/6250] eta: 0:10:04 lr: 0.000094 grad: 0.0841 (0.0907) loss: 0.8153 (0.8181) time: 0.1209 data: 0.0381 max mem: 9377 +Train: [36] [2300/6250] eta: 0:09:46 lr: 0.000094 grad: 0.0835 (0.0905) loss: 0.8179 (0.8182) time: 0.1435 data: 0.0605 max mem: 9377 +Train: [36] [2400/6250] eta: 0:09:28 lr: 0.000094 grad: 0.0833 (0.0904) loss: 0.8250 (0.8183) time: 0.1323 data: 0.0492 max mem: 9377 +Train: [36] [2500/6250] eta: 0:09:12 lr: 0.000094 grad: 0.0835 (0.0903) loss: 0.8136 (0.8183) time: 0.1253 data: 0.0389 max mem: 9377 +Train: [36] [2600/6250] eta: 0:08:56 lr: 0.000094 grad: 0.0926 (0.0902) loss: 0.8131 (0.8182) time: 0.1488 data: 0.0682 max mem: 9377 +Train: [36] [2700/6250] eta: 0:08:40 lr: 0.000094 grad: 0.0815 (0.0903) loss: 0.8218 (0.8182) time: 0.1514 data: 0.0702 max mem: 9377 +Train: [36] [2800/6250] eta: 0:08:24 lr: 0.000094 grad: 0.0869 (0.0902) loss: 0.8189 (0.8181) time: 0.1307 data: 0.0423 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:07 lr: 0.000094 grad: 0.0888 (0.0903) loss: 0.8166 (0.8181) time: 0.1323 data: 0.0508 max mem: 9377 +Train: [36] [3000/6250] eta: 0:07:52 lr: 0.000094 grad: 0.0835 (0.0902) loss: 0.8181 (0.8180) time: 0.1325 data: 0.0508 max mem: 9377 +Train: [36] [3100/6250] eta: 0:07:36 lr: 0.000094 grad: 0.0878 (0.0902) loss: 0.8221 (0.8180) time: 0.1007 data: 0.0186 max mem: 9377 +Train: [36] [3200/6250] eta: 0:07:21 lr: 0.000094 grad: 0.0890 (0.0902) loss: 0.8196 (0.8180) time: 0.1345 data: 0.0441 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:06 lr: 0.000094 grad: 0.0924 (0.0902) loss: 0.8188 (0.8180) time: 0.1534 data: 0.0741 max mem: 9377 +Train: [36] [3400/6250] eta: 0:06:51 lr: 0.000094 grad: 0.0877 (0.0902) loss: 0.8200 (0.8180) time: 0.1331 data: 0.0584 max mem: 9377 +Train: [36] [3500/6250] eta: 0:06:36 lr: 0.000094 grad: 0.0917 (0.0901) loss: 0.8173 (0.8180) time: 0.1233 data: 0.0469 max mem: 9377 +Train: [36] [3600/6250] eta: 0:06:21 lr: 0.000094 grad: 0.0889 (0.0901) loss: 0.8121 (0.8179) time: 0.1018 data: 0.0153 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:06 lr: 0.000094 grad: 0.0864 (0.0902) loss: 0.8179 (0.8179) time: 0.1304 data: 0.0484 max mem: 9377 +Train: [36] [3800/6250] eta: 0:05:51 lr: 0.000094 grad: 0.0907 (0.0903) loss: 0.8170 (0.8178) time: 0.1334 data: 0.0509 max mem: 9377 +Train: [36] [3900/6250] eta: 0:05:36 lr: 0.000094 grad: 0.0930 (0.0904) loss: 0.8143 (0.8177) time: 0.1113 data: 0.0231 max mem: 9377 +Train: [36] [4000/6250] eta: 0:05:22 lr: 0.000094 grad: 0.0894 (0.0905) loss: 0.8111 (0.8176) time: 0.1297 data: 0.0532 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:07 lr: 0.000094 grad: 0.0958 (0.0906) loss: 0.8139 (0.8175) time: 0.1275 data: 0.0350 max mem: 9377 +Train: [36] [4200/6250] eta: 0:04:52 lr: 0.000094 grad: 0.0929 (0.0907) loss: 0.8148 (0.8175) time: 0.1419 data: 0.0621 max mem: 9377 +Train: [36] [4300/6250] eta: 0:04:38 lr: 0.000094 grad: 0.0886 (0.0907) loss: 0.8195 (0.8175) time: 0.1194 data: 0.0336 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:23 lr: 0.000094 grad: 0.0899 (0.0908) loss: 0.8172 (0.8174) time: 0.1291 data: 0.0489 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:09 lr: 0.000094 grad: 0.0904 (0.0910) loss: 0.8100 (0.8172) time: 0.1254 data: 0.0426 max mem: 9377 +Train: [36] [4600/6250] eta: 0:03:55 lr: 0.000094 grad: 0.0913 (0.0911) loss: 0.8133 (0.8171) time: 0.1364 data: 0.0536 max mem: 9377 +Train: [36] [4700/6250] eta: 0:03:40 lr: 0.000094 grad: 0.0929 (0.0912) loss: 0.8196 (0.8170) time: 0.1323 data: 0.0474 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:27 lr: 0.000094 grad: 0.0890 (0.0913) loss: 0.8122 (0.8169) time: 0.1295 data: 0.0439 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:13 lr: 0.000094 grad: 0.0907 (0.0913) loss: 0.8120 (0.8168) time: 0.1602 data: 0.0689 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:00 lr: 0.000094 grad: 0.0914 (0.0914) loss: 0.8157 (0.8168) time: 0.1426 data: 0.0512 max mem: 9377 +Train: [36] [5100/6250] eta: 0:02:46 lr: 0.000093 grad: 0.0885 (0.0915) loss: 0.8199 (0.8167) time: 0.1837 data: 0.1039 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:32 lr: 0.000093 grad: 0.0894 (0.0916) loss: 0.8217 (0.8167) time: 0.1878 data: 0.1065 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:18 lr: 0.000093 grad: 0.0863 (0.0916) loss: 0.8161 (0.8167) time: 0.1617 data: 0.0712 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:03 lr: 0.000093 grad: 0.1004 (0.0917) loss: 0.8091 (0.8166) time: 0.1446 data: 0.0551 max mem: 9377 +Train: [36] [5500/6250] eta: 0:01:49 lr: 0.000093 grad: 0.0908 (0.0918) loss: 0.8127 (0.8166) time: 0.1457 data: 0.0685 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:34 lr: 0.000093 grad: 0.0993 (0.0919) loss: 0.8094 (0.8165) time: 0.1451 data: 0.0644 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:19 lr: 0.000093 grad: 0.1027 (0.0920) loss: 0.8145 (0.8164) time: 0.1390 data: 0.0588 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:05 lr: 0.000093 grad: 0.0870 (0.0921) loss: 0.8090 (0.8164) time: 0.1388 data: 0.0517 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:50 lr: 0.000093 grad: 0.0968 (0.0922) loss: 0.8144 (0.8163) time: 0.1414 data: 0.0572 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:36 lr: 0.000093 grad: 0.1001 (0.0923) loss: 0.8121 (0.8162) time: 0.1715 data: 0.0938 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:21 lr: 0.000093 grad: 0.0957 (0.0924) loss: 0.8099 (0.8161) time: 0.1320 data: 0.0523 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:07 lr: 0.000093 grad: 0.0975 (0.0925) loss: 0.8158 (0.8160) time: 0.1266 data: 0.0457 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1018 (0.0926) loss: 0.8071 (0.8160) time: 0.1272 data: 0.0439 max mem: 9377 +Train: [36] Total time: 0:15:08 (0.1454 s / it) +Averaged stats: lr: 0.000093 grad: 0.1018 (0.0926) loss: 0.8071 (0.8160) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:04:43 loss: 0.8418 (0.8418) time: 4.5762 data: 4.5325 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8409 (0.8429) time: 0.1109 data: 0.0861 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-train-subset): loss: 0.8409 (0.8429) +Eval (hcp-val): [36] [ 0/62] eta: 0:04:47 loss: 0.8356 (0.8356) time: 4.6447 data: 4.6137 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8400 (0.8414) time: 0.1350 data: 0.1084 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:13 (0.2112 s / it) +Averaged stats (hcp-val): loss: 0.8400 (0.8414) +Eval (nsd-val): [36] [ 0/62] eta: 0:05:36 loss: 0.8016 (0.8016) time: 5.4287 data: 5.3951 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8120 (0.8133) time: 0.1449 data: 0.1195 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (nsd-val): loss: 0.8120 (0.8133) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 10:41:43 lr: 0.000093 grad: 0.1650 (0.1650) loss: 0.8392 (0.8392) time: 6.1606 data: 6.0504 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:20:14 lr: 0.000093 grad: 0.1011 (0.1159) loss: 0.8170 (0.8248) time: 0.1486 data: 0.0509 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:17:12 lr: 0.000093 grad: 0.0898 (0.1046) loss: 0.8106 (0.8221) time: 0.1493 data: 0.0572 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:16:33 lr: 0.000093 grad: 0.0855 (0.0988) loss: 0.8316 (0.8226) time: 0.1543 data: 0.0628 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:15:47 lr: 0.000093 grad: 0.0866 (0.0951) loss: 0.8241 (0.8232) time: 0.1297 data: 0.0454 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:15:11 lr: 0.000093 grad: 0.0875 (0.0935) loss: 0.8135 (0.8233) time: 0.1236 data: 0.0360 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:14:45 lr: 0.000093 grad: 0.0823 (0.0930) loss: 0.8201 (0.8231) time: 0.1637 data: 0.0694 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:14:23 lr: 0.000093 grad: 0.0796 (0.0917) loss: 0.8290 (0.8237) time: 0.1526 data: 0.0610 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:13:59 lr: 0.000093 grad: 0.0846 (0.0910) loss: 0.8228 (0.8241) time: 0.1457 data: 0.0535 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:13:37 lr: 0.000093 grad: 0.0823 (0.0908) loss: 0.8245 (0.8242) time: 0.1394 data: 0.0507 max mem: 9377 +Train: [37] [1000/6250] eta: 0:13:14 lr: 0.000093 grad: 0.0833 (0.0902) loss: 0.8271 (0.8244) time: 0.1248 data: 0.0358 max mem: 9377 +Train: [37] [1100/6250] eta: 0:12:49 lr: 0.000093 grad: 0.0858 (0.0900) loss: 0.8231 (0.8244) time: 0.1270 data: 0.0413 max mem: 9377 +Train: [37] [1200/6250] eta: 0:12:27 lr: 0.000093 grad: 0.0857 (0.0899) loss: 0.8180 (0.8240) time: 0.1301 data: 0.0493 max mem: 9377 +Train: [37] [1300/6250] eta: 0:12:08 lr: 0.000093 grad: 0.0868 (0.0899) loss: 0.8229 (0.8238) time: 0.1335 data: 0.0539 max mem: 9377 +Train: [37] [1400/6250] eta: 0:11:49 lr: 0.000093 grad: 0.0803 (0.0900) loss: 0.8182 (0.8235) time: 0.1363 data: 0.0549 max mem: 9377 +Train: [37] [1500/6250] eta: 0:11:32 lr: 0.000093 grad: 0.0937 (0.0902) loss: 0.8187 (0.8233) time: 0.1359 data: 0.0504 max mem: 9377 +Train: [37] [1600/6250] eta: 0:11:22 lr: 0.000093 grad: 0.0873 (0.0904) loss: 0.8217 (0.8230) time: 0.1628 data: 0.0878 max mem: 9377 +Train: [37] [1700/6250] eta: 0:11:10 lr: 0.000093 grad: 0.0865 (0.0905) loss: 0.8207 (0.8227) time: 0.1425 data: 0.0584 max mem: 9377 +Train: [37] [1800/6250] eta: 0:10:54 lr: 0.000093 grad: 0.0924 (0.0906) loss: 0.8192 (0.8224) time: 0.1413 data: 0.0607 max mem: 9377 +Train: [37] [1900/6250] eta: 0:10:41 lr: 0.000093 grad: 0.0899 (0.0908) loss: 0.8128 (0.8221) time: 0.1521 data: 0.0760 max mem: 9377 +Train: [37] [2000/6250] eta: 0:10:26 lr: 0.000093 grad: 0.0952 (0.0911) loss: 0.8153 (0.8218) time: 0.1678 data: 0.0827 max mem: 9377 +Train: [37] [2100/6250] eta: 0:10:12 lr: 0.000093 grad: 0.0928 (0.0911) loss: 0.8200 (0.8216) time: 0.1750 data: 0.0947 max mem: 9377 +Train: [37] [2200/6250] eta: 0:09:58 lr: 0.000093 grad: 0.0887 (0.0911) loss: 0.8199 (0.8215) time: 0.1620 data: 0.0832 max mem: 9377 +Train: [37] [2300/6250] eta: 0:09:42 lr: 0.000092 grad: 0.0895 (0.0912) loss: 0.8142 (0.8212) time: 0.1218 data: 0.0371 max mem: 9377 +Train: [37] [2400/6250] eta: 0:09:30 lr: 0.000092 grad: 0.0952 (0.0912) loss: 0.8130 (0.8209) time: 0.1634 data: 0.0875 max mem: 9377 +Train: [37] [2500/6250] eta: 0:09:16 lr: 0.000092 grad: 0.0922 (0.0915) loss: 0.8162 (0.8208) time: 0.1775 data: 0.0990 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:01 lr: 0.000092 grad: 0.0888 (0.0916) loss: 0.8172 (0.8205) time: 0.1464 data: 0.0646 max mem: 9377 +Train: [37] [2700/6250] eta: 0:08:47 lr: 0.000092 grad: 0.0927 (0.0918) loss: 0.8099 (0.8204) time: 0.1421 data: 0.0585 max mem: 9377 +Train: [37] [2800/6250] eta: 0:08:33 lr: 0.000092 grad: 0.0935 (0.0920) loss: 0.8188 (0.8201) time: 0.1634 data: 0.0841 max mem: 9377 +Train: [37] [2900/6250] eta: 0:08:18 lr: 0.000092 grad: 0.0899 (0.0920) loss: 0.8165 (0.8199) time: 0.1560 data: 0.0773 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:02 lr: 0.000092 grad: 0.0864 (0.0921) loss: 0.8194 (0.8197) time: 0.1189 data: 0.0354 max mem: 9377 +Train: [37] [3100/6250] eta: 0:07:46 lr: 0.000092 grad: 0.0866 (0.0921) loss: 0.8183 (0.8196) time: 0.1367 data: 0.0565 max mem: 9377 +Train: [37] [3200/6250] eta: 0:07:31 lr: 0.000092 grad: 0.0897 (0.0922) loss: 0.8212 (0.8195) time: 0.1414 data: 0.0583 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:15 lr: 0.000092 grad: 0.0861 (0.0921) loss: 0.8215 (0.8194) time: 0.1308 data: 0.0496 max mem: 9377 +Train: [37] [3400/6250] eta: 0:06:59 lr: 0.000092 grad: 0.0863 (0.0921) loss: 0.8183 (0.8194) time: 0.1487 data: 0.0623 max mem: 9377 +Train: [37] [3500/6250] eta: 0:06:44 lr: 0.000092 grad: 0.0945 (0.0921) loss: 0.8152 (0.8193) time: 0.1414 data: 0.0579 max mem: 9377 +Train: [37] [3600/6250] eta: 0:06:29 lr: 0.000092 grad: 0.0903 (0.0921) loss: 0.8121 (0.8192) time: 0.1350 data: 0.0569 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:14 lr: 0.000092 grad: 0.0879 (0.0921) loss: 0.8147 (0.8191) time: 0.1361 data: 0.0564 max mem: 9377 +Train: [37] [3800/6250] eta: 0:05:58 lr: 0.000092 grad: 0.0932 (0.0921) loss: 0.8176 (0.8190) time: 0.1284 data: 0.0479 max mem: 9377 +Train: [37] [3900/6250] eta: 0:05:44 lr: 0.000092 grad: 0.0815 (0.0920) loss: 0.8207 (0.8190) time: 0.1825 data: 0.0986 max mem: 9377 +Train: [37] [4000/6250] eta: 0:05:28 lr: 0.000092 grad: 0.0862 (0.0920) loss: 0.8145 (0.8189) time: 0.1397 data: 0.0589 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:13 lr: 0.000092 grad: 0.0925 (0.0920) loss: 0.8221 (0.8189) time: 0.1419 data: 0.0578 max mem: 9377 +Train: [37] [4200/6250] eta: 0:04:58 lr: 0.000092 grad: 0.1027 (0.0921) loss: 0.8109 (0.8188) time: 0.1487 data: 0.0685 max mem: 9377 +Train: [37] [4300/6250] eta: 0:04:43 lr: 0.000092 grad: 0.0962 (0.0922) loss: 0.8185 (0.8187) time: 0.1479 data: 0.0619 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:28 lr: 0.000092 grad: 0.0931 (0.0924) loss: 0.8111 (0.8185) time: 0.1470 data: 0.0611 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:13 lr: 0.000092 grad: 0.0954 (0.0925) loss: 0.8076 (0.8183) time: 0.1354 data: 0.0528 max mem: 9377 +Train: [37] [4600/6250] eta: 0:03:59 lr: 0.000092 grad: 0.0951 (0.0927) loss: 0.8141 (0.8182) time: 0.1290 data: 0.0468 max mem: 9377 +Train: [37] [4700/6250] eta: 0:03:44 lr: 0.000092 grad: 0.0997 (0.0929) loss: 0.8101 (0.8180) time: 0.1340 data: 0.0500 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:30 lr: 0.000092 grad: 0.1003 (0.0930) loss: 0.8074 (0.8178) time: 0.1693 data: 0.0805 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:15 lr: 0.000092 grad: 0.0992 (0.0932) loss: 0.8063 (0.8176) time: 0.1612 data: 0.0727 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:01 lr: 0.000092 grad: 0.1023 (0.0933) loss: 0.8092 (0.8174) time: 0.1580 data: 0.0716 max mem: 9377 +Train: [37] [5100/6250] eta: 0:02:47 lr: 0.000092 grad: 0.0962 (0.0934) loss: 0.8115 (0.8173) time: 0.1594 data: 0.0755 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:32 lr: 0.000092 grad: 0.0982 (0.0935) loss: 0.8071 (0.8171) time: 0.1372 data: 0.0487 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:17 lr: 0.000092 grad: 0.0938 (0.0936) loss: 0.8093 (0.8170) time: 0.1620 data: 0.0769 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:03 lr: 0.000092 grad: 0.0920 (0.0937) loss: 0.8180 (0.8170) time: 0.1224 data: 0.0339 max mem: 9377 +Train: [37] [5500/6250] eta: 0:01:48 lr: 0.000092 grad: 0.0910 (0.0939) loss: 0.8152 (0.8169) time: 0.1311 data: 0.0461 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:34 lr: 0.000092 grad: 0.0967 (0.0940) loss: 0.8164 (0.8169) time: 0.1490 data: 0.0587 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:19 lr: 0.000091 grad: 0.0965 (0.0941) loss: 0.8119 (0.8168) time: 0.1585 data: 0.0773 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:05 lr: 0.000091 grad: 0.0935 (0.0942) loss: 0.8157 (0.8167) time: 0.1325 data: 0.0479 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:50 lr: 0.000091 grad: 0.0914 (0.0943) loss: 0.8130 (0.8166) time: 0.1386 data: 0.0518 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:36 lr: 0.000091 grad: 0.0902 (0.0943) loss: 0.8182 (0.8165) time: 0.1470 data: 0.0605 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:21 lr: 0.000091 grad: 0.0935 (0.0943) loss: 0.8115 (0.8165) time: 0.1365 data: 0.0467 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:07 lr: 0.000091 grad: 0.0883 (0.0943) loss: 0.8102 (0.8165) time: 0.1249 data: 0.0379 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.0925 (0.0943) loss: 0.8165 (0.8164) time: 0.1250 data: 0.0407 max mem: 9377 +Train: [37] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000091 grad: 0.0925 (0.0943) loss: 0.8165 (0.8164) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:04:35 loss: 0.8440 (0.8440) time: 4.4359 data: 4.3958 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8440 (0.8445) time: 0.1135 data: 0.0883 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8440 (0.8445) +Eval (hcp-val): [37] [ 0/62] eta: 0:03:31 loss: 0.8417 (0.8417) time: 3.4054 data: 3.3566 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8406 (0.8419) time: 0.1447 data: 0.1195 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-val): loss: 0.8406 (0.8419) +Eval (nsd-val): [37] [ 0/62] eta: 0:03:35 loss: 0.8110 (0.8110) time: 3.4777 data: 3.4158 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8195 (0.8222) time: 0.1234 data: 0.0984 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (nsd-val): loss: 0.8195 (0.8222) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 10:16:23 lr: 0.000091 grad: 0.1164 (0.1164) loss: 0.8357 (0.8357) time: 5.9173 data: 5.7982 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:20:50 lr: 0.000091 grad: 0.1027 (0.1156) loss: 0.8231 (0.8289) time: 0.1486 data: 0.0466 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:17:57 lr: 0.000091 grad: 0.0927 (0.1091) loss: 0.8191 (0.8248) time: 0.1516 data: 0.0551 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:16:38 lr: 0.000091 grad: 0.0903 (0.1059) loss: 0.8163 (0.8209) time: 0.1452 data: 0.0500 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:15:50 lr: 0.000091 grad: 0.0905 (0.1034) loss: 0.8204 (0.8197) time: 0.1386 data: 0.0459 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:15:28 lr: 0.000091 grad: 0.0841 (0.1007) loss: 0.8238 (0.8203) time: 0.1579 data: 0.0652 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:15:07 lr: 0.000091 grad: 0.0850 (0.0990) loss: 0.8233 (0.8203) time: 0.1602 data: 0.0745 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:14:42 lr: 0.000091 grad: 0.0850 (0.0980) loss: 0.8271 (0.8204) time: 0.1444 data: 0.0566 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:14:18 lr: 0.000091 grad: 0.0881 (0.0972) loss: 0.8206 (0.8207) time: 0.1662 data: 0.0719 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:14:01 lr: 0.000091 grad: 0.0855 (0.0970) loss: 0.8205 (0.8205) time: 0.1621 data: 0.0713 max mem: 9377 +Train: [38] [1000/6250] eta: 0:13:38 lr: 0.000091 grad: 0.0936 (0.0970) loss: 0.8096 (0.8197) time: 0.1474 data: 0.0591 max mem: 9377 +Train: [38] [1100/6250] eta: 0:13:19 lr: 0.000091 grad: 0.1028 (0.0972) loss: 0.8103 (0.8187) time: 0.1318 data: 0.0533 max mem: 9377 +Train: [38] [1200/6250] eta: 0:12:56 lr: 0.000091 grad: 0.0945 (0.0977) loss: 0.8098 (0.8177) time: 0.1279 data: 0.0460 max mem: 9377 +Train: [38] [1300/6250] eta: 0:12:35 lr: 0.000091 grad: 0.0891 (0.0976) loss: 0.8131 (0.8169) time: 0.1331 data: 0.0491 max mem: 9377 +Train: [38] [1400/6250] eta: 0:12:16 lr: 0.000091 grad: 0.0917 (0.0977) loss: 0.8112 (0.8162) time: 0.1496 data: 0.0733 max mem: 9377 +Train: [38] [1500/6250] eta: 0:11:59 lr: 0.000091 grad: 0.0991 (0.0977) loss: 0.8048 (0.8156) time: 0.1504 data: 0.0738 max mem: 9377 +Train: [38] [1600/6250] eta: 0:11:42 lr: 0.000091 grad: 0.0969 (0.0979) loss: 0.8071 (0.8151) time: 0.1447 data: 0.0563 max mem: 9377 +Train: [38] [1700/6250] eta: 0:11:24 lr: 0.000091 grad: 0.0955 (0.0979) loss: 0.8097 (0.8148) time: 0.1214 data: 0.0358 max mem: 9377 +Train: [38] [1800/6250] eta: 0:11:06 lr: 0.000091 grad: 0.0970 (0.0980) loss: 0.8144 (0.8145) time: 0.1318 data: 0.0490 max mem: 9377 +Train: [38] [1900/6250] eta: 0:10:48 lr: 0.000091 grad: 0.1022 (0.0980) loss: 0.8111 (0.8143) time: 0.1315 data: 0.0479 max mem: 9377 +Train: [38] [2000/6250] eta: 0:10:29 lr: 0.000091 grad: 0.0920 (0.0979) loss: 0.8116 (0.8141) time: 0.1230 data: 0.0376 max mem: 9377 +Train: [38] [2100/6250] eta: 0:10:12 lr: 0.000091 grad: 0.0962 (0.0979) loss: 0.8159 (0.8140) time: 0.1368 data: 0.0545 max mem: 9377 +Train: [38] [2200/6250] eta: 0:09:56 lr: 0.000091 grad: 0.0969 (0.0980) loss: 0.8133 (0.8138) time: 0.1330 data: 0.0454 max mem: 9377 +Train: [38] [2300/6250] eta: 0:09:39 lr: 0.000091 grad: 0.1006 (0.0979) loss: 0.8100 (0.8137) time: 0.1446 data: 0.0677 max mem: 9377 +Train: [38] [2400/6250] eta: 0:09:24 lr: 0.000091 grad: 0.0983 (0.0980) loss: 0.8081 (0.8135) time: 0.1510 data: 0.0674 max mem: 9377 +Train: [38] [2500/6250] eta: 0:09:07 lr: 0.000091 grad: 0.0942 (0.0980) loss: 0.8130 (0.8135) time: 0.1556 data: 0.0814 max mem: 9377 +Train: [38] [2600/6250] eta: 0:08:51 lr: 0.000091 grad: 0.0875 (0.0979) loss: 0.8174 (0.8136) time: 0.1419 data: 0.0595 max mem: 9377 +Train: [38] [2700/6250] eta: 0:08:36 lr: 0.000091 grad: 0.0953 (0.0978) loss: 0.8151 (0.8136) time: 0.1406 data: 0.0522 max mem: 9377 +Train: [38] [2800/6250] eta: 0:08:22 lr: 0.000091 grad: 0.0953 (0.0977) loss: 0.8129 (0.8136) time: 0.1469 data: 0.0697 max mem: 9377 +Train: [38] [2900/6250] eta: 0:08:09 lr: 0.000090 grad: 0.0902 (0.0977) loss: 0.8174 (0.8137) time: 0.1797 data: 0.1028 max mem: 9377 +Train: [38] [3000/6250] eta: 0:07:54 lr: 0.000090 grad: 0.0965 (0.0976) loss: 0.8140 (0.8138) time: 0.1549 data: 0.0717 max mem: 9377 +Train: [38] [3100/6250] eta: 0:07:40 lr: 0.000090 grad: 0.0932 (0.0976) loss: 0.8083 (0.8137) time: 0.2170 data: 0.1398 max mem: 9377 +Train: [38] [3200/6250] eta: 0:07:26 lr: 0.000090 grad: 0.0937 (0.0977) loss: 0.8127 (0.8138) time: 0.1624 data: 0.0806 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:13 lr: 0.000090 grad: 0.0927 (0.0978) loss: 0.8181 (0.8138) time: 0.1585 data: 0.0819 max mem: 9377 +Train: [38] [3400/6250] eta: 0:06:59 lr: 0.000090 grad: 0.1005 (0.0979) loss: 0.8169 (0.8138) time: 0.1614 data: 0.0794 max mem: 9377 +Train: [38] [3500/6250] eta: 0:06:45 lr: 0.000090 grad: 0.0973 (0.0980) loss: 0.8171 (0.8138) time: 0.1401 data: 0.0639 max mem: 9377 +Train: [38] [3600/6250] eta: 0:06:31 lr: 0.000090 grad: 0.0932 (0.0980) loss: 0.8167 (0.8138) time: 0.1678 data: 0.0870 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:16 lr: 0.000090 grad: 0.0996 (0.0979) loss: 0.8146 (0.8138) time: 0.1536 data: 0.0784 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:02 lr: 0.000090 grad: 0.0915 (0.0980) loss: 0.8166 (0.8139) time: 0.1659 data: 0.0867 max mem: 9377 +Train: [38] [3900/6250] eta: 0:05:47 lr: 0.000090 grad: 0.0876 (0.0979) loss: 0.8229 (0.8139) time: 0.1476 data: 0.0644 max mem: 9377 +Train: [38] [4000/6250] eta: 0:05:32 lr: 0.000090 grad: 0.0936 (0.0980) loss: 0.8101 (0.8140) time: 0.1412 data: 0.0587 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:17 lr: 0.000090 grad: 0.0930 (0.0979) loss: 0.8137 (0.8141) time: 0.1309 data: 0.0491 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:02 lr: 0.000090 grad: 0.0935 (0.0979) loss: 0.8187 (0.8141) time: 0.1292 data: 0.0471 max mem: 9377 +Train: [38] [4300/6250] eta: 0:04:47 lr: 0.000090 grad: 0.0998 (0.0982) loss: 0.8220 (0.8141) time: 0.1276 data: 0.0471 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:32 lr: 0.000090 grad: 0.0930 (0.0984) loss: 0.8147 (0.8142) time: 0.1764 data: 0.0970 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:18 lr: 0.000090 grad: 0.0903 (0.0985) loss: 0.8090 (0.8142) time: 0.1903 data: 0.1131 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:03 lr: 0.000090 grad: 0.0905 (0.0984) loss: 0.8174 (0.8143) time: 0.1478 data: 0.0701 max mem: 9377 +Train: [38] [4700/6250] eta: 0:03:48 lr: 0.000090 grad: 0.0909 (0.0983) loss: 0.8205 (0.8144) time: 0.1876 data: 0.1113 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:33 lr: 0.000090 grad: 0.0946 (0.0983) loss: 0.8167 (0.8144) time: 0.1458 data: 0.0620 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:19 lr: 0.000090 grad: 0.0913 (0.0982) loss: 0.8126 (0.8144) time: 0.1565 data: 0.0753 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:04 lr: 0.000090 grad: 0.0934 (0.0982) loss: 0.8175 (0.8144) time: 0.1523 data: 0.0708 max mem: 9377 +Train: [38] [5100/6250] eta: 0:02:49 lr: 0.000090 grad: 0.0865 (0.0982) loss: 0.8153 (0.8144) time: 0.1651 data: 0.0843 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:34 lr: 0.000090 grad: 0.0981 (0.0981) loss: 0.8173 (0.8145) time: 0.1412 data: 0.0546 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:19 lr: 0.000090 grad: 0.1019 (0.0981) loss: 0.8164 (0.8145) time: 0.1364 data: 0.0515 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:04 lr: 0.000090 grad: 0.0986 (0.0981) loss: 0.8118 (0.8145) time: 0.1262 data: 0.0425 max mem: 9377 +Train: [38] [5500/6250] eta: 0:01:50 lr: 0.000090 grad: 0.0956 (0.0981) loss: 0.8152 (0.8145) time: 0.1361 data: 0.0518 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:35 lr: 0.000090 grad: 0.0941 (0.0981) loss: 0.8171 (0.8146) time: 0.1498 data: 0.0636 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:20 lr: 0.000090 grad: 0.0900 (0.0981) loss: 0.8182 (0.8146) time: 0.1248 data: 0.0302 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:05 lr: 0.000090 grad: 0.0945 (0.0980) loss: 0.8164 (0.8146) time: 0.1396 data: 0.0650 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:51 lr: 0.000090 grad: 0.0908 (0.0980) loss: 0.8180 (0.8147) time: 0.1446 data: 0.0643 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:36 lr: 0.000090 grad: 0.0983 (0.0979) loss: 0.8204 (0.8147) time: 0.1877 data: 0.1132 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:21 lr: 0.000090 grad: 0.1009 (0.0979) loss: 0.8160 (0.8148) time: 0.1372 data: 0.0552 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:07 lr: 0.000089 grad: 0.0949 (0.0979) loss: 0.8173 (0.8148) time: 0.1465 data: 0.0708 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.0949 (0.0980) loss: 0.8100 (0.8148) time: 0.1292 data: 0.0514 max mem: 9377 +Train: [38] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000089 grad: 0.0949 (0.0980) loss: 0.8100 (0.8148) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:04:19 loss: 0.8410 (0.8410) time: 4.1838 data: 4.1146 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8428 (0.8436) time: 0.1212 data: 0.0946 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-train-subset): loss: 0.8428 (0.8436) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:38 loss: 0.8390 (0.8390) time: 5.4571 data: 5.4261 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8392 (0.8414) time: 0.1173 data: 0.0895 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (hcp-val): loss: 0.8392 (0.8414) +Eval (nsd-val): [38] [ 0/62] eta: 0:05:58 loss: 0.8167 (0.8167) time: 5.7750 data: 5.7406 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8274 (0.8275) time: 0.1600 data: 0.1323 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:15 (0.2511 s / it) +Averaged stats (nsd-val): loss: 0.8274 (0.8275) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 10:46:22 lr: 0.000089 grad: 0.0833 (0.0833) loss: 0.8131 (0.8131) time: 6.2051 data: 6.0775 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:22:51 lr: 0.000089 grad: 0.0918 (0.1076) loss: 0.8302 (0.8387) time: 0.1820 data: 0.0823 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:19:33 lr: 0.000089 grad: 0.0818 (0.0993) loss: 0.8251 (0.8347) time: 0.1741 data: 0.0773 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:18:03 lr: 0.000089 grad: 0.0881 (0.0987) loss: 0.8298 (0.8307) time: 0.1357 data: 0.0450 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:16:55 lr: 0.000089 grad: 0.0899 (0.0973) loss: 0.8218 (0.8282) time: 0.1345 data: 0.0428 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:16:06 lr: 0.000089 grad: 0.0905 (0.0965) loss: 0.8237 (0.8268) time: 0.1705 data: 0.0885 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:15:32 lr: 0.000089 grad: 0.0898 (0.0954) loss: 0.8200 (0.8261) time: 0.1647 data: 0.0759 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:15:04 lr: 0.000089 grad: 0.0906 (0.0949) loss: 0.8153 (0.8249) time: 0.1776 data: 0.0967 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:14:44 lr: 0.000089 grad: 0.0921 (0.0946) loss: 0.8222 (0.8242) time: 0.1666 data: 0.0786 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:14:20 lr: 0.000089 grad: 0.0882 (0.0951) loss: 0.8283 (0.8239) time: 0.1453 data: 0.0598 max mem: 9377 +Train: [39] [1000/6250] eta: 0:13:54 lr: 0.000089 grad: 0.0919 (0.0956) loss: 0.8129 (0.8231) time: 0.1359 data: 0.0515 max mem: 9377 +Train: [39] [1100/6250] eta: 0:13:30 lr: 0.000089 grad: 0.0938 (0.0956) loss: 0.8108 (0.8223) time: 0.1386 data: 0.0554 max mem: 9377 +Train: [39] [1200/6250] eta: 0:13:08 lr: 0.000089 grad: 0.0969 (0.0958) loss: 0.8124 (0.8215) time: 0.1322 data: 0.0529 max mem: 9377 +Train: [39] [1300/6250] eta: 0:12:47 lr: 0.000089 grad: 0.0940 (0.0958) loss: 0.8138 (0.8210) time: 0.1417 data: 0.0600 max mem: 9377 +Train: [39] [1400/6250] eta: 0:12:24 lr: 0.000089 grad: 0.0882 (0.0959) loss: 0.8197 (0.8202) time: 0.1417 data: 0.0578 max mem: 9377 +Train: [39] [1500/6250] eta: 0:12:02 lr: 0.000089 grad: 0.0920 (0.0958) loss: 0.8098 (0.8198) time: 0.1214 data: 0.0359 max mem: 9377 +Train: [39] [1600/6250] eta: 0:11:40 lr: 0.000089 grad: 0.0936 (0.0960) loss: 0.8167 (0.8194) time: 0.1303 data: 0.0499 max mem: 9377 +Train: [39] [1700/6250] eta: 0:11:21 lr: 0.000089 grad: 0.0923 (0.0959) loss: 0.8185 (0.8190) time: 0.1428 data: 0.0602 max mem: 9377 +Train: [39] [1800/6250] eta: 0:11:04 lr: 0.000089 grad: 0.0875 (0.0959) loss: 0.8171 (0.8187) time: 0.1721 data: 0.0907 max mem: 9377 +Train: [39] [1900/6250] eta: 0:10:46 lr: 0.000089 grad: 0.0850 (0.0959) loss: 0.8179 (0.8185) time: 0.1226 data: 0.0386 max mem: 9377 +Train: [39] [2000/6250] eta: 0:10:29 lr: 0.000089 grad: 0.0932 (0.0958) loss: 0.8227 (0.8184) time: 0.1276 data: 0.0408 max mem: 9377 +Train: [39] [2100/6250] eta: 0:10:12 lr: 0.000089 grad: 0.0872 (0.0958) loss: 0.8248 (0.8183) time: 0.1500 data: 0.0689 max mem: 9377 +Train: [39] [2200/6250] eta: 0:09:56 lr: 0.000089 grad: 0.0909 (0.0957) loss: 0.8213 (0.8183) time: 0.1405 data: 0.0572 max mem: 9377 +Train: [39] [2300/6250] eta: 0:09:39 lr: 0.000089 grad: 0.0972 (0.0957) loss: 0.8155 (0.8182) time: 0.1341 data: 0.0552 max mem: 9377 +Train: [39] [2400/6250] eta: 0:09:23 lr: 0.000089 grad: 0.0939 (0.0958) loss: 0.8038 (0.8181) time: 0.1373 data: 0.0534 max mem: 9377 +Train: [39] [2500/6250] eta: 0:09:07 lr: 0.000089 grad: 0.0875 (0.0958) loss: 0.8160 (0.8179) time: 0.1396 data: 0.0610 max mem: 9377 +Train: [39] [2600/6250] eta: 0:08:50 lr: 0.000089 grad: 0.1024 (0.0961) loss: 0.8150 (0.8177) time: 0.1355 data: 0.0518 max mem: 9377 +Train: [39] [2700/6250] eta: 0:08:34 lr: 0.000089 grad: 0.1000 (0.0962) loss: 0.8143 (0.8175) time: 0.1460 data: 0.0670 max mem: 9377 +Train: [39] [2800/6250] eta: 0:08:18 lr: 0.000089 grad: 0.0995 (0.0963) loss: 0.8081 (0.8172) time: 0.1148 data: 0.0239 max mem: 9377 +Train: [39] [2900/6250] eta: 0:08:03 lr: 0.000089 grad: 0.0897 (0.0964) loss: 0.8188 (0.8171) time: 0.1327 data: 0.0518 max mem: 9377 +Train: [39] [3000/6250] eta: 0:07:48 lr: 0.000089 grad: 0.0998 (0.0965) loss: 0.8035 (0.8169) time: 0.1269 data: 0.0433 max mem: 9377 +Train: [39] [3100/6250] eta: 0:07:32 lr: 0.000089 grad: 0.0924 (0.0966) loss: 0.8178 (0.8167) time: 0.1394 data: 0.0561 max mem: 9377 +Train: [39] [3200/6250] eta: 0:07:17 lr: 0.000089 grad: 0.0939 (0.0967) loss: 0.8187 (0.8166) time: 0.1061 data: 0.0195 max mem: 9377 +Train: [39] [3300/6250] eta: 0:07:02 lr: 0.000088 grad: 0.0982 (0.0969) loss: 0.8133 (0.8166) time: 0.1369 data: 0.0552 max mem: 9377 +Train: [39] [3400/6250] eta: 0:06:47 lr: 0.000088 grad: 0.0961 (0.0969) loss: 0.8192 (0.8166) time: 0.1298 data: 0.0494 max mem: 9377 +Train: [39] [3500/6250] eta: 0:06:31 lr: 0.000088 grad: 0.0917 (0.0969) loss: 0.8218 (0.8166) time: 0.1183 data: 0.0389 max mem: 9377 +Train: [39] [3600/6250] eta: 0:06:17 lr: 0.000088 grad: 0.0914 (0.0969) loss: 0.8192 (0.8165) time: 0.1423 data: 0.0618 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:03 lr: 0.000088 grad: 0.0950 (0.0969) loss: 0.8096 (0.8164) time: 0.1775 data: 0.0982 max mem: 9377 +Train: [39] [3800/6250] eta: 0:05:48 lr: 0.000088 grad: 0.0961 (0.0970) loss: 0.8100 (0.8163) time: 0.1292 data: 0.0502 max mem: 9377 +Train: [39] [3900/6250] eta: 0:05:34 lr: 0.000088 grad: 0.0927 (0.0970) loss: 0.8143 (0.8163) time: 0.1465 data: 0.0634 max mem: 9377 +Train: [39] [4000/6250] eta: 0:05:20 lr: 0.000088 grad: 0.0932 (0.0970) loss: 0.8118 (0.8162) time: 0.1270 data: 0.0472 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:06 lr: 0.000088 grad: 0.0939 (0.0971) loss: 0.8132 (0.8161) time: 0.1511 data: 0.0685 max mem: 9377 +Train: [39] [4200/6250] eta: 0:04:52 lr: 0.000088 grad: 0.0919 (0.0971) loss: 0.8162 (0.8161) time: 0.1501 data: 0.0689 max mem: 9377 +Train: [39] [4300/6250] eta: 0:04:38 lr: 0.000088 grad: 0.0947 (0.0971) loss: 0.8127 (0.8160) time: 0.1135 data: 0.0356 max mem: 9377 +Train: [39] [4400/6250] eta: 0:04:24 lr: 0.000088 grad: 0.0941 (0.0971) loss: 0.8138 (0.8160) time: 0.1543 data: 0.0776 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:09 lr: 0.000088 grad: 0.0979 (0.0971) loss: 0.8194 (0.8160) time: 0.1469 data: 0.0681 max mem: 9377 +Train: [39] [4600/6250] eta: 0:03:55 lr: 0.000088 grad: 0.0929 (0.0970) loss: 0.8133 (0.8160) time: 0.1591 data: 0.0789 max mem: 9377 +Train: [39] [4700/6250] eta: 0:03:41 lr: 0.000088 grad: 0.0879 (0.0970) loss: 0.8218 (0.8160) time: 0.1364 data: 0.0574 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:27 lr: 0.000088 grad: 0.0878 (0.0969) loss: 0.8258 (0.8162) time: 0.1501 data: 0.0661 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:13 lr: 0.000088 grad: 0.0907 (0.0968) loss: 0.8215 (0.8163) time: 0.1581 data: 0.0676 max mem: 9377 +Train: [39] [5000/6250] eta: 0:02:59 lr: 0.000088 grad: 0.0947 (0.0968) loss: 0.8167 (0.8163) time: 0.1935 data: 0.1063 max mem: 9377 +Train: [39] [5100/6250] eta: 0:02:45 lr: 0.000088 grad: 0.0949 (0.0968) loss: 0.8087 (0.8163) time: 0.1485 data: 0.0683 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:31 lr: 0.000088 grad: 0.0983 (0.0968) loss: 0.8084 (0.8162) time: 0.1571 data: 0.0749 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:17 lr: 0.000088 grad: 0.0890 (0.0968) loss: 0.8189 (0.8162) time: 0.1362 data: 0.0540 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:02 lr: 0.000088 grad: 0.0886 (0.0969) loss: 0.8200 (0.8162) time: 0.1308 data: 0.0449 max mem: 9377 +Train: [39] [5500/6250] eta: 0:01:48 lr: 0.000088 grad: 0.0910 (0.0968) loss: 0.8040 (0.8161) time: 0.1272 data: 0.0349 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:33 lr: 0.000088 grad: 0.0966 (0.0968) loss: 0.8113 (0.8160) time: 0.1123 data: 0.0122 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:19 lr: 0.000088 grad: 0.0957 (0.0969) loss: 0.8077 (0.8160) time: 0.1448 data: 0.0648 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:04 lr: 0.000088 grad: 0.0852 (0.0969) loss: 0.8242 (0.8160) time: 0.1368 data: 0.0562 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:50 lr: 0.000088 grad: 0.0951 (0.0969) loss: 0.8082 (0.8159) time: 0.1667 data: 0.0841 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:35 lr: 0.000088 grad: 0.0952 (0.0969) loss: 0.8102 (0.8158) time: 0.1233 data: 0.0388 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:21 lr: 0.000088 grad: 0.1001 (0.0969) loss: 0.8070 (0.8158) time: 0.1401 data: 0.0624 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:07 lr: 0.000088 grad: 0.0977 (0.0969) loss: 0.8077 (0.8157) time: 0.1358 data: 0.0504 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1058 (0.0969) loss: 0.8109 (0.8156) time: 0.1325 data: 0.0521 max mem: 9377 +Train: [39] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000088 grad: 0.1058 (0.0969) loss: 0.8109 (0.8156) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:04:36 loss: 0.8471 (0.8471) time: 4.4557 data: 4.4262 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8435 (0.8436) time: 0.1521 data: 0.1273 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (hcp-train-subset): loss: 0.8435 (0.8436) +Making plots (hcp-train-subset): example=3 +Eval (hcp-val): [39] [ 0/62] eta: 0:03:49 loss: 0.8395 (0.8395) time: 3.7091 data: 3.6142 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8399 (0.8411) time: 0.1395 data: 0.1143 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-val): loss: 0.8399 (0.8411) +Making plots (hcp-val): example=24 +Eval (nsd-val): [39] [ 0/62] eta: 0:04:42 loss: 0.8205 (0.8205) time: 4.5548 data: 4.4717 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8298 (0.8296) time: 0.1280 data: 0.1028 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:15 (0.2573 s / it) +Averaged stats (nsd-val): loss: 0.8298 (0.8296) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 8:17:37 lr: 0.000088 grad: 0.3077 (0.3077) loss: 0.8051 (0.8051) time: 4.7773 data: 4.6035 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:20:05 lr: 0.000088 grad: 0.1030 (0.1363) loss: 0.8117 (0.8156) time: 0.1608 data: 0.0715 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:17:09 lr: 0.000088 grad: 0.1017 (0.1219) loss: 0.8110 (0.8175) time: 0.1192 data: 0.0320 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:15:58 lr: 0.000088 grad: 0.1034 (0.1170) loss: 0.8100 (0.8150) time: 0.1343 data: 0.0502 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:15:13 lr: 0.000087 grad: 0.0922 (0.1138) loss: 0.8173 (0.8132) time: 0.1524 data: 0.0690 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:14:33 lr: 0.000087 grad: 0.1015 (0.1110) loss: 0.8070 (0.8128) time: 0.1420 data: 0.0556 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:13:56 lr: 0.000087 grad: 0.0955 (0.1088) loss: 0.8211 (0.8127) time: 0.1208 data: 0.0294 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:13:34 lr: 0.000087 grad: 0.0941 (0.1068) loss: 0.8137 (0.8126) time: 0.1416 data: 0.0529 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:13:19 lr: 0.000087 grad: 0.0980 (0.1060) loss: 0.8120 (0.8124) time: 0.1552 data: 0.0656 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:13:10 lr: 0.000087 grad: 0.0934 (0.1050) loss: 0.8159 (0.8126) time: 0.1684 data: 0.0923 max mem: 9377 +Train: [40] [1000/6250] eta: 0:12:58 lr: 0.000087 grad: 0.0905 (0.1040) loss: 0.8121 (0.8127) time: 0.1529 data: 0.0669 max mem: 9377 +Train: [40] [1100/6250] eta: 0:12:52 lr: 0.000087 grad: 0.0990 (0.1035) loss: 0.8096 (0.8126) time: 0.2015 data: 0.1050 max mem: 9377 +Train: [40] [1200/6250] eta: 0:12:39 lr: 0.000087 grad: 0.0950 (0.1028) loss: 0.8143 (0.8127) time: 0.1476 data: 0.0683 max mem: 9377 +Train: [40] [1300/6250] eta: 0:12:28 lr: 0.000087 grad: 0.0858 (0.1021) loss: 0.8198 (0.8127) time: 0.1726 data: 0.0886 max mem: 9377 +Train: [40] [1400/6250] eta: 0:12:16 lr: 0.000087 grad: 0.0918 (0.1017) loss: 0.8156 (0.8126) time: 0.1446 data: 0.0588 max mem: 9377 +Train: [40] [1500/6250] eta: 0:12:00 lr: 0.000087 grad: 0.0932 (0.1014) loss: 0.8096 (0.8125) time: 0.1439 data: 0.0527 max mem: 9377 +Train: [40] [1600/6250] eta: 0:11:46 lr: 0.000087 grad: 0.0960 (0.1011) loss: 0.8089 (0.8128) time: 0.1406 data: 0.0454 max mem: 9377 +Train: [40] [1700/6250] eta: 0:11:29 lr: 0.000087 grad: 0.0900 (0.1010) loss: 0.8178 (0.8128) time: 0.1455 data: 0.0614 max mem: 9377 +Train: [40] [1800/6250] eta: 0:11:10 lr: 0.000087 grad: 0.1007 (0.1009) loss: 0.8095 (0.8130) time: 0.1310 data: 0.0435 max mem: 9377 +Train: [40] [1900/6250] eta: 0:10:54 lr: 0.000087 grad: 0.0943 (0.1006) loss: 0.8181 (0.8132) time: 0.1669 data: 0.0847 max mem: 9377 +Train: [40] [2000/6250] eta: 0:10:37 lr: 0.000087 grad: 0.0956 (0.1004) loss: 0.8075 (0.8132) time: 0.1659 data: 0.0861 max mem: 9377 +Train: [40] [2100/6250] eta: 0:10:21 lr: 0.000087 grad: 0.0907 (0.1002) loss: 0.8175 (0.8133) time: 0.1491 data: 0.0680 max mem: 9377 +Train: [40] [2200/6250] eta: 0:10:05 lr: 0.000087 grad: 0.0957 (0.1000) loss: 0.8121 (0.8134) time: 0.1425 data: 0.0576 max mem: 9377 +Train: [40] [2300/6250] eta: 0:09:48 lr: 0.000087 grad: 0.0894 (0.0998) loss: 0.8189 (0.8134) time: 0.1249 data: 0.0485 max mem: 9377 +Train: [40] [2400/6250] eta: 0:09:31 lr: 0.000087 grad: 0.0953 (0.0998) loss: 0.8159 (0.8135) time: 0.1253 data: 0.0455 max mem: 9377 +Train: [40] [2500/6250] eta: 0:09:14 lr: 0.000087 grad: 0.0948 (0.0997) loss: 0.8183 (0.8135) time: 0.1377 data: 0.0530 max mem: 9377 +Train: [40] [2600/6250] eta: 0:08:58 lr: 0.000087 grad: 0.0992 (0.0997) loss: 0.8063 (0.8133) time: 0.1485 data: 0.0700 max mem: 9377 +Train: [40] [2700/6250] eta: 0:08:41 lr: 0.000087 grad: 0.1016 (0.0998) loss: 0.8154 (0.8132) time: 0.1316 data: 0.0464 max mem: 9377 +Train: [40] [2800/6250] eta: 0:08:25 lr: 0.000087 grad: 0.0936 (0.0996) loss: 0.8154 (0.8133) time: 0.1326 data: 0.0542 max mem: 9377 +Train: [40] [2900/6250] eta: 0:08:09 lr: 0.000087 grad: 0.0990 (0.0996) loss: 0.8126 (0.8134) time: 0.1350 data: 0.0525 max mem: 9377 +Train: [40] [3000/6250] eta: 0:07:54 lr: 0.000087 grad: 0.0906 (0.0998) loss: 0.8158 (0.8134) time: 0.1314 data: 0.0501 max mem: 9377 +Train: [40] [3100/6250] eta: 0:07:38 lr: 0.000087 grad: 0.0969 (0.0997) loss: 0.8116 (0.8134) time: 0.1501 data: 0.0712 max mem: 9377 +Train: [40] [3200/6250] eta: 0:07:23 lr: 0.000087 grad: 0.0934 (0.0997) loss: 0.8186 (0.8136) time: 0.1296 data: 0.0495 max mem: 9377 +Train: [40] [3300/6250] eta: 0:07:08 lr: 0.000087 grad: 0.0949 (0.0996) loss: 0.8114 (0.8136) time: 0.1283 data: 0.0444 max mem: 9377 +Train: [40] [3400/6250] eta: 0:06:53 lr: 0.000087 grad: 0.0965 (0.0996) loss: 0.8176 (0.8137) time: 0.1318 data: 0.0498 max mem: 9377 +Train: [40] [3500/6250] eta: 0:06:38 lr: 0.000087 grad: 0.0927 (0.0995) loss: 0.8199 (0.8138) time: 0.1213 data: 0.0409 max mem: 9377 +Train: [40] [3600/6250] eta: 0:06:23 lr: 0.000087 grad: 0.0995 (0.0994) loss: 0.8141 (0.8138) time: 0.1365 data: 0.0470 max mem: 9377 +Train: [40] [3700/6250] eta: 0:06:08 lr: 0.000086 grad: 0.0935 (0.0995) loss: 0.8140 (0.8138) time: 0.1322 data: 0.0545 max mem: 9377 +Train: [40] [3800/6250] eta: 0:05:54 lr: 0.000086 grad: 0.0971 (0.0995) loss: 0.8049 (0.8138) time: 0.1116 data: 0.0273 max mem: 9377 +Train: [40] [3900/6250] eta: 0:05:39 lr: 0.000086 grad: 0.0891 (0.0994) loss: 0.8184 (0.8138) time: 0.1385 data: 0.0603 max mem: 9377 +Train: [40] [4000/6250] eta: 0:05:25 lr: 0.000086 grad: 0.1011 (0.0994) loss: 0.8168 (0.8138) time: 0.1425 data: 0.0596 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:10 lr: 0.000086 grad: 0.0952 (0.0994) loss: 0.8146 (0.8138) time: 0.1288 data: 0.0463 max mem: 9377 +Train: [40] [4200/6250] eta: 0:04:56 lr: 0.000086 grad: 0.0974 (0.0995) loss: 0.8149 (0.8138) time: 0.1436 data: 0.0632 max mem: 9377 +Train: [40] [4300/6250] eta: 0:04:41 lr: 0.000086 grad: 0.1004 (0.0995) loss: 0.8113 (0.8138) time: 0.1486 data: 0.0739 max mem: 9377 +Train: [40] [4400/6250] eta: 0:04:27 lr: 0.000086 grad: 0.0980 (0.0995) loss: 0.8115 (0.8137) time: 0.1358 data: 0.0575 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:12 lr: 0.000086 grad: 0.0950 (0.0996) loss: 0.8092 (0.8136) time: 0.1102 data: 0.0284 max mem: 9377 +Train: [40] [4600/6250] eta: 0:03:58 lr: 0.000086 grad: 0.1017 (0.0997) loss: 0.8073 (0.8134) time: 0.1283 data: 0.0409 max mem: 9377 +Train: [40] [4700/6250] eta: 0:03:43 lr: 0.000086 grad: 0.1022 (0.0997) loss: 0.8064 (0.8133) time: 0.1581 data: 0.0814 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:29 lr: 0.000086 grad: 0.0960 (0.0997) loss: 0.8137 (0.8132) time: 0.1400 data: 0.0521 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:15 lr: 0.000086 grad: 0.0958 (0.0997) loss: 0.8142 (0.8131) time: 0.1352 data: 0.0478 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:00 lr: 0.000086 grad: 0.0970 (0.0996) loss: 0.8127 (0.8131) time: 0.1414 data: 0.0580 max mem: 9377 +Train: [40] [5100/6250] eta: 0:02:46 lr: 0.000086 grad: 0.1010 (0.0996) loss: 0.8104 (0.8130) time: 0.1351 data: 0.0566 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:31 lr: 0.000086 grad: 0.1017 (0.0997) loss: 0.8115 (0.8129) time: 0.1101 data: 0.0250 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:17 lr: 0.000086 grad: 0.0975 (0.0997) loss: 0.8063 (0.8128) time: 0.1322 data: 0.0442 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:02 lr: 0.000086 grad: 0.0988 (0.0998) loss: 0.8125 (0.8127) time: 0.1272 data: 0.0379 max mem: 9377 +Train: [40] [5500/6250] eta: 0:01:48 lr: 0.000086 grad: 0.0991 (0.0998) loss: 0.8137 (0.8127) time: 0.1323 data: 0.0474 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:33 lr: 0.000086 grad: 0.0928 (0.0998) loss: 0.8199 (0.8127) time: 0.1306 data: 0.0438 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:19 lr: 0.000086 grad: 0.0984 (0.0997) loss: 0.8176 (0.8127) time: 0.1467 data: 0.0615 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:04 lr: 0.000086 grad: 0.0926 (0.0997) loss: 0.8162 (0.8127) time: 0.1353 data: 0.0523 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:50 lr: 0.000086 grad: 0.0996 (0.0997) loss: 0.8155 (0.8127) time: 0.1285 data: 0.0444 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:35 lr: 0.000086 grad: 0.0960 (0.0997) loss: 0.8221 (0.8128) time: 0.1466 data: 0.0607 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:21 lr: 0.000086 grad: 0.0968 (0.0997) loss: 0.8195 (0.8128) time: 0.1379 data: 0.0559 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:07 lr: 0.000086 grad: 0.0982 (0.0997) loss: 0.8167 (0.8128) time: 0.1305 data: 0.0453 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.0975 (0.0998) loss: 0.8037 (0.8128) time: 0.1308 data: 0.0508 max mem: 9377 +Train: [40] Total time: 0:14:56 (0.1435 s / it) +Averaged stats: lr: 0.000086 grad: 0.0975 (0.0998) loss: 0.8037 (0.8128) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:05:30 loss: 0.8387 (0.8387) time: 5.3365 data: 5.3068 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8436 (0.8435) time: 0.1279 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:12 (0.2095 s / it) +Averaged stats (hcp-train-subset): loss: 0.8436 (0.8435) +Eval (hcp-val): [40] [ 0/62] eta: 0:04:44 loss: 0.8381 (0.8381) time: 4.5950 data: 4.5628 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8401 (0.8414) time: 0.1352 data: 0.1098 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-val): loss: 0.8401 (0.8414) +Eval (nsd-val): [40] [ 0/62] eta: 0:04:25 loss: 0.8195 (0.8195) time: 4.2809 data: 4.1989 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8321 (0.8307) time: 0.1109 data: 0.0860 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (nsd-val): loss: 0.8321 (0.8307) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 8:00:25 lr: 0.000086 grad: 0.0651 (0.0651) loss: 0.8728 (0.8728) time: 4.6120 data: 4.3665 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:18:59 lr: 0.000086 grad: 0.1139 (0.1275) loss: 0.8180 (0.8227) time: 0.1481 data: 0.0534 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:16:44 lr: 0.000086 grad: 0.0978 (0.1209) loss: 0.8082 (0.8182) time: 0.1471 data: 0.0656 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:15:30 lr: 0.000086 grad: 0.1009 (0.1165) loss: 0.8085 (0.8155) time: 0.1542 data: 0.0666 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:14:43 lr: 0.000086 grad: 0.0918 (0.1113) loss: 0.8188 (0.8157) time: 0.1203 data: 0.0298 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:14:13 lr: 0.000086 grad: 0.0947 (0.1087) loss: 0.8130 (0.8154) time: 0.1532 data: 0.0683 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:13:44 lr: 0.000086 grad: 0.0903 (0.1071) loss: 0.8246 (0.8163) time: 0.1241 data: 0.0277 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:13:21 lr: 0.000085 grad: 0.1001 (0.1052) loss: 0.8200 (0.8168) time: 0.1317 data: 0.0464 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:13:16 lr: 0.000085 grad: 0.0864 (0.1040) loss: 0.8105 (0.8170) time: 0.1748 data: 0.0842 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:13:06 lr: 0.000085 grad: 0.0937 (0.1028) loss: 0.8235 (0.8172) time: 0.1738 data: 0.0890 max mem: 9377 +Train: [41] [1000/6250] eta: 0:12:56 lr: 0.000085 grad: 0.0947 (0.1023) loss: 0.8146 (0.8169) time: 0.1490 data: 0.0682 max mem: 9377 +Train: [41] [1100/6250] eta: 0:12:54 lr: 0.000085 grad: 0.0876 (0.1015) loss: 0.8210 (0.8171) time: 0.1750 data: 0.0849 max mem: 9377 +Train: [41] [1200/6250] eta: 0:12:38 lr: 0.000085 grad: 0.0929 (0.1010) loss: 0.8142 (0.8168) time: 0.1590 data: 0.0717 max mem: 9377 +Train: [41] [1300/6250] eta: 0:12:23 lr: 0.000085 grad: 0.0911 (0.1008) loss: 0.8084 (0.8166) time: 0.1411 data: 0.0531 max mem: 9377 +Train: [41] [1400/6250] eta: 0:12:11 lr: 0.000085 grad: 0.0921 (0.1004) loss: 0.8129 (0.8164) time: 0.1431 data: 0.0617 max mem: 9377 +Train: [41] [1500/6250] eta: 0:11:52 lr: 0.000085 grad: 0.0961 (0.1001) loss: 0.8218 (0.8162) time: 0.1445 data: 0.0603 max mem: 9377 +Train: [41] [1600/6250] eta: 0:11:39 lr: 0.000085 grad: 0.0930 (0.1000) loss: 0.8146 (0.8158) time: 0.1708 data: 0.0904 max mem: 9377 +Train: [41] [1700/6250] eta: 0:11:25 lr: 0.000085 grad: 0.1061 (0.1000) loss: 0.8040 (0.8154) time: 0.1721 data: 0.0852 max mem: 9377 +Train: [41] [1800/6250] eta: 0:11:10 lr: 0.000085 grad: 0.0976 (0.1000) loss: 0.8030 (0.8150) time: 0.1540 data: 0.0644 max mem: 9377 +Train: [41] [1900/6250] eta: 0:10:55 lr: 0.000085 grad: 0.0960 (0.1002) loss: 0.8089 (0.8146) time: 0.1298 data: 0.0387 max mem: 9377 +Train: [41] [2000/6250] eta: 0:10:37 lr: 0.000085 grad: 0.0995 (0.1002) loss: 0.8115 (0.8144) time: 0.1367 data: 0.0415 max mem: 9377 +Train: [41] [2100/6250] eta: 0:10:20 lr: 0.000085 grad: 0.0941 (0.1002) loss: 0.8119 (0.8141) time: 0.1422 data: 0.0514 max mem: 9377 +Train: [41] [2200/6250] eta: 0:10:03 lr: 0.000085 grad: 0.0995 (0.1002) loss: 0.8059 (0.8138) time: 0.1612 data: 0.0768 max mem: 9377 +Train: [41] [2300/6250] eta: 0:09:47 lr: 0.000085 grad: 0.0938 (0.1001) loss: 0.8133 (0.8137) time: 0.1402 data: 0.0605 max mem: 9377 +Train: [41] [2400/6250] eta: 0:09:31 lr: 0.000085 grad: 0.1027 (0.1003) loss: 0.8050 (0.8132) time: 0.1373 data: 0.0516 max mem: 9377 +Train: [41] [2500/6250] eta: 0:09:15 lr: 0.000085 grad: 0.0969 (0.1003) loss: 0.8092 (0.8130) time: 0.1459 data: 0.0631 max mem: 9377 +Train: [41] [2600/6250] eta: 0:08:59 lr: 0.000085 grad: 0.0985 (0.1003) loss: 0.8103 (0.8129) time: 0.1290 data: 0.0466 max mem: 9377 +Train: [41] [2700/6250] eta: 0:08:43 lr: 0.000085 grad: 0.1040 (0.1006) loss: 0.8096 (0.8128) time: 0.1548 data: 0.0748 max mem: 9377 +Train: [41] [2800/6250] eta: 0:08:27 lr: 0.000085 grad: 0.0924 (0.1006) loss: 0.8138 (0.8127) time: 0.1423 data: 0.0581 max mem: 9377 +Train: [41] [2900/6250] eta: 0:08:11 lr: 0.000085 grad: 0.0980 (0.1007) loss: 0.8156 (0.8126) time: 0.1159 data: 0.0348 max mem: 9377 +Train: [41] [3000/6250] eta: 0:07:57 lr: 0.000085 grad: 0.0919 (0.1008) loss: 0.8155 (0.8124) time: 0.1611 data: 0.0805 max mem: 9377 +Train: [41] [3100/6250] eta: 0:07:41 lr: 0.000085 grad: 0.0994 (0.1008) loss: 0.8134 (0.8124) time: 0.1447 data: 0.0601 max mem: 9377 +Train: [41] [3200/6250] eta: 0:07:25 lr: 0.000085 grad: 0.1023 (0.1008) loss: 0.8119 (0.8123) time: 0.1367 data: 0.0561 max mem: 9377 +Train: [41] [3300/6250] eta: 0:07:10 lr: 0.000085 grad: 0.0982 (0.1008) loss: 0.8174 (0.8122) time: 0.1433 data: 0.0613 max mem: 9377 +Train: [41] [3400/6250] eta: 0:06:55 lr: 0.000085 grad: 0.0925 (0.1009) loss: 0.8095 (0.8122) time: 0.1342 data: 0.0477 max mem: 9377 +Train: [41] [3500/6250] eta: 0:06:41 lr: 0.000085 grad: 0.1052 (0.1009) loss: 0.8102 (0.8122) time: 0.1731 data: 0.0900 max mem: 9377 +Train: [41] [3600/6250] eta: 0:06:25 lr: 0.000085 grad: 0.0919 (0.1009) loss: 0.8144 (0.8122) time: 0.1316 data: 0.0461 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:10 lr: 0.000085 grad: 0.0974 (0.1009) loss: 0.8154 (0.8122) time: 0.1449 data: 0.0573 max mem: 9377 +Train: [41] [3800/6250] eta: 0:05:56 lr: 0.000085 grad: 0.0980 (0.1010) loss: 0.8118 (0.8122) time: 0.1410 data: 0.0633 max mem: 9377 +Train: [41] [3900/6250] eta: 0:05:40 lr: 0.000084 grad: 0.0964 (0.1010) loss: 0.8069 (0.8122) time: 0.1220 data: 0.0448 max mem: 9377 +Train: [41] [4000/6250] eta: 0:05:25 lr: 0.000084 grad: 0.0995 (0.1010) loss: 0.8142 (0.8121) time: 0.1270 data: 0.0351 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:10 lr: 0.000084 grad: 0.0982 (0.1011) loss: 0.8114 (0.8122) time: 0.1184 data: 0.0317 max mem: 9377 +Train: [41] [4200/6250] eta: 0:04:56 lr: 0.000084 grad: 0.0980 (0.1011) loss: 0.8093 (0.8122) time: 0.1355 data: 0.0561 max mem: 9377 +Train: [41] [4300/6250] eta: 0:04:41 lr: 0.000084 grad: 0.0989 (0.1011) loss: 0.8086 (0.8122) time: 0.1568 data: 0.0692 max mem: 9377 +Train: [41] [4400/6250] eta: 0:04:26 lr: 0.000084 grad: 0.1014 (0.1011) loss: 0.8114 (0.8122) time: 0.1325 data: 0.0519 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:12 lr: 0.000084 grad: 0.0979 (0.1011) loss: 0.8167 (0.8122) time: 0.1547 data: 0.0756 max mem: 9377 +Train: [41] [4600/6250] eta: 0:03:57 lr: 0.000084 grad: 0.0946 (0.1011) loss: 0.8177 (0.8122) time: 0.1218 data: 0.0345 max mem: 9377 +Train: [41] [4700/6250] eta: 0:03:43 lr: 0.000084 grad: 0.0972 (0.1011) loss: 0.8040 (0.8122) time: 0.1357 data: 0.0461 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:29 lr: 0.000084 grad: 0.0984 (0.1011) loss: 0.8128 (0.8122) time: 0.1562 data: 0.0756 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:15 lr: 0.000084 grad: 0.0973 (0.1011) loss: 0.8132 (0.8123) time: 0.1496 data: 0.0619 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:00 lr: 0.000084 grad: 0.0986 (0.1011) loss: 0.8129 (0.8123) time: 0.1426 data: 0.0606 max mem: 9377 +Train: [41] [5100/6250] eta: 0:02:46 lr: 0.000084 grad: 0.0996 (0.1012) loss: 0.8071 (0.8124) time: 0.1386 data: 0.0523 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:31 lr: 0.000084 grad: 0.0946 (0.1012) loss: 0.8171 (0.8124) time: 0.1315 data: 0.0515 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:16 lr: 0.000084 grad: 0.0882 (0.1011) loss: 0.8210 (0.8125) time: 0.1406 data: 0.0570 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:02 lr: 0.000084 grad: 0.1002 (0.1012) loss: 0.8217 (0.8126) time: 0.1430 data: 0.0530 max mem: 9377 +Train: [41] [5500/6250] eta: 0:01:47 lr: 0.000084 grad: 0.0903 (0.1013) loss: 0.8253 (0.8127) time: 0.1374 data: 0.0486 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:33 lr: 0.000084 grad: 0.0986 (0.1012) loss: 0.8121 (0.8127) time: 0.1254 data: 0.0410 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:18 lr: 0.000084 grad: 0.0981 (0.1012) loss: 0.8122 (0.8128) time: 0.1335 data: 0.0557 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:04 lr: 0.000084 grad: 0.0973 (0.1012) loss: 0.8125 (0.8128) time: 0.1276 data: 0.0363 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:49 lr: 0.000084 grad: 0.0958 (0.1012) loss: 0.8116 (0.8129) time: 0.1365 data: 0.0539 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:35 lr: 0.000084 grad: 0.1005 (0.1011) loss: 0.8121 (0.8129) time: 0.1509 data: 0.0712 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:21 lr: 0.000084 grad: 0.0929 (0.1011) loss: 0.8143 (0.8129) time: 0.1145 data: 0.0282 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:07 lr: 0.000084 grad: 0.0984 (0.1011) loss: 0.8154 (0.8129) time: 0.1395 data: 0.0556 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.0917 (0.1011) loss: 0.8189 (0.8129) time: 0.1452 data: 0.0613 max mem: 9377 +Train: [41] Total time: 0:14:56 (0.1434 s / it) +Averaged stats: lr: 0.000084 grad: 0.0917 (0.1011) loss: 0.8189 (0.8129) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:03:15 loss: 0.8400 (0.8400) time: 3.1519 data: 3.0581 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8427 (0.8431) time: 0.1088 data: 0.0839 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8427 (0.8431) +Eval (hcp-val): [41] [ 0/62] eta: 0:05:34 loss: 0.8346 (0.8346) time: 5.3986 data: 5.3686 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8408 (0.8403) time: 0.1189 data: 0.0937 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (hcp-val): loss: 0.8408 (0.8403) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:04 loss: 0.8087 (0.8087) time: 3.9475 data: 3.8769 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8157 (0.8172) time: 0.1244 data: 0.0994 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (nsd-val): loss: 0.8157 (0.8172) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [42] [ 0/6250] eta: 11:03:45 lr: 0.000084 grad: 0.2215 (0.2215) loss: 0.8653 (0.8653) time: 6.3720 data: 6.2766 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:21:08 lr: 0.000084 grad: 0.1075 (0.1340) loss: 0.8292 (0.8332) time: 0.1540 data: 0.0528 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:18:36 lr: 0.000084 grad: 0.1183 (0.1354) loss: 0.8093 (0.8218) time: 0.1678 data: 0.0690 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:16:55 lr: 0.000084 grad: 0.1104 (0.1299) loss: 0.8005 (0.8154) time: 0.1312 data: 0.0372 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:15:47 lr: 0.000084 grad: 0.0954 (0.1259) loss: 0.8066 (0.8118) time: 0.1302 data: 0.0420 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:15:03 lr: 0.000084 grad: 0.0958 (0.1213) loss: 0.8189 (0.8105) time: 0.1450 data: 0.0573 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:14:39 lr: 0.000084 grad: 0.1071 (0.1176) loss: 0.8058 (0.8096) time: 0.1344 data: 0.0505 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:14:26 lr: 0.000084 grad: 0.0931 (0.1146) loss: 0.8138 (0.8099) time: 0.1673 data: 0.0780 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:14:08 lr: 0.000084 grad: 0.0899 (0.1126) loss: 0.8159 (0.8099) time: 0.1635 data: 0.0842 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:13:54 lr: 0.000083 grad: 0.1000 (0.1110) loss: 0.8187 (0.8102) time: 0.1584 data: 0.0766 max mem: 9377 +Train: [42] [1000/6250] eta: 0:13:29 lr: 0.000083 grad: 0.0989 (0.1097) loss: 0.8020 (0.8103) time: 0.1369 data: 0.0570 max mem: 9377 +Train: [42] [1100/6250] eta: 0:13:08 lr: 0.000083 grad: 0.0980 (0.1085) loss: 0.8155 (0.8104) time: 0.1403 data: 0.0644 max mem: 9377 +Train: [42] [1200/6250] eta: 0:12:50 lr: 0.000083 grad: 0.0944 (0.1075) loss: 0.8114 (0.8105) time: 0.1507 data: 0.0708 max mem: 9377 +Train: [42] [1300/6250] eta: 0:12:41 lr: 0.000083 grad: 0.0981 (0.1068) loss: 0.8086 (0.8103) time: 0.1778 data: 0.0894 max mem: 9377 +Train: [42] [1400/6250] eta: 0:12:28 lr: 0.000083 grad: 0.0991 (0.1060) loss: 0.8102 (0.8104) time: 0.1830 data: 0.0970 max mem: 9377 +Train: [42] [1500/6250] eta: 0:12:14 lr: 0.000083 grad: 0.1047 (0.1054) loss: 0.8033 (0.8104) time: 0.1569 data: 0.0680 max mem: 9377 +Train: [42] [1600/6250] eta: 0:11:58 lr: 0.000083 grad: 0.0950 (0.1057) loss: 0.8168 (0.8106) time: 0.1788 data: 0.0964 max mem: 9377 +Train: [42] [1700/6250] eta: 0:11:39 lr: 0.000083 grad: 0.0880 (0.1052) loss: 0.8073 (0.8106) time: 0.1498 data: 0.0678 max mem: 9377 +Train: [42] [1800/6250] eta: 0:11:20 lr: 0.000083 grad: 0.0980 (0.1046) loss: 0.8069 (0.8107) time: 0.1342 data: 0.0492 max mem: 9377 +Train: [42] [1900/6250] eta: 0:11:00 lr: 0.000083 grad: 0.0948 (0.1043) loss: 0.8105 (0.8108) time: 0.1235 data: 0.0413 max mem: 9377 +Train: [42] [2000/6250] eta: 0:10:41 lr: 0.000083 grad: 0.0999 (0.1039) loss: 0.8108 (0.8109) time: 0.1246 data: 0.0334 max mem: 9377 +Train: [42] [2100/6250] eta: 0:10:22 lr: 0.000083 grad: 0.0982 (0.1036) loss: 0.8177 (0.8111) time: 0.1235 data: 0.0430 max mem: 9377 +Train: [42] [2200/6250] eta: 0:10:05 lr: 0.000083 grad: 0.0937 (0.1032) loss: 0.8136 (0.8112) time: 0.1428 data: 0.0636 max mem: 9377 +Train: [42] [2300/6250] eta: 0:09:46 lr: 0.000083 grad: 0.0954 (0.1029) loss: 0.8152 (0.8114) time: 0.1259 data: 0.0425 max mem: 9377 +Train: [42] [2400/6250] eta: 0:09:31 lr: 0.000083 grad: 0.0932 (0.1028) loss: 0.8106 (0.8115) time: 0.1628 data: 0.0804 max mem: 9377 +Train: [42] [2500/6250] eta: 0:09:15 lr: 0.000083 grad: 0.0938 (0.1025) loss: 0.8120 (0.8115) time: 0.1463 data: 0.0642 max mem: 9377 +Train: [42] [2600/6250] eta: 0:08:58 lr: 0.000083 grad: 0.0984 (0.1024) loss: 0.8126 (0.8115) time: 0.1314 data: 0.0455 max mem: 9377 +Train: [42] [2700/6250] eta: 0:08:43 lr: 0.000083 grad: 0.0926 (0.1024) loss: 0.8086 (0.8114) time: 0.1249 data: 0.0437 max mem: 9377 +Train: [42] [2800/6250] eta: 0:08:28 lr: 0.000083 grad: 0.0972 (0.1025) loss: 0.8097 (0.8113) time: 0.1487 data: 0.0673 max mem: 9377 +Train: [42] [2900/6250] eta: 0:08:12 lr: 0.000083 grad: 0.1049 (0.1024) loss: 0.8068 (0.8113) time: 0.1558 data: 0.0741 max mem: 9377 +Train: [42] [3000/6250] eta: 0:07:57 lr: 0.000083 grad: 0.0938 (0.1024) loss: 0.8161 (0.8113) time: 0.1371 data: 0.0531 max mem: 9377 +Train: [42] [3100/6250] eta: 0:07:41 lr: 0.000083 grad: 0.1032 (0.1024) loss: 0.8115 (0.8113) time: 0.1252 data: 0.0403 max mem: 9377 +Train: [42] [3200/6250] eta: 0:07:26 lr: 0.000083 grad: 0.1004 (0.1024) loss: 0.8104 (0.8112) time: 0.1245 data: 0.0381 max mem: 9377 +Train: [42] [3300/6250] eta: 0:07:11 lr: 0.000083 grad: 0.0937 (0.1025) loss: 0.8123 (0.8112) time: 0.1393 data: 0.0505 max mem: 9377 +Train: [42] [3400/6250] eta: 0:06:56 lr: 0.000083 grad: 0.1006 (0.1025) loss: 0.8130 (0.8112) time: 0.1343 data: 0.0534 max mem: 9377 +Train: [42] [3500/6250] eta: 0:06:40 lr: 0.000083 grad: 0.1017 (0.1026) loss: 0.8009 (0.8111) time: 0.1249 data: 0.0386 max mem: 9377 +Train: [42] [3600/6250] eta: 0:06:25 lr: 0.000083 grad: 0.1004 (0.1025) loss: 0.8207 (0.8112) time: 0.1424 data: 0.0593 max mem: 9377 +Train: [42] [3700/6250] eta: 0:06:10 lr: 0.000083 grad: 0.0882 (0.1025) loss: 0.8186 (0.8113) time: 0.1512 data: 0.0620 max mem: 9377 +Train: [42] [3800/6250] eta: 0:05:55 lr: 0.000083 grad: 0.0969 (0.1024) loss: 0.8163 (0.8113) time: 0.1335 data: 0.0476 max mem: 9377 +Train: [42] [3900/6250] eta: 0:05:40 lr: 0.000083 grad: 0.0983 (0.1024) loss: 0.8153 (0.8115) time: 0.1523 data: 0.0686 max mem: 9377 +Train: [42] [4000/6250] eta: 0:05:25 lr: 0.000083 grad: 0.0960 (0.1023) loss: 0.8151 (0.8115) time: 0.1120 data: 0.0223 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:10 lr: 0.000082 grad: 0.0994 (0.1024) loss: 0.8149 (0.8115) time: 0.1391 data: 0.0541 max mem: 9377 +Train: [42] [4200/6250] eta: 0:04:56 lr: 0.000082 grad: 0.0932 (0.1023) loss: 0.8218 (0.8116) time: 0.1341 data: 0.0488 max mem: 9377 +Train: [42] [4300/6250] eta: 0:04:41 lr: 0.000082 grad: 0.0916 (0.1023) loss: 0.8209 (0.8117) time: 0.1569 data: 0.0754 max mem: 9377 +Train: [42] [4400/6250] eta: 0:04:26 lr: 0.000082 grad: 0.0976 (0.1022) loss: 0.8159 (0.8119) time: 0.1313 data: 0.0358 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:12 lr: 0.000082 grad: 0.0923 (0.1021) loss: 0.8190 (0.8119) time: 0.1388 data: 0.0585 max mem: 9377 +Train: [42] [4600/6250] eta: 0:03:57 lr: 0.000082 grad: 0.1055 (0.1021) loss: 0.8118 (0.8120) time: 0.1270 data: 0.0457 max mem: 9377 +Train: [42] [4700/6250] eta: 0:03:43 lr: 0.000082 grad: 0.1013 (0.1020) loss: 0.8048 (0.8120) time: 0.1565 data: 0.0738 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:29 lr: 0.000082 grad: 0.0983 (0.1020) loss: 0.8095 (0.8121) time: 0.1578 data: 0.0696 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:15 lr: 0.000082 grad: 0.1019 (0.1019) loss: 0.8139 (0.8122) time: 0.1490 data: 0.0694 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:01 lr: 0.000082 grad: 0.1047 (0.1020) loss: 0.8084 (0.8122) time: 0.1960 data: 0.1114 max mem: 9377 +Train: [42] [5100/6250] eta: 0:02:46 lr: 0.000082 grad: 0.0994 (0.1019) loss: 0.8153 (0.8122) time: 0.1619 data: 0.0781 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:32 lr: 0.000082 grad: 0.0995 (0.1019) loss: 0.8105 (0.8122) time: 0.1640 data: 0.0784 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:17 lr: 0.000082 grad: 0.0979 (0.1019) loss: 0.8177 (0.8122) time: 0.1393 data: 0.0586 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:03 lr: 0.000082 grad: 0.0987 (0.1020) loss: 0.8031 (0.8122) time: 0.1306 data: 0.0435 max mem: 9377 +Train: [42] [5500/6250] eta: 0:01:48 lr: 0.000082 grad: 0.0989 (0.1020) loss: 0.8033 (0.8122) time: 0.1295 data: 0.0382 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:33 lr: 0.000082 grad: 0.0970 (0.1019) loss: 0.8187 (0.8123) time: 0.1140 data: 0.0268 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:19 lr: 0.000082 grad: 0.0939 (0.1019) loss: 0.8175 (0.8123) time: 0.1106 data: 0.0209 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:04 lr: 0.000082 grad: 0.1008 (0.1018) loss: 0.8147 (0.8124) time: 0.1325 data: 0.0512 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:50 lr: 0.000082 grad: 0.0942 (0.1018) loss: 0.8206 (0.8125) time: 0.1314 data: 0.0482 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:36 lr: 0.000082 grad: 0.0924 (0.1018) loss: 0.8117 (0.8124) time: 0.1387 data: 0.0586 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:21 lr: 0.000082 grad: 0.1007 (0.1018) loss: 0.8109 (0.8124) time: 0.1568 data: 0.0772 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:07 lr: 0.000082 grad: 0.1044 (0.1019) loss: 0.8127 (0.8124) time: 0.1512 data: 0.0700 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1068 (0.1019) loss: 0.8177 (0.8124) time: 0.1616 data: 0.0815 max mem: 9377 +Train: [42] Total time: 0:15:11 (0.1458 s / it) +Averaged stats: lr: 0.000082 grad: 0.1068 (0.1019) loss: 0.8177 (0.8124) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:04:37 loss: 0.8406 (0.8406) time: 4.4750 data: 4.4458 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8419 (0.8425) time: 0.1269 data: 0.1019 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:12 (0.2062 s / it) +Averaged stats (hcp-train-subset): loss: 0.8419 (0.8425) +Eval (hcp-val): [42] [ 0/62] eta: 0:03:58 loss: 0.8384 (0.8384) time: 3.8395 data: 3.7621 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8383 (0.8405) time: 0.1373 data: 0.1103 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (hcp-val): loss: 0.8383 (0.8405) +Eval (nsd-val): [42] [ 0/62] eta: 0:04:49 loss: 0.8165 (0.8165) time: 4.6649 data: 4.6348 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8209 (0.8239) time: 0.1252 data: 0.1003 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8239) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 8:49:36 lr: 0.000082 grad: 0.1183 (0.1183) loss: 0.8586 (0.8586) time: 5.0843 data: 4.9653 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:18:53 lr: 0.000082 grad: 0.1060 (0.1240) loss: 0.8244 (0.8273) time: 0.1361 data: 0.0483 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:16:28 lr: 0.000082 grad: 0.0902 (0.1162) loss: 0.8185 (0.8222) time: 0.1432 data: 0.0525 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:15:16 lr: 0.000082 grad: 0.0988 (0.1160) loss: 0.8162 (0.8184) time: 0.1160 data: 0.0324 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:14:32 lr: 0.000082 grad: 0.0978 (0.1130) loss: 0.8102 (0.8157) time: 0.1272 data: 0.0333 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:13:56 lr: 0.000082 grad: 0.0933 (0.1110) loss: 0.8105 (0.8143) time: 0.1238 data: 0.0330 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:13:28 lr: 0.000082 grad: 0.0931 (0.1093) loss: 0.8121 (0.8137) time: 0.1397 data: 0.0527 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:13:07 lr: 0.000082 grad: 0.0945 (0.1080) loss: 0.8098 (0.8132) time: 0.1375 data: 0.0492 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:12:46 lr: 0.000082 grad: 0.0888 (0.1068) loss: 0.8130 (0.8132) time: 0.1427 data: 0.0562 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:12:33 lr: 0.000082 grad: 0.0965 (0.1057) loss: 0.8074 (0.8132) time: 0.1223 data: 0.0333 max mem: 9377 +Train: [43] [1000/6250] eta: 0:12:19 lr: 0.000081 grad: 0.0997 (0.1053) loss: 0.8181 (0.8132) time: 0.1503 data: 0.0693 max mem: 9377 +Train: [43] [1100/6250] eta: 0:12:04 lr: 0.000081 grad: 0.0998 (0.1050) loss: 0.8053 (0.8128) time: 0.1496 data: 0.0717 max mem: 9377 +Train: [43] [1200/6250] eta: 0:11:49 lr: 0.000081 grad: 0.1029 (0.1048) loss: 0.8065 (0.8125) time: 0.1273 data: 0.0489 max mem: 9377 +Train: [43] [1300/6250] eta: 0:11:33 lr: 0.000081 grad: 0.0908 (0.1046) loss: 0.8144 (0.8122) time: 0.1441 data: 0.0599 max mem: 9377 +Train: [43] [1400/6250] eta: 0:11:17 lr: 0.000081 grad: 0.0980 (0.1045) loss: 0.8034 (0.8119) time: 0.1393 data: 0.0588 max mem: 9377 +Train: [43] [1500/6250] eta: 0:11:01 lr: 0.000081 grad: 0.0990 (0.1043) loss: 0.8112 (0.8118) time: 0.1202 data: 0.0394 max mem: 9377 +Train: [43] [1600/6250] eta: 0:10:48 lr: 0.000081 grad: 0.1000 (0.1042) loss: 0.8078 (0.8117) time: 0.1681 data: 0.0914 max mem: 9377 +Train: [43] [1700/6250] eta: 0:10:36 lr: 0.000081 grad: 0.0960 (0.1041) loss: 0.8095 (0.8116) time: 0.1350 data: 0.0541 max mem: 9377 +Train: [43] [1800/6250] eta: 0:10:25 lr: 0.000081 grad: 0.1072 (0.1041) loss: 0.8088 (0.8114) time: 0.1562 data: 0.0758 max mem: 9377 +Train: [43] [1900/6250] eta: 0:10:14 lr: 0.000081 grad: 0.1003 (0.1041) loss: 0.8118 (0.8113) time: 0.1230 data: 0.0435 max mem: 9377 +Train: [43] [2000/6250] eta: 0:10:01 lr: 0.000081 grad: 0.1089 (0.1042) loss: 0.8098 (0.8110) time: 0.1581 data: 0.0827 max mem: 9377 +Train: [43] [2100/6250] eta: 0:09:47 lr: 0.000081 grad: 0.1057 (0.1044) loss: 0.8112 (0.8109) time: 0.1250 data: 0.0456 max mem: 9377 +Train: [43] [2200/6250] eta: 0:09:34 lr: 0.000081 grad: 0.1069 (0.1044) loss: 0.8097 (0.8109) time: 0.1577 data: 0.0776 max mem: 9377 +Train: [43] [2300/6250] eta: 0:09:19 lr: 0.000081 grad: 0.1034 (0.1045) loss: 0.8078 (0.8109) time: 0.1339 data: 0.0458 max mem: 9377 +Train: [43] [2400/6250] eta: 0:09:04 lr: 0.000081 grad: 0.1009 (0.1046) loss: 0.8108 (0.8108) time: 0.1248 data: 0.0432 max mem: 9377 +Train: [43] [2500/6250] eta: 0:08:49 lr: 0.000081 grad: 0.1052 (0.1046) loss: 0.8023 (0.8108) time: 0.1192 data: 0.0371 max mem: 9377 +Train: [43] [2600/6250] eta: 0:08:33 lr: 0.000081 grad: 0.1006 (0.1046) loss: 0.8092 (0.8108) time: 0.1335 data: 0.0506 max mem: 9377 +Train: [43] [2700/6250] eta: 0:08:18 lr: 0.000081 grad: 0.1035 (0.1046) loss: 0.8157 (0.8109) time: 0.1317 data: 0.0496 max mem: 9377 +Train: [43] [2800/6250] eta: 0:08:03 lr: 0.000081 grad: 0.1004 (0.1046) loss: 0.8068 (0.8109) time: 0.1312 data: 0.0494 max mem: 9377 +Train: [43] [2900/6250] eta: 0:07:51 lr: 0.000081 grad: 0.1053 (0.1045) loss: 0.8068 (0.8110) time: 0.1333 data: 0.0554 max mem: 9377 +Train: [43] [3000/6250] eta: 0:07:37 lr: 0.000081 grad: 0.1027 (0.1045) loss: 0.8168 (0.8111) time: 0.1358 data: 0.0535 max mem: 9377 +Train: [43] [3100/6250] eta: 0:07:23 lr: 0.000081 grad: 0.0976 (0.1045) loss: 0.8139 (0.8111) time: 0.1249 data: 0.0375 max mem: 9377 +Train: [43] [3200/6250] eta: 0:07:08 lr: 0.000081 grad: 0.0963 (0.1044) loss: 0.8146 (0.8110) time: 0.1349 data: 0.0503 max mem: 9377 +Train: [43] [3300/6250] eta: 0:06:54 lr: 0.000081 grad: 0.0905 (0.1044) loss: 0.8117 (0.8109) time: 0.1286 data: 0.0387 max mem: 9377 +Train: [43] [3400/6250] eta: 0:06:40 lr: 0.000081 grad: 0.0968 (0.1045) loss: 0.8116 (0.8108) time: 0.1324 data: 0.0511 max mem: 9377 +Train: [43] [3500/6250] eta: 0:06:26 lr: 0.000081 grad: 0.1024 (0.1046) loss: 0.8090 (0.8108) time: 0.1493 data: 0.0667 max mem: 9377 +Train: [43] [3600/6250] eta: 0:06:12 lr: 0.000081 grad: 0.1059 (0.1047) loss: 0.8096 (0.8108) time: 0.1584 data: 0.0784 max mem: 9377 +Train: [43] [3700/6250] eta: 0:05:58 lr: 0.000081 grad: 0.0933 (0.1046) loss: 0.8127 (0.8107) time: 0.1370 data: 0.0492 max mem: 9377 +Train: [43] [3800/6250] eta: 0:05:43 lr: 0.000081 grad: 0.1038 (0.1046) loss: 0.8182 (0.8107) time: 0.1386 data: 0.0578 max mem: 9377 +Train: [43] [3900/6250] eta: 0:05:29 lr: 0.000081 grad: 0.0971 (0.1045) loss: 0.8099 (0.8107) time: 0.1429 data: 0.0633 max mem: 9377 +Train: [43] [4000/6250] eta: 0:05:15 lr: 0.000081 grad: 0.0985 (0.1045) loss: 0.8194 (0.8107) time: 0.1302 data: 0.0489 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:01 lr: 0.000081 grad: 0.1015 (0.1045) loss: 0.8093 (0.8107) time: 0.1286 data: 0.0478 max mem: 9377 +Train: [43] [4200/6250] eta: 0:04:47 lr: 0.000080 grad: 0.1011 (0.1045) loss: 0.8148 (0.8107) time: 0.1459 data: 0.0707 max mem: 9377 +Train: [43] [4300/6250] eta: 0:04:33 lr: 0.000080 grad: 0.1049 (0.1046) loss: 0.8162 (0.8107) time: 0.1482 data: 0.0666 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:19 lr: 0.000080 grad: 0.0993 (0.1047) loss: 0.8134 (0.8107) time: 0.1284 data: 0.0471 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:05 lr: 0.000080 grad: 0.1088 (0.1048) loss: 0.8035 (0.8107) time: 0.1233 data: 0.0349 max mem: 9377 +Train: [43] [4600/6250] eta: 0:03:51 lr: 0.000080 grad: 0.1061 (0.1047) loss: 0.8108 (0.8107) time: 0.1312 data: 0.0487 max mem: 9377 +Train: [43] [4700/6250] eta: 0:03:37 lr: 0.000080 grad: 0.1041 (0.1048) loss: 0.8102 (0.8107) time: 0.1491 data: 0.0670 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:24 lr: 0.000080 grad: 0.1050 (0.1048) loss: 0.8080 (0.8106) time: 0.1488 data: 0.0663 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:10 lr: 0.000080 grad: 0.1016 (0.1048) loss: 0.8127 (0.8106) time: 0.1512 data: 0.0665 max mem: 9377 +Train: [43] [5000/6250] eta: 0:02:56 lr: 0.000080 grad: 0.1011 (0.1047) loss: 0.8139 (0.8106) time: 0.1523 data: 0.0662 max mem: 9377 +Train: [43] [5100/6250] eta: 0:02:42 lr: 0.000080 grad: 0.1051 (0.1047) loss: 0.8104 (0.8106) time: 0.1150 data: 0.0355 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:28 lr: 0.000080 grad: 0.1032 (0.1046) loss: 0.8169 (0.8106) time: 0.1532 data: 0.0750 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:14 lr: 0.000080 grad: 0.1048 (0.1047) loss: 0.8131 (0.8107) time: 0.1479 data: 0.0600 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:00 lr: 0.000080 grad: 0.1079 (0.1046) loss: 0.8095 (0.8107) time: 0.1398 data: 0.0570 max mem: 9377 +Train: [43] [5500/6250] eta: 0:01:45 lr: 0.000080 grad: 0.1001 (0.1046) loss: 0.8180 (0.8107) time: 0.1478 data: 0.0593 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:31 lr: 0.000080 grad: 0.1006 (0.1047) loss: 0.8169 (0.8108) time: 0.1418 data: 0.0536 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:17 lr: 0.000080 grad: 0.1051 (0.1047) loss: 0.8178 (0.8108) time: 0.1367 data: 0.0524 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:03 lr: 0.000080 grad: 0.1076 (0.1046) loss: 0.8169 (0.8109) time: 0.0988 data: 0.0180 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:49 lr: 0.000080 grad: 0.1003 (0.1046) loss: 0.8183 (0.8110) time: 0.1363 data: 0.0513 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:35 lr: 0.000080 grad: 0.0969 (0.1046) loss: 0.8130 (0.8110) time: 0.1196 data: 0.0279 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:21 lr: 0.000080 grad: 0.1009 (0.1047) loss: 0.8177 (0.8110) time: 0.1552 data: 0.0714 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.1032 (0.1047) loss: 0.8070 (0.8111) time: 0.1349 data: 0.0526 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1036 (0.1047) loss: 0.8024 (0.8111) time: 0.1502 data: 0.0660 max mem: 9377 +Train: [43] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000080 grad: 0.1036 (0.1047) loss: 0.8024 (0.8111) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:04:10 loss: 0.8421 (0.8421) time: 4.0428 data: 3.9626 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8421 (0.8429) time: 0.1226 data: 0.0974 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:13 (0.2156 s / it) +Averaged stats (hcp-train-subset): loss: 0.8421 (0.8429) +Eval (hcp-val): [43] [ 0/62] eta: 0:04:19 loss: 0.8395 (0.8395) time: 4.1787 data: 4.0848 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8395 (0.8405) time: 0.1279 data: 0.1013 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:12 (0.2074 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8405) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:22 loss: 0.8074 (0.8074) time: 5.2059 data: 5.1755 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8214 (0.8215) time: 0.1297 data: 0.1046 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8214 (0.8215) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 10:11:33 lr: 0.000080 grad: 0.1321 (0.1321) loss: 0.8551 (0.8551) time: 5.8710 data: 5.7718 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:19:24 lr: 0.000080 grad: 0.1065 (0.1097) loss: 0.8124 (0.8364) time: 0.1376 data: 0.0475 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:16:41 lr: 0.000080 grad: 0.1056 (0.1122) loss: 0.8087 (0.8243) time: 0.1304 data: 0.0461 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:15:36 lr: 0.000080 grad: 0.0987 (0.1091) loss: 0.8103 (0.8217) time: 0.1065 data: 0.0143 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:14:49 lr: 0.000080 grad: 0.0924 (0.1070) loss: 0.8143 (0.8203) time: 0.1274 data: 0.0401 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:14:15 lr: 0.000080 grad: 0.1000 (0.1059) loss: 0.8054 (0.8190) time: 0.1386 data: 0.0562 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:13:47 lr: 0.000080 grad: 0.0991 (0.1050) loss: 0.8107 (0.8174) time: 0.1339 data: 0.0500 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:13:21 lr: 0.000080 grad: 0.1017 (0.1046) loss: 0.8086 (0.8162) time: 0.1382 data: 0.0523 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:13:07 lr: 0.000080 grad: 0.0961 (0.1042) loss: 0.8118 (0.8157) time: 0.1509 data: 0.0658 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:12:58 lr: 0.000080 grad: 0.0962 (0.1037) loss: 0.8097 (0.8152) time: 0.1639 data: 0.0847 max mem: 9377 +Train: [44] [1000/6250] eta: 0:12:49 lr: 0.000080 grad: 0.0925 (0.1030) loss: 0.8161 (0.8149) time: 0.1346 data: 0.0589 max mem: 9377 +Train: [44] [1100/6250] eta: 0:12:42 lr: 0.000079 grad: 0.1023 (0.1029) loss: 0.8143 (0.8146) time: 0.1824 data: 0.0944 max mem: 9377 +Train: [44] [1200/6250] eta: 0:12:28 lr: 0.000079 grad: 0.0972 (0.1028) loss: 0.8114 (0.8141) time: 0.1529 data: 0.0691 max mem: 9377 +Train: [44] [1300/6250] eta: 0:12:20 lr: 0.000079 grad: 0.0942 (0.1028) loss: 0.8085 (0.8138) time: 0.1944 data: 0.1140 max mem: 9377 +Train: [44] [1400/6250] eta: 0:12:06 lr: 0.000079 grad: 0.0986 (0.1025) loss: 0.8041 (0.8134) time: 0.1745 data: 0.0927 max mem: 9377 +Train: [44] [1500/6250] eta: 0:11:50 lr: 0.000079 grad: 0.1006 (0.1024) loss: 0.8103 (0.8130) time: 0.1706 data: 0.0907 max mem: 9377 +Train: [44] [1600/6250] eta: 0:11:33 lr: 0.000079 grad: 0.1015 (0.1024) loss: 0.8024 (0.8125) time: 0.1401 data: 0.0595 max mem: 9377 +Train: [44] [1700/6250] eta: 0:11:17 lr: 0.000079 grad: 0.1059 (0.1026) loss: 0.8016 (0.8121) time: 0.1214 data: 0.0350 max mem: 9377 +Train: [44] [1800/6250] eta: 0:10:59 lr: 0.000079 grad: 0.0968 (0.1025) loss: 0.8090 (0.8118) time: 0.1260 data: 0.0489 max mem: 9377 +Train: [44] [1900/6250] eta: 0:10:42 lr: 0.000079 grad: 0.1036 (0.1026) loss: 0.8049 (0.8114) time: 0.1445 data: 0.0639 max mem: 9377 +Train: [44] [2000/6250] eta: 0:10:25 lr: 0.000079 grad: 0.1043 (0.1027) loss: 0.8009 (0.8110) time: 0.1409 data: 0.0592 max mem: 9377 +Train: [44] [2100/6250] eta: 0:10:14 lr: 0.000079 grad: 0.1035 (0.1027) loss: 0.8068 (0.8108) time: 0.1458 data: 0.0639 max mem: 9377 +Train: [44] [2200/6250] eta: 0:10:00 lr: 0.000079 grad: 0.1006 (0.1030) loss: 0.8098 (0.8104) time: 0.1479 data: 0.0634 max mem: 9377 +Train: [44] [2300/6250] eta: 0:09:47 lr: 0.000079 grad: 0.1010 (0.1032) loss: 0.8079 (0.8102) time: 0.1523 data: 0.0714 max mem: 9377 +Train: [44] [2400/6250] eta: 0:09:31 lr: 0.000079 grad: 0.1012 (0.1032) loss: 0.8098 (0.8102) time: 0.1391 data: 0.0534 max mem: 9377 +Train: [44] [2500/6250] eta: 0:09:15 lr: 0.000079 grad: 0.1066 (0.1032) loss: 0.8098 (0.8102) time: 0.1433 data: 0.0588 max mem: 9377 +Train: [44] [2600/6250] eta: 0:08:59 lr: 0.000079 grad: 0.1005 (0.1034) loss: 0.8143 (0.8101) time: 0.1365 data: 0.0542 max mem: 9377 +Train: [44] [2700/6250] eta: 0:08:42 lr: 0.000079 grad: 0.1101 (0.1035) loss: 0.8042 (0.8100) time: 0.1433 data: 0.0638 max mem: 9377 +Train: [44] [2800/6250] eta: 0:08:26 lr: 0.000079 grad: 0.1012 (0.1037) loss: 0.8163 (0.8099) time: 0.1292 data: 0.0472 max mem: 9377 +Train: [44] [2900/6250] eta: 0:08:10 lr: 0.000079 grad: 0.0999 (0.1038) loss: 0.8120 (0.8099) time: 0.1171 data: 0.0291 max mem: 9377 +Train: [44] [3000/6250] eta: 0:07:54 lr: 0.000079 grad: 0.1016 (0.1038) loss: 0.8039 (0.8098) time: 0.1390 data: 0.0501 max mem: 9377 +Train: [44] [3100/6250] eta: 0:07:38 lr: 0.000079 grad: 0.1066 (0.1039) loss: 0.8003 (0.8097) time: 0.1475 data: 0.0671 max mem: 9377 +Train: [44] [3200/6250] eta: 0:07:22 lr: 0.000079 grad: 0.1080 (0.1041) loss: 0.8038 (0.8096) time: 0.1265 data: 0.0422 max mem: 9377 +Train: [44] [3300/6250] eta: 0:07:07 lr: 0.000079 grad: 0.1008 (0.1042) loss: 0.8094 (0.8096) time: 0.1310 data: 0.0435 max mem: 9377 +Train: [44] [3400/6250] eta: 0:06:51 lr: 0.000079 grad: 0.1022 (0.1043) loss: 0.8114 (0.8095) time: 0.1377 data: 0.0531 max mem: 9377 +Train: [44] [3500/6250] eta: 0:06:36 lr: 0.000079 grad: 0.1009 (0.1043) loss: 0.8127 (0.8095) time: 0.1423 data: 0.0636 max mem: 9377 +Train: [44] [3600/6250] eta: 0:06:21 lr: 0.000079 grad: 0.1004 (0.1045) loss: 0.8124 (0.8094) time: 0.1535 data: 0.0670 max mem: 9377 +Train: [44] [3700/6250] eta: 0:06:07 lr: 0.000079 grad: 0.0995 (0.1046) loss: 0.8095 (0.8093) time: 0.1282 data: 0.0430 max mem: 9377 +Train: [44] [3800/6250] eta: 0:05:52 lr: 0.000079 grad: 0.1052 (0.1046) loss: 0.8037 (0.8092) time: 0.1514 data: 0.0701 max mem: 9377 +Train: [44] [3900/6250] eta: 0:05:37 lr: 0.000079 grad: 0.1080 (0.1047) loss: 0.8098 (0.8091) time: 0.1196 data: 0.0343 max mem: 9377 +Train: [44] [4000/6250] eta: 0:05:23 lr: 0.000079 grad: 0.1042 (0.1048) loss: 0.8057 (0.8090) time: 0.1403 data: 0.0602 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:08 lr: 0.000079 grad: 0.1083 (0.1049) loss: 0.8050 (0.8088) time: 0.1345 data: 0.0491 max mem: 9377 +Train: [44] [4200/6250] eta: 0:04:54 lr: 0.000078 grad: 0.1125 (0.1051) loss: 0.7951 (0.8086) time: 0.1437 data: 0.0604 max mem: 9377 +Train: [44] [4300/6250] eta: 0:04:40 lr: 0.000078 grad: 0.1075 (0.1053) loss: 0.8106 (0.8085) time: 0.1579 data: 0.0792 max mem: 9377 +Train: [44] [4400/6250] eta: 0:04:25 lr: 0.000078 grad: 0.1077 (0.1055) loss: 0.8042 (0.8084) time: 0.1549 data: 0.0755 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:11 lr: 0.000078 grad: 0.1083 (0.1056) loss: 0.8082 (0.8082) time: 0.1417 data: 0.0560 max mem: 9377 +Train: [44] [4600/6250] eta: 0:03:57 lr: 0.000078 grad: 0.1084 (0.1057) loss: 0.8022 (0.8081) time: 0.1373 data: 0.0438 max mem: 9377 +Train: [44] [4700/6250] eta: 0:03:43 lr: 0.000078 grad: 0.1088 (0.1058) loss: 0.8100 (0.8081) time: 0.1590 data: 0.0742 max mem: 9377 +Train: [44] [4800/6250] eta: 0:03:29 lr: 0.000078 grad: 0.1221 (0.1060) loss: 0.8044 (0.8080) time: 0.1742 data: 0.0821 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:15 lr: 0.000078 grad: 0.1140 (0.1061) loss: 0.8047 (0.8079) time: 0.1574 data: 0.0657 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:01 lr: 0.000078 grad: 0.1164 (0.1063) loss: 0.7939 (0.8079) time: 0.1599 data: 0.0741 max mem: 9377 +Train: [44] [5100/6250] eta: 0:02:47 lr: 0.000078 grad: 0.1164 (0.1064) loss: 0.8026 (0.8078) time: 0.1502 data: 0.0676 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:32 lr: 0.000078 grad: 0.1108 (0.1066) loss: 0.8075 (0.8077) time: 0.1656 data: 0.0730 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:18 lr: 0.000078 grad: 0.1124 (0.1067) loss: 0.8008 (0.8076) time: 0.1571 data: 0.0665 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:04 lr: 0.000078 grad: 0.1130 (0.1068) loss: 0.8019 (0.8075) time: 0.1299 data: 0.0449 max mem: 9377 +Train: [44] [5500/6250] eta: 0:01:49 lr: 0.000078 grad: 0.1084 (0.1069) loss: 0.8091 (0.8074) time: 0.1535 data: 0.0724 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:34 lr: 0.000078 grad: 0.0992 (0.1068) loss: 0.8099 (0.8074) time: 0.1358 data: 0.0502 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:20 lr: 0.000078 grad: 0.1091 (0.1069) loss: 0.7966 (0.8073) time: 0.1306 data: 0.0433 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:05 lr: 0.000078 grad: 0.1030 (0.1069) loss: 0.8081 (0.8073) time: 0.1618 data: 0.0806 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:50 lr: 0.000078 grad: 0.0991 (0.1068) loss: 0.8129 (0.8073) time: 0.1529 data: 0.0718 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:36 lr: 0.000078 grad: 0.1031 (0.1068) loss: 0.8097 (0.8074) time: 0.1430 data: 0.0613 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:21 lr: 0.000078 grad: 0.1000 (0.1068) loss: 0.8135 (0.8074) time: 0.1515 data: 0.0703 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:07 lr: 0.000078 grad: 0.1064 (0.1068) loss: 0.8081 (0.8075) time: 0.1447 data: 0.0600 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1063 (0.1068) loss: 0.8052 (0.8075) time: 0.1595 data: 0.0793 max mem: 9377 +Train: [44] Total time: 0:15:13 (0.1462 s / it) +Averaged stats: lr: 0.000078 grad: 0.1063 (0.1068) loss: 0.8052 (0.8075) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:03:39 loss: 0.8424 (0.8424) time: 3.5343 data: 3.4554 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8429 (0.8444) time: 0.1324 data: 0.1072 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:13 (0.2190 s / it) +Averaged stats (hcp-train-subset): loss: 0.8429 (0.8444) +Making plots (hcp-train-subset): example=37 +Eval (hcp-val): [44] [ 0/62] eta: 0:04:49 loss: 0.8370 (0.8370) time: 4.6737 data: 4.6417 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8406 (0.8423) time: 0.1188 data: 0.0932 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (hcp-val): loss: 0.8406 (0.8423) +Making plots (hcp-val): example=5 +Eval (nsd-val): [44] [ 0/62] eta: 0:05:14 loss: 0.8121 (0.8121) time: 5.0764 data: 5.0458 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8233 (0.8240) time: 0.1283 data: 0.1035 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:13 (0.2153 s / it) +Averaged stats (nsd-val): loss: 0.8233 (0.8240) +Making plots (nsd-val): example=7 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 9:45:34 lr: 0.000078 grad: 0.0626 (0.0626) loss: 0.8508 (0.8508) time: 5.6214 data: 5.4748 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:20:35 lr: 0.000078 grad: 0.0953 (0.1170) loss: 0.8289 (0.8331) time: 0.1405 data: 0.0538 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:17:42 lr: 0.000078 grad: 0.1131 (0.1163) loss: 0.8076 (0.8268) time: 0.1406 data: 0.0457 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:16:40 lr: 0.000078 grad: 0.1119 (0.1151) loss: 0.8110 (0.8217) time: 0.1368 data: 0.0436 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:15:57 lr: 0.000078 grad: 0.1014 (0.1128) loss: 0.8090 (0.8185) time: 0.1517 data: 0.0612 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:15:23 lr: 0.000078 grad: 0.0995 (0.1124) loss: 0.8099 (0.8164) time: 0.1497 data: 0.0516 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:14:48 lr: 0.000078 grad: 0.1024 (0.1123) loss: 0.8069 (0.8146) time: 0.1476 data: 0.0576 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:14:18 lr: 0.000078 grad: 0.0976 (0.1107) loss: 0.8086 (0.8141) time: 0.1216 data: 0.0323 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:13:53 lr: 0.000078 grad: 0.0943 (0.1093) loss: 0.8214 (0.8141) time: 0.1477 data: 0.0453 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:13:36 lr: 0.000078 grad: 0.0988 (0.1086) loss: 0.8078 (0.8141) time: 0.1164 data: 0.0227 max mem: 9377 +Train: [45] [1000/6250] eta: 0:13:18 lr: 0.000078 grad: 0.0973 (0.1081) loss: 0.8061 (0.8139) time: 0.1367 data: 0.0541 max mem: 9377 +Train: [45] [1100/6250] eta: 0:13:07 lr: 0.000077 grad: 0.0959 (0.1076) loss: 0.8176 (0.8137) time: 0.1703 data: 0.0937 max mem: 9377 +Train: [45] [1200/6250] eta: 0:12:53 lr: 0.000077 grad: 0.0961 (0.1070) loss: 0.8143 (0.8136) time: 0.1632 data: 0.0776 max mem: 9377 +Train: [45] [1300/6250] eta: 0:12:38 lr: 0.000077 grad: 0.0943 (0.1067) loss: 0.8053 (0.8132) time: 0.1477 data: 0.0588 max mem: 9377 +Train: [45] [1400/6250] eta: 0:12:21 lr: 0.000077 grad: 0.1040 (0.1066) loss: 0.8101 (0.8128) time: 0.1549 data: 0.0730 max mem: 9377 +Train: [45] [1500/6250] eta: 0:12:04 lr: 0.000077 grad: 0.0997 (0.1065) loss: 0.8152 (0.8125) time: 0.1603 data: 0.0804 max mem: 9377 +Train: [45] [1600/6250] eta: 0:11:49 lr: 0.000077 grad: 0.1033 (0.1063) loss: 0.8074 (0.8121) time: 0.1254 data: 0.0480 max mem: 9377 +Train: [45] [1700/6250] eta: 0:11:33 lr: 0.000077 grad: 0.1027 (0.1063) loss: 0.8088 (0.8116) time: 0.1471 data: 0.0670 max mem: 9377 +Train: [45] [1800/6250] eta: 0:11:15 lr: 0.000077 grad: 0.1007 (0.1063) loss: 0.8037 (0.8113) time: 0.1383 data: 0.0610 max mem: 9377 +Train: [45] [1900/6250] eta: 0:10:58 lr: 0.000077 grad: 0.1009 (0.1064) loss: 0.8041 (0.8110) time: 0.1454 data: 0.0619 max mem: 9377 +Train: [45] [2000/6250] eta: 0:10:42 lr: 0.000077 grad: 0.1041 (0.1065) loss: 0.8033 (0.8107) time: 0.1650 data: 0.0878 max mem: 9377 +Train: [45] [2100/6250] eta: 0:10:26 lr: 0.000077 grad: 0.1015 (0.1065) loss: 0.8032 (0.8104) time: 0.1657 data: 0.0839 max mem: 9377 +Train: [45] [2200/6250] eta: 0:10:11 lr: 0.000077 grad: 0.1043 (0.1064) loss: 0.8027 (0.8102) time: 0.1293 data: 0.0498 max mem: 9377 +Train: [45] [2300/6250] eta: 0:09:57 lr: 0.000077 grad: 0.1036 (0.1065) loss: 0.8084 (0.8101) time: 0.1627 data: 0.0842 max mem: 9377 +Train: [45] [2400/6250] eta: 0:09:43 lr: 0.000077 grad: 0.1058 (0.1066) loss: 0.8086 (0.8100) time: 0.1474 data: 0.0651 max mem: 9377 +Train: [45] [2500/6250] eta: 0:09:28 lr: 0.000077 grad: 0.1071 (0.1066) loss: 0.7951 (0.8098) time: 0.1525 data: 0.0700 max mem: 9377 +Train: [45] [2600/6250] eta: 0:09:15 lr: 0.000077 grad: 0.1025 (0.1066) loss: 0.8056 (0.8096) time: 0.1745 data: 0.0931 max mem: 9377 +Train: [45] [2700/6250] eta: 0:09:00 lr: 0.000077 grad: 0.1048 (0.1066) loss: 0.8141 (0.8095) time: 0.1674 data: 0.0805 max mem: 9377 +Train: [45] [2800/6250] eta: 0:08:43 lr: 0.000077 grad: 0.0994 (0.1065) loss: 0.8091 (0.8095) time: 0.1364 data: 0.0519 max mem: 9377 +Train: [45] [2900/6250] eta: 0:08:27 lr: 0.000077 grad: 0.1102 (0.1065) loss: 0.8037 (0.8094) time: 0.1375 data: 0.0520 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:10 lr: 0.000077 grad: 0.1086 (0.1066) loss: 0.8090 (0.8093) time: 0.1680 data: 0.0857 max mem: 9377 +Train: [45] [3100/6250] eta: 0:07:55 lr: 0.000077 grad: 0.1037 (0.1066) loss: 0.8066 (0.8092) time: 0.1768 data: 0.1000 max mem: 9377 +Train: [45] [3200/6250] eta: 0:07:41 lr: 0.000077 grad: 0.1056 (0.1067) loss: 0.8099 (0.8091) time: 0.1806 data: 0.1005 max mem: 9377 +Train: [45] [3300/6250] eta: 0:07:26 lr: 0.000077 grad: 0.0946 (0.1066) loss: 0.8152 (0.8091) time: 0.1675 data: 0.0828 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:12 lr: 0.000077 grad: 0.1017 (0.1065) loss: 0.8071 (0.8092) time: 0.1542 data: 0.0798 max mem: 9377 +Train: [45] [3500/6250] eta: 0:06:57 lr: 0.000077 grad: 0.1031 (0.1065) loss: 0.8110 (0.8091) time: 0.1652 data: 0.0896 max mem: 9377 +Train: [45] [3600/6250] eta: 0:06:41 lr: 0.000077 grad: 0.1023 (0.1064) loss: 0.8089 (0.8091) time: 0.1386 data: 0.0560 max mem: 9377 +Train: [45] [3700/6250] eta: 0:06:25 lr: 0.000077 grad: 0.0996 (0.1064) loss: 0.8119 (0.8092) time: 0.1389 data: 0.0558 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:09 lr: 0.000077 grad: 0.0962 (0.1063) loss: 0.8140 (0.8092) time: 0.1415 data: 0.0618 max mem: 9377 +Train: [45] [3900/6250] eta: 0:05:53 lr: 0.000077 grad: 0.1041 (0.1063) loss: 0.8079 (0.8092) time: 0.1418 data: 0.0614 max mem: 9377 +Train: [45] [4000/6250] eta: 0:05:37 lr: 0.000077 grad: 0.1049 (0.1064) loss: 0.8073 (0.8091) time: 0.1361 data: 0.0528 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:22 lr: 0.000077 grad: 0.1017 (0.1064) loss: 0.8186 (0.8091) time: 0.1487 data: 0.0708 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:06 lr: 0.000076 grad: 0.1021 (0.1064) loss: 0.8020 (0.8090) time: 0.1418 data: 0.0628 max mem: 9377 +Train: [45] [4300/6250] eta: 0:04:51 lr: 0.000076 grad: 0.1023 (0.1065) loss: 0.8070 (0.8090) time: 0.1233 data: 0.0401 max mem: 9377 +Train: [45] [4400/6250] eta: 0:04:35 lr: 0.000076 grad: 0.1091 (0.1065) loss: 0.8112 (0.8090) time: 0.1051 data: 0.0225 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:20 lr: 0.000076 grad: 0.1022 (0.1065) loss: 0.8080 (0.8090) time: 0.1317 data: 0.0487 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:05 lr: 0.000076 grad: 0.0977 (0.1066) loss: 0.8096 (0.8089) time: 0.1549 data: 0.0730 max mem: 9377 +Train: [45] [4700/6250] eta: 0:03:50 lr: 0.000076 grad: 0.1107 (0.1067) loss: 0.8144 (0.8089) time: 0.1502 data: 0.0621 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:35 lr: 0.000076 grad: 0.1054 (0.1066) loss: 0.8051 (0.8088) time: 0.1147 data: 0.0288 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:20 lr: 0.000076 grad: 0.0985 (0.1068) loss: 0.8074 (0.8088) time: 0.1630 data: 0.0852 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:05 lr: 0.000076 grad: 0.1074 (0.1068) loss: 0.8047 (0.8088) time: 0.1389 data: 0.0605 max mem: 9377 +Train: [45] [5100/6250] eta: 0:02:50 lr: 0.000076 grad: 0.1037 (0.1068) loss: 0.8101 (0.8087) time: 0.1424 data: 0.0642 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:35 lr: 0.000076 grad: 0.1002 (0.1069) loss: 0.8067 (0.8087) time: 0.1357 data: 0.0501 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:20 lr: 0.000076 grad: 0.0977 (0.1069) loss: 0.8186 (0.8087) time: 0.1548 data: 0.0684 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:05 lr: 0.000076 grad: 0.1039 (0.1070) loss: 0.8078 (0.8086) time: 0.1362 data: 0.0496 max mem: 9377 +Train: [45] [5500/6250] eta: 0:01:50 lr: 0.000076 grad: 0.1091 (0.1071) loss: 0.8103 (0.8087) time: 0.1352 data: 0.0475 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:35 lr: 0.000076 grad: 0.1034 (0.1071) loss: 0.8075 (0.8086) time: 0.1233 data: 0.0440 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:21 lr: 0.000076 grad: 0.1008 (0.1072) loss: 0.8070 (0.8086) time: 0.1487 data: 0.0629 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:06 lr: 0.000076 grad: 0.1025 (0.1072) loss: 0.8125 (0.8087) time: 0.1450 data: 0.0595 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:51 lr: 0.000076 grad: 0.1116 (0.1073) loss: 0.8013 (0.8086) time: 0.1228 data: 0.0386 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:36 lr: 0.000076 grad: 0.1069 (0.1073) loss: 0.8062 (0.8087) time: 0.1148 data: 0.0337 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:22 lr: 0.000076 grad: 0.1061 (0.1073) loss: 0.8058 (0.8087) time: 0.1417 data: 0.0595 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:07 lr: 0.000076 grad: 0.1024 (0.1074) loss: 0.8070 (0.8087) time: 0.1618 data: 0.0824 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1093 (0.1074) loss: 0.8052 (0.8087) time: 0.1378 data: 0.0554 max mem: 9377 +Train: [45] Total time: 0:15:20 (0.1473 s / it) +Averaged stats: lr: 0.000076 grad: 0.1093 (0.1074) loss: 0.8052 (0.8087) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:05:14 loss: 0.8390 (0.8390) time: 5.0793 data: 5.0471 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8432 (0.8426) time: 0.1350 data: 0.1100 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-train-subset): loss: 0.8432 (0.8426) +Eval (hcp-val): [45] [ 0/62] eta: 0:05:42 loss: 0.8335 (0.8335) time: 5.5225 data: 5.4923 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8388 (0.8406) time: 0.1299 data: 0.1048 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-val): loss: 0.8388 (0.8406) +Eval (nsd-val): [45] [ 0/62] eta: 0:03:35 loss: 0.8150 (0.8150) time: 3.4782 data: 3.3966 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8244 (0.8266) time: 0.1309 data: 0.1060 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (nsd-val): loss: 0.8244 (0.8266) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 7:28:54 lr: 0.000076 grad: 0.1859 (0.1859) loss: 0.8518 (0.8518) time: 4.3095 data: 4.1060 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:21:06 lr: 0.000076 grad: 0.1274 (0.1867) loss: 0.8012 (0.8092) time: 0.1532 data: 0.0561 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:18:03 lr: 0.000076 grad: 0.1252 (0.1594) loss: 0.8109 (0.8065) time: 0.1599 data: 0.0570 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:16:20 lr: 0.000076 grad: 0.1166 (0.1467) loss: 0.8072 (0.8048) time: 0.1408 data: 0.0501 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:15:41 lr: 0.000076 grad: 0.1067 (0.1390) loss: 0.8068 (0.8047) time: 0.1593 data: 0.0680 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:15:00 lr: 0.000076 grad: 0.1098 (0.1340) loss: 0.8112 (0.8045) time: 0.1335 data: 0.0446 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:14:34 lr: 0.000076 grad: 0.1063 (0.1299) loss: 0.7990 (0.8047) time: 0.1469 data: 0.0594 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:14:05 lr: 0.000076 grad: 0.1170 (0.1274) loss: 0.7937 (0.8047) time: 0.1427 data: 0.0550 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:13:41 lr: 0.000076 grad: 0.1010 (0.1246) loss: 0.8061 (0.8053) time: 0.1461 data: 0.0622 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:13:21 lr: 0.000076 grad: 0.1079 (0.1228) loss: 0.8060 (0.8055) time: 0.1343 data: 0.0539 max mem: 9377 +Train: [46] [1000/6250] eta: 0:13:01 lr: 0.000076 grad: 0.1042 (0.1212) loss: 0.8003 (0.8056) time: 0.1378 data: 0.0541 max mem: 9377 +Train: [46] [1100/6250] eta: 0:12:44 lr: 0.000075 grad: 0.1082 (0.1199) loss: 0.8050 (0.8057) time: 0.1452 data: 0.0617 max mem: 9377 +Train: [46] [1200/6250] eta: 0:12:23 lr: 0.000075 grad: 0.1043 (0.1191) loss: 0.7999 (0.8056) time: 0.1394 data: 0.0561 max mem: 9377 +Train: [46] [1300/6250] eta: 0:12:03 lr: 0.000075 grad: 0.1019 (0.1183) loss: 0.8084 (0.8057) time: 0.1453 data: 0.0705 max mem: 9377 +Train: [46] [1400/6250] eta: 0:11:45 lr: 0.000075 grad: 0.1057 (0.1177) loss: 0.8087 (0.8056) time: 0.1268 data: 0.0533 max mem: 9377 +Train: [46] [1500/6250] eta: 0:11:28 lr: 0.000075 grad: 0.1075 (0.1172) loss: 0.8088 (0.8055) time: 0.1472 data: 0.0636 max mem: 9377 +Train: [46] [1600/6250] eta: 0:11:12 lr: 0.000075 grad: 0.1107 (0.1167) loss: 0.8070 (0.8055) time: 0.1570 data: 0.0816 max mem: 9377 +Train: [46] [1700/6250] eta: 0:10:54 lr: 0.000075 grad: 0.1088 (0.1164) loss: 0.7971 (0.8053) time: 0.1403 data: 0.0568 max mem: 9377 +Train: [46] [1800/6250] eta: 0:10:39 lr: 0.000075 grad: 0.1088 (0.1163) loss: 0.8073 (0.8052) time: 0.1252 data: 0.0449 max mem: 9377 +Train: [46] [1900/6250] eta: 0:10:24 lr: 0.000075 grad: 0.1058 (0.1158) loss: 0.8013 (0.8052) time: 0.1561 data: 0.0755 max mem: 9377 +Train: [46] [2000/6250] eta: 0:10:09 lr: 0.000075 grad: 0.1124 (0.1153) loss: 0.7956 (0.8050) time: 0.1524 data: 0.0716 max mem: 9377 +Train: [46] [2100/6250] eta: 0:09:55 lr: 0.000075 grad: 0.1067 (0.1149) loss: 0.8052 (0.8049) time: 0.1554 data: 0.0782 max mem: 9377 +Train: [46] [2200/6250] eta: 0:09:40 lr: 0.000075 grad: 0.0986 (0.1145) loss: 0.8061 (0.8049) time: 0.1396 data: 0.0600 max mem: 9377 +Train: [46] [2300/6250] eta: 0:09:26 lr: 0.000075 grad: 0.1021 (0.1142) loss: 0.8119 (0.8050) time: 0.1467 data: 0.0640 max mem: 9377 +Train: [46] [2400/6250] eta: 0:09:12 lr: 0.000075 grad: 0.1069 (0.1138) loss: 0.8038 (0.8050) time: 0.1413 data: 0.0708 max mem: 9377 +Train: [46] [2500/6250] eta: 0:09:01 lr: 0.000075 grad: 0.1078 (0.1137) loss: 0.8062 (0.8049) time: 0.1505 data: 0.0676 max mem: 9377 +Train: [46] [2600/6250] eta: 0:08:47 lr: 0.000075 grad: 0.0997 (0.1134) loss: 0.8090 (0.8049) time: 0.1317 data: 0.0518 max mem: 9377 +Train: [46] [2700/6250] eta: 0:08:35 lr: 0.000075 grad: 0.1071 (0.1133) loss: 0.8037 (0.8049) time: 0.1555 data: 0.0762 max mem: 9377 +Train: [46] [2800/6250] eta: 0:08:19 lr: 0.000075 grad: 0.1123 (0.1130) loss: 0.8015 (0.8049) time: 0.1421 data: 0.0668 max mem: 9377 +Train: [46] [2900/6250] eta: 0:08:04 lr: 0.000075 grad: 0.1031 (0.1128) loss: 0.8038 (0.8050) time: 0.1470 data: 0.0646 max mem: 9377 +Train: [46] [3000/6250] eta: 0:07:48 lr: 0.000075 grad: 0.1016 (0.1127) loss: 0.8087 (0.8051) time: 0.1401 data: 0.0585 max mem: 9377 +Train: [46] [3100/6250] eta: 0:07:33 lr: 0.000075 grad: 0.1104 (0.1126) loss: 0.8061 (0.8052) time: 0.1338 data: 0.0546 max mem: 9377 +Train: [46] [3200/6250] eta: 0:07:18 lr: 0.000075 grad: 0.1049 (0.1126) loss: 0.8124 (0.8052) time: 0.1489 data: 0.0709 max mem: 9377 +Train: [46] [3300/6250] eta: 0:07:03 lr: 0.000075 grad: 0.1023 (0.1125) loss: 0.8101 (0.8053) time: 0.1058 data: 0.0200 max mem: 9377 +Train: [46] [3400/6250] eta: 0:06:47 lr: 0.000075 grad: 0.1000 (0.1124) loss: 0.8109 (0.8054) time: 0.1227 data: 0.0340 max mem: 9377 +Train: [46] [3500/6250] eta: 0:06:32 lr: 0.000075 grad: 0.1067 (0.1122) loss: 0.8061 (0.8055) time: 0.1161 data: 0.0283 max mem: 9377 +Train: [46] [3600/6250] eta: 0:06:16 lr: 0.000075 grad: 0.1067 (0.1121) loss: 0.8027 (0.8056) time: 0.1192 data: 0.0295 max mem: 9377 +Train: [46] [3700/6250] eta: 0:06:01 lr: 0.000075 grad: 0.1089 (0.1122) loss: 0.8110 (0.8056) time: 0.1233 data: 0.0398 max mem: 9377 +Train: [46] [3800/6250] eta: 0:05:47 lr: 0.000075 grad: 0.1064 (0.1122) loss: 0.8020 (0.8056) time: 0.1479 data: 0.0701 max mem: 9377 +Train: [46] [3900/6250] eta: 0:05:33 lr: 0.000075 grad: 0.1095 (0.1123) loss: 0.8004 (0.8056) time: 0.1582 data: 0.0770 max mem: 9377 +Train: [46] [4000/6250] eta: 0:05:19 lr: 0.000075 grad: 0.1083 (0.1123) loss: 0.8129 (0.8057) time: 0.1614 data: 0.0866 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:05 lr: 0.000075 grad: 0.1113 (0.1123) loss: 0.8121 (0.8057) time: 0.1448 data: 0.0671 max mem: 9377 +Train: [46] [4200/6250] eta: 0:04:52 lr: 0.000074 grad: 0.1110 (0.1123) loss: 0.8178 (0.8057) time: 0.1391 data: 0.0622 max mem: 9377 +Train: [46] [4300/6250] eta: 0:04:38 lr: 0.000074 grad: 0.1035 (0.1124) loss: 0.8067 (0.8058) time: 0.1525 data: 0.0733 max mem: 9377 +Train: [46] [4400/6250] eta: 0:04:24 lr: 0.000074 grad: 0.1077 (0.1124) loss: 0.7938 (0.8058) time: 0.1190 data: 0.0347 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:10 lr: 0.000074 grad: 0.1120 (0.1124) loss: 0.8132 (0.8058) time: 0.1920 data: 0.1183 max mem: 9377 +Train: [46] [4600/6250] eta: 0:03:56 lr: 0.000074 grad: 0.1024 (0.1124) loss: 0.8066 (0.8059) time: 0.1443 data: 0.0615 max mem: 9377 +Train: [46] [4700/6250] eta: 0:03:42 lr: 0.000074 grad: 0.1133 (0.1124) loss: 0.8026 (0.8058) time: 0.1428 data: 0.0538 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:28 lr: 0.000074 grad: 0.1082 (0.1124) loss: 0.8029 (0.8059) time: 0.1346 data: 0.0531 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:13 lr: 0.000074 grad: 0.1125 (0.1124) loss: 0.8069 (0.8059) time: 0.1584 data: 0.0796 max mem: 9377 +Train: [46] [5000/6250] eta: 0:02:59 lr: 0.000074 grad: 0.1096 (0.1124) loss: 0.8087 (0.8059) time: 0.1405 data: 0.0511 max mem: 9377 +Train: [46] [5100/6250] eta: 0:02:45 lr: 0.000074 grad: 0.1037 (0.1124) loss: 0.8067 (0.8059) time: 0.1663 data: 0.0784 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:30 lr: 0.000074 grad: 0.1117 (0.1124) loss: 0.8106 (0.8059) time: 0.1195 data: 0.0386 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:16 lr: 0.000074 grad: 0.1125 (0.1124) loss: 0.8115 (0.8060) time: 0.1340 data: 0.0522 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:01 lr: 0.000074 grad: 0.1020 (0.1124) loss: 0.8120 (0.8060) time: 0.1254 data: 0.0368 max mem: 9377 +Train: [46] [5500/6250] eta: 0:01:47 lr: 0.000074 grad: 0.1089 (0.1124) loss: 0.8014 (0.8060) time: 0.1411 data: 0.0623 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:32 lr: 0.000074 grad: 0.1087 (0.1124) loss: 0.8035 (0.8060) time: 0.1625 data: 0.0795 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:18 lr: 0.000074 grad: 0.1079 (0.1125) loss: 0.7974 (0.8059) time: 0.1546 data: 0.0771 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:04 lr: 0.000074 grad: 0.1058 (0.1125) loss: 0.8047 (0.8059) time: 0.1396 data: 0.0598 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:50 lr: 0.000074 grad: 0.1126 (0.1125) loss: 0.8087 (0.8059) time: 0.1362 data: 0.0575 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:35 lr: 0.000074 grad: 0.1094 (0.1127) loss: 0.8033 (0.8059) time: 0.1364 data: 0.0573 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:21 lr: 0.000074 grad: 0.1170 (0.1128) loss: 0.7960 (0.8058) time: 0.1450 data: 0.0682 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:07 lr: 0.000074 grad: 0.1058 (0.1128) loss: 0.8105 (0.8058) time: 0.1360 data: 0.0534 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1115 (0.1129) loss: 0.8087 (0.8058) time: 0.1475 data: 0.0624 max mem: 9377 +Train: [46] Total time: 0:14:59 (0.1440 s / it) +Averaged stats: lr: 0.000074 grad: 0.1115 (0.1129) loss: 0.8087 (0.8058) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:04:56 loss: 0.8428 (0.8428) time: 4.7822 data: 4.7447 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8419 (0.8433) time: 0.1580 data: 0.1316 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (hcp-train-subset): loss: 0.8419 (0.8433) +Eval (hcp-val): [46] [ 0/62] eta: 0:05:39 loss: 0.8381 (0.8381) time: 5.4745 data: 5.4405 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8394 (0.8411) time: 0.1442 data: 0.1176 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:15 (0.2435 s / it) +Averaged stats (hcp-val): loss: 0.8394 (0.8411) +Eval (nsd-val): [46] [ 0/62] eta: 0:03:35 loss: 0.8358 (0.8358) time: 3.4706 data: 3.3981 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8456 (0.8469) time: 0.1036 data: 0.0768 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:15 (0.2492 s / it) +Averaged stats (nsd-val): loss: 0.8456 (0.8469) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 8:27:57 lr: 0.000074 grad: 0.0674 (0.0674) loss: 0.8704 (0.8704) time: 4.8764 data: 4.5615 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:23:23 lr: 0.000074 grad: 0.1175 (0.1428) loss: 0.8184 (0.8243) time: 0.1681 data: 0.0718 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:19:23 lr: 0.000074 grad: 0.1029 (0.1275) loss: 0.8100 (0.8201) time: 0.1581 data: 0.0696 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:17:45 lr: 0.000074 grad: 0.0966 (0.1223) loss: 0.8207 (0.8195) time: 0.1435 data: 0.0507 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:16:52 lr: 0.000074 grad: 0.1016 (0.1176) loss: 0.8093 (0.8193) time: 0.1586 data: 0.0654 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:16:02 lr: 0.000074 grad: 0.0938 (0.1150) loss: 0.8224 (0.8193) time: 0.1409 data: 0.0570 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:15:21 lr: 0.000074 grad: 0.0963 (0.1133) loss: 0.8172 (0.8189) time: 0.1429 data: 0.0511 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:14:56 lr: 0.000074 grad: 0.0964 (0.1129) loss: 0.8140 (0.8178) time: 0.1657 data: 0.0756 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:14:29 lr: 0.000074 grad: 0.1118 (0.1129) loss: 0.8040 (0.8168) time: 0.1458 data: 0.0607 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:14:05 lr: 0.000074 grad: 0.0992 (0.1125) loss: 0.8143 (0.8159) time: 0.1362 data: 0.0479 max mem: 9377 +Train: [47] [1000/6250] eta: 0:13:42 lr: 0.000073 grad: 0.1030 (0.1123) loss: 0.8167 (0.8147) time: 0.1370 data: 0.0520 max mem: 9377 +Train: [47] [1100/6250] eta: 0:13:18 lr: 0.000073 grad: 0.1060 (0.1119) loss: 0.8038 (0.8138) time: 0.1428 data: 0.0570 max mem: 9377 +Train: [47] [1200/6250] eta: 0:12:56 lr: 0.000073 grad: 0.1118 (0.1118) loss: 0.7995 (0.8130) time: 0.1443 data: 0.0611 max mem: 9377 +Train: [47] [1300/6250] eta: 0:12:38 lr: 0.000073 grad: 0.1141 (0.1117) loss: 0.7902 (0.8125) time: 0.1501 data: 0.0700 max mem: 9377 +Train: [47] [1400/6250] eta: 0:12:15 lr: 0.000073 grad: 0.1061 (0.1114) loss: 0.7952 (0.8119) time: 0.1420 data: 0.0618 max mem: 9377 +Train: [47] [1500/6250] eta: 0:11:55 lr: 0.000073 grad: 0.1095 (0.1111) loss: 0.8097 (0.8117) time: 0.1354 data: 0.0517 max mem: 9377 +Train: [47] [1600/6250] eta: 0:11:37 lr: 0.000073 grad: 0.1100 (0.1109) loss: 0.8041 (0.8115) time: 0.1181 data: 0.0325 max mem: 9377 +Train: [47] [1700/6250] eta: 0:11:18 lr: 0.000073 grad: 0.1017 (0.1107) loss: 0.8182 (0.8113) time: 0.1448 data: 0.0616 max mem: 9377 +Train: [47] [1800/6250] eta: 0:11:02 lr: 0.000073 grad: 0.1112 (0.1107) loss: 0.8064 (0.8111) time: 0.1371 data: 0.0588 max mem: 9377 +Train: [47] [1900/6250] eta: 0:10:45 lr: 0.000073 grad: 0.1051 (0.1109) loss: 0.8114 (0.8109) time: 0.1561 data: 0.0747 max mem: 9377 +Train: [47] [2000/6250] eta: 0:10:27 lr: 0.000073 grad: 0.1105 (0.1111) loss: 0.8080 (0.8106) time: 0.1314 data: 0.0480 max mem: 9377 +Train: [47] [2100/6250] eta: 0:10:11 lr: 0.000073 grad: 0.1116 (0.1112) loss: 0.8081 (0.8103) time: 0.1422 data: 0.0598 max mem: 9377 +Train: [47] [2200/6250] eta: 0:09:54 lr: 0.000073 grad: 0.1179 (0.1114) loss: 0.8014 (0.8100) time: 0.1604 data: 0.0794 max mem: 9377 +Train: [47] [2300/6250] eta: 0:09:37 lr: 0.000073 grad: 0.1086 (0.1117) loss: 0.7983 (0.8096) time: 0.1395 data: 0.0610 max mem: 9377 +Train: [47] [2400/6250] eta: 0:09:22 lr: 0.000073 grad: 0.1063 (0.1117) loss: 0.8066 (0.8094) time: 0.1503 data: 0.0712 max mem: 9377 +Train: [47] [2500/6250] eta: 0:09:06 lr: 0.000073 grad: 0.1087 (0.1119) loss: 0.8091 (0.8091) time: 0.1237 data: 0.0400 max mem: 9377 +Train: [47] [2600/6250] eta: 0:08:50 lr: 0.000073 grad: 0.1084 (0.1120) loss: 0.7948 (0.8089) time: 0.1380 data: 0.0529 max mem: 9377 +Train: [47] [2700/6250] eta: 0:08:35 lr: 0.000073 grad: 0.1087 (0.1120) loss: 0.8089 (0.8088) time: 0.1490 data: 0.0671 max mem: 9377 +Train: [47] [2800/6250] eta: 0:08:22 lr: 0.000073 grad: 0.1042 (0.1121) loss: 0.8013 (0.8087) time: 0.1997 data: 0.1267 max mem: 9377 +Train: [47] [2900/6250] eta: 0:08:08 lr: 0.000073 grad: 0.1096 (0.1121) loss: 0.7994 (0.8086) time: 0.1566 data: 0.0724 max mem: 9377 +Train: [47] [3000/6250] eta: 0:07:54 lr: 0.000073 grad: 0.1099 (0.1120) loss: 0.8064 (0.8086) time: 0.1531 data: 0.0631 max mem: 9377 +Train: [47] [3100/6250] eta: 0:07:39 lr: 0.000073 grad: 0.1074 (0.1120) loss: 0.8070 (0.8085) time: 0.1276 data: 0.0469 max mem: 9377 +Train: [47] [3200/6250] eta: 0:07:25 lr: 0.000073 grad: 0.1054 (0.1120) loss: 0.8067 (0.8084) time: 0.1600 data: 0.0782 max mem: 9377 +Train: [47] [3300/6250] eta: 0:07:09 lr: 0.000073 grad: 0.1052 (0.1120) loss: 0.8073 (0.8083) time: 0.1425 data: 0.0566 max mem: 9377 +Train: [47] [3400/6250] eta: 0:06:54 lr: 0.000073 grad: 0.1023 (0.1119) loss: 0.8158 (0.8083) time: 0.1389 data: 0.0585 max mem: 9377 +Train: [47] [3500/6250] eta: 0:06:39 lr: 0.000073 grad: 0.1135 (0.1118) loss: 0.8040 (0.8083) time: 0.1514 data: 0.0693 max mem: 9377 +Train: [47] [3600/6250] eta: 0:06:23 lr: 0.000073 grad: 0.1079 (0.1118) loss: 0.8103 (0.8083) time: 0.1245 data: 0.0415 max mem: 9377 +Train: [47] [3700/6250] eta: 0:06:08 lr: 0.000073 grad: 0.1130 (0.1118) loss: 0.8054 (0.8082) time: 0.1142 data: 0.0261 max mem: 9377 +Train: [47] [3800/6250] eta: 0:05:52 lr: 0.000073 grad: 0.1127 (0.1119) loss: 0.8072 (0.8081) time: 0.1249 data: 0.0406 max mem: 9377 +Train: [47] [3900/6250] eta: 0:05:37 lr: 0.000073 grad: 0.1082 (0.1119) loss: 0.8065 (0.8081) time: 0.1084 data: 0.0231 max mem: 9377 +Train: [47] [4000/6250] eta: 0:05:22 lr: 0.000073 grad: 0.1059 (0.1120) loss: 0.8117 (0.8080) time: 0.1433 data: 0.0566 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:07 lr: 0.000072 grad: 0.1134 (0.1120) loss: 0.7945 (0.8080) time: 0.1524 data: 0.0675 max mem: 9377 +Train: [47] [4200/6250] eta: 0:04:53 lr: 0.000072 grad: 0.1124 (0.1121) loss: 0.8038 (0.8079) time: 0.1353 data: 0.0513 max mem: 9377 +Train: [47] [4300/6250] eta: 0:04:38 lr: 0.000072 grad: 0.1185 (0.1122) loss: 0.7998 (0.8078) time: 0.1425 data: 0.0592 max mem: 9377 +Train: [47] [4400/6250] eta: 0:04:24 lr: 0.000072 grad: 0.1103 (0.1123) loss: 0.8076 (0.8076) time: 0.1524 data: 0.0731 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:10 lr: 0.000072 grad: 0.1099 (0.1124) loss: 0.8032 (0.8075) time: 0.1781 data: 0.0973 max mem: 9377 +Train: [47] [4600/6250] eta: 0:03:56 lr: 0.000072 grad: 0.1084 (0.1126) loss: 0.8029 (0.8074) time: 0.1439 data: 0.0603 max mem: 9377 +Train: [47] [4700/6250] eta: 0:03:42 lr: 0.000072 grad: 0.1175 (0.1127) loss: 0.8059 (0.8073) time: 0.1534 data: 0.0615 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:28 lr: 0.000072 grad: 0.1119 (0.1128) loss: 0.8001 (0.8072) time: 0.1511 data: 0.0723 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:14 lr: 0.000072 grad: 0.1191 (0.1129) loss: 0.7880 (0.8070) time: 0.1352 data: 0.0454 max mem: 9377 +Train: [47] [5000/6250] eta: 0:02:59 lr: 0.000072 grad: 0.1118 (0.1130) loss: 0.7982 (0.8068) time: 0.1361 data: 0.0574 max mem: 9377 +Train: [47] [5100/6250] eta: 0:02:45 lr: 0.000072 grad: 0.1171 (0.1133) loss: 0.7947 (0.8066) time: 0.1230 data: 0.0419 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:30 lr: 0.000072 grad: 0.1182 (0.1133) loss: 0.7933 (0.8064) time: 0.1166 data: 0.0318 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:16 lr: 0.000072 grad: 0.1085 (0.1134) loss: 0.8110 (0.8064) time: 0.1367 data: 0.0545 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:01 lr: 0.000072 grad: 0.1168 (0.1135) loss: 0.7937 (0.8063) time: 0.1390 data: 0.0509 max mem: 9377 +Train: [47] [5500/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1075 (0.1135) loss: 0.8067 (0.8062) time: 0.1079 data: 0.0189 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:32 lr: 0.000072 grad: 0.1132 (0.1136) loss: 0.8019 (0.8061) time: 0.1296 data: 0.0424 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:18 lr: 0.000072 grad: 0.1110 (0.1136) loss: 0.8068 (0.8060) time: 0.1195 data: 0.0264 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:03 lr: 0.000072 grad: 0.1189 (0.1139) loss: 0.8009 (0.8060) time: 0.1209 data: 0.0363 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:49 lr: 0.000072 grad: 0.1156 (0.1140) loss: 0.8011 (0.8059) time: 0.1306 data: 0.0454 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:35 lr: 0.000072 grad: 0.1115 (0.1141) loss: 0.8030 (0.8059) time: 0.1313 data: 0.0463 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:21 lr: 0.000072 grad: 0.1118 (0.1142) loss: 0.8029 (0.8058) time: 0.1106 data: 0.0252 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:07 lr: 0.000072 grad: 0.1050 (0.1142) loss: 0.8048 (0.8057) time: 0.1513 data: 0.0731 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1119 (0.1142) loss: 0.8094 (0.8058) time: 0.1364 data: 0.0596 max mem: 9377 +Train: [47] Total time: 0:14:50 (0.1425 s / it) +Averaged stats: lr: 0.000072 grad: 0.1119 (0.1142) loss: 0.8094 (0.8058) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:04:49 loss: 0.8400 (0.8400) time: 4.6637 data: 4.6315 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8431 (0.8441) time: 0.1366 data: 0.1118 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (hcp-train-subset): loss: 0.8431 (0.8441) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:58 loss: 0.8391 (0.8391) time: 3.8524 data: 3.7789 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8383 (0.8416) time: 0.1287 data: 0.1033 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:12 (0.2083 s / it) +Averaged stats (hcp-val): loss: 0.8383 (0.8416) +Eval (nsd-val): [47] [ 0/62] eta: 0:03:58 loss: 0.8091 (0.8091) time: 3.8480 data: 3.7319 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8198 (0.8211) time: 0.1308 data: 0.1056 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2107 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 13:09:21 lr: 0.000072 grad: 0.5650 (0.5650) loss: 0.8446 (0.8446) time: 7.5778 data: 7.4360 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:21:27 lr: 0.000072 grad: 0.1262 (0.1373) loss: 0.8061 (0.8314) time: 0.1588 data: 0.0707 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:17:41 lr: 0.000072 grad: 0.1105 (0.1325) loss: 0.8096 (0.8215) time: 0.1340 data: 0.0490 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:16:10 lr: 0.000072 grad: 0.1046 (0.1267) loss: 0.8110 (0.8177) time: 0.1550 data: 0.0651 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:15:09 lr: 0.000072 grad: 0.1033 (0.1218) loss: 0.8065 (0.8158) time: 0.1352 data: 0.0454 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:14:36 lr: 0.000072 grad: 0.1090 (0.1185) loss: 0.8120 (0.8150) time: 0.1399 data: 0.0529 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:14:04 lr: 0.000072 grad: 0.1042 (0.1163) loss: 0.8120 (0.8146) time: 0.1466 data: 0.0584 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:13:43 lr: 0.000072 grad: 0.1009 (0.1146) loss: 0.8129 (0.8142) time: 0.1292 data: 0.0396 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:13:15 lr: 0.000072 grad: 0.0996 (0.1130) loss: 0.8191 (0.8140) time: 0.1066 data: 0.0264 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:12:55 lr: 0.000071 grad: 0.1057 (0.1117) loss: 0.8092 (0.8140) time: 0.1402 data: 0.0577 max mem: 9377 +Train: [48] [1000/6250] eta: 0:12:36 lr: 0.000071 grad: 0.1025 (0.1109) loss: 0.8106 (0.8139) time: 0.1438 data: 0.0602 max mem: 9377 +Train: [48] [1100/6250] eta: 0:12:16 lr: 0.000071 grad: 0.0978 (0.1099) loss: 0.8135 (0.8138) time: 0.1369 data: 0.0541 max mem: 9377 +Train: [48] [1200/6250] eta: 0:11:58 lr: 0.000071 grad: 0.0994 (0.1090) loss: 0.8174 (0.8138) time: 0.1360 data: 0.0496 max mem: 9377 +Train: [48] [1300/6250] eta: 0:11:39 lr: 0.000071 grad: 0.1015 (0.1085) loss: 0.8168 (0.8139) time: 0.1254 data: 0.0471 max mem: 9377 +Train: [48] [1400/6250] eta: 0:11:21 lr: 0.000071 grad: 0.1090 (0.1082) loss: 0.8169 (0.8138) time: 0.1301 data: 0.0478 max mem: 9377 +Train: [48] [1500/6250] eta: 0:11:03 lr: 0.000071 grad: 0.1050 (0.1080) loss: 0.8142 (0.8138) time: 0.1346 data: 0.0526 max mem: 9377 +Train: [48] [1600/6250] eta: 0:10:48 lr: 0.000071 grad: 0.0995 (0.1075) loss: 0.8090 (0.8137) time: 0.1415 data: 0.0602 max mem: 9377 +Train: [48] [1700/6250] eta: 0:10:31 lr: 0.000071 grad: 0.1005 (0.1076) loss: 0.8153 (0.8136) time: 0.1333 data: 0.0537 max mem: 9377 +Train: [48] [1800/6250] eta: 0:10:17 lr: 0.000071 grad: 0.1130 (0.1076) loss: 0.8154 (0.8134) time: 0.1459 data: 0.0690 max mem: 9377 +Train: [48] [1900/6250] eta: 0:10:02 lr: 0.000071 grad: 0.1001 (0.1077) loss: 0.8151 (0.8133) time: 0.1175 data: 0.0402 max mem: 9377 +Train: [48] [2000/6250] eta: 0:09:47 lr: 0.000071 grad: 0.1097 (0.1077) loss: 0.8096 (0.8131) time: 0.1292 data: 0.0481 max mem: 9377 +Train: [48] [2100/6250] eta: 0:09:33 lr: 0.000071 grad: 0.1044 (0.1078) loss: 0.8190 (0.8129) time: 0.1468 data: 0.0679 max mem: 9377 +Train: [48] [2200/6250] eta: 0:09:19 lr: 0.000071 grad: 0.1033 (0.1079) loss: 0.8067 (0.8127) time: 0.1431 data: 0.0635 max mem: 9377 +Train: [48] [2300/6250] eta: 0:09:05 lr: 0.000071 grad: 0.1061 (0.1079) loss: 0.8129 (0.8126) time: 0.1690 data: 0.0859 max mem: 9377 +Train: [48] [2400/6250] eta: 0:08:50 lr: 0.000071 grad: 0.0976 (0.1080) loss: 0.8155 (0.8123) time: 0.1298 data: 0.0499 max mem: 9377 +Train: [48] [2500/6250] eta: 0:08:36 lr: 0.000071 grad: 0.1040 (0.1082) loss: 0.8121 (0.8122) time: 0.1503 data: 0.0680 max mem: 9377 +Train: [48] [2600/6250] eta: 0:08:22 lr: 0.000071 grad: 0.1006 (0.1082) loss: 0.8077 (0.8120) time: 0.1439 data: 0.0614 max mem: 9377 +Train: [48] [2700/6250] eta: 0:08:08 lr: 0.000071 grad: 0.1104 (0.1081) loss: 0.8077 (0.8120) time: 0.1328 data: 0.0532 max mem: 9377 +Train: [48] [2800/6250] eta: 0:07:55 lr: 0.000071 grad: 0.1030 (0.1081) loss: 0.8095 (0.8119) time: 0.1335 data: 0.0504 max mem: 9377 +Train: [48] [2900/6250] eta: 0:07:40 lr: 0.000071 grad: 0.1057 (0.1081) loss: 0.8145 (0.8118) time: 0.1403 data: 0.0611 max mem: 9377 +Train: [48] [3000/6250] eta: 0:07:26 lr: 0.000071 grad: 0.1062 (0.1082) loss: 0.8153 (0.8116) time: 0.1257 data: 0.0367 max mem: 9377 +Train: [48] [3100/6250] eta: 0:07:12 lr: 0.000071 grad: 0.1115 (0.1083) loss: 0.8106 (0.8115) time: 0.1690 data: 0.0821 max mem: 9377 +Train: [48] [3200/6250] eta: 0:07:00 lr: 0.000071 grad: 0.1047 (0.1084) loss: 0.8000 (0.8114) time: 0.1465 data: 0.0682 max mem: 9377 +Train: [48] [3300/6250] eta: 0:06:48 lr: 0.000071 grad: 0.1022 (0.1085) loss: 0.8127 (0.8114) time: 0.1510 data: 0.0739 max mem: 9377 +Train: [48] [3400/6250] eta: 0:06:35 lr: 0.000071 grad: 0.1068 (0.1085) loss: 0.8106 (0.8113) time: 0.1384 data: 0.0595 max mem: 9377 +Train: [48] [3500/6250] eta: 0:06:22 lr: 0.000071 grad: 0.1073 (0.1086) loss: 0.8091 (0.8112) time: 0.1616 data: 0.0853 max mem: 9377 +Train: [48] [3600/6250] eta: 0:06:08 lr: 0.000071 grad: 0.1076 (0.1086) loss: 0.8085 (0.8111) time: 0.1648 data: 0.0849 max mem: 9377 +Train: [48] [3700/6250] eta: 0:05:54 lr: 0.000071 grad: 0.1049 (0.1087) loss: 0.8070 (0.8110) time: 0.1201 data: 0.0349 max mem: 9377 +Train: [48] [3800/6250] eta: 0:05:40 lr: 0.000071 grad: 0.1058 (0.1087) loss: 0.8066 (0.8109) time: 0.1297 data: 0.0458 max mem: 9377 +Train: [48] [3900/6250] eta: 0:05:25 lr: 0.000070 grad: 0.1096 (0.1089) loss: 0.8035 (0.8107) time: 0.1576 data: 0.0796 max mem: 9377 +Train: [48] [4000/6250] eta: 0:05:11 lr: 0.000070 grad: 0.1124 (0.1090) loss: 0.8080 (0.8106) time: 0.1334 data: 0.0502 max mem: 9377 +Train: [48] [4100/6250] eta: 0:04:56 lr: 0.000070 grad: 0.1076 (0.1091) loss: 0.8103 (0.8105) time: 0.1153 data: 0.0287 max mem: 9377 +Train: [48] [4200/6250] eta: 0:04:42 lr: 0.000070 grad: 0.1077 (0.1092) loss: 0.8074 (0.8105) time: 0.1265 data: 0.0429 max mem: 9377 +Train: [48] [4300/6250] eta: 0:04:29 lr: 0.000070 grad: 0.1106 (0.1092) loss: 0.8064 (0.8104) time: 0.1327 data: 0.0555 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:15 lr: 0.000070 grad: 0.1051 (0.1094) loss: 0.8072 (0.8103) time: 0.1324 data: 0.0529 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:02 lr: 0.000070 grad: 0.1075 (0.1094) loss: 0.8105 (0.8102) time: 0.1662 data: 0.0928 max mem: 9377 +Train: [48] [4600/6250] eta: 0:03:48 lr: 0.000070 grad: 0.1054 (0.1094) loss: 0.8138 (0.8102) time: 0.1342 data: 0.0473 max mem: 9377 +Train: [48] [4700/6250] eta: 0:03:35 lr: 0.000070 grad: 0.1124 (0.1097) loss: 0.8100 (0.8102) time: 0.1462 data: 0.0577 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:21 lr: 0.000070 grad: 0.1151 (0.1098) loss: 0.8103 (0.8101) time: 0.1397 data: 0.0593 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:07 lr: 0.000070 grad: 0.1070 (0.1098) loss: 0.8061 (0.8100) time: 0.1290 data: 0.0482 max mem: 9377 +Train: [48] [5000/6250] eta: 0:02:53 lr: 0.000070 grad: 0.1078 (0.1099) loss: 0.8087 (0.8100) time: 0.1398 data: 0.0613 max mem: 9377 +Train: [48] [5100/6250] eta: 0:02:39 lr: 0.000070 grad: 0.1134 (0.1100) loss: 0.8021 (0.8099) time: 0.1475 data: 0.0669 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:26 lr: 0.000070 grad: 0.1087 (0.1101) loss: 0.8123 (0.8099) time: 0.1439 data: 0.0577 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:12 lr: 0.000070 grad: 0.1093 (0.1101) loss: 0.8081 (0.8099) time: 0.1342 data: 0.0476 max mem: 9377 +Train: [48] [5400/6250] eta: 0:01:58 lr: 0.000070 grad: 0.1147 (0.1102) loss: 0.7998 (0.8098) time: 0.1325 data: 0.0505 max mem: 9377 +Train: [48] [5500/6250] eta: 0:01:44 lr: 0.000070 grad: 0.1088 (0.1103) loss: 0.8098 (0.8097) time: 0.1273 data: 0.0439 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:30 lr: 0.000070 grad: 0.1100 (0.1104) loss: 0.8060 (0.8097) time: 0.1585 data: 0.0783 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:16 lr: 0.000070 grad: 0.1029 (0.1104) loss: 0.8121 (0.8096) time: 0.1387 data: 0.0587 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:02 lr: 0.000070 grad: 0.1157 (0.1106) loss: 0.8086 (0.8095) time: 0.1351 data: 0.0533 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:48 lr: 0.000070 grad: 0.1163 (0.1107) loss: 0.7994 (0.8094) time: 0.1465 data: 0.0668 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:34 lr: 0.000070 grad: 0.1136 (0.1107) loss: 0.8053 (0.8093) time: 0.1661 data: 0.0848 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:20 lr: 0.000070 grad: 0.1036 (0.1108) loss: 0.8159 (0.8092) time: 0.1480 data: 0.0696 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:07 lr: 0.000070 grad: 0.1153 (0.1109) loss: 0.7977 (0.8091) time: 0.1464 data: 0.0682 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1166 (0.1110) loss: 0.8024 (0.8090) time: 0.1699 data: 0.0937 max mem: 9377 +Train: [48] Total time: 0:14:41 (0.1411 s / it) +Averaged stats: lr: 0.000070 grad: 0.1166 (0.1110) loss: 0.8024 (0.8090) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:05:03 loss: 0.8392 (0.8392) time: 4.8924 data: 4.8616 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8425 (0.8436) time: 0.1225 data: 0.0975 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:12 (0.2053 s / it) +Averaged stats (hcp-train-subset): loss: 0.8425 (0.8436) +Eval (hcp-val): [48] [ 0/62] eta: 0:03:49 loss: 0.8409 (0.8409) time: 3.7072 data: 3.6165 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8415 (0.8420) time: 0.1254 data: 0.1003 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:12 (0.2074 s / it) +Averaged stats (hcp-val): loss: 0.8415 (0.8420) +Eval (nsd-val): [48] [ 0/62] eta: 0:04:46 loss: 0.8090 (0.8090) time: 4.6237 data: 4.5886 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8221 (0.8230) time: 0.1118 data: 0.0871 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (nsd-val): loss: 0.8221 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 10:40:33 lr: 0.000070 grad: nan (nan) loss: 0.8106 (0.8106) time: 6.1494 data: 6.0625 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:19:15 lr: 0.000070 grad: 0.1041 (0.1250) loss: 0.8288 (0.8368) time: 0.1330 data: 0.0313 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:16:45 lr: 0.000070 grad: 0.0981 (0.1197) loss: 0.8206 (0.8277) time: 0.1782 data: 0.0901 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:15:33 lr: 0.000070 grad: 0.1083 (0.1190) loss: 0.8118 (0.8229) time: 0.1251 data: 0.0346 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:15:08 lr: 0.000070 grad: 0.1063 (0.1177) loss: 0.8157 (0.8196) time: 0.1671 data: 0.0761 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:14:39 lr: 0.000070 grad: 0.1032 (0.1158) loss: 0.8082 (0.8179) time: 0.1371 data: 0.0500 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:14:05 lr: 0.000070 grad: 0.1065 (0.1142) loss: 0.8206 (0.8170) time: 0.1397 data: 0.0568 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:13:51 lr: 0.000069 grad: 0.1125 (0.1135) loss: 0.8101 (0.8163) time: 0.1430 data: 0.0546 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:13:55 lr: 0.000069 grad: 0.1052 (0.1131) loss: 0.8107 (0.8158) time: 0.1976 data: 0.1188 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:13:48 lr: 0.000069 grad: 0.1000 (0.1127) loss: 0.8112 (0.8154) time: 0.2008 data: 0.1185 max mem: 9377 +Train: [49] [1000/6250] eta: 0:13:43 lr: 0.000069 grad: 0.1086 (0.1124) loss: 0.8046 (0.8149) time: 0.1612 data: 0.0734 max mem: 9377 +Train: [49] [1100/6250] eta: 0:13:30 lr: 0.000069 grad: 0.1143 (0.1120) loss: 0.8047 (0.8144) time: 0.1577 data: 0.0778 max mem: 9377 +Train: [49] [1200/6250] eta: 0:13:14 lr: 0.000069 grad: 0.1108 (0.1122) loss: 0.8042 (0.8140) time: 0.1483 data: 0.0586 max mem: 9377 +Train: [49] [1300/6250] eta: 0:12:55 lr: 0.000069 grad: 0.1105 (0.1122) loss: 0.8070 (0.8136) time: 0.1441 data: 0.0608 max mem: 9377 +Train: [49] [1400/6250] eta: 0:12:34 lr: 0.000069 grad: 0.1083 (0.1119) loss: 0.8110 (0.8133) time: 0.1293 data: 0.0479 max mem: 9377 +Train: [49] [1500/6250] eta: 0:12:12 lr: 0.000069 grad: 0.1044 (0.1116) loss: 0.8101 (0.8130) time: 0.1567 data: 0.0713 max mem: 9377 +Train: [49] [1600/6250] eta: 0:11:51 lr: 0.000069 grad: 0.1118 (0.1115) loss: 0.8029 (0.8127) time: 0.1332 data: 0.0536 max mem: 9377 +Train: [49] [1700/6250] eta: 0:11:32 lr: 0.000069 grad: 0.1157 (0.1117) loss: 0.8073 (0.8124) time: 0.1392 data: 0.0555 max mem: 9377 +Train: [49] [1800/6250] eta: 0:11:15 lr: 0.000069 grad: 0.1133 (0.1119) loss: 0.8028 (0.8121) time: 0.1413 data: 0.0605 max mem: 9377 +Train: [49] [1900/6250] eta: 0:10:57 lr: 0.000069 grad: 0.1105 (0.1122) loss: 0.8044 (0.8116) time: 0.1402 data: 0.0511 max mem: 9377 +Train: [49] [2000/6250] eta: 0:10:39 lr: 0.000069 grad: 0.1146 (0.1127) loss: 0.8063 (0.8112) time: 0.1335 data: 0.0432 max mem: 9377 +Train: [49] [2100/6250] eta: 0:10:22 lr: 0.000069 grad: 0.1133 (0.1128) loss: 0.8087 (0.8109) time: 0.1392 data: 0.0585 max mem: 9377 +Train: [49] [2200/6250] eta: 0:10:05 lr: 0.000069 grad: 0.1056 (0.1131) loss: 0.8122 (0.8106) time: 0.1457 data: 0.0628 max mem: 9377 +Train: [49] [2300/6250] eta: 0:09:48 lr: 0.000069 grad: 0.1146 (0.1133) loss: 0.8042 (0.8103) time: 0.1380 data: 0.0512 max mem: 9377 +Train: [49] [2400/6250] eta: 0:09:32 lr: 0.000069 grad: 0.1132 (0.1133) loss: 0.7991 (0.8100) time: 0.1362 data: 0.0513 max mem: 9377 +Train: [49] [2500/6250] eta: 0:09:16 lr: 0.000069 grad: 0.1180 (0.1135) loss: 0.7988 (0.8096) time: 0.1420 data: 0.0633 max mem: 9377 +Train: [49] [2600/6250] eta: 0:08:58 lr: 0.000069 grad: 0.1131 (0.1136) loss: 0.8055 (0.8094) time: 0.1222 data: 0.0453 max mem: 9377 +Train: [49] [2700/6250] eta: 0:08:42 lr: 0.000069 grad: 0.1076 (0.1137) loss: 0.8062 (0.8092) time: 0.1207 data: 0.0362 max mem: 9377 +Train: [49] [2800/6250] eta: 0:08:26 lr: 0.000069 grad: 0.1103 (0.1138) loss: 0.8029 (0.8089) time: 0.1358 data: 0.0572 max mem: 9377 +Train: [49] [2900/6250] eta: 0:08:11 lr: 0.000069 grad: 0.1132 (0.1138) loss: 0.8023 (0.8088) time: 0.1568 data: 0.0773 max mem: 9377 +Train: [49] [3000/6250] eta: 0:07:55 lr: 0.000069 grad: 0.1184 (0.1140) loss: 0.8037 (0.8086) time: 0.1310 data: 0.0449 max mem: 9377 +Train: [49] [3100/6250] eta: 0:07:39 lr: 0.000069 grad: 0.1086 (0.1141) loss: 0.8027 (0.8085) time: 0.1315 data: 0.0507 max mem: 9377 +Train: [49] [3200/6250] eta: 0:07:24 lr: 0.000069 grad: 0.1079 (0.1141) loss: 0.8114 (0.8084) time: 0.1486 data: 0.0681 max mem: 9377 +Train: [49] [3300/6250] eta: 0:07:08 lr: 0.000069 grad: 0.1099 (0.1141) loss: 0.8072 (0.8083) time: 0.1478 data: 0.0659 max mem: 9377 +Train: [49] [3400/6250] eta: 0:06:52 lr: 0.000069 grad: 0.1160 (0.1141) loss: 0.8115 (0.8082) time: 0.1170 data: 0.0287 max mem: 9377 +Train: [49] [3500/6250] eta: 0:06:37 lr: 0.000069 grad: 0.1087 (0.1140) loss: 0.8069 (0.8082) time: 0.1238 data: 0.0394 max mem: 9377 +Train: [49] [3600/6250] eta: 0:06:24 lr: 0.000069 grad: 0.1180 (0.1140) loss: 0.8008 (0.8081) time: 0.1525 data: 0.0726 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:10 lr: 0.000069 grad: 0.1080 (0.1140) loss: 0.8048 (0.8081) time: 0.1154 data: 0.0326 max mem: 9377 +Train: [49] [3800/6250] eta: 0:05:55 lr: 0.000068 grad: 0.1078 (0.1141) loss: 0.8059 (0.8080) time: 0.1421 data: 0.0599 max mem: 9377 +Train: [49] [3900/6250] eta: 0:05:41 lr: 0.000068 grad: 0.1073 (0.1141) loss: 0.8099 (0.8080) time: 0.1418 data: 0.0582 max mem: 9377 +Train: [49] [4000/6250] eta: 0:05:26 lr: 0.000068 grad: 0.1140 (0.1141) loss: 0.8070 (0.8079) time: 0.1472 data: 0.0650 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:11 lr: 0.000068 grad: 0.1146 (0.1142) loss: 0.8006 (0.8078) time: 0.1383 data: 0.0578 max mem: 9377 +Train: [49] [4200/6250] eta: 0:04:57 lr: 0.000068 grad: 0.1135 (0.1141) loss: 0.8065 (0.8077) time: 0.1407 data: 0.0613 max mem: 9377 +Train: [49] [4300/6250] eta: 0:04:42 lr: 0.000068 grad: 0.1123 (0.1141) loss: 0.8025 (0.8077) time: 0.1610 data: 0.0839 max mem: 9377 +Train: [49] [4400/6250] eta: 0:04:27 lr: 0.000068 grad: 0.1096 (0.1141) loss: 0.8055 (0.8077) time: 0.1481 data: 0.0585 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:12 lr: 0.000068 grad: 0.1126 (0.1141) loss: 0.8040 (0.8077) time: 0.1593 data: 0.0816 max mem: 9377 +Train: [49] [4600/6250] eta: 0:03:58 lr: 0.000068 grad: 0.1112 (0.1141) loss: 0.8129 (0.8076) time: 0.1194 data: 0.0414 max mem: 9377 +Train: [49] [4700/6250] eta: 0:03:43 lr: 0.000068 grad: 0.1127 (0.1141) loss: 0.8087 (0.8076) time: 0.1226 data: 0.0481 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:28 lr: 0.000068 grad: 0.1094 (0.1141) loss: 0.8104 (0.8076) time: 0.1456 data: 0.0635 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:13 lr: 0.000068 grad: 0.1173 (0.1141) loss: 0.7961 (0.8075) time: 0.1364 data: 0.0628 max mem: 9377 +Train: [49] [5000/6250] eta: 0:02:59 lr: 0.000068 grad: 0.1124 (0.1141) loss: 0.8114 (0.8074) time: 0.1488 data: 0.0684 max mem: 9377 +Train: [49] [5100/6250] eta: 0:02:45 lr: 0.000068 grad: 0.1117 (0.1143) loss: 0.8118 (0.8074) time: 0.1249 data: 0.0410 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:30 lr: 0.000068 grad: 0.1128 (0.1144) loss: 0.8097 (0.8074) time: 0.1417 data: 0.0611 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:15 lr: 0.000068 grad: 0.1106 (0.1144) loss: 0.8028 (0.8073) time: 0.1338 data: 0.0526 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:01 lr: 0.000068 grad: 0.1056 (0.1144) loss: 0.7991 (0.8073) time: 0.1160 data: 0.0283 max mem: 9377 +Train: [49] [5500/6250] eta: 0:01:46 lr: 0.000068 grad: 0.1089 (0.1144) loss: 0.8004 (0.8072) time: 0.1455 data: 0.0652 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:32 lr: 0.000068 grad: 0.1189 (0.1145) loss: 0.8065 (0.8072) time: 0.1561 data: 0.0757 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:18 lr: 0.000068 grad: 0.1107 (0.1145) loss: 0.8035 (0.8071) time: 0.1464 data: 0.0669 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:04 lr: 0.000068 grad: 0.1092 (0.1145) loss: 0.8042 (0.8070) time: 0.1466 data: 0.0665 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:50 lr: 0.000068 grad: 0.1200 (0.1146) loss: 0.7984 (0.8069) time: 0.1467 data: 0.0738 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:35 lr: 0.000068 grad: 0.1128 (0.1147) loss: 0.8079 (0.8068) time: 0.1695 data: 0.0893 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:21 lr: 0.000068 grad: 0.1151 (0.1147) loss: 0.8053 (0.8067) time: 0.1324 data: 0.0465 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:07 lr: 0.000068 grad: 0.1095 (0.1147) loss: 0.7917 (0.8065) time: 0.1240 data: 0.0436 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1132 (0.1147) loss: 0.7916 (0.8065) time: 0.1322 data: 0.0523 max mem: 9377 +Train: [49] Total time: 0:15:01 (0.1442 s / it) +Averaged stats: lr: 0.000068 grad: 0.1132 (0.1147) loss: 0.7916 (0.8065) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:03:23 loss: 0.8418 (0.8418) time: 3.2892 data: 3.1905 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8455 (0.8444) time: 0.1260 data: 0.1010 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:13 (0.2098 s / it) +Averaged stats (hcp-train-subset): loss: 0.8455 (0.8444) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [49] [ 0/62] eta: 0:04:02 loss: 0.8385 (0.8385) time: 3.9081 data: 3.8521 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8395 (0.8420) time: 0.1278 data: 0.1025 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:13 (0.2144 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8420) +Making plots (hcp-val): example=14 +Eval (nsd-val): [49] [ 0/62] eta: 0:03:24 loss: 0.8096 (0.8096) time: 3.3020 data: 3.2155 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8216 (0.8231) time: 0.1450 data: 0.1199 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (nsd-val): loss: 0.8216 (0.8231) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 8:41:27 lr: 0.000068 grad: 0.2858 (0.2858) loss: 0.6953 (0.6953) time: 5.0060 data: 4.7451 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:20:16 lr: 0.000068 grad: 0.1407 (0.1632) loss: 0.8048 (0.8198) time: 0.1444 data: 0.0378 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:16:55 lr: 0.000068 grad: 0.1036 (0.1456) loss: 0.8229 (0.8150) time: 0.1462 data: 0.0544 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:15:32 lr: 0.000068 grad: 0.1055 (0.1361) loss: 0.8078 (0.8149) time: 0.1240 data: 0.0378 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:14:44 lr: 0.000068 grad: 0.0935 (0.1276) loss: 0.8156 (0.8154) time: 0.1258 data: 0.0371 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:14:04 lr: 0.000067 grad: 0.0974 (0.1239) loss: 0.8176 (0.8154) time: 0.1267 data: 0.0331 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:13:42 lr: 0.000067 grad: 0.0956 (0.1200) loss: 0.8196 (0.8162) time: 0.1297 data: 0.0466 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:13:34 lr: 0.000067 grad: 0.0963 (0.1177) loss: 0.8234 (0.8167) time: 0.1649 data: 0.0817 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:13:20 lr: 0.000067 grad: 0.1036 (0.1166) loss: 0.8143 (0.8168) time: 0.1588 data: 0.0780 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:13:03 lr: 0.000067 grad: 0.1086 (0.1155) loss: 0.8068 (0.8165) time: 0.1523 data: 0.0688 max mem: 9377 +Train: [50] [1000/6250] eta: 0:12:48 lr: 0.000067 grad: 0.0997 (0.1145) loss: 0.8136 (0.8161) time: 0.1778 data: 0.0982 max mem: 9377 +Train: [50] [1100/6250] eta: 0:12:25 lr: 0.000067 grad: 0.1003 (0.1137) loss: 0.8153 (0.8157) time: 0.1501 data: 0.0697 max mem: 9377 +Train: [50] [1200/6250] eta: 0:12:06 lr: 0.000067 grad: 0.1007 (0.1131) loss: 0.8150 (0.8153) time: 0.1234 data: 0.0394 max mem: 9377 +Train: [50] [1300/6250] eta: 0:11:49 lr: 0.000067 grad: 0.1047 (0.1128) loss: 0.8151 (0.8148) time: 0.1220 data: 0.0336 max mem: 9377 +Train: [50] [1400/6250] eta: 0:11:32 lr: 0.000067 grad: 0.1088 (0.1124) loss: 0.8043 (0.8143) time: 0.1307 data: 0.0464 max mem: 9377 +Train: [50] [1500/6250] eta: 0:11:16 lr: 0.000067 grad: 0.1035 (0.1121) loss: 0.8022 (0.8139) time: 0.1344 data: 0.0546 max mem: 9377 +Train: [50] [1600/6250] eta: 0:11:00 lr: 0.000067 grad: 0.1110 (0.1119) loss: 0.8008 (0.8135) time: 0.1410 data: 0.0642 max mem: 9377 +Train: [50] [1700/6250] eta: 0:10:43 lr: 0.000067 grad: 0.1191 (0.1118) loss: 0.8067 (0.8131) time: 0.1318 data: 0.0515 max mem: 9377 +Train: [50] [1800/6250] eta: 0:10:27 lr: 0.000067 grad: 0.1153 (0.1119) loss: 0.8098 (0.8128) time: 0.1256 data: 0.0439 max mem: 9377 +Train: [50] [1900/6250] eta: 0:10:11 lr: 0.000067 grad: 0.1086 (0.1121) loss: 0.8047 (0.8123) time: 0.1395 data: 0.0568 max mem: 9377 +Train: [50] [2000/6250] eta: 0:09:58 lr: 0.000067 grad: 0.0999 (0.1119) loss: 0.8081 (0.8120) time: 0.1583 data: 0.0792 max mem: 9377 +Train: [50] [2100/6250] eta: 0:09:45 lr: 0.000067 grad: 0.1121 (0.1121) loss: 0.8064 (0.8116) time: 0.1391 data: 0.0578 max mem: 9377 +Train: [50] [2200/6250] eta: 0:09:30 lr: 0.000067 grad: 0.1165 (0.1123) loss: 0.8012 (0.8113) time: 0.1594 data: 0.0757 max mem: 9377 +Train: [50] [2300/6250] eta: 0:09:16 lr: 0.000067 grad: 0.1080 (0.1123) loss: 0.7985 (0.8110) time: 0.1364 data: 0.0515 max mem: 9377 +Train: [50] [2400/6250] eta: 0:09:03 lr: 0.000067 grad: 0.1064 (0.1124) loss: 0.7989 (0.8105) time: 0.1615 data: 0.0808 max mem: 9377 +Train: [50] [2500/6250] eta: 0:08:48 lr: 0.000067 grad: 0.1166 (0.1124) loss: 0.8038 (0.8103) time: 0.1220 data: 0.0388 max mem: 9377 +Train: [50] [2600/6250] eta: 0:08:34 lr: 0.000067 grad: 0.1231 (0.1125) loss: 0.7949 (0.8099) time: 0.1192 data: 0.0385 max mem: 9377 +Train: [50] [2700/6250] eta: 0:08:19 lr: 0.000067 grad: 0.1120 (0.1127) loss: 0.7993 (0.8096) time: 0.1419 data: 0.0647 max mem: 9377 +Train: [50] [2800/6250] eta: 0:08:03 lr: 0.000067 grad: 0.1163 (0.1128) loss: 0.7986 (0.8093) time: 0.1287 data: 0.0458 max mem: 9377 +Train: [50] [2900/6250] eta: 0:07:49 lr: 0.000067 grad: 0.1039 (0.1130) loss: 0.8084 (0.8090) time: 0.1353 data: 0.0593 max mem: 9377 +Train: [50] [3000/6250] eta: 0:07:35 lr: 0.000067 grad: 0.1199 (0.1132) loss: 0.7970 (0.8086) time: 0.1354 data: 0.0566 max mem: 9377 +Train: [50] [3100/6250] eta: 0:07:21 lr: 0.000067 grad: 0.1170 (0.1133) loss: 0.7979 (0.8083) time: 0.1343 data: 0.0570 max mem: 9377 +Train: [50] [3200/6250] eta: 0:07:07 lr: 0.000067 grad: 0.1173 (0.1134) loss: 0.8000 (0.8080) time: 0.1359 data: 0.0566 max mem: 9377 +Train: [50] [3300/6250] eta: 0:06:53 lr: 0.000067 grad: 0.1176 (0.1135) loss: 0.7965 (0.8077) time: 0.1283 data: 0.0462 max mem: 9377 +Train: [50] [3400/6250] eta: 0:06:39 lr: 0.000067 grad: 0.1134 (0.1137) loss: 0.7940 (0.8074) time: 0.1443 data: 0.0670 max mem: 9377 +Train: [50] [3500/6250] eta: 0:06:24 lr: 0.000067 grad: 0.1203 (0.1139) loss: 0.7994 (0.8072) time: 0.1277 data: 0.0453 max mem: 9377 +Train: [50] [3600/6250] eta: 0:06:10 lr: 0.000066 grad: 0.1118 (0.1141) loss: 0.8055 (0.8071) time: 0.1202 data: 0.0362 max mem: 9377 +Train: [50] [3700/6250] eta: 0:05:57 lr: 0.000066 grad: 0.1143 (0.1141) loss: 0.7985 (0.8069) time: 0.1391 data: 0.0593 max mem: 9377 +Train: [50] [3800/6250] eta: 0:05:44 lr: 0.000066 grad: 0.1146 (0.1142) loss: 0.8042 (0.8069) time: 0.1456 data: 0.0649 max mem: 9377 +Train: [50] [3900/6250] eta: 0:05:30 lr: 0.000066 grad: 0.1108 (0.1141) loss: 0.7983 (0.8068) time: 0.1527 data: 0.0695 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:17 lr: 0.000066 grad: 0.1088 (0.1141) loss: 0.8118 (0.8069) time: 0.1456 data: 0.0654 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:03 lr: 0.000066 grad: 0.1193 (0.1142) loss: 0.7912 (0.8068) time: 0.1304 data: 0.0497 max mem: 9377 +Train: [50] [4200/6250] eta: 0:04:49 lr: 0.000066 grad: 0.1151 (0.1143) loss: 0.8022 (0.8066) time: 0.1467 data: 0.0723 max mem: 9377 +Train: [50] [4300/6250] eta: 0:04:36 lr: 0.000066 grad: 0.1193 (0.1146) loss: 0.7947 (0.8066) time: 0.1430 data: 0.0610 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:21 lr: 0.000066 grad: 0.1176 (0.1147) loss: 0.8050 (0.8065) time: 0.1449 data: 0.0592 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:07 lr: 0.000066 grad: 0.1119 (0.1148) loss: 0.8016 (0.8063) time: 0.1362 data: 0.0503 max mem: 9377 +Train: [50] [4600/6250] eta: 0:03:53 lr: 0.000066 grad: 0.1193 (0.1150) loss: 0.7951 (0.8063) time: 0.1530 data: 0.0721 max mem: 9377 +Train: [50] [4700/6250] eta: 0:03:39 lr: 0.000066 grad: 0.1070 (0.1151) loss: 0.8036 (0.8062) time: 0.1493 data: 0.0695 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:25 lr: 0.000066 grad: 0.1176 (0.1152) loss: 0.8009 (0.8062) time: 0.1359 data: 0.0539 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:10 lr: 0.000066 grad: 0.1163 (0.1153) loss: 0.7986 (0.8061) time: 0.1288 data: 0.0501 max mem: 9377 +Train: [50] [5000/6250] eta: 0:02:56 lr: 0.000066 grad: 0.1140 (0.1153) loss: 0.8061 (0.8062) time: 0.1439 data: 0.0560 max mem: 9377 +Train: [50] [5100/6250] eta: 0:02:42 lr: 0.000066 grad: 0.1110 (0.1154) loss: 0.7997 (0.8062) time: 0.1288 data: 0.0473 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:28 lr: 0.000066 grad: 0.1163 (0.1154) loss: 0.8034 (0.8062) time: 0.1342 data: 0.0486 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:14 lr: 0.000066 grad: 0.1092 (0.1154) loss: 0.8154 (0.8062) time: 0.1372 data: 0.0582 max mem: 9377 +Train: [50] [5400/6250] eta: 0:01:59 lr: 0.000066 grad: 0.1205 (0.1155) loss: 0.8086 (0.8062) time: 0.1101 data: 0.0238 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:45 lr: 0.000066 grad: 0.1048 (0.1155) loss: 0.8111 (0.8062) time: 0.1597 data: 0.0784 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:31 lr: 0.000066 grad: 0.1127 (0.1155) loss: 0.8075 (0.8062) time: 0.1644 data: 0.0838 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:17 lr: 0.000066 grad: 0.1085 (0.1155) loss: 0.8097 (0.8062) time: 0.1577 data: 0.0805 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:03 lr: 0.000066 grad: 0.1143 (0.1156) loss: 0.8015 (0.8062) time: 0.1548 data: 0.0749 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:49 lr: 0.000066 grad: 0.1062 (0.1156) loss: 0.8150 (0.8062) time: 0.1544 data: 0.0722 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:35 lr: 0.000066 grad: 0.1160 (0.1156) loss: 0.8027 (0.8062) time: 0.1531 data: 0.0668 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:21 lr: 0.000066 grad: 0.1196 (0.1156) loss: 0.8036 (0.8062) time: 0.1353 data: 0.0537 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1179 (0.1157) loss: 0.8126 (0.8062) time: 0.1247 data: 0.0421 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1108 (0.1156) loss: 0.8127 (0.8063) time: 0.1417 data: 0.0597 max mem: 9377 +Train: [50] Total time: 0:14:52 (0.1428 s / it) +Averaged stats: lr: 0.000066 grad: 0.1108 (0.1156) loss: 0.8127 (0.8063) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:04:48 loss: 0.8398 (0.8398) time: 4.6469 data: 4.6167 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8418 (0.8433) time: 0.1068 data: 0.0818 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (hcp-train-subset): loss: 0.8418 (0.8433) +Eval (hcp-val): [50] [ 0/62] eta: 0:04:54 loss: 0.8369 (0.8369) time: 4.7482 data: 4.7154 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8406 (0.8409) time: 0.1279 data: 0.1027 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (hcp-val): loss: 0.8406 (0.8409) +Eval (nsd-val): [50] [ 0/62] eta: 0:05:09 loss: 0.8048 (0.8048) time: 4.9839 data: 4.9505 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8192 (0.8198) time: 0.1318 data: 0.1068 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8198) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 10:21:03 lr: 0.000066 grad: 0.4076 (0.4076) loss: 0.8640 (0.8640) time: 5.9622 data: 5.8676 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:19:02 lr: 0.000066 grad: 0.1082 (0.1334) loss: 0.8185 (0.8224) time: 0.1463 data: 0.0518 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:16:28 lr: 0.000066 grad: 0.1052 (0.1261) loss: 0.8101 (0.8166) time: 0.1328 data: 0.0516 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:15:17 lr: 0.000065 grad: 0.0983 (0.1218) loss: 0.8091 (0.8146) time: 0.1427 data: 0.0541 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:14:27 lr: 0.000065 grad: 0.1008 (0.1189) loss: 0.8115 (0.8129) time: 0.1200 data: 0.0252 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:13:55 lr: 0.000065 grad: 0.1040 (0.1165) loss: 0.8074 (0.8121) time: 0.1394 data: 0.0435 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:13:35 lr: 0.000065 grad: 0.1019 (0.1152) loss: 0.8142 (0.8121) time: 0.1333 data: 0.0445 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:13:19 lr: 0.000065 grad: 0.1064 (0.1147) loss: 0.8077 (0.8119) time: 0.1366 data: 0.0540 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:13:01 lr: 0.000065 grad: 0.1054 (0.1141) loss: 0.8167 (0.8118) time: 0.1407 data: 0.0563 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:12:47 lr: 0.000065 grad: 0.1068 (0.1140) loss: 0.8043 (0.8115) time: 0.1627 data: 0.0835 max mem: 9377 +Train: [51] [1000/6250] eta: 0:12:29 lr: 0.000065 grad: 0.1130 (0.1140) loss: 0.7935 (0.8109) time: 0.1183 data: 0.0446 max mem: 9377 +Train: [51] [1100/6250] eta: 0:12:12 lr: 0.000065 grad: 0.1140 (0.1137) loss: 0.8034 (0.8104) time: 0.1435 data: 0.0569 max mem: 9377 +Train: [51] [1200/6250] eta: 0:11:56 lr: 0.000065 grad: 0.1131 (0.1138) loss: 0.8011 (0.8102) time: 0.1534 data: 0.0737 max mem: 9377 +Train: [51] [1300/6250] eta: 0:11:40 lr: 0.000065 grad: 0.1112 (0.1140) loss: 0.8075 (0.8099) time: 0.1307 data: 0.0494 max mem: 9377 +Train: [51] [1400/6250] eta: 0:11:24 lr: 0.000065 grad: 0.1152 (0.1139) loss: 0.7993 (0.8097) time: 0.1547 data: 0.0746 max mem: 9377 +Train: [51] [1500/6250] eta: 0:11:10 lr: 0.000065 grad: 0.1083 (0.1145) loss: 0.8056 (0.8093) time: 0.1494 data: 0.0754 max mem: 9377 +Train: [51] [1600/6250] eta: 0:10:54 lr: 0.000065 grad: 0.1178 (0.1145) loss: 0.8002 (0.8090) time: 0.1486 data: 0.0725 max mem: 9377 +Train: [51] [1700/6250] eta: 0:10:40 lr: 0.000065 grad: 0.1153 (0.1147) loss: 0.7917 (0.8085) time: 0.1267 data: 0.0454 max mem: 9377 +Train: [51] [1800/6250] eta: 0:10:27 lr: 0.000065 grad: 0.1194 (0.1152) loss: 0.7894 (0.8079) time: 0.1414 data: 0.0540 max mem: 9377 +Train: [51] [1900/6250] eta: 0:10:14 lr: 0.000065 grad: 0.1106 (0.1153) loss: 0.8002 (0.8075) time: 0.1444 data: 0.0673 max mem: 9377 +Train: [51] [2000/6250] eta: 0:09:59 lr: 0.000065 grad: 0.1204 (0.1156) loss: 0.8026 (0.8073) time: 0.1379 data: 0.0604 max mem: 9377 +Train: [51] [2100/6250] eta: 0:09:45 lr: 0.000065 grad: 0.1156 (0.1157) loss: 0.8009 (0.8070) time: 0.1341 data: 0.0533 max mem: 9377 +Train: [51] [2200/6250] eta: 0:09:30 lr: 0.000065 grad: 0.1147 (0.1158) loss: 0.8022 (0.8069) time: 0.1311 data: 0.0525 max mem: 9377 +Train: [51] [2300/6250] eta: 0:09:15 lr: 0.000065 grad: 0.1145 (0.1158) loss: 0.8111 (0.8067) time: 0.1227 data: 0.0380 max mem: 9377 +Train: [51] [2400/6250] eta: 0:09:01 lr: 0.000065 grad: 0.1142 (0.1160) loss: 0.8041 (0.8065) time: 0.1520 data: 0.0729 max mem: 9377 +Train: [51] [2500/6250] eta: 0:08:46 lr: 0.000065 grad: 0.1157 (0.1161) loss: 0.8096 (0.8064) time: 0.1271 data: 0.0407 max mem: 9377 +Train: [51] [2600/6250] eta: 0:08:33 lr: 0.000065 grad: 0.1134 (0.1161) loss: 0.7926 (0.8062) time: 0.1390 data: 0.0575 max mem: 9377 +Train: [51] [2700/6250] eta: 0:08:18 lr: 0.000065 grad: 0.1136 (0.1164) loss: 0.8017 (0.8060) time: 0.1382 data: 0.0582 max mem: 9377 +Train: [51] [2800/6250] eta: 0:08:03 lr: 0.000065 grad: 0.1094 (0.1164) loss: 0.8016 (0.8058) time: 0.1593 data: 0.0828 max mem: 9377 +Train: [51] [2900/6250] eta: 0:07:48 lr: 0.000065 grad: 0.1128 (0.1164) loss: 0.8064 (0.8057) time: 0.1293 data: 0.0378 max mem: 9377 +Train: [51] [3000/6250] eta: 0:07:34 lr: 0.000065 grad: 0.1168 (0.1164) loss: 0.8021 (0.8057) time: 0.1291 data: 0.0509 max mem: 9377 +Train: [51] [3100/6250] eta: 0:07:20 lr: 0.000065 grad: 0.1118 (0.1164) loss: 0.8105 (0.8057) time: 0.1396 data: 0.0607 max mem: 9377 +Train: [51] [3200/6250] eta: 0:07:06 lr: 0.000065 grad: 0.1186 (0.1164) loss: 0.8066 (0.8056) time: 0.1365 data: 0.0564 max mem: 9377 +Train: [51] [3300/6250] eta: 0:06:52 lr: 0.000065 grad: 0.1135 (0.1164) loss: 0.7994 (0.8056) time: 0.1265 data: 0.0464 max mem: 9377 +Train: [51] [3400/6250] eta: 0:06:38 lr: 0.000064 grad: 0.1150 (0.1165) loss: 0.8021 (0.8057) time: 0.1475 data: 0.0678 max mem: 9377 +Train: [51] [3500/6250] eta: 0:06:25 lr: 0.000064 grad: 0.1074 (0.1164) loss: 0.8081 (0.8057) time: 0.1174 data: 0.0365 max mem: 9377 +Train: [51] [3600/6250] eta: 0:06:11 lr: 0.000064 grad: 0.1110 (0.1164) loss: 0.8086 (0.8057) time: 0.1184 data: 0.0445 max mem: 9377 +Train: [51] [3700/6250] eta: 0:05:57 lr: 0.000064 grad: 0.1195 (0.1165) loss: 0.8031 (0.8057) time: 0.1429 data: 0.0630 max mem: 9377 +Train: [51] [3800/6250] eta: 0:05:42 lr: 0.000064 grad: 0.1127 (0.1165) loss: 0.8078 (0.8057) time: 0.1119 data: 0.0309 max mem: 9377 +Train: [51] [3900/6250] eta: 0:05:28 lr: 0.000064 grad: 0.1146 (0.1166) loss: 0.8072 (0.8057) time: 0.1346 data: 0.0536 max mem: 9377 +Train: [51] [4000/6250] eta: 0:05:14 lr: 0.000064 grad: 0.1149 (0.1167) loss: 0.8026 (0.8057) time: 0.1469 data: 0.0615 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:01 lr: 0.000064 grad: 0.1109 (0.1168) loss: 0.8056 (0.8057) time: 0.2305 data: 0.1562 max mem: 9377 +Train: [51] [4200/6250] eta: 0:04:48 lr: 0.000064 grad: 0.1180 (0.1170) loss: 0.8045 (0.8056) time: 0.1742 data: 0.0922 max mem: 9377 +Train: [51] [4300/6250] eta: 0:04:36 lr: 0.000064 grad: 0.1179 (0.1171) loss: 0.8062 (0.8055) time: 0.1402 data: 0.0602 max mem: 9377 +Train: [51] [4400/6250] eta: 0:04:23 lr: 0.000064 grad: 0.1106 (0.1171) loss: 0.8025 (0.8055) time: 0.1762 data: 0.0985 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:09 lr: 0.000064 grad: 0.1113 (0.1172) loss: 0.8107 (0.8055) time: 0.1352 data: 0.0581 max mem: 9377 +Train: [51] [4600/6250] eta: 0:03:55 lr: 0.000064 grad: 0.1141 (0.1173) loss: 0.8092 (0.8055) time: 0.1587 data: 0.0779 max mem: 9377 +Train: [51] [4700/6250] eta: 0:03:42 lr: 0.000064 grad: 0.1174 (0.1173) loss: 0.8048 (0.8055) time: 0.1674 data: 0.0860 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:28 lr: 0.000064 grad: 0.1160 (0.1173) loss: 0.8072 (0.8055) time: 0.1327 data: 0.0470 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:14 lr: 0.000064 grad: 0.1150 (0.1174) loss: 0.8015 (0.8054) time: 0.1687 data: 0.0899 max mem: 9377 +Train: [51] [5000/6250] eta: 0:02:59 lr: 0.000064 grad: 0.1093 (0.1174) loss: 0.7979 (0.8054) time: 0.1399 data: 0.0556 max mem: 9377 +Train: [51] [5100/6250] eta: 0:02:45 lr: 0.000064 grad: 0.1175 (0.1175) loss: 0.8032 (0.8053) time: 0.1619 data: 0.0743 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:31 lr: 0.000064 grad: 0.1206 (0.1176) loss: 0.7926 (0.8052) time: 0.1612 data: 0.0751 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:17 lr: 0.000064 grad: 0.1186 (0.1176) loss: 0.8081 (0.8051) time: 0.1576 data: 0.0637 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:02 lr: 0.000064 grad: 0.1069 (0.1176) loss: 0.8086 (0.8051) time: 0.1519 data: 0.0678 max mem: 9377 +Train: [51] [5500/6250] eta: 0:01:48 lr: 0.000064 grad: 0.1147 (0.1177) loss: 0.8018 (0.8051) time: 0.1411 data: 0.0629 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:33 lr: 0.000064 grad: 0.1110 (0.1177) loss: 0.7959 (0.8050) time: 0.1501 data: 0.0647 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:19 lr: 0.000064 grad: 0.1124 (0.1177) loss: 0.8039 (0.8050) time: 0.1275 data: 0.0457 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:04 lr: 0.000064 grad: 0.1094 (0.1178) loss: 0.8129 (0.8050) time: 0.1369 data: 0.0535 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:50 lr: 0.000064 grad: 0.1179 (0.1177) loss: 0.8037 (0.8050) time: 0.1630 data: 0.0908 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:35 lr: 0.000064 grad: 0.1138 (0.1177) loss: 0.7998 (0.8050) time: 0.1437 data: 0.0616 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:21 lr: 0.000064 grad: 0.1055 (0.1177) loss: 0.8075 (0.8050) time: 0.1314 data: 0.0474 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:07 lr: 0.000064 grad: 0.1129 (0.1177) loss: 0.8048 (0.8050) time: 0.1478 data: 0.0668 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1103 (0.1177) loss: 0.8026 (0.8050) time: 0.1295 data: 0.0425 max mem: 9377 +Train: [51] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000064 grad: 0.1103 (0.1177) loss: 0.8026 (0.8050) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:04:45 loss: 0.8404 (0.8404) time: 4.6015 data: 4.5699 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8409 (0.8431) time: 0.1291 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8409 (0.8431) +Eval (hcp-val): [51] [ 0/62] eta: 0:05:16 loss: 0.8418 (0.8418) time: 5.1054 data: 5.0428 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8395 (0.8409) time: 0.1434 data: 0.1179 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8409) +Eval (nsd-val): [51] [ 0/62] eta: 0:04:32 loss: 0.8048 (0.8048) time: 4.3939 data: 4.3569 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8200 (0.8202) time: 0.1381 data: 0.1116 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (nsd-val): loss: 0.8200 (0.8202) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 8:07:04 lr: 0.000064 grad: 0.2753 (0.2753) loss: 0.7722 (0.7722) time: 4.6759 data: 4.4857 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:19:25 lr: 0.000063 grad: 0.1239 (0.1510) loss: 0.8200 (0.8324) time: 0.1563 data: 0.0572 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:16:36 lr: 0.000063 grad: 0.1189 (0.1451) loss: 0.8153 (0.8230) time: 0.1343 data: 0.0442 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:15:47 lr: 0.000063 grad: 0.1172 (0.1386) loss: 0.8121 (0.8188) time: 0.1409 data: 0.0480 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:14:57 lr: 0.000063 grad: 0.1164 (0.1350) loss: 0.8077 (0.8160) time: 0.1451 data: 0.0490 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:14:20 lr: 0.000063 grad: 0.1184 (0.1321) loss: 0.7939 (0.8141) time: 0.1372 data: 0.0474 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:13:49 lr: 0.000063 grad: 0.1136 (0.1298) loss: 0.8101 (0.8128) time: 0.1225 data: 0.0381 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:13:30 lr: 0.000063 grad: 0.1166 (0.1289) loss: 0.8087 (0.8115) time: 0.1665 data: 0.0809 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:13:25 lr: 0.000063 grad: 0.1069 (0.1276) loss: 0.8030 (0.8107) time: 0.1522 data: 0.0686 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:13:21 lr: 0.000063 grad: 0.1164 (0.1268) loss: 0.8013 (0.8100) time: 0.1618 data: 0.0776 max mem: 9377 +Train: [52] [1000/6250] eta: 0:13:14 lr: 0.000063 grad: 0.1119 (0.1260) loss: 0.8045 (0.8094) time: 0.1564 data: 0.0683 max mem: 9377 +Train: [52] [1100/6250] eta: 0:13:12 lr: 0.000063 grad: 0.1131 (0.1252) loss: 0.8023 (0.8087) time: 0.1691 data: 0.0849 max mem: 9377 +Train: [52] [1200/6250] eta: 0:12:58 lr: 0.000063 grad: 0.1137 (0.1242) loss: 0.7943 (0.8083) time: 0.1634 data: 0.0788 max mem: 9377 +Train: [52] [1300/6250] eta: 0:12:43 lr: 0.000063 grad: 0.1158 (0.1236) loss: 0.8020 (0.8081) time: 0.1548 data: 0.0726 max mem: 9377 +Train: [52] [1400/6250] eta: 0:12:23 lr: 0.000063 grad: 0.1158 (0.1233) loss: 0.8016 (0.8076) time: 0.1435 data: 0.0581 max mem: 9377 +Train: [52] [1500/6250] eta: 0:12:04 lr: 0.000063 grad: 0.1229 (0.1234) loss: 0.7925 (0.8068) time: 0.1645 data: 0.0791 max mem: 9377 +Train: [52] [1600/6250] eta: 0:11:45 lr: 0.000063 grad: 0.1207 (0.1230) loss: 0.7998 (0.8063) time: 0.1527 data: 0.0686 max mem: 9377 +Train: [52] [1700/6250] eta: 0:11:28 lr: 0.000063 grad: 0.1184 (0.1231) loss: 0.7976 (0.8058) time: 0.1456 data: 0.0647 max mem: 9377 +Train: [52] [1800/6250] eta: 0:11:11 lr: 0.000063 grad: 0.1142 (0.1230) loss: 0.7974 (0.8054) time: 0.1375 data: 0.0545 max mem: 9377 +Train: [52] [1900/6250] eta: 0:10:54 lr: 0.000063 grad: 0.1208 (0.1229) loss: 0.7994 (0.8051) time: 0.1412 data: 0.0613 max mem: 9377 +Train: [52] [2000/6250] eta: 0:10:37 lr: 0.000063 grad: 0.1217 (0.1229) loss: 0.8031 (0.8049) time: 0.1319 data: 0.0434 max mem: 9377 +Train: [52] [2100/6250] eta: 0:10:20 lr: 0.000063 grad: 0.1269 (0.1228) loss: 0.8067 (0.8047) time: 0.1529 data: 0.0743 max mem: 9377 +Train: [52] [2200/6250] eta: 0:10:03 lr: 0.000063 grad: 0.1181 (0.1228) loss: 0.8030 (0.8044) time: 0.1350 data: 0.0525 max mem: 9377 +Train: [52] [2300/6250] eta: 0:09:46 lr: 0.000063 grad: 0.1160 (0.1228) loss: 0.7966 (0.8042) time: 0.1439 data: 0.0697 max mem: 9377 +Train: [52] [2400/6250] eta: 0:09:29 lr: 0.000063 grad: 0.1183 (0.1228) loss: 0.7951 (0.8040) time: 0.1408 data: 0.0621 max mem: 9377 +Train: [52] [2500/6250] eta: 0:09:14 lr: 0.000063 grad: 0.1135 (0.1227) loss: 0.8024 (0.8038) time: 0.1355 data: 0.0540 max mem: 9377 +Train: [52] [2600/6250] eta: 0:08:57 lr: 0.000063 grad: 0.1206 (0.1228) loss: 0.7985 (0.8037) time: 0.1259 data: 0.0425 max mem: 9377 +Train: [52] [2700/6250] eta: 0:08:42 lr: 0.000063 grad: 0.1163 (0.1229) loss: 0.7952 (0.8035) time: 0.1514 data: 0.0704 max mem: 9377 +Train: [52] [2800/6250] eta: 0:08:26 lr: 0.000063 grad: 0.1204 (0.1229) loss: 0.7956 (0.8034) time: 0.1448 data: 0.0621 max mem: 9377 +Train: [52] [2900/6250] eta: 0:08:11 lr: 0.000063 grad: 0.1227 (0.1232) loss: 0.7989 (0.8031) time: 0.1437 data: 0.0605 max mem: 9377 +Train: [52] [3000/6250] eta: 0:07:55 lr: 0.000063 grad: 0.1208 (0.1231) loss: 0.8009 (0.8030) time: 0.1433 data: 0.0615 max mem: 9377 +Train: [52] [3100/6250] eta: 0:07:40 lr: 0.000063 grad: 0.1185 (0.1230) loss: 0.7994 (0.8029) time: 0.1409 data: 0.0525 max mem: 9377 +Train: [52] [3200/6250] eta: 0:07:25 lr: 0.000062 grad: 0.1222 (0.1230) loss: 0.8040 (0.8028) time: 0.1443 data: 0.0607 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:10 lr: 0.000062 grad: 0.1131 (0.1229) loss: 0.8005 (0.8027) time: 0.1514 data: 0.0688 max mem: 9377 +Train: [52] [3400/6250] eta: 0:06:56 lr: 0.000062 grad: 0.1106 (0.1230) loss: 0.8008 (0.8025) time: 0.1750 data: 0.0923 max mem: 9377 +Train: [52] [3500/6250] eta: 0:06:40 lr: 0.000062 grad: 0.1202 (0.1231) loss: 0.8002 (0.8024) time: 0.1441 data: 0.0644 max mem: 9377 +Train: [52] [3600/6250] eta: 0:06:25 lr: 0.000062 grad: 0.1177 (0.1231) loss: 0.7947 (0.8022) time: 0.1557 data: 0.0760 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:10 lr: 0.000062 grad: 0.1197 (0.1231) loss: 0.7975 (0.8021) time: 0.1096 data: 0.0203 max mem: 9377 +Train: [52] [3800/6250] eta: 0:05:55 lr: 0.000062 grad: 0.1203 (0.1232) loss: 0.8018 (0.8020) time: 0.1451 data: 0.0667 max mem: 9377 +Train: [52] [3900/6250] eta: 0:05:40 lr: 0.000062 grad: 0.1248 (0.1231) loss: 0.7952 (0.8019) time: 0.1650 data: 0.0806 max mem: 9377 +Train: [52] [4000/6250] eta: 0:05:26 lr: 0.000062 grad: 0.1129 (0.1230) loss: 0.7995 (0.8019) time: 0.1234 data: 0.0406 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:11 lr: 0.000062 grad: 0.1171 (0.1230) loss: 0.8017 (0.8018) time: 0.1306 data: 0.0517 max mem: 9377 +Train: [52] [4200/6250] eta: 0:04:57 lr: 0.000062 grad: 0.1168 (0.1229) loss: 0.8064 (0.8019) time: 0.1633 data: 0.0803 max mem: 9377 +Train: [52] [4300/6250] eta: 0:04:43 lr: 0.000062 grad: 0.1209 (0.1229) loss: 0.7926 (0.8018) time: 0.1612 data: 0.0756 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:28 lr: 0.000062 grad: 0.1167 (0.1228) loss: 0.8022 (0.8018) time: 0.1596 data: 0.0760 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:14 lr: 0.000062 grad: 0.1196 (0.1228) loss: 0.8007 (0.8018) time: 0.1622 data: 0.0770 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:00 lr: 0.000062 grad: 0.1184 (0.1227) loss: 0.8017 (0.8018) time: 0.2097 data: 0.1378 max mem: 9377 +Train: [52] [4700/6250] eta: 0:03:46 lr: 0.000062 grad: 0.1205 (0.1227) loss: 0.8023 (0.8018) time: 0.1436 data: 0.0670 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:31 lr: 0.000062 grad: 0.1141 (0.1227) loss: 0.8060 (0.8018) time: 0.1328 data: 0.0438 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:17 lr: 0.000062 grad: 0.1118 (0.1226) loss: 0.8000 (0.8018) time: 0.1591 data: 0.0758 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:03 lr: 0.000062 grad: 0.1200 (0.1227) loss: 0.8023 (0.8018) time: 0.2210 data: 0.1316 max mem: 9377 +Train: [52] [5100/6250] eta: 0:02:49 lr: 0.000062 grad: 0.1212 (0.1227) loss: 0.8100 (0.8019) time: 0.1807 data: 0.0931 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:34 lr: 0.000062 grad: 0.1142 (0.1228) loss: 0.8023 (0.8019) time: 0.1535 data: 0.0563 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:20 lr: 0.000062 grad: 0.1106 (0.1227) loss: 0.8105 (0.8020) time: 0.1467 data: 0.0668 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:05 lr: 0.000062 grad: 0.1152 (0.1225) loss: 0.7999 (0.8020) time: 0.1292 data: 0.0386 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:50 lr: 0.000062 grad: 0.1162 (0.1224) loss: 0.8033 (0.8021) time: 0.1593 data: 0.0726 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:35 lr: 0.000062 grad: 0.1136 (0.1223) loss: 0.8091 (0.8022) time: 0.1404 data: 0.0594 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:20 lr: 0.000062 grad: 0.1092 (0.1222) loss: 0.8103 (0.8023) time: 0.1294 data: 0.0497 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:06 lr: 0.000062 grad: 0.1153 (0.1221) loss: 0.8077 (0.8024) time: 0.1515 data: 0.0647 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:51 lr: 0.000062 grad: 0.1143 (0.1220) loss: 0.8067 (0.8024) time: 0.1347 data: 0.0595 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:36 lr: 0.000062 grad: 0.1114 (0.1218) loss: 0.8048 (0.8026) time: 0.1539 data: 0.0715 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:21 lr: 0.000062 grad: 0.1046 (0.1217) loss: 0.8089 (0.8027) time: 0.1459 data: 0.0646 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1182 (0.1216) loss: 0.8036 (0.8028) time: 0.1435 data: 0.0610 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1106 (0.1215) loss: 0.8062 (0.8028) time: 0.1400 data: 0.0559 max mem: 9377 +Train: [52] Total time: 0:15:20 (0.1473 s / it) +Averaged stats: lr: 0.000061 grad: 0.1106 (0.1215) loss: 0.8062 (0.8028) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:05:09 loss: 0.8381 (0.8381) time: 4.9938 data: 4.9310 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8387 (0.8428) time: 0.1414 data: 0.1165 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (hcp-train-subset): loss: 0.8387 (0.8428) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:58 loss: 0.8407 (0.8407) time: 3.8544 data: 3.8008 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8384 (0.8410) time: 0.0891 data: 0.0625 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-val): loss: 0.8384 (0.8410) +Eval (nsd-val): [52] [ 0/62] eta: 0:04:37 loss: 0.8188 (0.8188) time: 4.4831 data: 4.4461 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8302 (0.8307) time: 0.0803 data: 0.0555 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:13 (0.2252 s / it) +Averaged stats (nsd-val): loss: 0.8302 (0.8307) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 9:05:06 lr: 0.000061 grad: 0.1356 (0.1356) loss: 0.7823 (0.7823) time: 5.2330 data: 4.9824 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:20:23 lr: 0.000061 grad: 0.1085 (0.1317) loss: 0.8305 (0.8267) time: 0.1489 data: 0.0372 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:17:35 lr: 0.000061 grad: 0.1591 (0.1358) loss: 0.7897 (0.8175) time: 0.1573 data: 0.0677 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:16:26 lr: 0.000061 grad: 0.1279 (0.1352) loss: 0.7973 (0.8115) time: 0.1524 data: 0.0588 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:15:42 lr: 0.000061 grad: 0.1138 (0.1325) loss: 0.7994 (0.8087) time: 0.1478 data: 0.0575 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:15:01 lr: 0.000061 grad: 0.1203 (0.1304) loss: 0.7976 (0.8069) time: 0.1311 data: 0.0448 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:14:24 lr: 0.000061 grad: 0.1124 (0.1284) loss: 0.8163 (0.8062) time: 0.1408 data: 0.0525 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:13:56 lr: 0.000061 grad: 0.1180 (0.1271) loss: 0.8131 (0.8056) time: 0.1437 data: 0.0559 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:13:25 lr: 0.000061 grad: 0.1105 (0.1263) loss: 0.8026 (0.8054) time: 0.1094 data: 0.0017 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:13:08 lr: 0.000061 grad: 0.1138 (0.1253) loss: 0.8100 (0.8054) time: 0.1471 data: 0.0572 max mem: 9377 +Train: [53] [1000/6250] eta: 0:12:57 lr: 0.000061 grad: 0.1154 (0.1248) loss: 0.8060 (0.8052) time: 0.1372 data: 0.0533 max mem: 9377 +Train: [53] [1100/6250] eta: 0:12:38 lr: 0.000061 grad: 0.1230 (0.1246) loss: 0.7933 (0.8049) time: 0.1330 data: 0.0516 max mem: 9377 +Train: [53] [1200/6250] eta: 0:12:28 lr: 0.000061 grad: 0.1195 (0.1245) loss: 0.8150 (0.8048) time: 0.1377 data: 0.0472 max mem: 9377 +Train: [53] [1300/6250] eta: 0:12:12 lr: 0.000061 grad: 0.1189 (0.1244) loss: 0.8021 (0.8046) time: 0.1405 data: 0.0541 max mem: 9377 +Train: [53] [1400/6250] eta: 0:11:53 lr: 0.000061 grad: 0.1159 (0.1242) loss: 0.8021 (0.8043) time: 0.1446 data: 0.0545 max mem: 9377 +Train: [53] [1500/6250] eta: 0:11:34 lr: 0.000061 grad: 0.1177 (0.1240) loss: 0.8022 (0.8042) time: 0.1351 data: 0.0493 max mem: 9377 +Train: [53] [1600/6250] eta: 0:11:17 lr: 0.000061 grad: 0.1134 (0.1239) loss: 0.8024 (0.8041) time: 0.1554 data: 0.0749 max mem: 9377 +Train: [53] [1700/6250] eta: 0:10:58 lr: 0.000061 grad: 0.1208 (0.1237) loss: 0.8037 (0.8041) time: 0.1309 data: 0.0401 max mem: 9377 +Train: [53] [1800/6250] eta: 0:10:42 lr: 0.000061 grad: 0.1243 (0.1235) loss: 0.8046 (0.8041) time: 0.1349 data: 0.0469 max mem: 9377 +Train: [53] [1900/6250] eta: 0:10:26 lr: 0.000061 grad: 0.1219 (0.1235) loss: 0.8000 (0.8041) time: 0.1331 data: 0.0569 max mem: 9377 +Train: [53] [2000/6250] eta: 0:10:11 lr: 0.000061 grad: 0.1154 (0.1233) loss: 0.8047 (0.8041) time: 0.1660 data: 0.0751 max mem: 9377 +Train: [53] [2100/6250] eta: 0:09:55 lr: 0.000061 grad: 0.1170 (0.1232) loss: 0.7944 (0.8041) time: 0.1418 data: 0.0604 max mem: 9377 +Train: [53] [2200/6250] eta: 0:09:40 lr: 0.000061 grad: 0.1192 (0.1232) loss: 0.8034 (0.8042) time: 0.1555 data: 0.0692 max mem: 9377 +Train: [53] [2300/6250] eta: 0:09:24 lr: 0.000061 grad: 0.1101 (0.1229) loss: 0.8076 (0.8043) time: 0.1220 data: 0.0368 max mem: 9377 +Train: [53] [2400/6250] eta: 0:09:09 lr: 0.000061 grad: 0.1147 (0.1227) loss: 0.8031 (0.8043) time: 0.1234 data: 0.0339 max mem: 9377 +Train: [53] [2500/6250] eta: 0:08:53 lr: 0.000061 grad: 0.1122 (0.1226) loss: 0.8079 (0.8044) time: 0.1501 data: 0.0685 max mem: 9377 +Train: [53] [2600/6250] eta: 0:08:39 lr: 0.000061 grad: 0.1184 (0.1224) loss: 0.8072 (0.8045) time: 0.1556 data: 0.0769 max mem: 9377 +Train: [53] [2700/6250] eta: 0:08:25 lr: 0.000061 grad: 0.1121 (0.1222) loss: 0.8117 (0.8046) time: 0.1403 data: 0.0572 max mem: 9377 +Train: [53] [2800/6250] eta: 0:08:11 lr: 0.000061 grad: 0.1088 (0.1219) loss: 0.8134 (0.8048) time: 0.1371 data: 0.0554 max mem: 9377 +Train: [53] [2900/6250] eta: 0:07:57 lr: 0.000061 grad: 0.1163 (0.1217) loss: 0.8084 (0.8049) time: 0.1480 data: 0.0660 max mem: 9377 +Train: [53] [3000/6250] eta: 0:07:43 lr: 0.000060 grad: 0.1138 (0.1216) loss: 0.8064 (0.8050) time: 0.1584 data: 0.0788 max mem: 9377 +Train: [53] [3100/6250] eta: 0:07:28 lr: 0.000060 grad: 0.1161 (0.1215) loss: 0.8105 (0.8050) time: 0.1512 data: 0.0677 max mem: 9377 +Train: [53] [3200/6250] eta: 0:07:12 lr: 0.000060 grad: 0.1193 (0.1215) loss: 0.8020 (0.8049) time: 0.1410 data: 0.0612 max mem: 9377 +Train: [53] [3300/6250] eta: 0:06:58 lr: 0.000060 grad: 0.1152 (0.1215) loss: 0.8009 (0.8048) time: 0.1478 data: 0.0624 max mem: 9377 +Train: [53] [3400/6250] eta: 0:06:43 lr: 0.000060 grad: 0.1158 (0.1216) loss: 0.8044 (0.8048) time: 0.1217 data: 0.0413 max mem: 9377 +Train: [53] [3500/6250] eta: 0:06:29 lr: 0.000060 grad: 0.1167 (0.1216) loss: 0.8019 (0.8048) time: 0.1429 data: 0.0634 max mem: 9377 +Train: [53] [3600/6250] eta: 0:06:15 lr: 0.000060 grad: 0.1209 (0.1216) loss: 0.8016 (0.8048) time: 0.1145 data: 0.0378 max mem: 9377 +Train: [53] [3700/6250] eta: 0:06:01 lr: 0.000060 grad: 0.1106 (0.1216) loss: 0.8092 (0.8048) time: 0.1236 data: 0.0458 max mem: 9377 +Train: [53] [3800/6250] eta: 0:05:47 lr: 0.000060 grad: 0.1134 (0.1217) loss: 0.8042 (0.8048) time: 0.1413 data: 0.0602 max mem: 9377 +Train: [53] [3900/6250] eta: 0:05:32 lr: 0.000060 grad: 0.1160 (0.1216) loss: 0.8034 (0.8048) time: 0.1425 data: 0.0634 max mem: 9377 +Train: [53] [4000/6250] eta: 0:05:18 lr: 0.000060 grad: 0.1201 (0.1215) loss: 0.7970 (0.8048) time: 0.1231 data: 0.0422 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:04 lr: 0.000060 grad: 0.1105 (0.1215) loss: 0.8085 (0.8047) time: 0.1345 data: 0.0521 max mem: 9377 +Train: [53] [4200/6250] eta: 0:04:50 lr: 0.000060 grad: 0.1234 (0.1215) loss: 0.7993 (0.8047) time: 0.1539 data: 0.0720 max mem: 9377 +Train: [53] [4300/6250] eta: 0:04:35 lr: 0.000060 grad: 0.1146 (0.1216) loss: 0.8038 (0.8047) time: 0.1265 data: 0.0368 max mem: 9377 +Train: [53] [4400/6250] eta: 0:04:21 lr: 0.000060 grad: 0.1180 (0.1216) loss: 0.8071 (0.8047) time: 0.1407 data: 0.0516 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:07 lr: 0.000060 grad: 0.1154 (0.1216) loss: 0.8034 (0.8046) time: 0.1488 data: 0.0694 max mem: 9377 +Train: [53] [4600/6250] eta: 0:03:54 lr: 0.000060 grad: 0.1125 (0.1216) loss: 0.7999 (0.8046) time: 0.1568 data: 0.0721 max mem: 9377 +Train: [53] [4700/6250] eta: 0:03:40 lr: 0.000060 grad: 0.1189 (0.1217) loss: 0.7991 (0.8045) time: 0.1590 data: 0.0761 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:26 lr: 0.000060 grad: 0.1162 (0.1217) loss: 0.8043 (0.8044) time: 0.1623 data: 0.0841 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:13 lr: 0.000060 grad: 0.1207 (0.1218) loss: 0.7978 (0.8043) time: 0.1533 data: 0.0680 max mem: 9377 +Train: [53] [5000/6250] eta: 0:02:58 lr: 0.000060 grad: 0.1186 (0.1218) loss: 0.7995 (0.8042) time: 0.1373 data: 0.0524 max mem: 9377 +Train: [53] [5100/6250] eta: 0:02:44 lr: 0.000060 grad: 0.1251 (0.1219) loss: 0.7967 (0.8040) time: 0.1340 data: 0.0557 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:30 lr: 0.000060 grad: 0.1182 (0.1219) loss: 0.8011 (0.8040) time: 0.1468 data: 0.0630 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:15 lr: 0.000060 grad: 0.1194 (0.1219) loss: 0.7953 (0.8039) time: 0.1474 data: 0.0658 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:01 lr: 0.000060 grad: 0.1216 (0.1219) loss: 0.7884 (0.8038) time: 0.1305 data: 0.0471 max mem: 9377 +Train: [53] [5500/6250] eta: 0:01:46 lr: 0.000060 grad: 0.1205 (0.1219) loss: 0.7989 (0.8038) time: 0.1274 data: 0.0353 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:32 lr: 0.000060 grad: 0.1171 (0.1219) loss: 0.7993 (0.8038) time: 0.1249 data: 0.0274 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:17 lr: 0.000060 grad: 0.1143 (0.1219) loss: 0.8059 (0.8038) time: 0.1109 data: 0.0141 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:03 lr: 0.000060 grad: 0.1163 (0.1219) loss: 0.8028 (0.8038) time: 0.1339 data: 0.0513 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:49 lr: 0.000060 grad: 0.1137 (0.1219) loss: 0.8059 (0.8037) time: 0.1249 data: 0.0422 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:35 lr: 0.000059 grad: 0.1121 (0.1219) loss: 0.8100 (0.8037) time: 0.1249 data: 0.0368 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:21 lr: 0.000059 grad: 0.1174 (0.1219) loss: 0.8028 (0.8037) time: 0.1320 data: 0.0499 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:07 lr: 0.000059 grad: 0.1183 (0.1218) loss: 0.8107 (0.8038) time: 0.1484 data: 0.0682 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1169 (0.1218) loss: 0.8041 (0.8038) time: 0.1413 data: 0.0587 max mem: 9377 +Train: [53] Total time: 0:14:47 (0.1419 s / it) +Averaged stats: lr: 0.000059 grad: 0.1169 (0.1218) loss: 0.8041 (0.8038) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:05:14 loss: 0.8369 (0.8369) time: 5.0702 data: 5.0375 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8405 (0.8421) time: 0.0931 data: 0.0682 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-train-subset): loss: 0.8405 (0.8421) +Eval (hcp-val): [53] [ 0/62] eta: 0:03:41 loss: 0.8416 (0.8416) time: 3.5667 data: 3.4958 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8396 (0.8404) time: 0.1260 data: 0.1007 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8404) +Eval (nsd-val): [53] [ 0/62] eta: 0:05:29 loss: 0.8245 (0.8245) time: 5.3095 data: 5.2791 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8307 (0.8331) time: 0.1240 data: 0.0988 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:13 (0.2169 s / it) +Averaged stats (nsd-val): loss: 0.8307 (0.8331) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 10:57:35 lr: 0.000059 grad: 0.3093 (0.3093) loss: 0.8513 (0.8513) time: 6.3129 data: 6.2179 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:19:59 lr: 0.000059 grad: 0.1316 (0.1382) loss: 0.8269 (0.8275) time: 0.1503 data: 0.0536 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:17:42 lr: 0.000059 grad: 0.1397 (0.1429) loss: 0.8039 (0.8131) time: 0.1623 data: 0.0730 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:16:34 lr: 0.000059 grad: 0.1163 (0.1421) loss: 0.8023 (0.8078) time: 0.1525 data: 0.0648 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:15:46 lr: 0.000059 grad: 0.1114 (0.1378) loss: 0.8028 (0.8063) time: 0.1337 data: 0.0420 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:15:11 lr: 0.000059 grad: 0.1160 (0.1337) loss: 0.8025 (0.8063) time: 0.1610 data: 0.0670 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:14:39 lr: 0.000059 grad: 0.1129 (0.1305) loss: 0.8023 (0.8066) time: 0.1215 data: 0.0327 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:14:13 lr: 0.000059 grad: 0.1075 (0.1282) loss: 0.8063 (0.8068) time: 0.1441 data: 0.0396 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:13:48 lr: 0.000059 grad: 0.1110 (0.1265) loss: 0.8058 (0.8068) time: 0.1456 data: 0.0573 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:13:26 lr: 0.000059 grad: 0.1170 (0.1259) loss: 0.8008 (0.8068) time: 0.1430 data: 0.0564 max mem: 9377 +Train: [54] [1000/6250] eta: 0:13:01 lr: 0.000059 grad: 0.1062 (0.1251) loss: 0.8048 (0.8068) time: 0.1117 data: 0.0273 max mem: 9377 +Train: [54] [1100/6250] eta: 0:12:40 lr: 0.000059 grad: 0.1105 (0.1245) loss: 0.8077 (0.8066) time: 0.1338 data: 0.0434 max mem: 9377 +Train: [54] [1200/6250] eta: 0:12:17 lr: 0.000059 grad: 0.1172 (0.1240) loss: 0.8025 (0.8063) time: 0.1391 data: 0.0591 max mem: 9377 +Train: [54] [1300/6250] eta: 0:11:58 lr: 0.000059 grad: 0.1210 (0.1237) loss: 0.8060 (0.8060) time: 0.1453 data: 0.0664 max mem: 9377 +Train: [54] [1400/6250] eta: 0:11:44 lr: 0.000059 grad: 0.1161 (0.1235) loss: 0.8011 (0.8056) time: 0.1471 data: 0.0694 max mem: 9377 +Train: [54] [1500/6250] eta: 0:11:27 lr: 0.000059 grad: 0.1219 (0.1234) loss: 0.8018 (0.8052) time: 0.1316 data: 0.0435 max mem: 9377 +Train: [54] [1600/6250] eta: 0:11:12 lr: 0.000059 grad: 0.1226 (0.1233) loss: 0.7922 (0.8050) time: 0.1550 data: 0.0759 max mem: 9377 +Train: [54] [1700/6250] eta: 0:10:58 lr: 0.000059 grad: 0.1225 (0.1234) loss: 0.8024 (0.8045) time: 0.1598 data: 0.0787 max mem: 9377 +Train: [54] [1800/6250] eta: 0:10:43 lr: 0.000059 grad: 0.1267 (0.1235) loss: 0.8002 (0.8041) time: 0.1329 data: 0.0512 max mem: 9377 +Train: [54] [1900/6250] eta: 0:10:28 lr: 0.000059 grad: 0.1218 (0.1235) loss: 0.7929 (0.8037) time: 0.1264 data: 0.0441 max mem: 9377 +Train: [54] [2000/6250] eta: 0:10:13 lr: 0.000059 grad: 0.1226 (0.1235) loss: 0.7993 (0.8034) time: 0.1174 data: 0.0271 max mem: 9377 +Train: [54] [2100/6250] eta: 0:09:57 lr: 0.000059 grad: 0.1143 (0.1236) loss: 0.8052 (0.8032) time: 0.1357 data: 0.0546 max mem: 9377 +Train: [54] [2200/6250] eta: 0:09:42 lr: 0.000059 grad: 0.1112 (0.1234) loss: 0.8062 (0.8031) time: 0.1301 data: 0.0520 max mem: 9377 +Train: [54] [2300/6250] eta: 0:09:27 lr: 0.000059 grad: 0.1235 (0.1234) loss: 0.7987 (0.8030) time: 0.1443 data: 0.0626 max mem: 9377 +Train: [54] [2400/6250] eta: 0:09:12 lr: 0.000059 grad: 0.1206 (0.1233) loss: 0.8021 (0.8030) time: 0.1267 data: 0.0374 max mem: 9377 +Train: [54] [2500/6250] eta: 0:08:57 lr: 0.000059 grad: 0.1215 (0.1233) loss: 0.7998 (0.8030) time: 0.1573 data: 0.0784 max mem: 9377 +Train: [54] [2600/6250] eta: 0:08:42 lr: 0.000059 grad: 0.1142 (0.1232) loss: 0.7991 (0.8030) time: 0.1543 data: 0.0770 max mem: 9377 +Train: [54] [2700/6250] eta: 0:08:27 lr: 0.000059 grad: 0.1235 (0.1231) loss: 0.7960 (0.8029) time: 0.1408 data: 0.0568 max mem: 9377 +Train: [54] [2800/6250] eta: 0:08:12 lr: 0.000058 grad: 0.1209 (0.1233) loss: 0.8008 (0.8026) time: 0.1381 data: 0.0547 max mem: 9377 +Train: [54] [2900/6250] eta: 0:07:58 lr: 0.000058 grad: 0.1209 (0.1233) loss: 0.7974 (0.8025) time: 0.1405 data: 0.0569 max mem: 9377 +Train: [54] [3000/6250] eta: 0:07:44 lr: 0.000058 grad: 0.1131 (0.1232) loss: 0.8032 (0.8024) time: 0.1241 data: 0.0393 max mem: 9377 +Train: [54] [3100/6250] eta: 0:07:29 lr: 0.000058 grad: 0.1147 (0.1231) loss: 0.8025 (0.8023) time: 0.1379 data: 0.0533 max mem: 9377 +Train: [54] [3200/6250] eta: 0:07:16 lr: 0.000058 grad: 0.1149 (0.1231) loss: 0.8008 (0.8022) time: 0.2008 data: 0.1255 max mem: 9377 +Train: [54] [3300/6250] eta: 0:07:00 lr: 0.000058 grad: 0.1208 (0.1232) loss: 0.7915 (0.8021) time: 0.1380 data: 0.0515 max mem: 9377 +Train: [54] [3400/6250] eta: 0:06:46 lr: 0.000058 grad: 0.1170 (0.1231) loss: 0.8078 (0.8022) time: 0.1394 data: 0.0554 max mem: 9377 +Train: [54] [3500/6250] eta: 0:06:31 lr: 0.000058 grad: 0.1169 (0.1231) loss: 0.8031 (0.8022) time: 0.1210 data: 0.0338 max mem: 9377 +Train: [54] [3600/6250] eta: 0:06:16 lr: 0.000058 grad: 0.1134 (0.1229) loss: 0.8109 (0.8023) time: 0.1493 data: 0.0707 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:01 lr: 0.000058 grad: 0.1148 (0.1228) loss: 0.8106 (0.8024) time: 0.1300 data: 0.0470 max mem: 9377 +Train: [54] [3800/6250] eta: 0:05:47 lr: 0.000058 grad: 0.1146 (0.1228) loss: 0.8037 (0.8025) time: 0.1439 data: 0.0576 max mem: 9377 +Train: [54] [3900/6250] eta: 0:05:33 lr: 0.000058 grad: 0.1202 (0.1229) loss: 0.8076 (0.8026) time: 0.1373 data: 0.0536 max mem: 9377 +Train: [54] [4000/6250] eta: 0:05:19 lr: 0.000058 grad: 0.1251 (0.1230) loss: 0.8030 (0.8025) time: 0.1389 data: 0.0537 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:04 lr: 0.000058 grad: 0.1210 (0.1232) loss: 0.7978 (0.8024) time: 0.1294 data: 0.0483 max mem: 9377 +Train: [54] [4200/6250] eta: 0:04:50 lr: 0.000058 grad: 0.1229 (0.1233) loss: 0.7998 (0.8024) time: 0.1497 data: 0.0687 max mem: 9377 +Train: [54] [4300/6250] eta: 0:04:36 lr: 0.000058 grad: 0.1194 (0.1234) loss: 0.8080 (0.8024) time: 0.1571 data: 0.0719 max mem: 9377 +Train: [54] [4400/6250] eta: 0:04:22 lr: 0.000058 grad: 0.1303 (0.1234) loss: 0.8008 (0.8024) time: 0.1514 data: 0.0695 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:07 lr: 0.000058 grad: 0.1193 (0.1235) loss: 0.7947 (0.8023) time: 0.1374 data: 0.0536 max mem: 9377 +Train: [54] [4600/6250] eta: 0:03:53 lr: 0.000058 grad: 0.1146 (0.1236) loss: 0.7998 (0.8022) time: 0.1507 data: 0.0725 max mem: 9377 +Train: [54] [4700/6250] eta: 0:03:39 lr: 0.000058 grad: 0.1268 (0.1237) loss: 0.7970 (0.8022) time: 0.1575 data: 0.0789 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:25 lr: 0.000058 grad: 0.1194 (0.1238) loss: 0.8061 (0.8021) time: 0.1737 data: 0.0971 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:12 lr: 0.000058 grad: 0.1160 (0.1237) loss: 0.7970 (0.8020) time: 0.1393 data: 0.0606 max mem: 9377 +Train: [54] [5000/6250] eta: 0:02:57 lr: 0.000058 grad: 0.1210 (0.1238) loss: 0.7919 (0.8020) time: 0.1533 data: 0.0722 max mem: 9377 +Train: [54] [5100/6250] eta: 0:02:43 lr: 0.000058 grad: 0.1179 (0.1238) loss: 0.8008 (0.8019) time: 0.1435 data: 0.0641 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:29 lr: 0.000058 grad: 0.1197 (0.1239) loss: 0.7942 (0.8018) time: 0.1647 data: 0.0808 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:15 lr: 0.000058 grad: 0.1204 (0.1239) loss: 0.7981 (0.8017) time: 0.1678 data: 0.0722 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:01 lr: 0.000058 grad: 0.1266 (0.1240) loss: 0.7882 (0.8015) time: 0.1549 data: 0.0707 max mem: 9377 +Train: [54] [5500/6250] eta: 0:01:47 lr: 0.000058 grad: 0.1226 (0.1241) loss: 0.7954 (0.8013) time: 0.1463 data: 0.0561 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:33 lr: 0.000058 grad: 0.1237 (0.1242) loss: 0.7943 (0.8012) time: 0.1247 data: 0.0278 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:18 lr: 0.000058 grad: 0.1253 (0.1243) loss: 0.7913 (0.8010) time: 0.1308 data: 0.0419 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:04 lr: 0.000057 grad: 0.1356 (0.1246) loss: 0.7821 (0.8009) time: 0.1328 data: 0.0454 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:49 lr: 0.000057 grad: 0.1303 (0.1247) loss: 0.7851 (0.8007) time: 0.1129 data: 0.0193 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:35 lr: 0.000057 grad: 0.1256 (0.1249) loss: 0.7873 (0.8006) time: 0.1581 data: 0.0778 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:21 lr: 0.000057 grad: 0.1376 (0.1250) loss: 0.7914 (0.8004) time: 0.1390 data: 0.0551 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:07 lr: 0.000057 grad: 0.1322 (0.1250) loss: 0.7839 (0.8002) time: 0.1430 data: 0.0660 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1260 (0.1251) loss: 0.7912 (0.8002) time: 0.1808 data: 0.1020 max mem: 9377 +Train: [54] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000057 grad: 0.1260 (0.1251) loss: 0.7912 (0.8002) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:05:11 loss: 0.8402 (0.8402) time: 5.0248 data: 4.9947 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8418 (0.8430) time: 0.1386 data: 0.1136 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-train-subset): loss: 0.8418 (0.8430) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [54] [ 0/62] eta: 0:03:53 loss: 0.8393 (0.8393) time: 3.7601 data: 3.6749 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8398 (0.8414) time: 0.1298 data: 0.1042 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-val): loss: 0.8398 (0.8414) +Making plots (hcp-val): example=7 +Eval (nsd-val): [54] [ 0/62] eta: 0:04:59 loss: 0.8150 (0.8150) time: 4.8235 data: 4.7417 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8235 (0.8250) time: 0.1272 data: 0.1008 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:14 (0.2311 s / it) +Averaged stats (nsd-val): loss: 0.8235 (0.8250) +Making plots (nsd-val): example=45 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 11:29:20 lr: 0.000057 grad: 0.1640 (0.1640) loss: 0.8274 (0.8274) time: 6.6177 data: 6.5111 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:23:05 lr: 0.000057 grad: 0.1388 (0.1477) loss: 0.8171 (0.8274) time: 0.1650 data: 0.0547 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:20:13 lr: 0.000057 grad: 0.1231 (0.1481) loss: 0.8071 (0.8176) time: 0.1739 data: 0.0854 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:18:26 lr: 0.000057 grad: 0.1331 (0.1437) loss: 0.8085 (0.8141) time: 0.1350 data: 0.0431 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:17:13 lr: 0.000057 grad: 0.1284 (0.1406) loss: 0.8020 (0.8111) time: 0.1551 data: 0.0729 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:16:23 lr: 0.000057 grad: 0.1145 (0.1380) loss: 0.8011 (0.8087) time: 0.1398 data: 0.0538 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:15:47 lr: 0.000057 grad: 0.1228 (0.1350) loss: 0.7975 (0.8077) time: 0.1661 data: 0.0774 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:15:12 lr: 0.000057 grad: 0.1123 (0.1332) loss: 0.7988 (0.8072) time: 0.1699 data: 0.0900 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:14:44 lr: 0.000057 grad: 0.1130 (0.1323) loss: 0.8053 (0.8071) time: 0.1566 data: 0.0702 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:14:18 lr: 0.000057 grad: 0.1131 (0.1317) loss: 0.8016 (0.8065) time: 0.1601 data: 0.0754 max mem: 9377 +Train: [55] [1000/6250] eta: 0:13:52 lr: 0.000057 grad: 0.1132 (0.1309) loss: 0.8058 (0.8061) time: 0.1238 data: 0.0437 max mem: 9377 +Train: [55] [1100/6250] eta: 0:13:27 lr: 0.000057 grad: 0.1149 (0.1302) loss: 0.7947 (0.8057) time: 0.1345 data: 0.0578 max mem: 9377 +Train: [55] [1200/6250] eta: 0:13:04 lr: 0.000057 grad: 0.1140 (0.1296) loss: 0.8042 (0.8055) time: 0.1527 data: 0.0702 max mem: 9377 +Train: [55] [1300/6250] eta: 0:12:40 lr: 0.000057 grad: 0.1168 (0.1289) loss: 0.8043 (0.8052) time: 0.1481 data: 0.0671 max mem: 9377 +Train: [55] [1400/6250] eta: 0:12:19 lr: 0.000057 grad: 0.1173 (0.1285) loss: 0.8100 (0.8049) time: 0.1406 data: 0.0616 max mem: 9377 +Train: [55] [1500/6250] eta: 0:12:00 lr: 0.000057 grad: 0.1176 (0.1283) loss: 0.8024 (0.8046) time: 0.1515 data: 0.0720 max mem: 9377 +Train: [55] [1600/6250] eta: 0:11:40 lr: 0.000057 grad: 0.1118 (0.1280) loss: 0.8044 (0.8044) time: 0.1263 data: 0.0402 max mem: 9377 +Train: [55] [1700/6250] eta: 0:11:21 lr: 0.000057 grad: 0.1226 (0.1276) loss: 0.7957 (0.8042) time: 0.1412 data: 0.0587 max mem: 9377 +Train: [55] [1800/6250] eta: 0:11:03 lr: 0.000057 grad: 0.1197 (0.1273) loss: 0.8000 (0.8040) time: 0.1225 data: 0.0357 max mem: 9377 +Train: [55] [1900/6250] eta: 0:10:45 lr: 0.000057 grad: 0.1280 (0.1276) loss: 0.8016 (0.8037) time: 0.1269 data: 0.0448 max mem: 9377 +Train: [55] [2000/6250] eta: 0:10:28 lr: 0.000057 grad: 0.1207 (0.1277) loss: 0.8016 (0.8035) time: 0.1241 data: 0.0430 max mem: 9377 +Train: [55] [2100/6250] eta: 0:10:11 lr: 0.000057 grad: 0.1180 (0.1283) loss: 0.7943 (0.8032) time: 0.1369 data: 0.0551 max mem: 9377 +Train: [55] [2200/6250] eta: 0:09:54 lr: 0.000057 grad: 0.1157 (0.1281) loss: 0.8004 (0.8030) time: 0.1568 data: 0.0729 max mem: 9377 +Train: [55] [2300/6250] eta: 0:09:38 lr: 0.000057 grad: 0.1240 (0.1280) loss: 0.7920 (0.8028) time: 0.1320 data: 0.0441 max mem: 9377 +Train: [55] [2400/6250] eta: 0:09:21 lr: 0.000057 grad: 0.1218 (0.1278) loss: 0.7860 (0.8026) time: 0.1272 data: 0.0369 max mem: 9377 +Train: [55] [2500/6250] eta: 0:09:06 lr: 0.000057 grad: 0.1215 (0.1275) loss: 0.7972 (0.8024) time: 0.1462 data: 0.0659 max mem: 9377 +Train: [55] [2600/6250] eta: 0:08:50 lr: 0.000056 grad: 0.1125 (0.1273) loss: 0.8050 (0.8024) time: 0.1337 data: 0.0537 max mem: 9377 +Train: [55] [2700/6250] eta: 0:08:35 lr: 0.000056 grad: 0.1228 (0.1271) loss: 0.8011 (0.8024) time: 0.1259 data: 0.0481 max mem: 9377 +Train: [55] [2800/6250] eta: 0:08:20 lr: 0.000056 grad: 0.1144 (0.1269) loss: 0.8031 (0.8024) time: 0.1325 data: 0.0524 max mem: 9377 +Train: [55] [2900/6250] eta: 0:08:04 lr: 0.000056 grad: 0.1209 (0.1266) loss: 0.8054 (0.8024) time: 0.1334 data: 0.0520 max mem: 9377 +Train: [55] [3000/6250] eta: 0:07:50 lr: 0.000056 grad: 0.1142 (0.1263) loss: 0.8054 (0.8025) time: 0.1353 data: 0.0522 max mem: 9377 +Train: [55] [3100/6250] eta: 0:07:34 lr: 0.000056 grad: 0.1201 (0.1261) loss: 0.8069 (0.8025) time: 0.1410 data: 0.0617 max mem: 9377 +Train: [55] [3200/6250] eta: 0:07:19 lr: 0.000056 grad: 0.1115 (0.1258) loss: 0.8028 (0.8026) time: 0.1204 data: 0.0371 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:06 lr: 0.000056 grad: 0.1225 (0.1257) loss: 0.7969 (0.8026) time: 0.1533 data: 0.0730 max mem: 9377 +Train: [55] [3400/6250] eta: 0:06:51 lr: 0.000056 grad: 0.1205 (0.1255) loss: 0.8031 (0.8026) time: 0.1156 data: 0.0357 max mem: 9377 +Train: [55] [3500/6250] eta: 0:06:36 lr: 0.000056 grad: 0.1076 (0.1252) loss: 0.8094 (0.8027) time: 0.1394 data: 0.0605 max mem: 9377 +Train: [55] [3600/6250] eta: 0:06:22 lr: 0.000056 grad: 0.1143 (0.1250) loss: 0.8055 (0.8028) time: 0.1264 data: 0.0448 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:08 lr: 0.000056 grad: 0.1190 (0.1249) loss: 0.8020 (0.8027) time: 0.1359 data: 0.0556 max mem: 9377 +Train: [55] [3800/6250] eta: 0:05:53 lr: 0.000056 grad: 0.1177 (0.1248) loss: 0.8028 (0.8027) time: 0.1491 data: 0.0716 max mem: 9377 +Train: [55] [3900/6250] eta: 0:05:38 lr: 0.000056 grad: 0.1122 (0.1247) loss: 0.8022 (0.8026) time: 0.1503 data: 0.0670 max mem: 9377 +Train: [55] [4000/6250] eta: 0:05:24 lr: 0.000056 grad: 0.1147 (0.1246) loss: 0.7994 (0.8026) time: 0.1463 data: 0.0697 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:09 lr: 0.000056 grad: 0.1111 (0.1245) loss: 0.7999 (0.8025) time: 0.1674 data: 0.0897 max mem: 9377 +Train: [55] [4200/6250] eta: 0:04:55 lr: 0.000056 grad: 0.1160 (0.1245) loss: 0.8017 (0.8025) time: 0.1457 data: 0.0650 max mem: 9377 +Train: [55] [4300/6250] eta: 0:04:40 lr: 0.000056 grad: 0.1313 (0.1245) loss: 0.8001 (0.8024) time: 0.1360 data: 0.0525 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:26 lr: 0.000056 grad: 0.1116 (0.1244) loss: 0.7947 (0.8023) time: 0.1413 data: 0.0577 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:11 lr: 0.000056 grad: 0.1182 (0.1244) loss: 0.7923 (0.8022) time: 0.1406 data: 0.0582 max mem: 9377 +Train: [55] [4600/6250] eta: 0:03:57 lr: 0.000056 grad: 0.1192 (0.1243) loss: 0.7948 (0.8021) time: 0.2530 data: 0.1776 max mem: 9377 +Train: [55] [4700/6250] eta: 0:03:43 lr: 0.000056 grad: 0.1335 (0.1244) loss: 0.7877 (0.8020) time: 0.1558 data: 0.0786 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:28 lr: 0.000056 grad: 0.1183 (0.1244) loss: 0.7929 (0.8020) time: 0.1523 data: 0.0775 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:14 lr: 0.000056 grad: 0.1178 (0.1245) loss: 0.8020 (0.8018) time: 0.1403 data: 0.0628 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:00 lr: 0.000056 grad: 0.1167 (0.1244) loss: 0.8053 (0.8019) time: 0.1705 data: 0.0882 max mem: 9377 +Train: [55] [5100/6250] eta: 0:02:46 lr: 0.000056 grad: 0.1192 (0.1245) loss: 0.7934 (0.8018) time: 0.1363 data: 0.0494 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:31 lr: 0.000056 grad: 0.1131 (0.1245) loss: 0.8089 (0.8017) time: 0.1457 data: 0.0555 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:17 lr: 0.000056 grad: 0.1241 (0.1245) loss: 0.7970 (0.8016) time: 0.1478 data: 0.0669 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:02 lr: 0.000056 grad: 0.1223 (0.1246) loss: 0.7956 (0.8015) time: 0.1325 data: 0.0435 max mem: 9377 +Train: [55] [5500/6250] eta: 0:01:47 lr: 0.000056 grad: 0.1168 (0.1246) loss: 0.7949 (0.8014) time: 0.1268 data: 0.0403 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:33 lr: 0.000055 grad: 0.1130 (0.1245) loss: 0.8021 (0.8013) time: 0.1332 data: 0.0490 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:18 lr: 0.000055 grad: 0.1197 (0.1245) loss: 0.7940 (0.8012) time: 0.1523 data: 0.0698 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:04 lr: 0.000055 grad: 0.1215 (0.1245) loss: 0.8054 (0.8012) time: 0.1319 data: 0.0341 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:50 lr: 0.000055 grad: 0.1210 (0.1245) loss: 0.7979 (0.8011) time: 0.1410 data: 0.0548 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:35 lr: 0.000055 grad: 0.1225 (0.1245) loss: 0.8024 (0.8011) time: 0.1317 data: 0.0412 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:21 lr: 0.000055 grad: 0.1250 (0.1246) loss: 0.7983 (0.8011) time: 0.1490 data: 0.0671 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:07 lr: 0.000055 grad: 0.1199 (0.1248) loss: 0.8054 (0.8011) time: 0.1268 data: 0.0433 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1196 (0.1249) loss: 0.8010 (0.8011) time: 0.1445 data: 0.0590 max mem: 9377 +Train: [55] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000055 grad: 0.1196 (0.1249) loss: 0.8010 (0.8011) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:03:35 loss: 0.8412 (0.8412) time: 3.4689 data: 3.4018 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8437 (0.8437) time: 0.1272 data: 0.1005 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (hcp-train-subset): loss: 0.8437 (0.8437) +Eval (hcp-val): [55] [ 0/62] eta: 0:04:43 loss: 0.8404 (0.8404) time: 4.5716 data: 4.5254 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8413 (0.8418) time: 0.1238 data: 0.0988 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:13 (0.2151 s / it) +Averaged stats (hcp-val): loss: 0.8413 (0.8418) +Eval (nsd-val): [55] [ 0/62] eta: 0:04:00 loss: 0.8095 (0.8095) time: 3.8799 data: 3.8213 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8253 (0.8254) time: 0.1175 data: 0.0923 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8253 (0.8254) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 10:49:29 lr: 0.000055 grad: 0.3760 (0.3760) loss: 0.7359 (0.7359) time: 6.2352 data: 6.1440 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:19:44 lr: 0.000055 grad: 0.1259 (0.1483) loss: 0.7940 (0.8163) time: 0.1197 data: 0.0185 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:16:53 lr: 0.000055 grad: 0.1286 (0.1424) loss: 0.7998 (0.8110) time: 0.1478 data: 0.0522 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:15:37 lr: 0.000055 grad: 0.1225 (0.1373) loss: 0.7943 (0.8081) time: 0.1263 data: 0.0297 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:14:48 lr: 0.000055 grad: 0.1177 (0.1338) loss: 0.8011 (0.8068) time: 0.1328 data: 0.0265 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:14:15 lr: 0.000055 grad: 0.1243 (0.1320) loss: 0.7983 (0.8064) time: 0.1416 data: 0.0601 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:13:46 lr: 0.000055 grad: 0.1218 (0.1310) loss: 0.8029 (0.8060) time: 0.1197 data: 0.0406 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:13:21 lr: 0.000055 grad: 0.1177 (0.1293) loss: 0.7993 (0.8055) time: 0.1182 data: 0.0378 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:13:04 lr: 0.000055 grad: 0.1130 (0.1283) loss: 0.8040 (0.8056) time: 0.1543 data: 0.0676 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:12:45 lr: 0.000055 grad: 0.1171 (0.1274) loss: 0.8022 (0.8055) time: 0.1302 data: 0.0430 max mem: 9377 +Train: [56] [1000/6250] eta: 0:12:28 lr: 0.000055 grad: 0.1080 (0.1264) loss: 0.8020 (0.8053) time: 0.1468 data: 0.0678 max mem: 9377 +Train: [56] [1100/6250] eta: 0:12:12 lr: 0.000055 grad: 0.1164 (0.1258) loss: 0.8071 (0.8051) time: 0.1285 data: 0.0466 max mem: 9377 +Train: [56] [1200/6250] eta: 0:11:58 lr: 0.000055 grad: 0.1209 (0.1254) loss: 0.8013 (0.8048) time: 0.1382 data: 0.0588 max mem: 9377 +Train: [56] [1300/6250] eta: 0:11:41 lr: 0.000055 grad: 0.1208 (0.1248) loss: 0.8042 (0.8047) time: 0.1298 data: 0.0464 max mem: 9377 +Train: [56] [1400/6250] eta: 0:11:27 lr: 0.000055 grad: 0.1152 (0.1248) loss: 0.8089 (0.8045) time: 0.1465 data: 0.0705 max mem: 9377 +Train: [56] [1500/6250] eta: 0:11:13 lr: 0.000055 grad: 0.1249 (0.1247) loss: 0.7918 (0.8043) time: 0.1511 data: 0.0653 max mem: 9377 +Train: [56] [1600/6250] eta: 0:10:57 lr: 0.000055 grad: 0.1158 (0.1244) loss: 0.7995 (0.8043) time: 0.1503 data: 0.0709 max mem: 9377 +Train: [56] [1700/6250] eta: 0:10:41 lr: 0.000055 grad: 0.1126 (0.1242) loss: 0.8120 (0.8041) time: 0.1333 data: 0.0529 max mem: 9377 +Train: [56] [1800/6250] eta: 0:10:26 lr: 0.000055 grad: 0.1246 (0.1242) loss: 0.7988 (0.8037) time: 0.1497 data: 0.0699 max mem: 9377 +Train: [56] [1900/6250] eta: 0:10:11 lr: 0.000055 grad: 0.1223 (0.1241) loss: 0.7999 (0.8035) time: 0.1361 data: 0.0556 max mem: 9377 +Train: [56] [2000/6250] eta: 0:09:57 lr: 0.000055 grad: 0.1225 (0.1241) loss: 0.7943 (0.8032) time: 0.1602 data: 0.0794 max mem: 9377 +Train: [56] [2100/6250] eta: 0:09:41 lr: 0.000055 grad: 0.1235 (0.1244) loss: 0.7982 (0.8028) time: 0.1271 data: 0.0464 max mem: 9377 +Train: [56] [2200/6250] eta: 0:09:28 lr: 0.000055 grad: 0.1224 (0.1247) loss: 0.7912 (0.8025) time: 0.1551 data: 0.0758 max mem: 9377 +Train: [56] [2300/6250] eta: 0:09:13 lr: 0.000055 grad: 0.1196 (0.1248) loss: 0.8019 (0.8021) time: 0.1412 data: 0.0602 max mem: 9377 +Train: [56] [2400/6250] eta: 0:08:58 lr: 0.000054 grad: 0.1235 (0.1249) loss: 0.7887 (0.8018) time: 0.1194 data: 0.0372 max mem: 9377 +Train: [56] [2500/6250] eta: 0:08:44 lr: 0.000054 grad: 0.1182 (0.1250) loss: 0.8049 (0.8016) time: 0.1262 data: 0.0437 max mem: 9377 +Train: [56] [2600/6250] eta: 0:08:29 lr: 0.000054 grad: 0.1295 (0.1251) loss: 0.7961 (0.8014) time: 0.1433 data: 0.0611 max mem: 9377 +Train: [56] [2700/6250] eta: 0:08:15 lr: 0.000054 grad: 0.1178 (0.1250) loss: 0.7958 (0.8013) time: 0.1396 data: 0.0627 max mem: 9377 +Train: [56] [2800/6250] eta: 0:08:00 lr: 0.000054 grad: 0.1214 (0.1251) loss: 0.7958 (0.8011) time: 0.1031 data: 0.0154 max mem: 9377 +Train: [56] [2900/6250] eta: 0:07:45 lr: 0.000054 grad: 0.1234 (0.1252) loss: 0.7966 (0.8009) time: 0.1210 data: 0.0389 max mem: 9377 +Train: [56] [3000/6250] eta: 0:07:31 lr: 0.000054 grad: 0.1253 (0.1255) loss: 0.7928 (0.8008) time: 0.1206 data: 0.0390 max mem: 9377 +Train: [56] [3100/6250] eta: 0:07:17 lr: 0.000054 grad: 0.1232 (0.1254) loss: 0.8007 (0.8007) time: 0.1301 data: 0.0512 max mem: 9377 +Train: [56] [3200/6250] eta: 0:07:03 lr: 0.000054 grad: 0.1278 (0.1254) loss: 0.7993 (0.8006) time: 0.1445 data: 0.0613 max mem: 9377 +Train: [56] [3300/6250] eta: 0:06:49 lr: 0.000054 grad: 0.1260 (0.1255) loss: 0.7953 (0.8006) time: 0.1543 data: 0.0707 max mem: 9377 +Train: [56] [3400/6250] eta: 0:06:34 lr: 0.000054 grad: 0.1241 (0.1256) loss: 0.8057 (0.8005) time: 0.1236 data: 0.0425 max mem: 9377 +Train: [56] [3500/6250] eta: 0:06:21 lr: 0.000054 grad: 0.1283 (0.1257) loss: 0.8075 (0.8005) time: 0.1467 data: 0.0670 max mem: 9377 +Train: [56] [3600/6250] eta: 0:06:07 lr: 0.000054 grad: 0.1269 (0.1257) loss: 0.8010 (0.8005) time: 0.1318 data: 0.0449 max mem: 9377 +Train: [56] [3700/6250] eta: 0:05:53 lr: 0.000054 grad: 0.1260 (0.1259) loss: 0.8031 (0.8004) time: 0.1575 data: 0.0751 max mem: 9377 +Train: [56] [3800/6250] eta: 0:05:39 lr: 0.000054 grad: 0.1215 (0.1259) loss: 0.7996 (0.8004) time: 0.1456 data: 0.0637 max mem: 9377 +Train: [56] [3900/6250] eta: 0:05:25 lr: 0.000054 grad: 0.1135 (0.1259) loss: 0.8026 (0.8004) time: 0.1197 data: 0.0350 max mem: 9377 +Train: [56] [4000/6250] eta: 0:05:11 lr: 0.000054 grad: 0.1242 (0.1260) loss: 0.8013 (0.8003) time: 0.1619 data: 0.0862 max mem: 9377 +Train: [56] [4100/6250] eta: 0:04:57 lr: 0.000054 grad: 0.1242 (0.1260) loss: 0.7984 (0.8002) time: 0.1296 data: 0.0442 max mem: 9377 +Train: [56] [4200/6250] eta: 0:04:43 lr: 0.000054 grad: 0.1202 (0.1260) loss: 0.8033 (0.8002) time: 0.1358 data: 0.0578 max mem: 9377 +Train: [56] [4300/6250] eta: 0:04:29 lr: 0.000054 grad: 0.1173 (0.1259) loss: 0.7933 (0.8002) time: 0.1603 data: 0.0795 max mem: 9377 +Train: [56] [4400/6250] eta: 0:04:16 lr: 0.000054 grad: 0.1266 (0.1260) loss: 0.8003 (0.8002) time: 0.1474 data: 0.0626 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:02 lr: 0.000054 grad: 0.1263 (0.1260) loss: 0.7932 (0.8001) time: 0.1346 data: 0.0526 max mem: 9377 +Train: [56] [4600/6250] eta: 0:03:49 lr: 0.000054 grad: 0.1314 (0.1260) loss: 0.7879 (0.8000) time: 0.1732 data: 0.1001 max mem: 9377 +Train: [56] [4700/6250] eta: 0:03:35 lr: 0.000054 grad: 0.1237 (0.1260) loss: 0.7955 (0.7999) time: 0.1453 data: 0.0573 max mem: 9377 +Train: [56] [4800/6250] eta: 0:03:22 lr: 0.000054 grad: 0.1237 (0.1261) loss: 0.8038 (0.7999) time: 0.1369 data: 0.0514 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:08 lr: 0.000054 grad: 0.1281 (0.1261) loss: 0.7999 (0.7999) time: 0.1412 data: 0.0660 max mem: 9377 +Train: [56] [5000/6250] eta: 0:02:54 lr: 0.000054 grad: 0.1288 (0.1262) loss: 0.8021 (0.7998) time: 0.1743 data: 0.0986 max mem: 9377 +Train: [56] [5100/6250] eta: 0:02:41 lr: 0.000054 grad: 0.1223 (0.1263) loss: 0.7988 (0.7998) time: 0.1948 data: 0.1201 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:27 lr: 0.000054 grad: 0.1268 (0.1264) loss: 0.7923 (0.7998) time: 0.1432 data: 0.0602 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:13 lr: 0.000054 grad: 0.1282 (0.1264) loss: 0.7985 (0.7998) time: 0.1530 data: 0.0694 max mem: 9377 +Train: [56] [5400/6250] eta: 0:01:59 lr: 0.000054 grad: 0.1237 (0.1264) loss: 0.7975 (0.7998) time: 0.1406 data: 0.0567 max mem: 9377 +Train: [56] [5500/6250] eta: 0:01:45 lr: 0.000053 grad: 0.1242 (0.1265) loss: 0.7982 (0.7998) time: 0.1444 data: 0.0678 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:31 lr: 0.000053 grad: 0.1163 (0.1265) loss: 0.8033 (0.7998) time: 0.1326 data: 0.0443 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:17 lr: 0.000053 grad: 0.1274 (0.1265) loss: 0.7967 (0.7998) time: 0.1430 data: 0.0548 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:03 lr: 0.000053 grad: 0.1301 (0.1266) loss: 0.7956 (0.7998) time: 0.1337 data: 0.0467 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:49 lr: 0.000053 grad: 0.1354 (0.1267) loss: 0.7906 (0.7998) time: 0.1395 data: 0.0591 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:35 lr: 0.000053 grad: 0.1274 (0.1267) loss: 0.7986 (0.7997) time: 0.1552 data: 0.0732 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:21 lr: 0.000053 grad: 0.1243 (0.1267) loss: 0.8018 (0.7997) time: 0.1480 data: 0.0696 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:07 lr: 0.000053 grad: 0.1311 (0.1268) loss: 0.7956 (0.7997) time: 0.1618 data: 0.0840 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1245 (0.1269) loss: 0.8025 (0.7998) time: 0.1371 data: 0.0536 max mem: 9377 +Train: [56] Total time: 0:14:45 (0.1416 s / it) +Averaged stats: lr: 0.000053 grad: 0.1245 (0.1269) loss: 0.8025 (0.7998) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:35 loss: 0.8411 (0.8411) time: 5.4074 data: 5.3778 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8406 (0.8422) time: 0.1321 data: 0.1051 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:13 (0.2116 s / it) +Averaged stats (hcp-train-subset): loss: 0.8406 (0.8422) +Eval (hcp-val): [56] [ 0/62] eta: 0:04:01 loss: 0.8394 (0.8394) time: 3.8908 data: 3.8113 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8407 (0.8409) time: 0.1592 data: 0.1340 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (hcp-val): loss: 0.8407 (0.8409) +Eval (nsd-val): [56] [ 0/62] eta: 0:04:20 loss: 0.8123 (0.8123) time: 4.2043 data: 4.1403 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8233 (0.8251) time: 0.1126 data: 0.0876 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (nsd-val): loss: 0.8233 (0.8251) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 12:27:19 lr: 0.000053 grad: 0.5707 (0.5707) loss: 0.7473 (0.7473) time: 7.1743 data: 7.0654 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:23:05 lr: 0.000053 grad: 0.1251 (0.1703) loss: 0.8091 (0.8143) time: 0.1740 data: 0.0655 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:19:24 lr: 0.000053 grad: 0.1364 (0.1570) loss: 0.8060 (0.8117) time: 0.1624 data: 0.0618 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:17:41 lr: 0.000053 grad: 0.1460 (0.1511) loss: 0.7975 (0.8072) time: 0.1542 data: 0.0607 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:16:32 lr: 0.000053 grad: 0.1193 (0.1471) loss: 0.8011 (0.8055) time: 0.1534 data: 0.0632 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:15:58 lr: 0.000053 grad: 0.1405 (0.1449) loss: 0.7929 (0.8042) time: 0.1682 data: 0.0853 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:15:33 lr: 0.000053 grad: 0.1280 (0.1427) loss: 0.8012 (0.8032) time: 0.1688 data: 0.0879 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:11 lr: 0.000053 grad: 0.1267 (0.1417) loss: 0.8040 (0.8024) time: 0.1675 data: 0.0818 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:14:55 lr: 0.000053 grad: 0.1252 (0.1395) loss: 0.8060 (0.8025) time: 0.1758 data: 0.0875 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:14:43 lr: 0.000053 grad: 0.1167 (0.1378) loss: 0.8132 (0.8025) time: 0.1488 data: 0.0650 max mem: 9377 +Train: [57] [1000/6250] eta: 0:14:31 lr: 0.000053 grad: 0.1113 (0.1361) loss: 0.8137 (0.8029) time: 0.1677 data: 0.0853 max mem: 9377 +Train: [57] [1100/6250] eta: 0:14:12 lr: 0.000053 grad: 0.1210 (0.1347) loss: 0.8047 (0.8033) time: 0.1626 data: 0.0784 max mem: 9377 +Train: [57] [1200/6250] eta: 0:13:54 lr: 0.000053 grad: 0.1128 (0.1336) loss: 0.8087 (0.8036) time: 0.1573 data: 0.0723 max mem: 9377 +Train: [57] [1300/6250] eta: 0:13:33 lr: 0.000053 grad: 0.1107 (0.1325) loss: 0.8089 (0.8039) time: 0.1471 data: 0.0594 max mem: 9377 +Train: [57] [1400/6250] eta: 0:13:08 lr: 0.000053 grad: 0.1212 (0.1318) loss: 0.8100 (0.8040) time: 0.1272 data: 0.0416 max mem: 9377 +Train: [57] [1500/6250] eta: 0:12:46 lr: 0.000053 grad: 0.1259 (0.1314) loss: 0.7996 (0.8040) time: 0.1565 data: 0.0710 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:24 lr: 0.000053 grad: 0.1195 (0.1309) loss: 0.8010 (0.8040) time: 0.1560 data: 0.0637 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:02 lr: 0.000053 grad: 0.1181 (0.1303) loss: 0.8005 (0.8040) time: 0.1391 data: 0.0533 max mem: 9377 +Train: [57] [1800/6250] eta: 0:11:44 lr: 0.000053 grad: 0.1149 (0.1304) loss: 0.8039 (0.8038) time: 0.1354 data: 0.0541 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:25 lr: 0.000053 grad: 0.1216 (0.1300) loss: 0.8059 (0.8039) time: 0.1630 data: 0.0758 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:06 lr: 0.000053 grad: 0.1171 (0.1298) loss: 0.8088 (0.8040) time: 0.1186 data: 0.0205 max mem: 9377 +Train: [57] [2100/6250] eta: 0:10:46 lr: 0.000053 grad: 0.1232 (0.1298) loss: 0.8115 (0.8040) time: 0.1478 data: 0.0582 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:27 lr: 0.000053 grad: 0.1214 (0.1299) loss: 0.8041 (0.8041) time: 0.1454 data: 0.0654 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:10 lr: 0.000052 grad: 0.1198 (0.1297) loss: 0.7996 (0.8040) time: 0.1712 data: 0.0844 max mem: 9377 +Train: [57] [2400/6250] eta: 0:09:52 lr: 0.000052 grad: 0.1274 (0.1296) loss: 0.8046 (0.8039) time: 0.1317 data: 0.0484 max mem: 9377 +Train: [57] [2500/6250] eta: 0:09:33 lr: 0.000052 grad: 0.1266 (0.1297) loss: 0.8018 (0.8038) time: 0.1349 data: 0.0526 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:15 lr: 0.000052 grad: 0.1220 (0.1298) loss: 0.8037 (0.8039) time: 0.1179 data: 0.0333 max mem: 9377 +Train: [57] [2700/6250] eta: 0:08:58 lr: 0.000052 grad: 0.1182 (0.1298) loss: 0.8059 (0.8039) time: 0.1325 data: 0.0526 max mem: 9377 +Train: [57] [2800/6250] eta: 0:08:41 lr: 0.000052 grad: 0.1153 (0.1297) loss: 0.8064 (0.8039) time: 0.1388 data: 0.0630 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:25 lr: 0.000052 grad: 0.1260 (0.1295) loss: 0.7886 (0.8038) time: 0.1497 data: 0.0663 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:09 lr: 0.000052 grad: 0.1208 (0.1293) loss: 0.8004 (0.8037) time: 0.1485 data: 0.0654 max mem: 9377 +Train: [57] [3100/6250] eta: 0:07:52 lr: 0.000052 grad: 0.1234 (0.1292) loss: 0.8078 (0.8037) time: 0.1340 data: 0.0501 max mem: 9377 +Train: [57] [3200/6250] eta: 0:07:35 lr: 0.000052 grad: 0.1263 (0.1292) loss: 0.7886 (0.8036) time: 0.1428 data: 0.0606 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:19 lr: 0.000052 grad: 0.1189 (0.1292) loss: 0.7988 (0.8035) time: 0.1388 data: 0.0562 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:04 lr: 0.000052 grad: 0.1227 (0.1292) loss: 0.7965 (0.8034) time: 0.1313 data: 0.0476 max mem: 9377 +Train: [57] [3500/6250] eta: 0:06:48 lr: 0.000052 grad: 0.1212 (0.1292) loss: 0.7980 (0.8034) time: 0.1384 data: 0.0541 max mem: 9377 +Train: [57] [3600/6250] eta: 0:06:32 lr: 0.000052 grad: 0.1248 (0.1292) loss: 0.8051 (0.8033) time: 0.1195 data: 0.0338 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:17 lr: 0.000052 grad: 0.1335 (0.1292) loss: 0.8061 (0.8032) time: 0.1453 data: 0.0607 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:01 lr: 0.000052 grad: 0.1263 (0.1293) loss: 0.8047 (0.8031) time: 0.1072 data: 0.0240 max mem: 9377 +Train: [57] [3900/6250] eta: 0:05:46 lr: 0.000052 grad: 0.1250 (0.1294) loss: 0.7976 (0.8030) time: 0.1186 data: 0.0314 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:30 lr: 0.000052 grad: 0.1266 (0.1293) loss: 0.8022 (0.8030) time: 0.1429 data: 0.0643 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:15 lr: 0.000052 grad: 0.1298 (0.1294) loss: 0.7938 (0.8029) time: 0.1418 data: 0.0617 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:00 lr: 0.000052 grad: 0.1346 (0.1294) loss: 0.7971 (0.8028) time: 0.1399 data: 0.0580 max mem: 9377 +Train: [57] [4300/6250] eta: 0:04:46 lr: 0.000052 grad: 0.1199 (0.1295) loss: 0.8030 (0.8027) time: 0.1415 data: 0.0596 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:31 lr: 0.000052 grad: 0.1230 (0.1297) loss: 0.8033 (0.8027) time: 0.1537 data: 0.0721 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:16 lr: 0.000052 grad: 0.1209 (0.1296) loss: 0.8008 (0.8026) time: 0.1407 data: 0.0567 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:01 lr: 0.000052 grad: 0.1191 (0.1296) loss: 0.8051 (0.8025) time: 0.1611 data: 0.0865 max mem: 9377 +Train: [57] [4700/6250] eta: 0:03:47 lr: 0.000052 grad: 0.1344 (0.1295) loss: 0.8018 (0.8025) time: 0.1461 data: 0.0663 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:32 lr: 0.000052 grad: 0.1159 (0.1295) loss: 0.8108 (0.8025) time: 0.1448 data: 0.0599 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:17 lr: 0.000052 grad: 0.1241 (0.1294) loss: 0.8000 (0.8025) time: 0.1136 data: 0.0270 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:03 lr: 0.000052 grad: 0.1259 (0.1294) loss: 0.8031 (0.8025) time: 0.1526 data: 0.0698 max mem: 9377 +Train: [57] [5100/6250] eta: 0:02:48 lr: 0.000052 grad: 0.1251 (0.1293) loss: 0.8041 (0.8025) time: 0.1565 data: 0.0753 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:33 lr: 0.000052 grad: 0.1284 (0.1294) loss: 0.8005 (0.8025) time: 0.1140 data: 0.0283 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:19 lr: 0.000052 grad: 0.1214 (0.1293) loss: 0.7987 (0.8025) time: 0.1891 data: 0.1149 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:04 lr: 0.000051 grad: 0.1282 (0.1294) loss: 0.8011 (0.8024) time: 0.1370 data: 0.0597 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:49 lr: 0.000051 grad: 0.1128 (0.1296) loss: 0.8107 (0.8024) time: 0.1508 data: 0.0711 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:34 lr: 0.000051 grad: 0.1295 (0.1296) loss: 0.7984 (0.8023) time: 0.1172 data: 0.0371 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:20 lr: 0.000051 grad: 0.1329 (0.1297) loss: 0.8037 (0.8023) time: 0.1467 data: 0.0594 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:05 lr: 0.000051 grad: 0.1298 (0.1298) loss: 0.8041 (0.8022) time: 0.1575 data: 0.0703 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:50 lr: 0.000051 grad: 0.1230 (0.1298) loss: 0.8033 (0.8022) time: 0.1313 data: 0.0488 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:36 lr: 0.000051 grad: 0.1237 (0.1298) loss: 0.8028 (0.8021) time: 0.1157 data: 0.0218 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:21 lr: 0.000051 grad: 0.1225 (0.1298) loss: 0.8053 (0.8021) time: 0.1180 data: 0.0289 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1344 (0.1298) loss: 0.7956 (0.8020) time: 0.1163 data: 0.0328 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1321 (0.1299) loss: 0.7948 (0.8020) time: 0.1414 data: 0.0560 max mem: 9377 +Train: [57] Total time: 0:15:07 (0.1453 s / it) +Averaged stats: lr: 0.000051 grad: 0.1321 (0.1299) loss: 0.7948 (0.8020) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:04:22 loss: 0.8411 (0.8411) time: 4.2271 data: 4.1638 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8435 (0.8432) time: 0.1035 data: 0.0788 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:13 (0.2142 s / it) +Averaged stats (hcp-train-subset): loss: 0.8435 (0.8432) +Eval (hcp-val): [57] [ 0/62] eta: 0:04:07 loss: 0.8369 (0.8369) time: 3.9866 data: 3.9250 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8397 (0.8417) time: 0.1332 data: 0.1084 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-val): loss: 0.8397 (0.8417) +Eval (nsd-val): [57] [ 0/62] eta: 0:03:44 loss: 0.8251 (0.8251) time: 3.6240 data: 3.5583 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8366 (0.8373) time: 0.1164 data: 0.0916 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (nsd-val): loss: 0.8366 (0.8373) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 11:08:31 lr: 0.000051 grad: 0.1442 (0.1442) loss: 0.8150 (0.8150) time: 6.4178 data: 6.3241 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:19:51 lr: 0.000051 grad: 0.1234 (0.1741) loss: 0.8215 (0.8136) time: 0.1449 data: 0.0473 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:17:12 lr: 0.000051 grad: 0.1245 (0.1658) loss: 0.8177 (0.8058) time: 0.1494 data: 0.0633 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:15:53 lr: 0.000051 grad: 0.1162 (0.1569) loss: 0.8235 (0.8037) time: 0.1573 data: 0.0572 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:15:02 lr: 0.000051 grad: 0.1309 (0.1506) loss: 0.7942 (0.8035) time: 0.1475 data: 0.0623 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:14:24 lr: 0.000051 grad: 0.1220 (0.1459) loss: 0.7952 (0.8031) time: 0.1370 data: 0.0438 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:14:12 lr: 0.000051 grad: 0.1254 (0.1426) loss: 0.7974 (0.8026) time: 0.1357 data: 0.0434 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:13:53 lr: 0.000051 grad: 0.1256 (0.1406) loss: 0.7992 (0.8025) time: 0.1437 data: 0.0567 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:13:38 lr: 0.000051 grad: 0.1228 (0.1390) loss: 0.7981 (0.8025) time: 0.1317 data: 0.0459 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:13:20 lr: 0.000051 grad: 0.1251 (0.1383) loss: 0.8003 (0.8024) time: 0.1352 data: 0.0490 max mem: 9377 +Train: [58] [1000/6250] eta: 0:13:01 lr: 0.000051 grad: 0.1193 (0.1372) loss: 0.8025 (0.8023) time: 0.1323 data: 0.0491 max mem: 9377 +Train: [58] [1100/6250] eta: 0:12:40 lr: 0.000051 grad: 0.1194 (0.1361) loss: 0.7991 (0.8024) time: 0.1474 data: 0.0580 max mem: 9377 +Train: [58] [1200/6250] eta: 0:12:23 lr: 0.000051 grad: 0.1272 (0.1354) loss: 0.8024 (0.8023) time: 0.1442 data: 0.0541 max mem: 9377 +Train: [58] [1300/6250] eta: 0:12:07 lr: 0.000051 grad: 0.1222 (0.1348) loss: 0.7919 (0.8022) time: 0.1400 data: 0.0571 max mem: 9377 +Train: [58] [1400/6250] eta: 0:11:50 lr: 0.000051 grad: 0.1191 (0.1343) loss: 0.8087 (0.8022) time: 0.1421 data: 0.0610 max mem: 9377 +Train: [58] [1500/6250] eta: 0:11:33 lr: 0.000051 grad: 0.1248 (0.1339) loss: 0.8111 (0.8021) time: 0.1336 data: 0.0479 max mem: 9377 +Train: [58] [1600/6250] eta: 0:11:15 lr: 0.000051 grad: 0.1113 (0.1331) loss: 0.8088 (0.8022) time: 0.1115 data: 0.0284 max mem: 9377 +Train: [58] [1700/6250] eta: 0:10:59 lr: 0.000051 grad: 0.1176 (0.1328) loss: 0.8017 (0.8021) time: 0.1517 data: 0.0765 max mem: 9377 +Train: [58] [1800/6250] eta: 0:10:43 lr: 0.000051 grad: 0.1176 (0.1322) loss: 0.8021 (0.8021) time: 0.1462 data: 0.0595 max mem: 9377 +Train: [58] [1900/6250] eta: 0:10:27 lr: 0.000051 grad: 0.1210 (0.1317) loss: 0.7965 (0.8021) time: 0.1373 data: 0.0588 max mem: 9377 +Train: [58] [2000/6250] eta: 0:10:11 lr: 0.000051 grad: 0.1278 (0.1315) loss: 0.8022 (0.8020) time: 0.1384 data: 0.0576 max mem: 9377 +Train: [58] [2100/6250] eta: 0:09:57 lr: 0.000051 grad: 0.1202 (0.1312) loss: 0.7991 (0.8018) time: 0.1501 data: 0.0647 max mem: 9377 +Train: [58] [2200/6250] eta: 0:09:44 lr: 0.000050 grad: 0.1247 (0.1311) loss: 0.8032 (0.8015) time: 0.1466 data: 0.0701 max mem: 9377 +Train: [58] [2300/6250] eta: 0:09:28 lr: 0.000050 grad: 0.1242 (0.1309) loss: 0.7982 (0.8013) time: 0.1301 data: 0.0494 max mem: 9377 +Train: [58] [2400/6250] eta: 0:09:13 lr: 0.000050 grad: 0.1262 (0.1309) loss: 0.7972 (0.8011) time: 0.1343 data: 0.0513 max mem: 9377 +Train: [58] [2500/6250] eta: 0:08:58 lr: 0.000050 grad: 0.1204 (0.1306) loss: 0.8022 (0.8010) time: 0.1286 data: 0.0438 max mem: 9377 +Train: [58] [2600/6250] eta: 0:08:43 lr: 0.000050 grad: 0.1243 (0.1304) loss: 0.8027 (0.8008) time: 0.1183 data: 0.0309 max mem: 9377 +Train: [58] [2700/6250] eta: 0:08:28 lr: 0.000050 grad: 0.1238 (0.1302) loss: 0.8003 (0.8007) time: 0.1256 data: 0.0367 max mem: 9377 +Train: [58] [2800/6250] eta: 0:08:13 lr: 0.000050 grad: 0.1188 (0.1303) loss: 0.8067 (0.8006) time: 0.1265 data: 0.0416 max mem: 9377 +Train: [58] [2900/6250] eta: 0:07:59 lr: 0.000050 grad: 0.1172 (0.1301) loss: 0.8005 (0.8006) time: 0.1442 data: 0.0676 max mem: 9377 +Train: [58] [3000/6250] eta: 0:07:44 lr: 0.000050 grad: 0.1327 (0.1300) loss: 0.7895 (0.8005) time: 0.1372 data: 0.0539 max mem: 9377 +Train: [58] [3100/6250] eta: 0:07:30 lr: 0.000050 grad: 0.1236 (0.1300) loss: 0.8002 (0.8005) time: 0.1338 data: 0.0468 max mem: 9377 +Train: [58] [3200/6250] eta: 0:07:15 lr: 0.000050 grad: 0.1264 (0.1298) loss: 0.7966 (0.8006) time: 0.1242 data: 0.0429 max mem: 9377 +Train: [58] [3300/6250] eta: 0:07:00 lr: 0.000050 grad: 0.1240 (0.1297) loss: 0.7982 (0.8005) time: 0.1339 data: 0.0466 max mem: 9377 +Train: [58] [3400/6250] eta: 0:06:46 lr: 0.000050 grad: 0.1221 (0.1297) loss: 0.7942 (0.8005) time: 0.1365 data: 0.0549 max mem: 9377 +Train: [58] [3500/6250] eta: 0:06:31 lr: 0.000050 grad: 0.1287 (0.1297) loss: 0.8010 (0.8005) time: 0.1358 data: 0.0485 max mem: 9377 +Train: [58] [3600/6250] eta: 0:06:17 lr: 0.000050 grad: 0.1327 (0.1298) loss: 0.7977 (0.8004) time: 0.1457 data: 0.0670 max mem: 9377 +Train: [58] [3700/6250] eta: 0:06:02 lr: 0.000050 grad: 0.1334 (0.1299) loss: 0.7959 (0.8002) time: 0.1303 data: 0.0504 max mem: 9377 +Train: [58] [3800/6250] eta: 0:05:47 lr: 0.000050 grad: 0.1326 (0.1301) loss: 0.7920 (0.8001) time: 0.1163 data: 0.0347 max mem: 9377 +Train: [58] [3900/6250] eta: 0:05:33 lr: 0.000050 grad: 0.1344 (0.1302) loss: 0.7977 (0.8000) time: 0.1347 data: 0.0556 max mem: 9377 +Train: [58] [4000/6250] eta: 0:05:18 lr: 0.000050 grad: 0.1236 (0.1304) loss: 0.7970 (0.7998) time: 0.1497 data: 0.0688 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:04 lr: 0.000050 grad: 0.1273 (0.1304) loss: 0.7885 (0.7997) time: 0.1488 data: 0.0630 max mem: 9377 +Train: [58] [4200/6250] eta: 0:04:49 lr: 0.000050 grad: 0.1263 (0.1304) loss: 0.7946 (0.7996) time: 0.1267 data: 0.0463 max mem: 9377 +Train: [58] [4300/6250] eta: 0:04:35 lr: 0.000050 grad: 0.1292 (0.1304) loss: 0.7926 (0.7995) time: 0.1278 data: 0.0452 max mem: 9377 +Train: [58] [4400/6250] eta: 0:04:21 lr: 0.000050 grad: 0.1266 (0.1304) loss: 0.7905 (0.7994) time: 0.1418 data: 0.0548 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:06 lr: 0.000050 grad: 0.1347 (0.1305) loss: 0.7931 (0.7993) time: 0.1420 data: 0.0610 max mem: 9377 +Train: [58] [4600/6250] eta: 0:03:53 lr: 0.000050 grad: 0.1236 (0.1306) loss: 0.8039 (0.7992) time: 0.1453 data: 0.0626 max mem: 9377 +Train: [58] [4700/6250] eta: 0:03:39 lr: 0.000050 grad: 0.1272 (0.1306) loss: 0.7939 (0.7992) time: 0.1599 data: 0.0648 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:25 lr: 0.000050 grad: 0.1307 (0.1307) loss: 0.7987 (0.7991) time: 0.1535 data: 0.0688 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:11 lr: 0.000050 grad: 0.1286 (0.1307) loss: 0.7906 (0.7990) time: 0.1531 data: 0.0695 max mem: 9377 +Train: [58] [5000/6250] eta: 0:02:57 lr: 0.000050 grad: 0.1247 (0.1308) loss: 0.7992 (0.7989) time: 0.1314 data: 0.0465 max mem: 9377 +Train: [58] [5100/6250] eta: 0:02:43 lr: 0.000050 grad: 0.1231 (0.1307) loss: 0.7983 (0.7989) time: 0.1275 data: 0.0399 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:29 lr: 0.000050 grad: 0.1246 (0.1308) loss: 0.7994 (0.7988) time: 0.1105 data: 0.0268 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:14 lr: 0.000049 grad: 0.1262 (0.1308) loss: 0.7958 (0.7988) time: 0.1138 data: 0.0201 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:00 lr: 0.000049 grad: 0.1147 (0.1308) loss: 0.8036 (0.7988) time: 0.1260 data: 0.0405 max mem: 9377 +Train: [58] [5500/6250] eta: 0:01:46 lr: 0.000049 grad: 0.1214 (0.1308) loss: 0.8051 (0.7987) time: 0.1203 data: 0.0351 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:31 lr: 0.000049 grad: 0.1215 (0.1310) loss: 0.7925 (0.7986) time: 0.1446 data: 0.0664 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:17 lr: 0.000049 grad: 0.1335 (0.1310) loss: 0.7933 (0.7985) time: 0.1151 data: 0.0309 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:03 lr: 0.000049 grad: 0.1268 (0.1311) loss: 0.7864 (0.7984) time: 0.1366 data: 0.0571 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:49 lr: 0.000049 grad: 0.1211 (0.1311) loss: 0.7989 (0.7983) time: 0.1287 data: 0.0454 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:35 lr: 0.000049 grad: 0.1243 (0.1311) loss: 0.7972 (0.7983) time: 0.1312 data: 0.0471 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:21 lr: 0.000049 grad: 0.1239 (0.1311) loss: 0.8004 (0.7983) time: 0.1164 data: 0.0307 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:07 lr: 0.000049 grad: 0.1258 (0.1312) loss: 0.7899 (0.7982) time: 0.1316 data: 0.0517 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1261 (0.1312) loss: 0.7965 (0.7981) time: 0.1285 data: 0.0437 max mem: 9377 +Train: [58] Total time: 0:14:44 (0.1415 s / it) +Averaged stats: lr: 0.000049 grad: 0.1261 (0.1312) loss: 0.7965 (0.7981) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:04:48 loss: 0.8366 (0.8366) time: 4.6613 data: 4.6189 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8434 (0.8429) time: 0.1100 data: 0.0854 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:12 (0.2056 s / it) +Averaged stats (hcp-train-subset): loss: 0.8434 (0.8429) +Eval (hcp-val): [58] [ 0/62] eta: 0:05:39 loss: 0.8392 (0.8392) time: 5.4774 data: 5.4479 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8392 (0.8409) time: 0.1251 data: 0.0988 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:13 (0.2102 s / it) +Averaged stats (hcp-val): loss: 0.8392 (0.8409) +Eval (nsd-val): [58] [ 0/62] eta: 0:04:26 loss: 0.8153 (0.8153) time: 4.3037 data: 4.2443 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8281 (0.8293) time: 0.1100 data: 0.0851 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:12 (0.1986 s / it) +Averaged stats (nsd-val): loss: 0.8281 (0.8293) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 9:48:26 lr: 0.000049 grad: 0.0915 (0.0915) loss: 0.8661 (0.8661) time: 5.6491 data: 5.5154 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:19:48 lr: 0.000049 grad: 0.1273 (0.1891) loss: 0.8139 (0.8072) time: 0.1461 data: 0.0512 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:16:59 lr: 0.000049 grad: 0.1630 (0.1766) loss: 0.7911 (0.8035) time: 0.1656 data: 0.0763 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:15:36 lr: 0.000049 grad: 0.1452 (0.1677) loss: 0.7987 (0.7999) time: 0.1349 data: 0.0424 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:14:51 lr: 0.000049 grad: 0.1330 (0.1624) loss: 0.8003 (0.7988) time: 0.1230 data: 0.0264 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:14:18 lr: 0.000049 grad: 0.1400 (0.1583) loss: 0.7885 (0.7985) time: 0.1429 data: 0.0514 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:13:53 lr: 0.000049 grad: 0.1246 (0.1547) loss: 0.8022 (0.7986) time: 0.1466 data: 0.0623 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:13:34 lr: 0.000049 grad: 0.1353 (0.1522) loss: 0.7934 (0.7991) time: 0.1340 data: 0.0464 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:13:15 lr: 0.000049 grad: 0.1320 (0.1506) loss: 0.8018 (0.7991) time: 0.1459 data: 0.0594 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:13:00 lr: 0.000049 grad: 0.1364 (0.1498) loss: 0.7954 (0.7990) time: 0.1407 data: 0.0610 max mem: 9377 +Train: [59] [1000/6250] eta: 0:12:48 lr: 0.000049 grad: 0.1305 (0.1488) loss: 0.8006 (0.7988) time: 0.1524 data: 0.0721 max mem: 9377 +Train: [59] [1100/6250] eta: 0:12:38 lr: 0.000049 grad: 0.1284 (0.1479) loss: 0.7965 (0.7986) time: 0.1519 data: 0.0770 max mem: 9377 +Train: [59] [1200/6250] eta: 0:12:26 lr: 0.000049 grad: 0.1284 (0.1469) loss: 0.7989 (0.7980) time: 0.1784 data: 0.1034 max mem: 9377 +Train: [59] [1300/6250] eta: 0:12:14 lr: 0.000049 grad: 0.1388 (0.1462) loss: 0.7941 (0.7975) time: 0.1590 data: 0.0783 max mem: 9377 +Train: [59] [1400/6250] eta: 0:12:00 lr: 0.000049 grad: 0.1323 (0.1456) loss: 0.7977 (0.7972) time: 0.1430 data: 0.0587 max mem: 9377 +Train: [59] [1500/6250] eta: 0:11:45 lr: 0.000049 grad: 0.1381 (0.1453) loss: 0.7852 (0.7966) time: 0.1458 data: 0.0632 max mem: 9377 +Train: [59] [1600/6250] eta: 0:11:30 lr: 0.000049 grad: 0.1357 (0.1446) loss: 0.7841 (0.7962) time: 0.1542 data: 0.0765 max mem: 9377 +Train: [59] [1700/6250] eta: 0:11:17 lr: 0.000049 grad: 0.1370 (0.1444) loss: 0.7846 (0.7956) time: 0.1803 data: 0.0994 max mem: 9377 +Train: [59] [1800/6250] eta: 0:11:01 lr: 0.000049 grad: 0.1291 (0.1440) loss: 0.7870 (0.7951) time: 0.1410 data: 0.0624 max mem: 9377 +Train: [59] [1900/6250] eta: 0:10:44 lr: 0.000049 grad: 0.1310 (0.1440) loss: 0.7858 (0.7946) time: 0.1466 data: 0.0631 max mem: 9377 +Train: [59] [2000/6250] eta: 0:10:26 lr: 0.000049 grad: 0.1299 (0.1439) loss: 0.7921 (0.7942) time: 0.1284 data: 0.0446 max mem: 9377 +Train: [59] [2100/6250] eta: 0:10:08 lr: 0.000048 grad: 0.1341 (0.1436) loss: 0.7902 (0.7939) time: 0.1346 data: 0.0509 max mem: 9377 +Train: [59] [2200/6250] eta: 0:09:52 lr: 0.000048 grad: 0.1424 (0.1434) loss: 0.7947 (0.7937) time: 0.1356 data: 0.0517 max mem: 9377 +Train: [59] [2300/6250] eta: 0:09:36 lr: 0.000048 grad: 0.1349 (0.1432) loss: 0.7870 (0.7936) time: 0.1176 data: 0.0322 max mem: 9377 +Train: [59] [2400/6250] eta: 0:09:20 lr: 0.000048 grad: 0.1258 (0.1429) loss: 0.7954 (0.7935) time: 0.1442 data: 0.0649 max mem: 9377 +Train: [59] [2500/6250] eta: 0:09:04 lr: 0.000048 grad: 0.1356 (0.1426) loss: 0.7914 (0.7934) time: 0.1473 data: 0.0678 max mem: 9377 +Train: [59] [2600/6250] eta: 0:08:49 lr: 0.000048 grad: 0.1246 (0.1422) loss: 0.7906 (0.7933) time: 0.1423 data: 0.0586 max mem: 9377 +Train: [59] [2700/6250] eta: 0:08:33 lr: 0.000048 grad: 0.1319 (0.1420) loss: 0.7794 (0.7932) time: 0.0950 data: 0.0002 max mem: 9377 +Train: [59] [2800/6250] eta: 0:08:19 lr: 0.000048 grad: 0.1239 (0.1418) loss: 0.7971 (0.7932) time: 0.1497 data: 0.0682 max mem: 9377 +Train: [59] [2900/6250] eta: 0:08:04 lr: 0.000048 grad: 0.1277 (0.1416) loss: 0.7894 (0.7932) time: 0.1440 data: 0.0638 max mem: 9377 +Train: [59] [3000/6250] eta: 0:07:50 lr: 0.000048 grad: 0.1168 (0.1414) loss: 0.7963 (0.7933) time: 0.1671 data: 0.0892 max mem: 9377 +Train: [59] [3100/6250] eta: 0:07:35 lr: 0.000048 grad: 0.1342 (0.1412) loss: 0.7898 (0.7932) time: 0.1478 data: 0.0706 max mem: 9377 +Train: [59] [3200/6250] eta: 0:07:20 lr: 0.000048 grad: 0.1326 (0.1410) loss: 0.7873 (0.7932) time: 0.1326 data: 0.0531 max mem: 9377 +Train: [59] [3300/6250] eta: 0:07:05 lr: 0.000048 grad: 0.1296 (0.1411) loss: 0.7895 (0.7931) time: 0.1394 data: 0.0527 max mem: 9377 +Train: [59] [3400/6250] eta: 0:06:51 lr: 0.000048 grad: 0.1292 (0.1409) loss: 0.7923 (0.7930) time: 0.1373 data: 0.0584 max mem: 9377 +Train: [59] [3500/6250] eta: 0:06:36 lr: 0.000048 grad: 0.1323 (0.1408) loss: 0.7876 (0.7929) time: 0.1502 data: 0.0691 max mem: 9377 +Train: [59] [3600/6250] eta: 0:06:21 lr: 0.000048 grad: 0.1295 (0.1407) loss: 0.7918 (0.7928) time: 0.1351 data: 0.0491 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:06 lr: 0.000048 grad: 0.1309 (0.1406) loss: 0.7932 (0.7927) time: 0.1272 data: 0.0395 max mem: 9377 +Train: [59] [3800/6250] eta: 0:05:51 lr: 0.000048 grad: 0.1323 (0.1405) loss: 0.7823 (0.7926) time: 0.1250 data: 0.0438 max mem: 9377 +Train: [59] [3900/6250] eta: 0:05:37 lr: 0.000048 grad: 0.1329 (0.1405) loss: 0.7789 (0.7924) time: 0.1421 data: 0.0599 max mem: 9377 +Train: [59] [4000/6250] eta: 0:05:22 lr: 0.000048 grad: 0.1366 (0.1405) loss: 0.7883 (0.7924) time: 0.1197 data: 0.0312 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:08 lr: 0.000048 grad: 0.1409 (0.1406) loss: 0.7925 (0.7923) time: 0.1479 data: 0.0682 max mem: 9377 +Train: [59] [4200/6250] eta: 0:04:53 lr: 0.000048 grad: 0.1250 (0.1405) loss: 0.7874 (0.7924) time: 0.1352 data: 0.0520 max mem: 9377 +Train: [59] [4300/6250] eta: 0:04:38 lr: 0.000048 grad: 0.1345 (0.1405) loss: 0.7914 (0.7923) time: 0.1393 data: 0.0544 max mem: 9377 +Train: [59] [4400/6250] eta: 0:04:24 lr: 0.000048 grad: 0.1423 (0.1404) loss: 0.7934 (0.7924) time: 0.1114 data: 0.0236 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:10 lr: 0.000048 grad: 0.1322 (0.1403) loss: 0.8002 (0.7924) time: 0.1953 data: 0.1156 max mem: 9377 +Train: [59] [4600/6250] eta: 0:03:56 lr: 0.000048 grad: 0.1317 (0.1402) loss: 0.7966 (0.7925) time: 0.1401 data: 0.0558 max mem: 9377 +Train: [59] [4700/6250] eta: 0:03:42 lr: 0.000048 grad: 0.1290 (0.1401) loss: 0.7918 (0.7925) time: 0.1726 data: 0.0931 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:28 lr: 0.000048 grad: 0.1393 (0.1400) loss: 0.7896 (0.7925) time: 0.1410 data: 0.0636 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:14 lr: 0.000048 grad: 0.1293 (0.1398) loss: 0.8035 (0.7926) time: 0.1556 data: 0.0848 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:00 lr: 0.000048 grad: 0.1330 (0.1398) loss: 0.7910 (0.7927) time: 0.1652 data: 0.0838 max mem: 9377 +Train: [59] [5100/6250] eta: 0:02:45 lr: 0.000048 grad: 0.1303 (0.1396) loss: 0.7888 (0.7927) time: 0.1679 data: 0.0854 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:31 lr: 0.000047 grad: 0.1328 (0.1395) loss: 0.7904 (0.7927) time: 0.1300 data: 0.0398 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:17 lr: 0.000047 grad: 0.1221 (0.1395) loss: 0.7947 (0.7927) time: 0.1353 data: 0.0510 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:02 lr: 0.000047 grad: 0.1297 (0.1394) loss: 0.7969 (0.7927) time: 0.1290 data: 0.0339 max mem: 9377 +Train: [59] [5500/6250] eta: 0:01:48 lr: 0.000047 grad: 0.1364 (0.1393) loss: 0.7923 (0.7927) time: 0.1753 data: 0.0970 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:34 lr: 0.000047 grad: 0.1388 (0.1395) loss: 0.7912 (0.7926) time: 0.1187 data: 0.0434 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:19 lr: 0.000047 grad: 0.1389 (0.1396) loss: 0.7913 (0.7926) time: 0.1190 data: 0.0369 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:05 lr: 0.000047 grad: 0.1311 (0.1396) loss: 0.7922 (0.7926) time: 0.1288 data: 0.0474 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:50 lr: 0.000047 grad: 0.1237 (0.1396) loss: 0.7921 (0.7925) time: 0.1455 data: 0.0544 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:36 lr: 0.000047 grad: 0.1298 (0.1395) loss: 0.7878 (0.7925) time: 0.1422 data: 0.0580 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:21 lr: 0.000047 grad: 0.1321 (0.1395) loss: 0.7897 (0.7925) time: 0.1322 data: 0.0462 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:07 lr: 0.000047 grad: 0.1342 (0.1395) loss: 0.7906 (0.7925) time: 0.1361 data: 0.0465 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1344 (0.1395) loss: 0.7880 (0.7925) time: 0.1344 data: 0.0524 max mem: 9377 +Train: [59] Total time: 0:15:06 (0.1450 s / it) +Averaged stats: lr: 0.000047 grad: 0.1344 (0.1395) loss: 0.7880 (0.7925) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:04:26 loss: 0.8398 (0.8398) time: 4.2923 data: 4.2059 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8447 (0.8441) time: 0.1103 data: 0.0840 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:12 (0.1949 s / it) +Averaged stats (hcp-train-subset): loss: 0.8447 (0.8441) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:04 loss: 0.8406 (0.8406) time: 4.9116 data: 4.8813 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8391 (0.8415) time: 0.1227 data: 0.0980 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:12 (0.1996 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8415) +Making plots (hcp-val): example=2 +Eval (nsd-val): [59] [ 0/62] eta: 0:05:33 loss: 0.8245 (0.8245) time: 5.3750 data: 5.3447 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8329 (0.8357) time: 0.1067 data: 0.0819 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:12 (0.2026 s / it) +Averaged stats (nsd-val): loss: 0.8329 (0.8357) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 9:25:17 lr: 0.000047 grad: 0.2297 (0.2297) loss: 0.7966 (0.7966) time: 5.4268 data: 5.2287 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:20:16 lr: 0.000047 grad: 0.1484 (0.1811) loss: 0.8095 (0.8126) time: 0.1426 data: 0.0517 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:17:30 lr: 0.000047 grad: 0.1652 (0.1764) loss: 0.8002 (0.8042) time: 0.1384 data: 0.0430 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:15:47 lr: 0.000047 grad: 0.1418 (0.1700) loss: 0.7911 (0.8024) time: 0.1233 data: 0.0247 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:14:55 lr: 0.000047 grad: 0.1336 (0.1653) loss: 0.7850 (0.7999) time: 0.1489 data: 0.0611 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:14:14 lr: 0.000047 grad: 0.1313 (0.1613) loss: 0.7932 (0.7986) time: 0.1265 data: 0.0236 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:13:51 lr: 0.000047 grad: 0.1306 (0.1584) loss: 0.7845 (0.7972) time: 0.1384 data: 0.0491 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:13:38 lr: 0.000047 grad: 0.1311 (0.1558) loss: 0.8007 (0.7964) time: 0.1436 data: 0.0541 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:13:20 lr: 0.000047 grad: 0.1369 (0.1535) loss: 0.8017 (0.7964) time: 0.1467 data: 0.0665 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:13:08 lr: 0.000047 grad: 0.1315 (0.1518) loss: 0.7997 (0.7962) time: 0.1594 data: 0.0747 max mem: 9377 +Train: [60] [1000/6250] eta: 0:12:50 lr: 0.000047 grad: 0.1343 (0.1502) loss: 0.7994 (0.7961) time: 0.1446 data: 0.0613 max mem: 9377 +Train: [60] [1100/6250] eta: 0:12:32 lr: 0.000047 grad: 0.1310 (0.1493) loss: 0.8035 (0.7959) time: 0.1459 data: 0.0645 max mem: 9377 +Train: [60] [1200/6250] eta: 0:12:15 lr: 0.000047 grad: 0.1356 (0.1485) loss: 0.7898 (0.7955) time: 0.1379 data: 0.0514 max mem: 9377 +Train: [60] [1300/6250] eta: 0:11:59 lr: 0.000047 grad: 0.1377 (0.1473) loss: 0.7895 (0.7953) time: 0.1470 data: 0.0605 max mem: 9377 +Train: [60] [1400/6250] eta: 0:11:43 lr: 0.000047 grad: 0.1337 (0.1464) loss: 0.7963 (0.7951) time: 0.1341 data: 0.0529 max mem: 9377 +Train: [60] [1500/6250] eta: 0:11:28 lr: 0.000047 grad: 0.1283 (0.1457) loss: 0.7992 (0.7949) time: 0.1503 data: 0.0695 max mem: 9377 +Train: [60] [1600/6250] eta: 0:11:11 lr: 0.000047 grad: 0.1271 (0.1452) loss: 0.7966 (0.7945) time: 0.1092 data: 0.0298 max mem: 9377 +Train: [60] [1700/6250] eta: 0:10:56 lr: 0.000047 grad: 0.1343 (0.1447) loss: 0.7853 (0.7942) time: 0.1299 data: 0.0466 max mem: 9377 +Train: [60] [1800/6250] eta: 0:10:41 lr: 0.000047 grad: 0.1374 (0.1445) loss: 0.7921 (0.7941) time: 0.1391 data: 0.0562 max mem: 9377 +Train: [60] [1900/6250] eta: 0:10:25 lr: 0.000047 grad: 0.1307 (0.1444) loss: 0.7933 (0.7940) time: 0.1304 data: 0.0413 max mem: 9377 +Train: [60] [2000/6250] eta: 0:10:08 lr: 0.000047 grad: 0.1383 (0.1443) loss: 0.7937 (0.7939) time: 0.1375 data: 0.0583 max mem: 9377 +Train: [60] [2100/6250] eta: 0:09:53 lr: 0.000046 grad: 0.1278 (0.1440) loss: 0.7875 (0.7939) time: 0.1209 data: 0.0361 max mem: 9377 +Train: [60] [2200/6250] eta: 0:09:37 lr: 0.000046 grad: 0.1323 (0.1436) loss: 0.7932 (0.7940) time: 0.1421 data: 0.0563 max mem: 9377 +Train: [60] [2300/6250] eta: 0:09:22 lr: 0.000046 grad: 0.1411 (0.1435) loss: 0.7939 (0.7940) time: 0.1487 data: 0.0665 max mem: 9377 +Train: [60] [2400/6250] eta: 0:09:08 lr: 0.000046 grad: 0.1352 (0.1432) loss: 0.7905 (0.7939) time: 0.1300 data: 0.0413 max mem: 9377 +Train: [60] [2500/6250] eta: 0:08:53 lr: 0.000046 grad: 0.1319 (0.1430) loss: 0.7876 (0.7938) time: 0.1545 data: 0.0770 max mem: 9377 +Train: [60] [2600/6250] eta: 0:08:38 lr: 0.000046 grad: 0.1422 (0.1430) loss: 0.7839 (0.7936) time: 0.1482 data: 0.0630 max mem: 9377 +Train: [60] [2700/6250] eta: 0:08:23 lr: 0.000046 grad: 0.1357 (0.1428) loss: 0.7907 (0.7935) time: 0.1308 data: 0.0538 max mem: 9377 +Train: [60] [2800/6250] eta: 0:08:08 lr: 0.000046 grad: 0.1352 (0.1429) loss: 0.7889 (0.7934) time: 0.1394 data: 0.0506 max mem: 9377 +Train: [60] [2900/6250] eta: 0:07:53 lr: 0.000046 grad: 0.1357 (0.1429) loss: 0.7883 (0.7932) time: 0.1321 data: 0.0471 max mem: 9377 +Train: [60] [3000/6250] eta: 0:07:38 lr: 0.000046 grad: 0.1351 (0.1429) loss: 0.7865 (0.7930) time: 0.1266 data: 0.0444 max mem: 9377 +Train: [60] [3100/6250] eta: 0:07:24 lr: 0.000046 grad: 0.1357 (0.1428) loss: 0.7891 (0.7929) time: 0.1454 data: 0.0646 max mem: 9377 +Train: [60] [3200/6250] eta: 0:07:09 lr: 0.000046 grad: 0.1275 (0.1426) loss: 0.8029 (0.7927) time: 0.1496 data: 0.0673 max mem: 9377 +Train: [60] [3300/6250] eta: 0:06:55 lr: 0.000046 grad: 0.1376 (0.1425) loss: 0.7858 (0.7926) time: 0.1149 data: 0.0318 max mem: 9377 +Train: [60] [3400/6250] eta: 0:06:40 lr: 0.000046 grad: 0.1425 (0.1425) loss: 0.7924 (0.7925) time: 0.1423 data: 0.0595 max mem: 9377 +Train: [60] [3500/6250] eta: 0:06:26 lr: 0.000046 grad: 0.1372 (0.1424) loss: 0.7877 (0.7924) time: 0.1529 data: 0.0734 max mem: 9377 +Train: [60] [3600/6250] eta: 0:06:12 lr: 0.000046 grad: 0.1359 (0.1423) loss: 0.7956 (0.7923) time: 0.1523 data: 0.0695 max mem: 9377 +Train: [60] [3700/6250] eta: 0:05:58 lr: 0.000046 grad: 0.1255 (0.1422) loss: 0.7928 (0.7923) time: 0.1295 data: 0.0557 max mem: 9377 +Train: [60] [3800/6250] eta: 0:05:44 lr: 0.000046 grad: 0.1343 (0.1421) loss: 0.8030 (0.7924) time: 0.1316 data: 0.0511 max mem: 9377 +Train: [60] [3900/6250] eta: 0:05:30 lr: 0.000046 grad: 0.1422 (0.1420) loss: 0.7912 (0.7924) time: 0.1310 data: 0.0505 max mem: 9377 +Train: [60] [4000/6250] eta: 0:05:17 lr: 0.000046 grad: 0.1336 (0.1419) loss: 0.7920 (0.7924) time: 0.1606 data: 0.0808 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:02 lr: 0.000046 grad: 0.1290 (0.1418) loss: 0.7969 (0.7924) time: 0.1168 data: 0.0302 max mem: 9377 +Train: [60] [4200/6250] eta: 0:04:48 lr: 0.000046 grad: 0.1407 (0.1417) loss: 0.7931 (0.7924) time: 0.1456 data: 0.0604 max mem: 9377 +Train: [60] [4300/6250] eta: 0:04:34 lr: 0.000046 grad: 0.1217 (0.1417) loss: 0.7993 (0.7924) time: 0.1365 data: 0.0519 max mem: 9377 +Train: [60] [4400/6250] eta: 0:04:20 lr: 0.000046 grad: 0.1250 (0.1415) loss: 0.7966 (0.7924) time: 0.1513 data: 0.0676 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:07 lr: 0.000046 grad: 0.1398 (0.1414) loss: 0.7921 (0.7924) time: 0.1666 data: 0.0854 max mem: 9377 +Train: [60] [4600/6250] eta: 0:03:54 lr: 0.000046 grad: 0.1283 (0.1413) loss: 0.7997 (0.7924) time: 0.1606 data: 0.0700 max mem: 9377 +Train: [60] [4700/6250] eta: 0:03:41 lr: 0.000046 grad: 0.1305 (0.1412) loss: 0.7940 (0.7924) time: 0.1819 data: 0.0961 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:27 lr: 0.000046 grad: 0.1252 (0.1411) loss: 0.7971 (0.7924) time: 0.1732 data: 0.0876 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:13 lr: 0.000046 grad: 0.1395 (0.1410) loss: 0.7863 (0.7924) time: 0.1684 data: 0.0852 max mem: 9377 +Train: [60] [5000/6250] eta: 0:02:59 lr: 0.000046 grad: 0.1383 (0.1410) loss: 0.7934 (0.7925) time: 0.1437 data: 0.0536 max mem: 9377 +Train: [60] [5100/6250] eta: 0:02:45 lr: 0.000046 grad: 0.1317 (0.1409) loss: 0.7995 (0.7925) time: 0.1447 data: 0.0519 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:31 lr: 0.000045 grad: 0.1275 (0.1408) loss: 0.8013 (0.7925) time: 0.1226 data: 0.0366 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:17 lr: 0.000045 grad: 0.1344 (0.1406) loss: 0.7923 (0.7926) time: 0.1318 data: 0.0437 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:02 lr: 0.000045 grad: 0.1333 (0.1406) loss: 0.7947 (0.7926) time: 0.1267 data: 0.0422 max mem: 9377 +Train: [60] [5500/6250] eta: 0:01:48 lr: 0.000045 grad: 0.1282 (0.1406) loss: 0.7902 (0.7926) time: 0.1298 data: 0.0447 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:33 lr: 0.000045 grad: 0.1330 (0.1405) loss: 0.7959 (0.7926) time: 0.1482 data: 0.0628 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:19 lr: 0.000045 grad: 0.1345 (0.1405) loss: 0.7971 (0.7925) time: 0.1393 data: 0.0614 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:05 lr: 0.000045 grad: 0.1297 (0.1406) loss: 0.7960 (0.7925) time: 0.1628 data: 0.0715 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:50 lr: 0.000045 grad: 0.1305 (0.1406) loss: 0.7942 (0.7925) time: 0.1188 data: 0.0385 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:36 lr: 0.000045 grad: 0.1366 (0.1405) loss: 0.7952 (0.7925) time: 0.1354 data: 0.0506 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:21 lr: 0.000045 grad: 0.1375 (0.1405) loss: 0.7919 (0.7925) time: 0.1422 data: 0.0640 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:07 lr: 0.000045 grad: 0.1423 (0.1404) loss: 0.7900 (0.7925) time: 0.1543 data: 0.0655 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1291 (0.1404) loss: 0.7917 (0.7925) time: 0.1653 data: 0.0785 max mem: 9377 +Train: [60] Total time: 0:15:10 (0.1457 s / it) +Averaged stats: lr: 0.000045 grad: 0.1291 (0.1404) loss: 0.7917 (0.7925) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:03:34 loss: 0.8376 (0.8376) time: 3.4668 data: 3.4017 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8449 (0.8433) time: 0.1120 data: 0.0857 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:12 (0.2033 s / it) +Averaged stats (hcp-train-subset): loss: 0.8449 (0.8433) +Eval (hcp-val): [60] [ 0/62] eta: 0:04:00 loss: 0.8413 (0.8413) time: 3.8826 data: 3.8178 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8390 (0.8415) time: 0.0854 data: 0.0607 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:12 (0.2000 s / it) +Averaged stats (hcp-val): loss: 0.8390 (0.8415) +Eval (nsd-val): [60] [ 0/62] eta: 0:04:31 loss: 0.8316 (0.8316) time: 4.3748 data: 4.3439 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8356 (0.8362) time: 0.1138 data: 0.0891 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:12 (0.1945 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8362) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 11:08:02 lr: 0.000045 grad: 0.2642 (0.2642) loss: 0.7921 (0.7921) time: 6.4133 data: 6.2955 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:20:03 lr: 0.000045 grad: 0.1772 (0.2100) loss: 0.7954 (0.8016) time: 0.1438 data: 0.0336 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:16:47 lr: 0.000045 grad: 0.1852 (0.2011) loss: 0.7944 (0.7959) time: 0.1363 data: 0.0471 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:15:25 lr: 0.000045 grad: 0.1431 (0.1860) loss: 0.8052 (0.7972) time: 0.1278 data: 0.0375 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:14:37 lr: 0.000045 grad: 0.1420 (0.1760) loss: 0.7944 (0.7977) time: 0.1412 data: 0.0448 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:14:02 lr: 0.000045 grad: 0.1399 (0.1698) loss: 0.7915 (0.7974) time: 0.1231 data: 0.0310 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:13:37 lr: 0.000045 grad: 0.1330 (0.1641) loss: 0.7964 (0.7979) time: 0.1491 data: 0.0621 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:13:30 lr: 0.000045 grad: 0.1351 (0.1603) loss: 0.7973 (0.7974) time: 0.1665 data: 0.0860 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:13:21 lr: 0.000045 grad: 0.1347 (0.1574) loss: 0.7947 (0.7971) time: 0.1498 data: 0.0599 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:13:03 lr: 0.000045 grad: 0.1323 (0.1551) loss: 0.7935 (0.7969) time: 0.1467 data: 0.0565 max mem: 9377 +Train: [61] [1000/6250] eta: 0:12:53 lr: 0.000045 grad: 0.1283 (0.1533) loss: 0.7945 (0.7969) time: 0.1494 data: 0.0723 max mem: 9377 +Train: [61] [1100/6250] eta: 0:12:38 lr: 0.000045 grad: 0.1331 (0.1520) loss: 0.7935 (0.7965) time: 0.1328 data: 0.0493 max mem: 9377 +Train: [61] [1200/6250] eta: 0:12:25 lr: 0.000045 grad: 0.1288 (0.1506) loss: 0.7931 (0.7962) time: 0.1347 data: 0.0544 max mem: 9377 +Train: [61] [1300/6250] eta: 0:12:14 lr: 0.000045 grad: 0.1288 (0.1493) loss: 0.7954 (0.7961) time: 0.1581 data: 0.0757 max mem: 9377 +Train: [61] [1400/6250] eta: 0:11:57 lr: 0.000045 grad: 0.1315 (0.1484) loss: 0.7880 (0.7958) time: 0.1396 data: 0.0559 max mem: 9377 +Train: [61] [1500/6250] eta: 0:11:44 lr: 0.000045 grad: 0.1418 (0.1479) loss: 0.7900 (0.7955) time: 0.1495 data: 0.0697 max mem: 9377 +Train: [61] [1600/6250] eta: 0:11:29 lr: 0.000045 grad: 0.1403 (0.1474) loss: 0.7936 (0.7953) time: 0.1348 data: 0.0508 max mem: 9377 +Train: [61] [1700/6250] eta: 0:11:15 lr: 0.000045 grad: 0.1239 (0.1468) loss: 0.7966 (0.7952) time: 0.1603 data: 0.0813 max mem: 9377 +Train: [61] [1800/6250] eta: 0:10:58 lr: 0.000045 grad: 0.1328 (0.1462) loss: 0.7944 (0.7951) time: 0.1372 data: 0.0535 max mem: 9377 +Train: [61] [1900/6250] eta: 0:10:42 lr: 0.000045 grad: 0.1298 (0.1459) loss: 0.8000 (0.7950) time: 0.1589 data: 0.0815 max mem: 9377 +Train: [61] [2000/6250] eta: 0:10:25 lr: 0.000045 grad: 0.1275 (0.1453) loss: 0.7962 (0.7950) time: 0.1325 data: 0.0472 max mem: 9377 +Train: [61] [2100/6250] eta: 0:10:09 lr: 0.000044 grad: 0.1348 (0.1449) loss: 0.7926 (0.7950) time: 0.1132 data: 0.0203 max mem: 9377 +Train: [61] [2200/6250] eta: 0:09:52 lr: 0.000044 grad: 0.1228 (0.1442) loss: 0.7992 (0.7952) time: 0.1378 data: 0.0578 max mem: 9377 +Train: [61] [2300/6250] eta: 0:09:36 lr: 0.000044 grad: 0.1324 (0.1436) loss: 0.7982 (0.7953) time: 0.1344 data: 0.0487 max mem: 9377 +Train: [61] [2400/6250] eta: 0:09:19 lr: 0.000044 grad: 0.1229 (0.1432) loss: 0.7926 (0.7954) time: 0.1377 data: 0.0503 max mem: 9377 +Train: [61] [2500/6250] eta: 0:09:04 lr: 0.000044 grad: 0.1337 (0.1430) loss: 0.7947 (0.7954) time: 0.1276 data: 0.0440 max mem: 9377 +Train: [61] [2600/6250] eta: 0:08:47 lr: 0.000044 grad: 0.1329 (0.1427) loss: 0.7995 (0.7955) time: 0.1240 data: 0.0416 max mem: 9377 +Train: [61] [2700/6250] eta: 0:08:32 lr: 0.000044 grad: 0.1295 (0.1425) loss: 0.7918 (0.7954) time: 0.1466 data: 0.0635 max mem: 9377 +Train: [61] [2800/6250] eta: 0:08:17 lr: 0.000044 grad: 0.1368 (0.1423) loss: 0.7903 (0.7954) time: 0.1313 data: 0.0506 max mem: 9377 +Train: [61] [2900/6250] eta: 0:08:02 lr: 0.000044 grad: 0.1259 (0.1421) loss: 0.7923 (0.7953) time: 0.1438 data: 0.0635 max mem: 9377 +Train: [61] [3000/6250] eta: 0:07:48 lr: 0.000044 grad: 0.1376 (0.1421) loss: 0.7915 (0.7951) time: 0.1535 data: 0.0720 max mem: 9377 +Train: [61] [3100/6250] eta: 0:07:33 lr: 0.000044 grad: 0.1352 (0.1420) loss: 0.7910 (0.7950) time: 0.1587 data: 0.0761 max mem: 9377 +Train: [61] [3200/6250] eta: 0:07:19 lr: 0.000044 grad: 0.1299 (0.1418) loss: 0.8008 (0.7949) time: 0.1676 data: 0.0938 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:05 lr: 0.000044 grad: 0.1218 (0.1417) loss: 0.8010 (0.7949) time: 0.1523 data: 0.0713 max mem: 9377 +Train: [61] [3400/6250] eta: 0:06:50 lr: 0.000044 grad: 0.1362 (0.1417) loss: 0.7919 (0.7948) time: 0.1436 data: 0.0627 max mem: 9377 +Train: [61] [3500/6250] eta: 0:06:37 lr: 0.000044 grad: 0.1329 (0.1415) loss: 0.7962 (0.7948) time: 0.1799 data: 0.1010 max mem: 9377 +Train: [61] [3600/6250] eta: 0:06:21 lr: 0.000044 grad: 0.1311 (0.1414) loss: 0.7914 (0.7948) time: 0.1480 data: 0.0635 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:06 lr: 0.000044 grad: 0.1368 (0.1414) loss: 0.7957 (0.7948) time: 0.1544 data: 0.0687 max mem: 9377 +Train: [61] [3800/6250] eta: 0:05:51 lr: 0.000044 grad: 0.1329 (0.1413) loss: 0.7945 (0.7948) time: 0.1414 data: 0.0580 max mem: 9377 +Train: [61] [3900/6250] eta: 0:05:37 lr: 0.000044 grad: 0.1314 (0.1412) loss: 0.7968 (0.7948) time: 0.1374 data: 0.0475 max mem: 9377 +Train: [61] [4000/6250] eta: 0:05:22 lr: 0.000044 grad: 0.1388 (0.1411) loss: 0.7857 (0.7948) time: 0.1423 data: 0.0633 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:07 lr: 0.000044 grad: 0.1425 (0.1411) loss: 0.7873 (0.7947) time: 0.1396 data: 0.0481 max mem: 9377 +Train: [61] [4200/6250] eta: 0:04:53 lr: 0.000044 grad: 0.1325 (0.1411) loss: 0.7981 (0.7947) time: 0.1377 data: 0.0532 max mem: 9377 +Train: [61] [4300/6250] eta: 0:04:38 lr: 0.000044 grad: 0.1378 (0.1412) loss: 0.7815 (0.7946) time: 0.1299 data: 0.0451 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:25 lr: 0.000044 grad: 0.1411 (0.1413) loss: 0.7898 (0.7945) time: 0.1874 data: 0.1079 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:10 lr: 0.000044 grad: 0.1424 (0.1415) loss: 0.7812 (0.7943) time: 0.1362 data: 0.0501 max mem: 9377 +Train: [61] [4600/6250] eta: 0:03:56 lr: 0.000044 grad: 0.1410 (0.1416) loss: 0.7851 (0.7942) time: 0.1593 data: 0.0681 max mem: 9377 +Train: [61] [4700/6250] eta: 0:03:42 lr: 0.000044 grad: 0.1351 (0.1416) loss: 0.7918 (0.7940) time: 0.1501 data: 0.0681 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:28 lr: 0.000044 grad: 0.1305 (0.1415) loss: 0.7951 (0.7940) time: 0.1721 data: 0.0899 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:13 lr: 0.000044 grad: 0.1316 (0.1414) loss: 0.7914 (0.7939) time: 0.1412 data: 0.0583 max mem: 9377 +Train: [61] [5000/6250] eta: 0:02:59 lr: 0.000044 grad: 0.1380 (0.1414) loss: 0.8013 (0.7939) time: 0.1331 data: 0.0529 max mem: 9377 +Train: [61] [5100/6250] eta: 0:02:44 lr: 0.000044 grad: 0.1307 (0.1413) loss: 0.8016 (0.7939) time: 0.1420 data: 0.0595 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:30 lr: 0.000044 grad: 0.1367 (0.1413) loss: 0.7971 (0.7938) time: 0.1223 data: 0.0320 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:15 lr: 0.000043 grad: 0.1356 (0.1413) loss: 0.7971 (0.7938) time: 0.1241 data: 0.0462 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:01 lr: 0.000043 grad: 0.1358 (0.1412) loss: 0.7971 (0.7938) time: 0.1150 data: 0.0240 max mem: 9377 +Train: [61] [5500/6250] eta: 0:01:46 lr: 0.000043 grad: 0.1393 (0.1412) loss: 0.7836 (0.7937) time: 0.1334 data: 0.0528 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:32 lr: 0.000043 grad: 0.1381 (0.1411) loss: 0.7937 (0.7937) time: 0.1339 data: 0.0500 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:18 lr: 0.000043 grad: 0.1414 (0.1411) loss: 0.7831 (0.7936) time: 0.1382 data: 0.0555 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:04 lr: 0.000043 grad: 0.1431 (0.1411) loss: 0.7936 (0.7936) time: 0.1460 data: 0.0641 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:49 lr: 0.000043 grad: 0.1415 (0.1411) loss: 0.7905 (0.7935) time: 0.1419 data: 0.0411 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:35 lr: 0.000043 grad: 0.1322 (0.1410) loss: 0.7914 (0.7935) time: 0.1474 data: 0.0589 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:21 lr: 0.000043 grad: 0.1316 (0.1410) loss: 0.8013 (0.7935) time: 0.1340 data: 0.0530 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:07 lr: 0.000043 grad: 0.1430 (0.1410) loss: 0.7916 (0.7935) time: 0.1473 data: 0.0619 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1346 (0.1410) loss: 0.7852 (0.7935) time: 0.1658 data: 0.0916 max mem: 9377 +Train: [61] Total time: 0:14:58 (0.1437 s / it) +Averaged stats: lr: 0.000043 grad: 0.1346 (0.1410) loss: 0.7852 (0.7935) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:05:30 loss: 0.8392 (0.8392) time: 5.3338 data: 5.3041 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8408 (0.8428) time: 0.1229 data: 0.0982 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:12 (0.2068 s / it) +Averaged stats (hcp-train-subset): loss: 0.8408 (0.8428) +Eval (hcp-val): [61] [ 0/62] eta: 0:04:44 loss: 0.8370 (0.8370) time: 4.5878 data: 4.5572 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8386 (0.8409) time: 0.1265 data: 0.1002 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:12 (0.2068 s / it) +Averaged stats (hcp-val): loss: 0.8386 (0.8409) +Eval (nsd-val): [61] [ 0/62] eta: 0:05:21 loss: 0.8189 (0.8189) time: 5.1787 data: 5.1490 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8282 (0.8301) time: 0.0909 data: 0.0644 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:12 (0.2065 s / it) +Averaged stats (nsd-val): loss: 0.8282 (0.8301) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 10:05:12 lr: 0.000043 grad: 0.2087 (0.2087) loss: 0.7433 (0.7433) time: 5.8101 data: 5.6781 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:20:48 lr: 0.000043 grad: 0.1326 (0.1689) loss: 0.8216 (0.8119) time: 0.1671 data: 0.0789 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:17:42 lr: 0.000043 grad: 0.1546 (0.1618) loss: 0.8014 (0.8086) time: 0.1370 data: 0.0360 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:16:19 lr: 0.000043 grad: 0.1335 (0.1566) loss: 0.7953 (0.8066) time: 0.1328 data: 0.0277 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:15:24 lr: 0.000043 grad: 0.1323 (0.1528) loss: 0.8110 (0.8055) time: 0.1354 data: 0.0364 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:14:50 lr: 0.000043 grad: 0.1372 (0.1498) loss: 0.7976 (0.8050) time: 0.1402 data: 0.0522 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:14:19 lr: 0.000043 grad: 0.1314 (0.1480) loss: 0.7994 (0.8037) time: 0.1451 data: 0.0582 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:13:56 lr: 0.000043 grad: 0.1442 (0.1464) loss: 0.7843 (0.8028) time: 0.1277 data: 0.0382 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:13:42 lr: 0.000043 grad: 0.1280 (0.1455) loss: 0.8015 (0.8022) time: 0.1215 data: 0.0287 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:13:38 lr: 0.000043 grad: 0.1454 (0.1444) loss: 0.7892 (0.8016) time: 0.1742 data: 0.0916 max mem: 9377 +Train: [62] [1000/6250] eta: 0:13:28 lr: 0.000043 grad: 0.1423 (0.1438) loss: 0.7919 (0.8012) time: 0.1713 data: 0.0918 max mem: 9377 +Train: [62] [1100/6250] eta: 0:13:19 lr: 0.000043 grad: 0.1363 (0.1428) loss: 0.7970 (0.8010) time: 0.1808 data: 0.1056 max mem: 9377 +Train: [62] [1200/6250] eta: 0:13:09 lr: 0.000043 grad: 0.1326 (0.1428) loss: 0.7917 (0.8004) time: 0.1832 data: 0.1081 max mem: 9377 +Train: [62] [1300/6250] eta: 0:12:57 lr: 0.000043 grad: 0.1342 (0.1424) loss: 0.7982 (0.7999) time: 0.1609 data: 0.0796 max mem: 9377 +Train: [62] [1400/6250] eta: 0:12:46 lr: 0.000043 grad: 0.1266 (0.1419) loss: 0.7967 (0.7995) time: 0.1661 data: 0.0812 max mem: 9377 +Train: [62] [1500/6250] eta: 0:12:29 lr: 0.000043 grad: 0.1354 (0.1415) loss: 0.7958 (0.7992) time: 0.1579 data: 0.0783 max mem: 9377 +Train: [62] [1600/6250] eta: 0:12:16 lr: 0.000043 grad: 0.1300 (0.1412) loss: 0.7940 (0.7988) time: 0.1883 data: 0.1095 max mem: 9377 +Train: [62] [1700/6250] eta: 0:12:00 lr: 0.000043 grad: 0.1342 (0.1409) loss: 0.8028 (0.7985) time: 0.1635 data: 0.0844 max mem: 9377 +Train: [62] [1800/6250] eta: 0:11:38 lr: 0.000043 grad: 0.1366 (0.1407) loss: 0.7959 (0.7983) time: 0.1301 data: 0.0489 max mem: 9377 +Train: [62] [1900/6250] eta: 0:11:18 lr: 0.000043 grad: 0.1284 (0.1404) loss: 0.7964 (0.7982) time: 0.1560 data: 0.0760 max mem: 9377 +Train: [62] [2000/6250] eta: 0:10:59 lr: 0.000043 grad: 0.1304 (0.1402) loss: 0.8056 (0.7981) time: 0.1507 data: 0.0705 max mem: 9377 +Train: [62] [2100/6250] eta: 0:10:40 lr: 0.000043 grad: 0.1255 (0.1398) loss: 0.8049 (0.7981) time: 0.1409 data: 0.0587 max mem: 9377 +Train: [62] [2200/6250] eta: 0:10:22 lr: 0.000042 grad: 0.1356 (0.1397) loss: 0.8029 (0.7981) time: 0.1379 data: 0.0621 max mem: 9377 +Train: [62] [2300/6250] eta: 0:10:05 lr: 0.000042 grad: 0.1350 (0.1398) loss: 0.7937 (0.7980) time: 0.1179 data: 0.0382 max mem: 9377 +Train: [62] [2400/6250] eta: 0:09:48 lr: 0.000042 grad: 0.1322 (0.1398) loss: 0.8001 (0.7978) time: 0.1416 data: 0.0561 max mem: 9377 +Train: [62] [2500/6250] eta: 0:09:33 lr: 0.000042 grad: 0.1284 (0.1398) loss: 0.8047 (0.7978) time: 0.1605 data: 0.0754 max mem: 9377 +Train: [62] [2600/6250] eta: 0:09:16 lr: 0.000042 grad: 0.1350 (0.1401) loss: 0.7924 (0.7977) time: 0.1364 data: 0.0578 max mem: 9377 +Train: [62] [2700/6250] eta: 0:09:00 lr: 0.000042 grad: 0.1329 (0.1400) loss: 0.7992 (0.7977) time: 0.1651 data: 0.0851 max mem: 9377 +Train: [62] [2800/6250] eta: 0:08:44 lr: 0.000042 grad: 0.1391 (0.1399) loss: 0.8002 (0.7978) time: 0.1524 data: 0.0678 max mem: 9377 +Train: [62] [2900/6250] eta: 0:08:28 lr: 0.000042 grad: 0.1309 (0.1398) loss: 0.7960 (0.7978) time: 0.1521 data: 0.0683 max mem: 9377 +Train: [62] [3000/6250] eta: 0:08:11 lr: 0.000042 grad: 0.1422 (0.1399) loss: 0.7913 (0.7977) time: 0.1425 data: 0.0596 max mem: 9377 +Train: [62] [3100/6250] eta: 0:07:55 lr: 0.000042 grad: 0.1327 (0.1399) loss: 0.7909 (0.7976) time: 0.1508 data: 0.0617 max mem: 9377 +Train: [62] [3200/6250] eta: 0:07:39 lr: 0.000042 grad: 0.1498 (0.1401) loss: 0.7896 (0.7974) time: 0.1544 data: 0.0704 max mem: 9377 +Train: [62] [3300/6250] eta: 0:07:23 lr: 0.000042 grad: 0.1389 (0.1402) loss: 0.7937 (0.7973) time: 0.1343 data: 0.0502 max mem: 9377 +Train: [62] [3400/6250] eta: 0:07:07 lr: 0.000042 grad: 0.1365 (0.1404) loss: 0.7915 (0.7970) time: 0.1394 data: 0.0534 max mem: 9377 +Train: [62] [3500/6250] eta: 0:06:51 lr: 0.000042 grad: 0.1381 (0.1405) loss: 0.7825 (0.7968) time: 0.1120 data: 0.0319 max mem: 9377 +Train: [62] [3600/6250] eta: 0:06:35 lr: 0.000042 grad: 0.1404 (0.1406) loss: 0.7810 (0.7964) time: 0.1290 data: 0.0494 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:19 lr: 0.000042 grad: 0.1420 (0.1409) loss: 0.7898 (0.7961) time: 0.1331 data: 0.0546 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:04 lr: 0.000042 grad: 0.1402 (0.1411) loss: 0.7883 (0.7957) time: 0.1396 data: 0.0519 max mem: 9377 +Train: [62] [3900/6250] eta: 0:05:49 lr: 0.000042 grad: 0.1384 (0.1413) loss: 0.7871 (0.7955) time: 0.1402 data: 0.0624 max mem: 9377 +Train: [62] [4000/6250] eta: 0:05:33 lr: 0.000042 grad: 0.1445 (0.1415) loss: 0.7849 (0.7951) time: 0.1549 data: 0.0771 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:18 lr: 0.000042 grad: 0.1439 (0.1415) loss: 0.7844 (0.7949) time: 0.1334 data: 0.0501 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:03 lr: 0.000042 grad: 0.1337 (0.1416) loss: 0.7957 (0.7947) time: 0.1258 data: 0.0424 max mem: 9377 +Train: [62] [4300/6250] eta: 0:04:48 lr: 0.000042 grad: 0.1370 (0.1415) loss: 0.7816 (0.7944) time: 0.1771 data: 0.0986 max mem: 9377 +Train: [62] [4400/6250] eta: 0:04:33 lr: 0.000042 grad: 0.1383 (0.1416) loss: 0.7815 (0.7943) time: 0.1237 data: 0.0406 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:18 lr: 0.000042 grad: 0.1409 (0.1417) loss: 0.7805 (0.7940) time: 0.1408 data: 0.0580 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:04 lr: 0.000042 grad: 0.1309 (0.1416) loss: 0.7909 (0.7938) time: 0.1439 data: 0.0632 max mem: 9377 +Train: [62] [4700/6250] eta: 0:03:49 lr: 0.000042 grad: 0.1405 (0.1417) loss: 0.7872 (0.7937) time: 0.1612 data: 0.0772 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:34 lr: 0.000042 grad: 0.1327 (0.1417) loss: 0.7925 (0.7936) time: 0.1589 data: 0.0736 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:20 lr: 0.000042 grad: 0.1420 (0.1418) loss: 0.7844 (0.7934) time: 0.1537 data: 0.0662 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:05 lr: 0.000042 grad: 0.1418 (0.1418) loss: 0.7823 (0.7932) time: 0.1612 data: 0.0746 max mem: 9377 +Train: [62] [5100/6250] eta: 0:02:50 lr: 0.000042 grad: 0.1371 (0.1418) loss: 0.7860 (0.7930) time: 0.1477 data: 0.0592 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:35 lr: 0.000042 grad: 0.1423 (0.1420) loss: 0.7821 (0.7928) time: 0.1179 data: 0.0308 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:20 lr: 0.000042 grad: 0.1442 (0.1421) loss: 0.7852 (0.7927) time: 0.1514 data: 0.0708 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:05 lr: 0.000041 grad: 0.1387 (0.1423) loss: 0.7794 (0.7925) time: 0.1580 data: 0.0791 max mem: 9377 +Train: [62] [5500/6250] eta: 0:01:51 lr: 0.000041 grad: 0.1375 (0.1424) loss: 0.7810 (0.7923) time: 0.1603 data: 0.0807 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:36 lr: 0.000041 grad: 0.1409 (0.1425) loss: 0.7879 (0.7921) time: 0.1536 data: 0.0772 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:21 lr: 0.000041 grad: 0.1390 (0.1425) loss: 0.7890 (0.7919) time: 0.1591 data: 0.0821 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:06 lr: 0.000041 grad: 0.1354 (0.1424) loss: 0.7911 (0.7918) time: 0.1615 data: 0.0839 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:51 lr: 0.000041 grad: 0.1290 (0.1424) loss: 0.7860 (0.7918) time: 0.1297 data: 0.0466 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:37 lr: 0.000041 grad: 0.1371 (0.1424) loss: 0.7953 (0.7917) time: 0.1616 data: 0.0780 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:22 lr: 0.000041 grad: 0.1383 (0.1423) loss: 0.7852 (0.7917) time: 0.1727 data: 0.0884 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:07 lr: 0.000041 grad: 0.1443 (0.1423) loss: 0.7901 (0.7917) time: 0.1449 data: 0.0580 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1367 (0.1423) loss: 0.7927 (0.7917) time: 0.1736 data: 0.0904 max mem: 9377 +Train: [62] Total time: 0:15:40 (0.1505 s / it) +Averaged stats: lr: 0.000041 grad: 0.1367 (0.1423) loss: 0.7927 (0.7917) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:03:40 loss: 0.8362 (0.8362) time: 3.5548 data: 3.4863 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8417 (0.8427) time: 0.1121 data: 0.0870 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.8417 (0.8427) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:44 loss: 0.8396 (0.8396) time: 4.5872 data: 4.5156 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8399 (0.8408) time: 0.0941 data: 0.0691 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:14 (0.2397 s / it) +Averaged stats (hcp-val): loss: 0.8399 (0.8408) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:58 loss: 0.8311 (0.8311) time: 4.8191 data: 4.7528 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8388 (0.8393) time: 0.1398 data: 0.1146 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:15 (0.2488 s / it) +Averaged stats (nsd-val): loss: 0.8388 (0.8393) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 9:56:03 lr: 0.000041 grad: 0.1840 (0.1840) loss: 0.8712 (0.8712) time: 5.7221 data: 5.4523 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:23:05 lr: 0.000041 grad: 0.1503 (0.1872) loss: 0.7982 (0.8117) time: 0.1544 data: 0.0409 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:19:59 lr: 0.000041 grad: 0.1314 (0.1684) loss: 0.8132 (0.8101) time: 0.1795 data: 0.0646 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:18:50 lr: 0.000041 grad: 0.1364 (0.1615) loss: 0.7919 (0.8078) time: 0.1830 data: 0.0780 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:17:52 lr: 0.000041 grad: 0.1569 (0.1600) loss: 0.7893 (0.8039) time: 0.1643 data: 0.0727 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:16:53 lr: 0.000041 grad: 0.1279 (0.1575) loss: 0.8013 (0.8020) time: 0.1495 data: 0.0615 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:16:10 lr: 0.000041 grad: 0.1412 (0.1540) loss: 0.7856 (0.8014) time: 0.1506 data: 0.0622 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:15:43 lr: 0.000041 grad: 0.1291 (0.1510) loss: 0.7965 (0.8018) time: 0.1698 data: 0.0819 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:15:16 lr: 0.000041 grad: 0.1230 (0.1489) loss: 0.8125 (0.8022) time: 0.1540 data: 0.0666 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:14:44 lr: 0.000041 grad: 0.1308 (0.1475) loss: 0.7961 (0.8021) time: 0.1488 data: 0.0593 max mem: 9377 +Train: [63] [1000/6250] eta: 0:14:19 lr: 0.000041 grad: 0.1323 (0.1465) loss: 0.8021 (0.8023) time: 0.1525 data: 0.0690 max mem: 9377 +Train: [63] [1100/6250] eta: 0:13:51 lr: 0.000041 grad: 0.1370 (0.1457) loss: 0.8037 (0.8019) time: 0.1373 data: 0.0565 max mem: 9377 +Train: [63] [1200/6250] eta: 0:13:29 lr: 0.000041 grad: 0.1380 (0.1457) loss: 0.7978 (0.8016) time: 0.1651 data: 0.0785 max mem: 9377 +Train: [63] [1300/6250] eta: 0:13:05 lr: 0.000041 grad: 0.1348 (0.1450) loss: 0.8036 (0.8014) time: 0.1262 data: 0.0450 max mem: 9377 +Train: [63] [1400/6250] eta: 0:12:42 lr: 0.000041 grad: 0.1324 (0.1446) loss: 0.7966 (0.8010) time: 0.1505 data: 0.0685 max mem: 9377 +Train: [63] [1500/6250] eta: 0:12:20 lr: 0.000041 grad: 0.1335 (0.1442) loss: 0.8026 (0.8006) time: 0.1449 data: 0.0660 max mem: 9377 +Train: [63] [1600/6250] eta: 0:11:58 lr: 0.000041 grad: 0.1439 (0.1444) loss: 0.7848 (0.8000) time: 0.1365 data: 0.0561 max mem: 9377 +Train: [63] [1700/6250] eta: 0:11:40 lr: 0.000041 grad: 0.1347 (0.1442) loss: 0.7951 (0.7996) time: 0.1379 data: 0.0612 max mem: 9377 +Train: [63] [1800/6250] eta: 0:11:19 lr: 0.000041 grad: 0.1393 (0.1441) loss: 0.7931 (0.7992) time: 0.1165 data: 0.0405 max mem: 9377 +Train: [63] [1900/6250] eta: 0:11:01 lr: 0.000041 grad: 0.1326 (0.1438) loss: 0.7973 (0.7989) time: 0.1425 data: 0.0568 max mem: 9377 +Train: [63] [2000/6250] eta: 0:10:43 lr: 0.000041 grad: 0.1310 (0.1435) loss: 0.7937 (0.7987) time: 0.1481 data: 0.0679 max mem: 9377 +Train: [63] [2100/6250] eta: 0:10:26 lr: 0.000041 grad: 0.1342 (0.1434) loss: 0.8004 (0.7985) time: 0.1352 data: 0.0506 max mem: 9377 +Train: [63] [2200/6250] eta: 0:10:10 lr: 0.000041 grad: 0.1263 (0.1432) loss: 0.8042 (0.7984) time: 0.1364 data: 0.0551 max mem: 9377 +Train: [63] [2300/6250] eta: 0:09:53 lr: 0.000041 grad: 0.1276 (0.1432) loss: 0.8090 (0.7984) time: 0.1106 data: 0.0252 max mem: 9377 +Train: [63] [2400/6250] eta: 0:09:37 lr: 0.000040 grad: 0.1380 (0.1429) loss: 0.7998 (0.7984) time: 0.1488 data: 0.0707 max mem: 9377 +Train: [63] [2500/6250] eta: 0:09:21 lr: 0.000040 grad: 0.1264 (0.1430) loss: 0.7948 (0.7983) time: 0.1378 data: 0.0628 max mem: 9377 +Train: [63] [2600/6250] eta: 0:09:04 lr: 0.000040 grad: 0.1364 (0.1428) loss: 0.7920 (0.7982) time: 0.1525 data: 0.0697 max mem: 9377 +Train: [63] [2700/6250] eta: 0:08:48 lr: 0.000040 grad: 0.1357 (0.1426) loss: 0.7986 (0.7982) time: 0.1259 data: 0.0455 max mem: 9377 +Train: [63] [2800/6250] eta: 0:08:31 lr: 0.000040 grad: 0.1349 (0.1425) loss: 0.7931 (0.7982) time: 0.1449 data: 0.0612 max mem: 9377 +Train: [63] [2900/6250] eta: 0:08:16 lr: 0.000040 grad: 0.1474 (0.1425) loss: 0.7958 (0.7980) time: 0.1428 data: 0.0644 max mem: 9377 +Train: [63] [3000/6250] eta: 0:08:00 lr: 0.000040 grad: 0.1349 (0.1424) loss: 0.7939 (0.7980) time: 0.1240 data: 0.0407 max mem: 9377 +Train: [63] [3100/6250] eta: 0:07:44 lr: 0.000040 grad: 0.1314 (0.1423) loss: 0.7900 (0.7978) time: 0.1461 data: 0.0671 max mem: 9377 +Train: [63] [3200/6250] eta: 0:07:28 lr: 0.000040 grad: 0.1377 (0.1422) loss: 0.7919 (0.7977) time: 0.1547 data: 0.0764 max mem: 9377 +Train: [63] [3300/6250] eta: 0:07:13 lr: 0.000040 grad: 0.1305 (0.1421) loss: 0.7950 (0.7976) time: 0.1592 data: 0.0782 max mem: 9377 +Train: [63] [3400/6250] eta: 0:06:58 lr: 0.000040 grad: 0.1291 (0.1420) loss: 0.7971 (0.7976) time: 0.1383 data: 0.0548 max mem: 9377 +Train: [63] [3500/6250] eta: 0:06:42 lr: 0.000040 grad: 0.1297 (0.1420) loss: 0.7949 (0.7975) time: 0.1547 data: 0.0743 max mem: 9377 +Train: [63] [3600/6250] eta: 0:06:27 lr: 0.000040 grad: 0.1367 (0.1421) loss: 0.7964 (0.7974) time: 0.1393 data: 0.0603 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:12 lr: 0.000040 grad: 0.1355 (0.1420) loss: 0.7937 (0.7973) time: 0.1278 data: 0.0439 max mem: 9377 +Train: [63] [3800/6250] eta: 0:05:57 lr: 0.000040 grad: 0.1347 (0.1420) loss: 0.7900 (0.7972) time: 0.1329 data: 0.0506 max mem: 9377 +Train: [63] [3900/6250] eta: 0:05:42 lr: 0.000040 grad: 0.1350 (0.1420) loss: 0.7918 (0.7971) time: 0.1329 data: 0.0479 max mem: 9377 +Train: [63] [4000/6250] eta: 0:05:27 lr: 0.000040 grad: 0.1420 (0.1421) loss: 0.7872 (0.7970) time: 0.1357 data: 0.0533 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:12 lr: 0.000040 grad: 0.1350 (0.1421) loss: 0.8009 (0.7969) time: 0.1330 data: 0.0515 max mem: 9377 +Train: [63] [4200/6250] eta: 0:04:57 lr: 0.000040 grad: 0.1378 (0.1421) loss: 0.7886 (0.7968) time: 0.1355 data: 0.0482 max mem: 9377 +Train: [63] [4300/6250] eta: 0:04:44 lr: 0.000040 grad: 0.1294 (0.1421) loss: 0.7922 (0.7966) time: 0.1526 data: 0.0667 max mem: 9377 +Train: [63] [4400/6250] eta: 0:04:29 lr: 0.000040 grad: 0.1448 (0.1421) loss: 0.7876 (0.7965) time: 0.1542 data: 0.0668 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:15 lr: 0.000040 grad: 0.1421 (0.1421) loss: 0.7900 (0.7963) time: 0.1590 data: 0.0698 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:00 lr: 0.000040 grad: 0.1392 (0.1422) loss: 0.7949 (0.7962) time: 0.1269 data: 0.0459 max mem: 9377 +Train: [63] [4700/6250] eta: 0:03:45 lr: 0.000040 grad: 0.1422 (0.1423) loss: 0.7884 (0.7961) time: 0.1417 data: 0.0588 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:30 lr: 0.000040 grad: 0.1486 (0.1425) loss: 0.7896 (0.7959) time: 0.1573 data: 0.0743 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:15 lr: 0.000040 grad: 0.1454 (0.1426) loss: 0.7958 (0.7958) time: 0.1287 data: 0.0411 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:01 lr: 0.000040 grad: 0.1413 (0.1427) loss: 0.7852 (0.7958) time: 0.1284 data: 0.0449 max mem: 9377 +Train: [63] [5100/6250] eta: 0:02:46 lr: 0.000040 grad: 0.1421 (0.1428) loss: 0.7922 (0.7957) time: 0.1334 data: 0.0478 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:31 lr: 0.000040 grad: 0.1400 (0.1427) loss: 0.7931 (0.7956) time: 0.1418 data: 0.0570 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:16 lr: 0.000040 grad: 0.1384 (0.1429) loss: 0.7914 (0.7956) time: 0.1450 data: 0.0643 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:02 lr: 0.000040 grad: 0.1293 (0.1430) loss: 0.8007 (0.7955) time: 0.1339 data: 0.0437 max mem: 9377 +Train: [63] [5500/6250] eta: 0:01:47 lr: 0.000040 grad: 0.1366 (0.1430) loss: 0.7906 (0.7955) time: 0.1323 data: 0.0421 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:33 lr: 0.000039 grad: 0.1392 (0.1429) loss: 0.7866 (0.7954) time: 0.1171 data: 0.0370 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:18 lr: 0.000039 grad: 0.1373 (0.1429) loss: 0.7939 (0.7953) time: 0.1192 data: 0.0312 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:04 lr: 0.000039 grad: 0.1344 (0.1429) loss: 0.8006 (0.7953) time: 0.1493 data: 0.0661 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:49 lr: 0.000039 grad: 0.1351 (0.1429) loss: 0.7888 (0.7953) time: 0.1271 data: 0.0436 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:35 lr: 0.000039 grad: 0.1457 (0.1430) loss: 0.7880 (0.7952) time: 0.1592 data: 0.0782 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:21 lr: 0.000039 grad: 0.1363 (0.1430) loss: 0.7880 (0.7951) time: 0.1444 data: 0.0599 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:07 lr: 0.000039 grad: 0.1473 (0.1429) loss: 0.7923 (0.7951) time: 0.1732 data: 0.0953 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1392 (0.1430) loss: 0.7903 (0.7950) time: 0.1631 data: 0.0810 max mem: 9377 +Train: [63] Total time: 0:14:56 (0.1435 s / it) +Averaged stats: lr: 0.000039 grad: 0.1392 (0.1430) loss: 0.7903 (0.7950) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:04:29 loss: 0.8402 (0.8402) time: 4.3392 data: 4.3059 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8434 (0.8429) time: 0.1224 data: 0.0971 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:13 (0.2227 s / it) +Averaged stats (hcp-train-subset): loss: 0.8434 (0.8429) +Eval (hcp-val): [63] [ 0/62] eta: 0:03:56 loss: 0.8407 (0.8407) time: 3.8125 data: 3.6855 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8399 (0.8406) time: 0.1382 data: 0.1133 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-val): loss: 0.8399 (0.8406) +Eval (nsd-val): [63] [ 0/62] eta: 0:03:22 loss: 0.8140 (0.8140) time: 3.2631 data: 3.1793 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8258 (0.8273) time: 0.1357 data: 0.1104 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:12 (0.2091 s / it) +Averaged stats (nsd-val): loss: 0.8258 (0.8273) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 9:32:34 lr: 0.000039 grad: 0.0827 (0.0827) loss: 0.8628 (0.8628) time: 5.4968 data: 5.3311 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:20:07 lr: 0.000039 grad: 0.1824 (0.1972) loss: 0.8017 (0.8163) time: 0.1576 data: 0.0616 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:16:56 lr: 0.000039 grad: 0.1544 (0.1848) loss: 0.7907 (0.8072) time: 0.1510 data: 0.0567 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:15:40 lr: 0.000039 grad: 0.1570 (0.1762) loss: 0.7888 (0.8030) time: 0.1254 data: 0.0261 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:14:57 lr: 0.000039 grad: 0.1438 (0.1693) loss: 0.7937 (0.8015) time: 0.1446 data: 0.0561 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:14:28 lr: 0.000039 grad: 0.1331 (0.1641) loss: 0.8091 (0.8012) time: 0.1318 data: 0.0415 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:14:10 lr: 0.000039 grad: 0.1355 (0.1609) loss: 0.8138 (0.8011) time: 0.1709 data: 0.0785 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:13:54 lr: 0.000039 grad: 0.1403 (0.1580) loss: 0.7919 (0.8002) time: 0.1285 data: 0.0413 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:13:32 lr: 0.000039 grad: 0.1463 (0.1560) loss: 0.7969 (0.7999) time: 0.1513 data: 0.0605 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:13:15 lr: 0.000039 grad: 0.1308 (0.1549) loss: 0.7948 (0.7994) time: 0.1232 data: 0.0298 max mem: 9377 +Train: [64] [1000/6250] eta: 0:12:52 lr: 0.000039 grad: 0.1367 (0.1534) loss: 0.7979 (0.7992) time: 0.1458 data: 0.0657 max mem: 9377 +Train: [64] [1100/6250] eta: 0:12:32 lr: 0.000039 grad: 0.1382 (0.1519) loss: 0.7861 (0.7989) time: 0.1422 data: 0.0642 max mem: 9377 +Train: [64] [1200/6250] eta: 0:12:13 lr: 0.000039 grad: 0.1430 (0.1508) loss: 0.7960 (0.7987) time: 0.1479 data: 0.0709 max mem: 9377 +Train: [64] [1300/6250] eta: 0:11:56 lr: 0.000039 grad: 0.1348 (0.1497) loss: 0.7935 (0.7986) time: 0.1234 data: 0.0434 max mem: 9377 +Train: [64] [1400/6250] eta: 0:11:39 lr: 0.000039 grad: 0.1304 (0.1486) loss: 0.8021 (0.7985) time: 0.1137 data: 0.0271 max mem: 9377 +Train: [64] [1500/6250] eta: 0:11:22 lr: 0.000039 grad: 0.1363 (0.1477) loss: 0.7912 (0.7984) time: 0.1438 data: 0.0666 max mem: 9377 +Train: [64] [1600/6250] eta: 0:11:06 lr: 0.000039 grad: 0.1284 (0.1472) loss: 0.7996 (0.7982) time: 0.1481 data: 0.0752 max mem: 9377 +Train: [64] [1700/6250] eta: 0:10:50 lr: 0.000039 grad: 0.1259 (0.1467) loss: 0.7951 (0.7981) time: 0.1450 data: 0.0589 max mem: 9377 +Train: [64] [1800/6250] eta: 0:10:35 lr: 0.000039 grad: 0.1308 (0.1460) loss: 0.7964 (0.7980) time: 0.1609 data: 0.0773 max mem: 9377 +Train: [64] [1900/6250] eta: 0:10:20 lr: 0.000039 grad: 0.1477 (0.1457) loss: 0.7848 (0.7977) time: 0.1503 data: 0.0654 max mem: 9377 +Train: [64] [2000/6250] eta: 0:10:04 lr: 0.000039 grad: 0.1389 (0.1461) loss: 0.7899 (0.7973) time: 0.1388 data: 0.0611 max mem: 9377 +Train: [64] [2100/6250] eta: 0:09:49 lr: 0.000039 grad: 0.1416 (0.1460) loss: 0.7884 (0.7969) time: 0.1380 data: 0.0577 max mem: 9377 +Train: [64] [2200/6250] eta: 0:09:34 lr: 0.000039 grad: 0.1469 (0.1460) loss: 0.7932 (0.7965) time: 0.1420 data: 0.0614 max mem: 9377 +Train: [64] [2300/6250] eta: 0:09:19 lr: 0.000039 grad: 0.1411 (0.1461) loss: 0.7876 (0.7961) time: 0.1235 data: 0.0383 max mem: 9377 +Train: [64] [2400/6250] eta: 0:09:04 lr: 0.000039 grad: 0.1388 (0.1464) loss: 0.7862 (0.7956) time: 0.1385 data: 0.0556 max mem: 9377 +Train: [64] [2500/6250] eta: 0:08:50 lr: 0.000039 grad: 0.1426 (0.1467) loss: 0.7874 (0.7952) time: 0.1456 data: 0.0623 max mem: 9377 +Train: [64] [2600/6250] eta: 0:08:35 lr: 0.000039 grad: 0.1445 (0.1469) loss: 0.7889 (0.7948) time: 0.1316 data: 0.0435 max mem: 9377 +Train: [64] [2700/6250] eta: 0:08:20 lr: 0.000038 grad: 0.1523 (0.1471) loss: 0.7865 (0.7944) time: 0.1349 data: 0.0513 max mem: 9377 +Train: [64] [2800/6250] eta: 0:08:06 lr: 0.000038 grad: 0.1425 (0.1471) loss: 0.7935 (0.7940) time: 0.1385 data: 0.0587 max mem: 9377 +Train: [64] [2900/6250] eta: 0:07:51 lr: 0.000038 grad: 0.1497 (0.1471) loss: 0.7834 (0.7938) time: 0.1435 data: 0.0644 max mem: 9377 +Train: [64] [3000/6250] eta: 0:07:37 lr: 0.000038 grad: 0.1387 (0.1472) loss: 0.7885 (0.7934) time: 0.1423 data: 0.0574 max mem: 9377 +Train: [64] [3100/6250] eta: 0:07:23 lr: 0.000038 grad: 0.1688 (0.1475) loss: 0.7769 (0.7931) time: 0.1420 data: 0.0630 max mem: 9377 +Train: [64] [3200/6250] eta: 0:07:08 lr: 0.000038 grad: 0.1544 (0.1478) loss: 0.7837 (0.7928) time: 0.1065 data: 0.0198 max mem: 9377 +Train: [64] [3300/6250] eta: 0:06:54 lr: 0.000038 grad: 0.1467 (0.1480) loss: 0.7867 (0.7925) time: 0.1117 data: 0.0302 max mem: 9377 +Train: [64] [3400/6250] eta: 0:06:39 lr: 0.000038 grad: 0.1498 (0.1482) loss: 0.7821 (0.7922) time: 0.1286 data: 0.0435 max mem: 9377 +Train: [64] [3500/6250] eta: 0:06:25 lr: 0.000038 grad: 0.1391 (0.1482) loss: 0.7903 (0.7919) time: 0.1454 data: 0.0664 max mem: 9377 +Train: [64] [3600/6250] eta: 0:06:12 lr: 0.000038 grad: 0.1429 (0.1481) loss: 0.7854 (0.7918) time: 0.1682 data: 0.0852 max mem: 9377 +Train: [64] [3700/6250] eta: 0:05:57 lr: 0.000038 grad: 0.1453 (0.1482) loss: 0.7886 (0.7916) time: 0.1274 data: 0.0453 max mem: 9377 +Train: [64] [3800/6250] eta: 0:05:42 lr: 0.000038 grad: 0.1440 (0.1482) loss: 0.7859 (0.7915) time: 0.1294 data: 0.0512 max mem: 9377 +Train: [64] [3900/6250] eta: 0:05:28 lr: 0.000038 grad: 0.1467 (0.1482) loss: 0.7872 (0.7913) time: 0.1281 data: 0.0434 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:14 lr: 0.000038 grad: 0.1494 (0.1483) loss: 0.7813 (0.7912) time: 0.1374 data: 0.0533 max mem: 9377 +Train: [64] [4100/6250] eta: 0:04:59 lr: 0.000038 grad: 0.1539 (0.1484) loss: 0.7839 (0.7911) time: 0.1257 data: 0.0436 max mem: 9377 +Train: [64] [4200/6250] eta: 0:04:45 lr: 0.000038 grad: 0.1443 (0.1485) loss: 0.7918 (0.7910) time: 0.1370 data: 0.0549 max mem: 9377 +Train: [64] [4300/6250] eta: 0:04:32 lr: 0.000038 grad: 0.1479 (0.1486) loss: 0.7840 (0.7909) time: 0.1575 data: 0.0815 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:19 lr: 0.000038 grad: 0.1511 (0.1487) loss: 0.7858 (0.7909) time: 0.1644 data: 0.0750 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:05 lr: 0.000038 grad: 0.1510 (0.1487) loss: 0.7859 (0.7908) time: 0.1371 data: 0.0488 max mem: 9377 +Train: [64] [4600/6250] eta: 0:03:51 lr: 0.000038 grad: 0.1455 (0.1487) loss: 0.7916 (0.7908) time: 0.1228 data: 0.0440 max mem: 9377 +Train: [64] [4700/6250] eta: 0:03:37 lr: 0.000038 grad: 0.1444 (0.1487) loss: 0.7870 (0.7907) time: 0.1422 data: 0.0597 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:23 lr: 0.000038 grad: 0.1476 (0.1487) loss: 0.7896 (0.7907) time: 0.1499 data: 0.0579 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:10 lr: 0.000038 grad: 0.1416 (0.1487) loss: 0.7898 (0.7906) time: 0.1473 data: 0.0539 max mem: 9377 +Train: [64] [5000/6250] eta: 0:02:56 lr: 0.000038 grad: 0.1474 (0.1487) loss: 0.7908 (0.7906) time: 0.1612 data: 0.0811 max mem: 9377 +Train: [64] [5100/6250] eta: 0:02:42 lr: 0.000038 grad: 0.1449 (0.1488) loss: 0.7867 (0.7905) time: 0.1417 data: 0.0587 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:28 lr: 0.000038 grad: 0.1394 (0.1487) loss: 0.7918 (0.7905) time: 0.1274 data: 0.0418 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:13 lr: 0.000038 grad: 0.1413 (0.1487) loss: 0.7958 (0.7906) time: 0.1353 data: 0.0556 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:00 lr: 0.000038 grad: 0.1391 (0.1486) loss: 0.7897 (0.7906) time: 0.1717 data: 0.0939 max mem: 9377 +Train: [64] [5500/6250] eta: 0:01:46 lr: 0.000038 grad: 0.1522 (0.1487) loss: 0.7894 (0.7906) time: 0.1617 data: 0.0797 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:32 lr: 0.000038 grad: 0.1381 (0.1486) loss: 0.7885 (0.7907) time: 0.1679 data: 0.0848 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:18 lr: 0.000038 grad: 0.1358 (0.1485) loss: 0.7915 (0.7907) time: 0.1414 data: 0.0615 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:03 lr: 0.000038 grad: 0.1409 (0.1484) loss: 0.7988 (0.7908) time: 0.1384 data: 0.0601 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:49 lr: 0.000037 grad: 0.1378 (0.1483) loss: 0.7956 (0.7908) time: 0.1510 data: 0.0688 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:35 lr: 0.000037 grad: 0.1341 (0.1483) loss: 0.7990 (0.7908) time: 0.1428 data: 0.0656 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:21 lr: 0.000037 grad: 0.1509 (0.1482) loss: 0.7907 (0.7908) time: 0.1401 data: 0.0491 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:07 lr: 0.000037 grad: 0.1444 (0.1481) loss: 0.7933 (0.7908) time: 0.1456 data: 0.0603 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1442 (0.1482) loss: 0.7983 (0.7908) time: 0.1698 data: 0.0891 max mem: 9377 +Train: [64] Total time: 0:14:52 (0.1428 s / it) +Averaged stats: lr: 0.000037 grad: 0.1442 (0.1482) loss: 0.7983 (0.7908) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:05:09 loss: 0.8405 (0.8405) time: 4.9976 data: 4.9680 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8419 (0.8430) time: 0.1245 data: 0.0979 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:13 (0.2172 s / it) +Averaged stats (hcp-train-subset): loss: 0.8419 (0.8430) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [64] [ 0/62] eta: 0:06:07 loss: 0.8390 (0.8390) time: 5.9289 data: 5.8981 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8406 (0.8414) time: 0.1444 data: 0.1174 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:14 (0.2397 s / it) +Averaged stats (hcp-val): loss: 0.8406 (0.8414) +Making plots (hcp-val): example=60 +Eval (nsd-val): [64] [ 0/62] eta: 0:05:15 loss: 0.8161 (0.8161) time: 5.0885 data: 5.0302 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8317 (0.8329) time: 0.1344 data: 0.1096 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:16 (0.2676 s / it) +Averaged stats (nsd-val): loss: 0.8317 (0.8329) +Making plots (nsd-val): example=24 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 9:07:01 lr: 0.000037 grad: 0.1732 (0.1732) loss: 0.8283 (0.8283) time: 5.2514 data: 4.9891 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:22:16 lr: 0.000037 grad: 0.1508 (0.1874) loss: 0.8013 (0.8094) time: 0.1384 data: 0.0353 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:19:34 lr: 0.000037 grad: 0.1363 (0.1775) loss: 0.7940 (0.8006) time: 0.1503 data: 0.0569 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:17:33 lr: 0.000037 grad: 0.1270 (0.1684) loss: 0.8018 (0.7985) time: 0.1292 data: 0.0437 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:16:28 lr: 0.000037 grad: 0.1439 (0.1618) loss: 0.7982 (0.7981) time: 0.1490 data: 0.0672 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:15:43 lr: 0.000037 grad: 0.1409 (0.1574) loss: 0.8021 (0.7988) time: 0.1510 data: 0.0660 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:15:05 lr: 0.000037 grad: 0.1329 (0.1546) loss: 0.8045 (0.7987) time: 0.1586 data: 0.0620 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:14:28 lr: 0.000037 grad: 0.1371 (0.1529) loss: 0.7948 (0.7985) time: 0.1119 data: 0.0219 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:13:57 lr: 0.000037 grad: 0.1478 (0.1518) loss: 0.7986 (0.7983) time: 0.1246 data: 0.0276 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:13:37 lr: 0.000037 grad: 0.1407 (0.1515) loss: 0.7968 (0.7977) time: 0.1466 data: 0.0635 max mem: 9377 +Train: [65] [1000/6250] eta: 0:13:16 lr: 0.000037 grad: 0.1307 (0.1507) loss: 0.7970 (0.7972) time: 0.1109 data: 0.0178 max mem: 9377 +Train: [65] [1100/6250] eta: 0:12:54 lr: 0.000037 grad: 0.1309 (0.1495) loss: 0.7965 (0.7970) time: 0.1318 data: 0.0515 max mem: 9377 +Train: [65] [1200/6250] eta: 0:12:36 lr: 0.000037 grad: 0.1370 (0.1485) loss: 0.7904 (0.7969) time: 0.1417 data: 0.0619 max mem: 9377 +Train: [65] [1300/6250] eta: 0:12:18 lr: 0.000037 grad: 0.1295 (0.1477) loss: 0.7958 (0.7967) time: 0.1240 data: 0.0363 max mem: 9377 +Train: [65] [1400/6250] eta: 0:12:00 lr: 0.000037 grad: 0.1327 (0.1471) loss: 0.7949 (0.7963) time: 0.1611 data: 0.0791 max mem: 9377 +Train: [65] [1500/6250] eta: 0:11:45 lr: 0.000037 grad: 0.1313 (0.1465) loss: 0.7935 (0.7962) time: 0.1368 data: 0.0543 max mem: 9377 +Train: [65] [1600/6250] eta: 0:11:26 lr: 0.000037 grad: 0.1343 (0.1463) loss: 0.7965 (0.7960) time: 0.1388 data: 0.0535 max mem: 9377 +Train: [65] [1700/6250] eta: 0:11:11 lr: 0.000037 grad: 0.1303 (0.1460) loss: 0.7977 (0.7958) time: 0.1193 data: 0.0333 max mem: 9377 +Train: [65] [1800/6250] eta: 0:10:55 lr: 0.000037 grad: 0.1339 (0.1457) loss: 0.7970 (0.7958) time: 0.1350 data: 0.0507 max mem: 9377 +Train: [65] [1900/6250] eta: 0:10:39 lr: 0.000037 grad: 0.1362 (0.1453) loss: 0.7940 (0.7959) time: 0.1392 data: 0.0569 max mem: 9377 +Train: [65] [2000/6250] eta: 0:10:23 lr: 0.000037 grad: 0.1337 (0.1450) loss: 0.7969 (0.7959) time: 0.1341 data: 0.0494 max mem: 9377 +Train: [65] [2100/6250] eta: 0:10:07 lr: 0.000037 grad: 0.1316 (0.1448) loss: 0.7988 (0.7959) time: 0.1583 data: 0.0752 max mem: 9377 +Train: [65] [2200/6250] eta: 0:09:50 lr: 0.000037 grad: 0.1393 (0.1446) loss: 0.7927 (0.7958) time: 0.1355 data: 0.0545 max mem: 9377 +Train: [65] [2300/6250] eta: 0:09:33 lr: 0.000037 grad: 0.1456 (0.1443) loss: 0.7948 (0.7958) time: 0.1129 data: 0.0320 max mem: 9377 +Train: [65] [2400/6250] eta: 0:09:17 lr: 0.000037 grad: 0.1315 (0.1442) loss: 0.7949 (0.7957) time: 0.1408 data: 0.0613 max mem: 9377 +Train: [65] [2500/6250] eta: 0:09:02 lr: 0.000037 grad: 0.1341 (0.1440) loss: 0.7971 (0.7957) time: 0.1497 data: 0.0647 max mem: 9377 +Train: [65] [2600/6250] eta: 0:08:46 lr: 0.000037 grad: 0.1414 (0.1438) loss: 0.7883 (0.7956) time: 0.1467 data: 0.0642 max mem: 9377 +Train: [65] [2700/6250] eta: 0:08:30 lr: 0.000037 grad: 0.1488 (0.1439) loss: 0.7866 (0.7953) time: 0.1332 data: 0.0470 max mem: 9377 +Train: [65] [2800/6250] eta: 0:08:16 lr: 0.000037 grad: 0.1369 (0.1439) loss: 0.7980 (0.7952) time: 0.1312 data: 0.0510 max mem: 9377 +Train: [65] [2900/6250] eta: 0:08:00 lr: 0.000037 grad: 0.1305 (0.1439) loss: 0.7923 (0.7950) time: 0.1247 data: 0.0437 max mem: 9377 +Train: [65] [3000/6250] eta: 0:07:46 lr: 0.000036 grad: 0.1420 (0.1439) loss: 0.7849 (0.7949) time: 0.1243 data: 0.0407 max mem: 9377 +Train: [65] [3100/6250] eta: 0:07:31 lr: 0.000036 grad: 0.1482 (0.1440) loss: 0.7816 (0.7947) time: 0.1260 data: 0.0463 max mem: 9377 +Train: [65] [3200/6250] eta: 0:07:16 lr: 0.000036 grad: 0.1425 (0.1440) loss: 0.7947 (0.7945) time: 0.1376 data: 0.0530 max mem: 9377 +Train: [65] [3300/6250] eta: 0:07:01 lr: 0.000036 grad: 0.1405 (0.1440) loss: 0.7891 (0.7944) time: 0.1346 data: 0.0581 max mem: 9377 +Train: [65] [3400/6250] eta: 0:06:47 lr: 0.000036 grad: 0.1393 (0.1441) loss: 0.7831 (0.7942) time: 0.1128 data: 0.0310 max mem: 9377 +Train: [65] [3500/6250] eta: 0:06:32 lr: 0.000036 grad: 0.1431 (0.1440) loss: 0.7962 (0.7942) time: 0.1346 data: 0.0528 max mem: 9377 +Train: [65] [3600/6250] eta: 0:06:17 lr: 0.000036 grad: 0.1482 (0.1441) loss: 0.7913 (0.7940) time: 0.1327 data: 0.0570 max mem: 9377 +Train: [65] [3700/6250] eta: 0:06:03 lr: 0.000036 grad: 0.1398 (0.1443) loss: 0.7984 (0.7940) time: 0.1187 data: 0.0395 max mem: 9377 +Train: [65] [3800/6250] eta: 0:05:48 lr: 0.000036 grad: 0.1425 (0.1444) loss: 0.7950 (0.7938) time: 0.1453 data: 0.0669 max mem: 9377 +Train: [65] [3900/6250] eta: 0:05:34 lr: 0.000036 grad: 0.1391 (0.1444) loss: 0.7974 (0.7938) time: 0.1350 data: 0.0506 max mem: 9377 +Train: [65] [4000/6250] eta: 0:05:20 lr: 0.000036 grad: 0.1497 (0.1444) loss: 0.7986 (0.7938) time: 0.1356 data: 0.0556 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:05 lr: 0.000036 grad: 0.1483 (0.1445) loss: 0.7884 (0.7937) time: 0.1204 data: 0.0346 max mem: 9377 +Train: [65] [4200/6250] eta: 0:04:51 lr: 0.000036 grad: 0.1392 (0.1447) loss: 0.7922 (0.7936) time: 0.1252 data: 0.0435 max mem: 9377 +Train: [65] [4300/6250] eta: 0:04:37 lr: 0.000036 grad: 0.1466 (0.1448) loss: 0.7878 (0.7935) time: 0.1363 data: 0.0556 max mem: 9377 +Train: [65] [4400/6250] eta: 0:04:23 lr: 0.000036 grad: 0.1498 (0.1450) loss: 0.7855 (0.7934) time: 0.1522 data: 0.0663 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:09 lr: 0.000036 grad: 0.1471 (0.1451) loss: 0.7919 (0.7933) time: 0.1368 data: 0.0486 max mem: 9377 +Train: [65] [4600/6250] eta: 0:03:55 lr: 0.000036 grad: 0.1405 (0.1452) loss: 0.7935 (0.7931) time: 0.1534 data: 0.0716 max mem: 9377 +Train: [65] [4700/6250] eta: 0:03:41 lr: 0.000036 grad: 0.1399 (0.1452) loss: 0.7879 (0.7932) time: 0.1386 data: 0.0599 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:26 lr: 0.000036 grad: 0.1474 (0.1455) loss: 0.7917 (0.7931) time: 0.1476 data: 0.0678 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:12 lr: 0.000036 grad: 0.1422 (0.1456) loss: 0.7873 (0.7930) time: 0.1234 data: 0.0418 max mem: 9377 +Train: [65] [5000/6250] eta: 0:02:57 lr: 0.000036 grad: 0.1479 (0.1457) loss: 0.7902 (0.7930) time: 0.1253 data: 0.0451 max mem: 9377 +Train: [65] [5100/6250] eta: 0:02:43 lr: 0.000036 grad: 0.1427 (0.1457) loss: 0.7890 (0.7929) time: 0.1343 data: 0.0539 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:28 lr: 0.000036 grad: 0.1442 (0.1458) loss: 0.7889 (0.7929) time: 0.1235 data: 0.0341 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:14 lr: 0.000036 grad: 0.1377 (0.1459) loss: 0.7911 (0.7928) time: 0.1195 data: 0.0409 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:00 lr: 0.000036 grad: 0.1392 (0.1459) loss: 0.7913 (0.7928) time: 0.1324 data: 0.0484 max mem: 9377 +Train: [65] [5500/6250] eta: 0:01:45 lr: 0.000036 grad: 0.1409 (0.1459) loss: 0.7895 (0.7928) time: 0.1279 data: 0.0481 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:31 lr: 0.000036 grad: 0.1475 (0.1460) loss: 0.7907 (0.7928) time: 0.1472 data: 0.0669 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:17 lr: 0.000036 grad: 0.1460 (0.1460) loss: 0.7855 (0.7927) time: 0.1206 data: 0.0411 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:03 lr: 0.000036 grad: 0.1433 (0.1460) loss: 0.8039 (0.7927) time: 0.1604 data: 0.0710 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:49 lr: 0.000036 grad: 0.1386 (0.1459) loss: 0.7989 (0.7928) time: 0.1118 data: 0.0270 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:35 lr: 0.000036 grad: 0.1443 (0.1459) loss: 0.7865 (0.7928) time: 0.1315 data: 0.0507 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:21 lr: 0.000036 grad: 0.1476 (0.1459) loss: 0.7897 (0.7929) time: 0.1379 data: 0.0598 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:07 lr: 0.000036 grad: 0.1387 (0.1460) loss: 0.7867 (0.7929) time: 0.1127 data: 0.0291 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1447 (0.1460) loss: 0.7882 (0.7928) time: 0.1405 data: 0.0554 max mem: 9377 +Train: [65] Total time: 0:14:44 (0.1416 s / it) +Averaged stats: lr: 0.000036 grad: 0.1447 (0.1460) loss: 0.7882 (0.7928) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:04:56 loss: 0.8401 (0.8401) time: 4.7852 data: 4.7520 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8433 (0.8435) time: 0.1222 data: 0.0957 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (hcp-train-subset): loss: 0.8433 (0.8435) +Eval (hcp-val): [65] [ 0/62] eta: 0:04:34 loss: 0.8407 (0.8407) time: 4.4257 data: 4.3574 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8403 (0.8416) time: 0.1270 data: 0.1002 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (hcp-val): loss: 0.8403 (0.8416) +Eval (nsd-val): [65] [ 0/62] eta: 0:04:37 loss: 0.8125 (0.8125) time: 4.4721 data: 4.4096 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8258 (0.8278) time: 0.1269 data: 0.1017 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:13 (0.2172 s / it) +Averaged stats (nsd-val): loss: 0.8258 (0.8278) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 9:55:38 lr: 0.000036 grad: 0.0898 (0.0898) loss: 0.8538 (0.8538) time: 5.7182 data: 5.6239 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:20:55 lr: 0.000035 grad: 0.1476 (0.1786) loss: 0.8053 (0.8163) time: 0.1596 data: 0.0510 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:17:44 lr: 0.000035 grad: 0.1464 (0.1707) loss: 0.7944 (0.8097) time: 0.1313 data: 0.0468 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:16:31 lr: 0.000035 grad: 0.1302 (0.1655) loss: 0.8107 (0.8072) time: 0.1537 data: 0.0655 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:15:34 lr: 0.000035 grad: 0.1436 (0.1609) loss: 0.7903 (0.8049) time: 0.1540 data: 0.0653 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:15:00 lr: 0.000035 grad: 0.1412 (0.1577) loss: 0.7987 (0.8044) time: 0.1308 data: 0.0408 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:14:36 lr: 0.000035 grad: 0.1288 (0.1552) loss: 0.8056 (0.8038) time: 0.1311 data: 0.0338 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:14:26 lr: 0.000035 grad: 0.1405 (0.1538) loss: 0.7929 (0.8027) time: 0.1715 data: 0.0785 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:14:11 lr: 0.000035 grad: 0.1417 (0.1526) loss: 0.7944 (0.8021) time: 0.1717 data: 0.0779 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:13:50 lr: 0.000035 grad: 0.1401 (0.1513) loss: 0.8012 (0.8016) time: 0.1477 data: 0.0576 max mem: 9377 +Train: [66] [1000/6250] eta: 0:13:27 lr: 0.000035 grad: 0.1445 (0.1503) loss: 0.7913 (0.8012) time: 0.1278 data: 0.0334 max mem: 9377 +Train: [66] [1100/6250] eta: 0:13:04 lr: 0.000035 grad: 0.1355 (0.1496) loss: 0.7982 (0.8007) time: 0.1169 data: 0.0372 max mem: 9377 +Train: [66] [1200/6250] eta: 0:12:50 lr: 0.000035 grad: 0.1339 (0.1495) loss: 0.7977 (0.8004) time: 0.1516 data: 0.0679 max mem: 9377 +Train: [66] [1300/6250] eta: 0:12:36 lr: 0.000035 grad: 0.1381 (0.1493) loss: 0.7975 (0.7999) time: 0.1887 data: 0.1085 max mem: 9377 +Train: [66] [1400/6250] eta: 0:12:24 lr: 0.000035 grad: 0.1250 (0.1487) loss: 0.8014 (0.7997) time: 0.1461 data: 0.0736 max mem: 9377 +Train: [66] [1500/6250] eta: 0:12:11 lr: 0.000035 grad: 0.1348 (0.1480) loss: 0.8022 (0.7997) time: 0.1616 data: 0.0829 max mem: 9377 +Train: [66] [1600/6250] eta: 0:11:58 lr: 0.000035 grad: 0.1461 (0.1475) loss: 0.7935 (0.7995) time: 0.1594 data: 0.0867 max mem: 9377 +Train: [66] [1700/6250] eta: 0:11:45 lr: 0.000035 grad: 0.1307 (0.1471) loss: 0.8011 (0.7994) time: 0.1532 data: 0.0790 max mem: 9377 +Train: [66] [1800/6250] eta: 0:11:31 lr: 0.000035 grad: 0.1380 (0.1467) loss: 0.8068 (0.7994) time: 0.1553 data: 0.0763 max mem: 9377 +Train: [66] [1900/6250] eta: 0:11:14 lr: 0.000035 grad: 0.1418 (0.1465) loss: 0.7930 (0.7992) time: 0.1644 data: 0.0872 max mem: 9377 +Train: [66] [2000/6250] eta: 0:10:59 lr: 0.000035 grad: 0.1403 (0.1463) loss: 0.8010 (0.7992) time: 0.2012 data: 0.1265 max mem: 9377 +Train: [66] [2100/6250] eta: 0:10:39 lr: 0.000035 grad: 0.1377 (0.1461) loss: 0.7951 (0.7990) time: 0.1314 data: 0.0449 max mem: 9377 +Train: [66] [2200/6250] eta: 0:10:21 lr: 0.000035 grad: 0.1555 (0.1461) loss: 0.7877 (0.7989) time: 0.1309 data: 0.0483 max mem: 9377 +Train: [66] [2300/6250] eta: 0:10:03 lr: 0.000035 grad: 0.1510 (0.1461) loss: 0.7907 (0.7985) time: 0.1356 data: 0.0519 max mem: 9377 +Train: [66] [2400/6250] eta: 0:09:46 lr: 0.000035 grad: 0.1414 (0.1459) loss: 0.7934 (0.7983) time: 0.1427 data: 0.0605 max mem: 9377 +Train: [66] [2500/6250] eta: 0:09:29 lr: 0.000035 grad: 0.1361 (0.1456) loss: 0.8012 (0.7983) time: 0.1426 data: 0.0545 max mem: 9377 +Train: [66] [2600/6250] eta: 0:09:11 lr: 0.000035 grad: 0.1427 (0.1456) loss: 0.7870 (0.7981) time: 0.1363 data: 0.0544 max mem: 9377 +Train: [66] [2700/6250] eta: 0:08:55 lr: 0.000035 grad: 0.1371 (0.1454) loss: 0.7976 (0.7981) time: 0.1225 data: 0.0323 max mem: 9377 +Train: [66] [2800/6250] eta: 0:08:37 lr: 0.000035 grad: 0.1382 (0.1452) loss: 0.7938 (0.7979) time: 0.1248 data: 0.0415 max mem: 9377 +Train: [66] [2900/6250] eta: 0:08:21 lr: 0.000035 grad: 0.1389 (0.1452) loss: 0.7836 (0.7978) time: 0.1293 data: 0.0544 max mem: 9377 +Train: [66] [3000/6250] eta: 0:08:05 lr: 0.000035 grad: 0.1353 (0.1450) loss: 0.7978 (0.7976) time: 0.1407 data: 0.0597 max mem: 9377 +Train: [66] [3100/6250] eta: 0:07:49 lr: 0.000035 grad: 0.1409 (0.1450) loss: 0.7895 (0.7973) time: 0.1637 data: 0.0906 max mem: 9377 +Train: [66] [3200/6250] eta: 0:07:33 lr: 0.000035 grad: 0.1463 (0.1450) loss: 0.7853 (0.7971) time: 0.1288 data: 0.0452 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:17 lr: 0.000035 grad: 0.1439 (0.1449) loss: 0.7895 (0.7970) time: 0.1204 data: 0.0380 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:02 lr: 0.000035 grad: 0.1512 (0.1450) loss: 0.7904 (0.7968) time: 0.1464 data: 0.0681 max mem: 9377 +Train: [66] [3500/6250] eta: 0:06:46 lr: 0.000034 grad: 0.1477 (0.1451) loss: 0.7803 (0.7967) time: 0.1400 data: 0.0584 max mem: 9377 +Train: [66] [3600/6250] eta: 0:06:30 lr: 0.000034 grad: 0.1369 (0.1451) loss: 0.7961 (0.7966) time: 0.1442 data: 0.0650 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:15 lr: 0.000034 grad: 0.1327 (0.1451) loss: 0.7887 (0.7965) time: 0.1358 data: 0.0583 max mem: 9377 +Train: [66] [3800/6250] eta: 0:05:59 lr: 0.000034 grad: 0.1426 (0.1451) loss: 0.7869 (0.7964) time: 0.1392 data: 0.0603 max mem: 9377 +Train: [66] [3900/6250] eta: 0:05:44 lr: 0.000034 grad: 0.1437 (0.1450) loss: 0.7988 (0.7964) time: 0.1338 data: 0.0459 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:28 lr: 0.000034 grad: 0.1403 (0.1451) loss: 0.7967 (0.7964) time: 0.1376 data: 0.0593 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:13 lr: 0.000034 grad: 0.1450 (0.1452) loss: 0.7988 (0.7964) time: 0.1363 data: 0.0579 max mem: 9377 +Train: [66] [4200/6250] eta: 0:04:58 lr: 0.000034 grad: 0.1383 (0.1452) loss: 0.7959 (0.7963) time: 0.1270 data: 0.0475 max mem: 9377 +Train: [66] [4300/6250] eta: 0:04:44 lr: 0.000034 grad: 0.1419 (0.1452) loss: 0.7926 (0.7963) time: 0.1527 data: 0.0751 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:29 lr: 0.000034 grad: 0.1435 (0.1452) loss: 0.7948 (0.7962) time: 0.1505 data: 0.0670 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:15 lr: 0.000034 grad: 0.1507 (0.1453) loss: 0.7842 (0.7962) time: 0.1531 data: 0.0622 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:00 lr: 0.000034 grad: 0.1441 (0.1454) loss: 0.7820 (0.7960) time: 0.1576 data: 0.0717 max mem: 9377 +Train: [66] [4700/6250] eta: 0:03:45 lr: 0.000034 grad: 0.1438 (0.1455) loss: 0.7885 (0.7959) time: 0.1388 data: 0.0556 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:30 lr: 0.000034 grad: 0.1467 (0.1456) loss: 0.7901 (0.7957) time: 0.1232 data: 0.0437 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:15 lr: 0.000034 grad: 0.1396 (0.1457) loss: 0.7895 (0.7955) time: 0.1368 data: 0.0493 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:01 lr: 0.000034 grad: 0.1475 (0.1458) loss: 0.7751 (0.7952) time: 0.1314 data: 0.0465 max mem: 9377 +Train: [66] [5100/6250] eta: 0:02:46 lr: 0.000034 grad: 0.1420 (0.1459) loss: 0.7880 (0.7950) time: 0.1382 data: 0.0540 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:31 lr: 0.000034 grad: 0.1505 (0.1459) loss: 0.7838 (0.7948) time: 0.1374 data: 0.0508 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:16 lr: 0.000034 grad: 0.1509 (0.1461) loss: 0.7866 (0.7945) time: 0.1283 data: 0.0423 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:02 lr: 0.000034 grad: 0.1458 (0.1462) loss: 0.7871 (0.7944) time: 0.1544 data: 0.0719 max mem: 9377 +Train: [66] [5500/6250] eta: 0:01:47 lr: 0.000034 grad: 0.1405 (0.1463) loss: 0.7831 (0.7942) time: 0.1624 data: 0.0829 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:33 lr: 0.000034 grad: 0.1516 (0.1465) loss: 0.7948 (0.7941) time: 0.1113 data: 0.0347 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:19 lr: 0.000034 grad: 0.1323 (0.1466) loss: 0.7940 (0.7939) time: 0.1477 data: 0.0714 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:04 lr: 0.000034 grad: 0.1479 (0.1467) loss: 0.7844 (0.7937) time: 0.1305 data: 0.0546 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:50 lr: 0.000034 grad: 0.1465 (0.1470) loss: 0.7805 (0.7935) time: 0.1480 data: 0.0731 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:36 lr: 0.000034 grad: 0.1404 (0.1471) loss: 0.7909 (0.7933) time: 0.1588 data: 0.0803 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:21 lr: 0.000034 grad: 0.1462 (0.1471) loss: 0.7852 (0.7932) time: 0.1129 data: 0.0300 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:07 lr: 0.000034 grad: 0.1561 (0.1473) loss: 0.7801 (0.7930) time: 0.1435 data: 0.0637 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1466 (0.1473) loss: 0.7857 (0.7930) time: 0.1124 data: 0.0209 max mem: 9377 +Train: [66] Total time: 0:15:04 (0.1448 s / it) +Averaged stats: lr: 0.000034 grad: 0.1466 (0.1473) loss: 0.7857 (0.7930) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:03:43 loss: 0.8392 (0.8392) time: 3.6070 data: 3.5099 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8408 (0.8417) time: 0.1332 data: 0.1059 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (hcp-train-subset): loss: 0.8408 (0.8417) +Eval (hcp-val): [66] [ 0/62] eta: 0:06:04 loss: 0.8381 (0.8381) time: 5.8779 data: 5.8464 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8388 (0.8405) time: 0.1489 data: 0.1216 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:14 (0.2405 s / it) +Averaged stats (hcp-val): loss: 0.8388 (0.8405) +Eval (nsd-val): [66] [ 0/62] eta: 0:06:30 loss: 0.8295 (0.8295) time: 6.2988 data: 6.2678 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8433 (0.8449) time: 0.1338 data: 0.1071 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (nsd-val): loss: 0.8433 (0.8449) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 9:00:25 lr: 0.000034 grad: 0.0777 (0.0777) loss: 0.8584 (0.8584) time: 5.1881 data: 4.9686 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:21:58 lr: 0.000034 grad: 0.2086 (0.2169) loss: 0.7910 (0.8076) time: 0.1268 data: 0.0229 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:19:07 lr: 0.000034 grad: 0.1634 (0.2047) loss: 0.7911 (0.8000) time: 0.1858 data: 0.0912 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:17:29 lr: 0.000034 grad: 0.1584 (0.1950) loss: 0.7791 (0.7979) time: 0.1448 data: 0.0575 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:16:31 lr: 0.000034 grad: 0.1634 (0.1872) loss: 0.7776 (0.7961) time: 0.1294 data: 0.0386 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:15:43 lr: 0.000034 grad: 0.1294 (0.1804) loss: 0.8034 (0.7953) time: 0.1448 data: 0.0620 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:15:06 lr: 0.000033 grad: 0.1294 (0.1770) loss: 0.8020 (0.7949) time: 0.1244 data: 0.0437 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:14:31 lr: 0.000033 grad: 0.1450 (0.1736) loss: 0.7852 (0.7944) time: 0.1352 data: 0.0545 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:13:56 lr: 0.000033 grad: 0.1352 (0.1709) loss: 0.7965 (0.7940) time: 0.1288 data: 0.0569 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:13:35 lr: 0.000033 grad: 0.1467 (0.1689) loss: 0.7964 (0.7938) time: 0.1356 data: 0.0341 max mem: 9377 +Train: [67] [1000/6250] eta: 0:13:11 lr: 0.000033 grad: 0.1395 (0.1672) loss: 0.7991 (0.7937) time: 0.1406 data: 0.0593 max mem: 9377 +Train: [67] [1100/6250] eta: 0:12:48 lr: 0.000033 grad: 0.1502 (0.1654) loss: 0.7848 (0.7934) time: 0.1205 data: 0.0316 max mem: 9377 +Train: [67] [1200/6250] eta: 0:12:25 lr: 0.000033 grad: 0.1370 (0.1638) loss: 0.7995 (0.7935) time: 0.1393 data: 0.0541 max mem: 9377 +Train: [67] [1300/6250] eta: 0:12:05 lr: 0.000033 grad: 0.1468 (0.1622) loss: 0.7897 (0.7935) time: 0.1477 data: 0.0634 max mem: 9377 +Train: [67] [1400/6250] eta: 0:11:45 lr: 0.000033 grad: 0.1378 (0.1613) loss: 0.7849 (0.7932) time: 0.1213 data: 0.0380 max mem: 9377 +Train: [67] [1500/6250] eta: 0:11:27 lr: 0.000033 grad: 0.1435 (0.1602) loss: 0.7944 (0.7931) time: 0.1388 data: 0.0574 max mem: 9377 +Train: [67] [1600/6250] eta: 0:11:10 lr: 0.000033 grad: 0.1454 (0.1590) loss: 0.7916 (0.7932) time: 0.1344 data: 0.0475 max mem: 9377 +Train: [67] [1700/6250] eta: 0:10:54 lr: 0.000033 grad: 0.1503 (0.1585) loss: 0.7915 (0.7932) time: 0.1316 data: 0.0486 max mem: 9377 +Train: [67] [1800/6250] eta: 0:10:37 lr: 0.000033 grad: 0.1444 (0.1581) loss: 0.7895 (0.7931) time: 0.1130 data: 0.0237 max mem: 9377 +Train: [67] [1900/6250] eta: 0:10:22 lr: 0.000033 grad: 0.1487 (0.1574) loss: 0.7822 (0.7931) time: 0.1340 data: 0.0488 max mem: 9377 +Train: [67] [2000/6250] eta: 0:10:06 lr: 0.000033 grad: 0.1505 (0.1575) loss: 0.7865 (0.7929) time: 0.1271 data: 0.0439 max mem: 9377 +Train: [67] [2100/6250] eta: 0:09:51 lr: 0.000033 grad: 0.1452 (0.1573) loss: 0.7951 (0.7926) time: 0.1419 data: 0.0650 max mem: 9377 +Train: [67] [2200/6250] eta: 0:09:36 lr: 0.000033 grad: 0.1537 (0.1574) loss: 0.7860 (0.7922) time: 0.1322 data: 0.0515 max mem: 9377 +Train: [67] [2300/6250] eta: 0:09:21 lr: 0.000033 grad: 0.1571 (0.1573) loss: 0.7828 (0.7918) time: 0.1070 data: 0.0240 max mem: 9377 +Train: [67] [2400/6250] eta: 0:09:05 lr: 0.000033 grad: 0.1500 (0.1574) loss: 0.7876 (0.7915) time: 0.1346 data: 0.0612 max mem: 9377 +Train: [67] [2500/6250] eta: 0:08:51 lr: 0.000033 grad: 0.1465 (0.1576) loss: 0.7899 (0.7913) time: 0.1394 data: 0.0593 max mem: 9377 +Train: [67] [2600/6250] eta: 0:08:36 lr: 0.000033 grad: 0.1509 (0.1576) loss: 0.7915 (0.7911) time: 0.1418 data: 0.0562 max mem: 9377 +Train: [67] [2700/6250] eta: 0:08:22 lr: 0.000033 grad: 0.1500 (0.1574) loss: 0.7818 (0.7910) time: 0.1526 data: 0.0720 max mem: 9377 +Train: [67] [2800/6250] eta: 0:08:08 lr: 0.000033 grad: 0.1438 (0.1573) loss: 0.7946 (0.7910) time: 0.1369 data: 0.0540 max mem: 9377 +Train: [67] [2900/6250] eta: 0:07:54 lr: 0.000033 grad: 0.1516 (0.1578) loss: 0.7945 (0.7909) time: 0.1498 data: 0.0714 max mem: 9377 +Train: [67] [3000/6250] eta: 0:07:41 lr: 0.000033 grad: 0.1482 (0.1579) loss: 0.7950 (0.7909) time: 0.1483 data: 0.0675 max mem: 9377 +Train: [67] [3100/6250] eta: 0:07:26 lr: 0.000033 grad: 0.1508 (0.1580) loss: 0.7896 (0.7908) time: 0.1505 data: 0.0659 max mem: 9377 +Train: [67] [3200/6250] eta: 0:07:12 lr: 0.000033 grad: 0.1468 (0.1579) loss: 0.7865 (0.7907) time: 0.1014 data: 0.0020 max mem: 9377 +Train: [67] [3300/6250] eta: 0:06:58 lr: 0.000033 grad: 0.1562 (0.1577) loss: 0.7798 (0.7907) time: 0.1287 data: 0.0471 max mem: 9377 +Train: [67] [3400/6250] eta: 0:06:43 lr: 0.000033 grad: 0.1495 (0.1576) loss: 0.7826 (0.7906) time: 0.1220 data: 0.0388 max mem: 9377 +Train: [67] [3500/6250] eta: 0:06:28 lr: 0.000033 grad: 0.1483 (0.1575) loss: 0.7807 (0.7905) time: 0.1301 data: 0.0445 max mem: 9377 +Train: [67] [3600/6250] eta: 0:06:14 lr: 0.000033 grad: 0.1521 (0.1574) loss: 0.7858 (0.7904) time: 0.1366 data: 0.0558 max mem: 9377 +Train: [67] [3700/6250] eta: 0:05:59 lr: 0.000033 grad: 0.1521 (0.1572) loss: 0.7891 (0.7903) time: 0.1383 data: 0.0571 max mem: 9377 +Train: [67] [3800/6250] eta: 0:05:44 lr: 0.000033 grad: 0.1541 (0.1572) loss: 0.7882 (0.7901) time: 0.1280 data: 0.0471 max mem: 9377 +Train: [67] [3900/6250] eta: 0:05:30 lr: 0.000033 grad: 0.1477 (0.1571) loss: 0.7916 (0.7900) time: 0.1352 data: 0.0595 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:15 lr: 0.000032 grad: 0.1465 (0.1570) loss: 0.7902 (0.7900) time: 0.1296 data: 0.0452 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:01 lr: 0.000032 grad: 0.1523 (0.1569) loss: 0.7813 (0.7899) time: 0.1200 data: 0.0358 max mem: 9377 +Train: [67] [4200/6250] eta: 0:04:47 lr: 0.000032 grad: 0.1451 (0.1569) loss: 0.7813 (0.7899) time: 0.1424 data: 0.0653 max mem: 9377 +Train: [67] [4300/6250] eta: 0:04:33 lr: 0.000032 grad: 0.1422 (0.1567) loss: 0.7924 (0.7899) time: 0.1555 data: 0.0780 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:20 lr: 0.000032 grad: 0.1445 (0.1566) loss: 0.7940 (0.7899) time: 0.1729 data: 0.0897 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:06 lr: 0.000032 grad: 0.1459 (0.1565) loss: 0.7901 (0.7899) time: 0.1757 data: 0.0878 max mem: 9377 +Train: [67] [4600/6250] eta: 0:03:52 lr: 0.000032 grad: 0.1362 (0.1563) loss: 0.7929 (0.7898) time: 0.1239 data: 0.0352 max mem: 9377 +Train: [67] [4700/6250] eta: 0:03:38 lr: 0.000032 grad: 0.1572 (0.1563) loss: 0.7838 (0.7898) time: 0.1501 data: 0.0660 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:24 lr: 0.000032 grad: 0.1526 (0.1562) loss: 0.7885 (0.7898) time: 0.1371 data: 0.0549 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:10 lr: 0.000032 grad: 0.1498 (0.1562) loss: 0.7917 (0.7897) time: 0.1462 data: 0.0628 max mem: 9377 +Train: [67] [5000/6250] eta: 0:02:56 lr: 0.000032 grad: 0.1548 (0.1561) loss: 0.7911 (0.7897) time: 0.1243 data: 0.0359 max mem: 9377 +Train: [67] [5100/6250] eta: 0:02:42 lr: 0.000032 grad: 0.1590 (0.1560) loss: 0.7895 (0.7896) time: 0.1399 data: 0.0506 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:27 lr: 0.000032 grad: 0.1425 (0.1559) loss: 0.7982 (0.7896) time: 0.1298 data: 0.0459 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:13 lr: 0.000032 grad: 0.1438 (0.1560) loss: 0.7969 (0.7897) time: 0.1305 data: 0.0508 max mem: 9377 +Train: [67] [5400/6250] eta: 0:01:59 lr: 0.000032 grad: 0.1394 (0.1559) loss: 0.7957 (0.7898) time: 0.1216 data: 0.0302 max mem: 9377 +Train: [67] [5500/6250] eta: 0:01:45 lr: 0.000032 grad: 0.1447 (0.1558) loss: 0.7885 (0.7899) time: 0.1636 data: 0.0739 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:31 lr: 0.000032 grad: 0.1459 (0.1557) loss: 0.7900 (0.7899) time: 0.1547 data: 0.0780 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:17 lr: 0.000032 grad: 0.1426 (0.1556) loss: 0.7960 (0.7899) time: 0.1635 data: 0.0890 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:03 lr: 0.000032 grad: 0.1490 (0.1557) loss: 0.7903 (0.7898) time: 0.1521 data: 0.0718 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:49 lr: 0.000032 grad: 0.1523 (0.1556) loss: 0.7941 (0.7898) time: 0.1150 data: 0.0297 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:35 lr: 0.000032 grad: 0.1468 (0.1556) loss: 0.7940 (0.7898) time: 0.1551 data: 0.0732 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:21 lr: 0.000032 grad: 0.1518 (0.1556) loss: 0.7871 (0.7898) time: 0.1526 data: 0.0733 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:07 lr: 0.000032 grad: 0.1609 (0.1556) loss: 0.7858 (0.7898) time: 0.1358 data: 0.0521 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1577 (0.1557) loss: 0.7840 (0.7898) time: 0.1425 data: 0.0626 max mem: 9377 +Train: [67] Total time: 0:14:46 (0.1418 s / it) +Averaged stats: lr: 0.000032 grad: 0.1577 (0.1557) loss: 0.7840 (0.7898) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:04:50 loss: 0.8366 (0.8366) time: 4.6860 data: 4.6545 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8409 (0.8417) time: 0.1172 data: 0.0923 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:13 (0.2140 s / it) +Averaged stats (hcp-train-subset): loss: 0.8409 (0.8417) +Eval (hcp-val): [67] [ 0/62] eta: 0:03:37 loss: 0.8358 (0.8358) time: 3.5082 data: 3.4338 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8391 (0.8404) time: 0.1336 data: 0.1064 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8404) +Eval (nsd-val): [67] [ 0/62] eta: 0:05:20 loss: 0.8168 (0.8168) time: 5.1758 data: 5.1457 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8266 (0.8291) time: 0.1184 data: 0.0934 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (nsd-val): loss: 0.8266 (0.8291) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 7:32:18 lr: 0.000032 grad: 0.1070 (0.1070) loss: 0.8458 (0.8458) time: 4.3422 data: 4.0540 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:19:28 lr: 0.000032 grad: 0.1739 (0.1862) loss: 0.8042 (0.8161) time: 0.1301 data: 0.0373 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:16:40 lr: 0.000032 grad: 0.1528 (0.1840) loss: 0.7909 (0.8069) time: 0.1232 data: 0.0193 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:15:36 lr: 0.000032 grad: 0.1617 (0.1788) loss: 0.7892 (0.8037) time: 0.1438 data: 0.0633 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:14:44 lr: 0.000032 grad: 0.1463 (0.1748) loss: 0.7815 (0.7999) time: 0.1197 data: 0.0327 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:14:07 lr: 0.000032 grad: 0.1460 (0.1718) loss: 0.7912 (0.7965) time: 0.1429 data: 0.0522 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:13:38 lr: 0.000032 grad: 0.1515 (0.1696) loss: 0.7910 (0.7946) time: 0.1450 data: 0.0587 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:13:11 lr: 0.000032 grad: 0.1517 (0.1675) loss: 0.7884 (0.7933) time: 0.1214 data: 0.0303 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:12:53 lr: 0.000032 grad: 0.1442 (0.1656) loss: 0.7892 (0.7928) time: 0.0991 data: 0.0033 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:12:38 lr: 0.000032 grad: 0.1534 (0.1642) loss: 0.7837 (0.7923) time: 0.1396 data: 0.0531 max mem: 9377 +Train: [68] [1000/6250] eta: 0:12:26 lr: 0.000032 grad: 0.1517 (0.1633) loss: 0.7827 (0.7919) time: 0.1447 data: 0.0614 max mem: 9377 +Train: [68] [1100/6250] eta: 0:12:21 lr: 0.000032 grad: 0.1585 (0.1637) loss: 0.7771 (0.7918) time: 0.1705 data: 0.0764 max mem: 9377 +Train: [68] [1200/6250] eta: 0:12:16 lr: 0.000032 grad: 0.1436 (0.1635) loss: 0.7868 (0.7915) time: 0.1686 data: 0.0806 max mem: 9377 +Train: [68] [1300/6250] eta: 0:12:07 lr: 0.000031 grad: 0.1513 (0.1628) loss: 0.7859 (0.7912) time: 0.1672 data: 0.0868 max mem: 9377 +Train: [68] [1400/6250] eta: 0:11:52 lr: 0.000031 grad: 0.1491 (0.1625) loss: 0.7739 (0.7908) time: 0.1486 data: 0.0683 max mem: 9377 +Train: [68] [1500/6250] eta: 0:11:34 lr: 0.000031 grad: 0.1498 (0.1618) loss: 0.7824 (0.7904) time: 0.1443 data: 0.0610 max mem: 9377 +Train: [68] [1600/6250] eta: 0:11:18 lr: 0.000031 grad: 0.1470 (0.1615) loss: 0.7932 (0.7902) time: 0.1345 data: 0.0516 max mem: 9377 +Train: [68] [1700/6250] eta: 0:11:03 lr: 0.000031 grad: 0.1396 (0.1611) loss: 0.7878 (0.7902) time: 0.1500 data: 0.0690 max mem: 9377 +Train: [68] [1800/6250] eta: 0:10:44 lr: 0.000031 grad: 0.1446 (0.1604) loss: 0.7874 (0.7902) time: 0.1275 data: 0.0425 max mem: 9377 +Train: [68] [1900/6250] eta: 0:10:26 lr: 0.000031 grad: 0.1438 (0.1600) loss: 0.7935 (0.7902) time: 0.1307 data: 0.0476 max mem: 9377 +Train: [68] [2000/6250] eta: 0:10:09 lr: 0.000031 grad: 0.1414 (0.1597) loss: 0.7946 (0.7903) time: 0.1340 data: 0.0515 max mem: 9377 +Train: [68] [2100/6250] eta: 0:09:53 lr: 0.000031 grad: 0.1519 (0.1594) loss: 0.7917 (0.7903) time: 0.1335 data: 0.0513 max mem: 9377 +Train: [68] [2200/6250] eta: 0:09:38 lr: 0.000031 grad: 0.1546 (0.1591) loss: 0.7831 (0.7904) time: 0.1303 data: 0.0523 max mem: 9377 +Train: [68] [2300/6250] eta: 0:09:24 lr: 0.000031 grad: 0.1535 (0.1594) loss: 0.7943 (0.7905) time: 0.1299 data: 0.0480 max mem: 9377 +Train: [68] [2400/6250] eta: 0:09:09 lr: 0.000031 grad: 0.1413 (0.1590) loss: 0.8020 (0.7907) time: 0.1227 data: 0.0352 max mem: 9377 +Train: [68] [2500/6250] eta: 0:08:55 lr: 0.000031 grad: 0.1537 (0.1588) loss: 0.7891 (0.7907) time: 0.1369 data: 0.0556 max mem: 9377 +Train: [68] [2600/6250] eta: 0:08:40 lr: 0.000031 grad: 0.1548 (0.1587) loss: 0.7861 (0.7907) time: 0.1404 data: 0.0562 max mem: 9377 +Train: [68] [2700/6250] eta: 0:08:25 lr: 0.000031 grad: 0.1474 (0.1584) loss: 0.7929 (0.7907) time: 0.1391 data: 0.0486 max mem: 9377 +Train: [68] [2800/6250] eta: 0:08:11 lr: 0.000031 grad: 0.1451 (0.1582) loss: 0.7932 (0.7907) time: 0.1441 data: 0.0647 max mem: 9377 +Train: [68] [2900/6250] eta: 0:07:57 lr: 0.000031 grad: 0.1433 (0.1578) loss: 0.7890 (0.7907) time: 0.1227 data: 0.0379 max mem: 9377 +Train: [68] [3000/6250] eta: 0:07:42 lr: 0.000031 grad: 0.1491 (0.1578) loss: 0.7810 (0.7906) time: 0.1458 data: 0.0645 max mem: 9377 +Train: [68] [3100/6250] eta: 0:07:27 lr: 0.000031 grad: 0.1543 (0.1577) loss: 0.7791 (0.7904) time: 0.1195 data: 0.0363 max mem: 9377 +Train: [68] [3200/6250] eta: 0:07:13 lr: 0.000031 grad: 0.1564 (0.1577) loss: 0.7865 (0.7903) time: 0.1654 data: 0.0853 max mem: 9377 +Train: [68] [3300/6250] eta: 0:06:59 lr: 0.000031 grad: 0.1514 (0.1577) loss: 0.7926 (0.7903) time: 0.1454 data: 0.0648 max mem: 9377 +Train: [68] [3400/6250] eta: 0:06:44 lr: 0.000031 grad: 0.1492 (0.1577) loss: 0.7903 (0.7902) time: 0.1165 data: 0.0282 max mem: 9377 +Train: [68] [3500/6250] eta: 0:06:29 lr: 0.000031 grad: 0.1561 (0.1576) loss: 0.7857 (0.7900) time: 0.1328 data: 0.0484 max mem: 9377 +Train: [68] [3600/6250] eta: 0:06:14 lr: 0.000031 grad: 0.1527 (0.1576) loss: 0.7873 (0.7900) time: 0.1384 data: 0.0523 max mem: 9377 +Train: [68] [3700/6250] eta: 0:06:00 lr: 0.000031 grad: 0.1537 (0.1576) loss: 0.7897 (0.7899) time: 0.1213 data: 0.0359 max mem: 9377 +Train: [68] [3800/6250] eta: 0:05:45 lr: 0.000031 grad: 0.1511 (0.1575) loss: 0.7886 (0.7898) time: 0.1238 data: 0.0448 max mem: 9377 +Train: [68] [3900/6250] eta: 0:05:31 lr: 0.000031 grad: 0.1472 (0.1574) loss: 0.7903 (0.7898) time: 0.1560 data: 0.0703 max mem: 9377 +Train: [68] [4000/6250] eta: 0:05:17 lr: 0.000031 grad: 0.1499 (0.1573) loss: 0.7856 (0.7897) time: 0.1116 data: 0.0235 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:03 lr: 0.000031 grad: 0.1533 (0.1573) loss: 0.7881 (0.7896) time: 0.1104 data: 0.0251 max mem: 9377 +Train: [68] [4200/6250] eta: 0:04:48 lr: 0.000031 grad: 0.1588 (0.1574) loss: 0.7805 (0.7894) time: 0.1265 data: 0.0403 max mem: 9377 +Train: [68] [4300/6250] eta: 0:04:35 lr: 0.000031 grad: 0.1485 (0.1575) loss: 0.7813 (0.7893) time: 0.1355 data: 0.0474 max mem: 9377 +Train: [68] [4400/6250] eta: 0:04:21 lr: 0.000031 grad: 0.1530 (0.1575) loss: 0.7852 (0.7892) time: 0.1672 data: 0.0775 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:08 lr: 0.000031 grad: 0.1593 (0.1575) loss: 0.7852 (0.7891) time: 0.1697 data: 0.0808 max mem: 9377 +Train: [68] [4600/6250] eta: 0:03:55 lr: 0.000031 grad: 0.1534 (0.1575) loss: 0.7720 (0.7890) time: 0.1609 data: 0.0728 max mem: 9377 +Train: [68] [4700/6250] eta: 0:03:41 lr: 0.000031 grad: 0.1573 (0.1575) loss: 0.7802 (0.7889) time: 0.1655 data: 0.0780 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:27 lr: 0.000030 grad: 0.1579 (0.1574) loss: 0.7910 (0.7889) time: 0.1594 data: 0.0661 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:13 lr: 0.000030 grad: 0.1649 (0.1575) loss: 0.7800 (0.7888) time: 0.1561 data: 0.0645 max mem: 9377 +Train: [68] [5000/6250] eta: 0:02:59 lr: 0.000030 grad: 0.1579 (0.1575) loss: 0.7832 (0.7886) time: 0.1721 data: 0.0932 max mem: 9377 +Train: [68] [5100/6250] eta: 0:02:45 lr: 0.000030 grad: 0.1586 (0.1575) loss: 0.7848 (0.7885) time: 0.1273 data: 0.0456 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:30 lr: 0.000030 grad: 0.1499 (0.1574) loss: 0.7774 (0.7885) time: 0.1338 data: 0.0572 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:16 lr: 0.000030 grad: 0.1482 (0.1574) loss: 0.7856 (0.7884) time: 0.1553 data: 0.0750 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:01 lr: 0.000030 grad: 0.1592 (0.1574) loss: 0.7833 (0.7883) time: 0.1305 data: 0.0510 max mem: 9377 +Train: [68] [5500/6250] eta: 0:01:47 lr: 0.000030 grad: 0.1485 (0.1574) loss: 0.7867 (0.7882) time: 0.1581 data: 0.0805 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:33 lr: 0.000030 grad: 0.1517 (0.1573) loss: 0.7930 (0.7883) time: 0.1313 data: 0.0462 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:18 lr: 0.000030 grad: 0.1508 (0.1572) loss: 0.7830 (0.7882) time: 0.1420 data: 0.0601 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:04 lr: 0.000030 grad: 0.1527 (0.1572) loss: 0.7796 (0.7881) time: 0.1409 data: 0.0591 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:50 lr: 0.000030 grad: 0.1598 (0.1571) loss: 0.7893 (0.7881) time: 0.1384 data: 0.0526 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:35 lr: 0.000030 grad: 0.1418 (0.1570) loss: 0.7790 (0.7880) time: 0.1495 data: 0.0656 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:21 lr: 0.000030 grad: 0.1406 (0.1570) loss: 0.7878 (0.7880) time: 0.1398 data: 0.0613 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:07 lr: 0.000030 grad: 0.1735 (0.1570) loss: 0.7772 (0.7880) time: 0.1175 data: 0.0361 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1544 (0.1570) loss: 0.7830 (0.7879) time: 0.1308 data: 0.0497 max mem: 9377 +Train: [68] Total time: 0:14:58 (0.1437 s / it) +Averaged stats: lr: 0.000030 grad: 0.1544 (0.1570) loss: 0.7830 (0.7879) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:03:43 loss: 0.8415 (0.8415) time: 3.6111 data: 3.5537 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8412 (0.8420) time: 0.1176 data: 0.0909 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:13 (0.2195 s / it) +Averaged stats (hcp-train-subset): loss: 0.8412 (0.8420) +Eval (hcp-val): [68] [ 0/62] eta: 0:04:02 loss: 0.8339 (0.8339) time: 3.9063 data: 3.8376 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8392 (0.8409) time: 0.1346 data: 0.1091 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (hcp-val): loss: 0.8392 (0.8409) +Eval (nsd-val): [68] [ 0/62] eta: 0:05:33 loss: 0.8249 (0.8249) time: 5.3862 data: 5.3547 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8363 (0.8377) time: 0.1226 data: 0.0975 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (nsd-val): loss: 0.8363 (0.8377) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 7:33:54 lr: 0.000030 grad: 0.2667 (0.2667) loss: 0.8537 (0.8537) time: 4.3574 data: 4.1424 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:20:23 lr: 0.000030 grad: 0.1730 (0.2291) loss: 0.8053 (0.7947) time: 0.1614 data: 0.0651 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:17:14 lr: 0.000030 grad: 0.1741 (0.2198) loss: 0.7953 (0.7920) time: 0.1241 data: 0.0286 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:16:03 lr: 0.000030 grad: 0.1741 (0.2096) loss: 0.7893 (0.7883) time: 0.1287 data: 0.0351 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:15:17 lr: 0.000030 grad: 0.1772 (0.2033) loss: 0.7635 (0.7850) time: 0.1364 data: 0.0459 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:14:42 lr: 0.000030 grad: 0.1694 (0.1984) loss: 0.7720 (0.7825) time: 0.1673 data: 0.0801 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:14:05 lr: 0.000030 grad: 0.1602 (0.1932) loss: 0.7818 (0.7815) time: 0.1346 data: 0.0420 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:13:39 lr: 0.000030 grad: 0.1558 (0.1889) loss: 0.7904 (0.7813) time: 0.1311 data: 0.0347 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:13:23 lr: 0.000030 grad: 0.1569 (0.1854) loss: 0.7761 (0.7815) time: 0.1425 data: 0.0557 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:13:15 lr: 0.000030 grad: 0.1595 (0.1827) loss: 0.7836 (0.7814) time: 0.1630 data: 0.0695 max mem: 9377 +Train: [69] [1000/6250] eta: 0:13:04 lr: 0.000030 grad: 0.1536 (0.1798) loss: 0.7864 (0.7817) time: 0.1596 data: 0.0697 max mem: 9377 +Train: [69] [1100/6250] eta: 0:12:58 lr: 0.000030 grad: 0.1517 (0.1774) loss: 0.7913 (0.7820) time: 0.1365 data: 0.0563 max mem: 9377 +Train: [69] [1200/6250] eta: 0:12:52 lr: 0.000030 grad: 0.1531 (0.1754) loss: 0.7895 (0.7823) time: 0.1458 data: 0.0635 max mem: 9377 +Train: [69] [1300/6250] eta: 0:12:37 lr: 0.000030 grad: 0.1530 (0.1740) loss: 0.7890 (0.7824) time: 0.1501 data: 0.0539 max mem: 9377 +Train: [69] [1400/6250] eta: 0:12:22 lr: 0.000030 grad: 0.1584 (0.1731) loss: 0.7805 (0.7823) time: 0.1438 data: 0.0547 max mem: 9377 +Train: [69] [1500/6250] eta: 0:12:07 lr: 0.000030 grad: 0.1561 (0.1722) loss: 0.7788 (0.7824) time: 0.1604 data: 0.0749 max mem: 9377 +Train: [69] [1600/6250] eta: 0:11:54 lr: 0.000030 grad: 0.1539 (0.1713) loss: 0.7857 (0.7823) time: 0.1348 data: 0.0458 max mem: 9377 +Train: [69] [1700/6250] eta: 0:11:38 lr: 0.000030 grad: 0.1502 (0.1705) loss: 0.7824 (0.7823) time: 0.1650 data: 0.0730 max mem: 9377 +Train: [69] [1800/6250] eta: 0:11:24 lr: 0.000030 grad: 0.1515 (0.1697) loss: 0.7846 (0.7824) time: 0.1587 data: 0.0708 max mem: 9377 +Train: [69] [1900/6250] eta: 0:11:04 lr: 0.000030 grad: 0.1468 (0.1689) loss: 0.7831 (0.7826) time: 0.1364 data: 0.0502 max mem: 9377 +Train: [69] [2000/6250] eta: 0:10:45 lr: 0.000030 grad: 0.1531 (0.1680) loss: 0.7813 (0.7829) time: 0.1259 data: 0.0356 max mem: 9377 +Train: [69] [2100/6250] eta: 0:10:28 lr: 0.000029 grad: 0.1438 (0.1672) loss: 0.7979 (0.7832) time: 0.1365 data: 0.0453 max mem: 9377 +Train: [69] [2200/6250] eta: 0:10:12 lr: 0.000029 grad: 0.1578 (0.1668) loss: 0.7849 (0.7835) time: 0.1372 data: 0.0506 max mem: 9377 +Train: [69] [2300/6250] eta: 0:09:57 lr: 0.000029 grad: 0.1525 (0.1659) loss: 0.7902 (0.7838) time: 0.1851 data: 0.1020 max mem: 9377 +Train: [69] [2400/6250] eta: 0:09:44 lr: 0.000029 grad: 0.1551 (0.1652) loss: 0.7860 (0.7841) time: 0.1702 data: 0.0917 max mem: 9377 +Train: [69] [2500/6250] eta: 0:09:30 lr: 0.000029 grad: 0.1529 (0.1647) loss: 0.7987 (0.7844) time: 0.1448 data: 0.0687 max mem: 9377 +Train: [69] [2600/6250] eta: 0:09:14 lr: 0.000029 grad: 0.1511 (0.1640) loss: 0.7959 (0.7848) time: 0.1472 data: 0.0635 max mem: 9377 +Train: [69] [2700/6250] eta: 0:08:57 lr: 0.000029 grad: 0.1505 (0.1636) loss: 0.7944 (0.7851) time: 0.1267 data: 0.0386 max mem: 9377 +Train: [69] [2800/6250] eta: 0:08:41 lr: 0.000029 grad: 0.1389 (0.1630) loss: 0.7901 (0.7854) time: 0.1471 data: 0.0674 max mem: 9377 +Train: [69] [2900/6250] eta: 0:08:24 lr: 0.000029 grad: 0.1545 (0.1627) loss: 0.7827 (0.7855) time: 0.1239 data: 0.0393 max mem: 9377 +Train: [69] [3000/6250] eta: 0:08:08 lr: 0.000029 grad: 0.1455 (0.1623) loss: 0.7944 (0.7858) time: 0.1169 data: 0.0223 max mem: 9377 +Train: [69] [3100/6250] eta: 0:07:52 lr: 0.000029 grad: 0.1433 (0.1619) loss: 0.7881 (0.7860) time: 0.1289 data: 0.0494 max mem: 9377 +Train: [69] [3200/6250] eta: 0:07:36 lr: 0.000029 grad: 0.1486 (0.1617) loss: 0.7923 (0.7862) time: 0.1370 data: 0.0487 max mem: 9377 +Train: [69] [3300/6250] eta: 0:07:20 lr: 0.000029 grad: 0.1647 (0.1614) loss: 0.7852 (0.7863) time: 0.1448 data: 0.0639 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:04 lr: 0.000029 grad: 0.1504 (0.1611) loss: 0.7883 (0.7865) time: 0.1397 data: 0.0546 max mem: 9377 +Train: [69] [3500/6250] eta: 0:06:49 lr: 0.000029 grad: 0.1514 (0.1608) loss: 0.7909 (0.7867) time: 0.1570 data: 0.0780 max mem: 9377 +Train: [69] [3600/6250] eta: 0:06:33 lr: 0.000029 grad: 0.1641 (0.1607) loss: 0.7908 (0.7868) time: 0.1389 data: 0.0576 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:17 lr: 0.000029 grad: 0.1513 (0.1605) loss: 0.7900 (0.7869) time: 0.1574 data: 0.0791 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:02 lr: 0.000029 grad: 0.1495 (0.1603) loss: 0.7906 (0.7870) time: 0.1355 data: 0.0552 max mem: 9377 +Train: [69] [3900/6250] eta: 0:05:46 lr: 0.000029 grad: 0.1449 (0.1601) loss: 0.7950 (0.7872) time: 0.1316 data: 0.0418 max mem: 9377 +Train: [69] [4000/6250] eta: 0:05:31 lr: 0.000029 grad: 0.1523 (0.1598) loss: 0.7921 (0.7873) time: 0.1269 data: 0.0431 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:15 lr: 0.000029 grad: 0.1474 (0.1596) loss: 0.7979 (0.7874) time: 0.1251 data: 0.0449 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:00 lr: 0.000029 grad: 0.1463 (0.1595) loss: 0.7950 (0.7876) time: 0.1366 data: 0.0580 max mem: 9377 +Train: [69] [4300/6250] eta: 0:04:46 lr: 0.000029 grad: 0.1506 (0.1593) loss: 0.7963 (0.7878) time: 0.1494 data: 0.0666 max mem: 9377 +Train: [69] [4400/6250] eta: 0:04:32 lr: 0.000029 grad: 0.1569 (0.1593) loss: 0.7879 (0.7879) time: 0.1500 data: 0.0632 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:17 lr: 0.000029 grad: 0.1483 (0.1592) loss: 0.7949 (0.7881) time: 0.1587 data: 0.0695 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:02 lr: 0.000029 grad: 0.1399 (0.1589) loss: 0.8029 (0.7883) time: 0.1387 data: 0.0589 max mem: 9377 +Train: [69] [4700/6250] eta: 0:03:47 lr: 0.000029 grad: 0.1559 (0.1588) loss: 0.7887 (0.7884) time: 0.1446 data: 0.0683 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:32 lr: 0.000029 grad: 0.1561 (0.1587) loss: 0.7904 (0.7885) time: 0.1360 data: 0.0534 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:17 lr: 0.000029 grad: 0.1471 (0.1585) loss: 0.7971 (0.7886) time: 0.1429 data: 0.0576 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:02 lr: 0.000029 grad: 0.1486 (0.1583) loss: 0.7961 (0.7888) time: 0.1421 data: 0.0607 max mem: 9377 +Train: [69] [5100/6250] eta: 0:02:48 lr: 0.000029 grad: 0.1502 (0.1582) loss: 0.7941 (0.7889) time: 0.1254 data: 0.0417 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:33 lr: 0.000029 grad: 0.1452 (0.1581) loss: 0.7885 (0.7890) time: 0.1280 data: 0.0417 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:18 lr: 0.000029 grad: 0.1489 (0.1580) loss: 0.7951 (0.7890) time: 0.1316 data: 0.0450 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:03 lr: 0.000029 grad: 0.1549 (0.1580) loss: 0.7906 (0.7891) time: 0.1188 data: 0.0362 max mem: 9377 +Train: [69] [5500/6250] eta: 0:01:48 lr: 0.000029 grad: 0.1479 (0.1579) loss: 0.8002 (0.7892) time: 0.1271 data: 0.0332 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:34 lr: 0.000028 grad: 0.1627 (0.1578) loss: 0.7958 (0.7893) time: 0.1479 data: 0.0699 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:19 lr: 0.000028 grad: 0.1517 (0.1578) loss: 0.7953 (0.7893) time: 0.1394 data: 0.0489 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:05 lr: 0.000028 grad: 0.1484 (0.1578) loss: 0.7956 (0.7894) time: 0.1580 data: 0.0779 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:50 lr: 0.000028 grad: 0.1509 (0.1580) loss: 0.7900 (0.7894) time: 0.1340 data: 0.0503 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:36 lr: 0.000028 grad: 0.1569 (0.1580) loss: 0.7879 (0.7894) time: 0.1302 data: 0.0454 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:21 lr: 0.000028 grad: 0.1547 (0.1580) loss: 0.7883 (0.7894) time: 0.1355 data: 0.0480 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:07 lr: 0.000028 grad: 0.1479 (0.1580) loss: 0.7973 (0.7895) time: 0.1263 data: 0.0392 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1462 (0.1580) loss: 0.7921 (0.7895) time: 0.1514 data: 0.0680 max mem: 9377 +Train: [69] Total time: 0:15:06 (0.1451 s / it) +Averaged stats: lr: 0.000028 grad: 0.1462 (0.1580) loss: 0.7921 (0.7895) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:04:11 loss: 0.8392 (0.8392) time: 4.0534 data: 3.9838 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8398 (0.8421) time: 0.1292 data: 0.1043 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:14 (0.2273 s / it) +Averaged stats (hcp-train-subset): loss: 0.8398 (0.8421) +Making plots (hcp-train-subset): example=41 +Eval (hcp-val): [69] [ 0/62] eta: 0:06:35 loss: 0.8369 (0.8369) time: 6.3814 data: 6.3495 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8399 (0.8408) time: 0.1100 data: 0.0847 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-val): loss: 0.8399 (0.8408) +Making plots (hcp-val): example=18 +Eval (nsd-val): [69] [ 0/62] eta: 0:06:01 loss: 0.8145 (0.8145) time: 5.8327 data: 5.8001 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8278 (0.8289) time: 0.1116 data: 0.0867 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:14 (0.2296 s / it) +Averaged stats (nsd-val): loss: 0.8278 (0.8289) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 8:54:35 lr: 0.000028 grad: 0.4600 (0.4600) loss: 0.8074 (0.8074) time: 5.1321 data: 4.9544 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:20:52 lr: 0.000028 grad: 0.1900 (0.2370) loss: 0.7782 (0.7939) time: 0.1605 data: 0.0581 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:18:04 lr: 0.000028 grad: 0.1694 (0.2149) loss: 0.7718 (0.7851) time: 0.1811 data: 0.0951 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:16:26 lr: 0.000028 grad: 0.1708 (0.2024) loss: 0.7771 (0.7825) time: 0.1448 data: 0.0586 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:15:30 lr: 0.000028 grad: 0.1498 (0.1936) loss: 0.7750 (0.7815) time: 0.1394 data: 0.0522 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:14:45 lr: 0.000028 grad: 0.1616 (0.1872) loss: 0.7907 (0.7821) time: 0.1292 data: 0.0361 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:14:15 lr: 0.000028 grad: 0.1472 (0.1823) loss: 0.7756 (0.7824) time: 0.1280 data: 0.0376 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:13:45 lr: 0.000028 grad: 0.1448 (0.1789) loss: 0.7889 (0.7834) time: 0.1295 data: 0.0415 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:13:22 lr: 0.000028 grad: 0.1454 (0.1774) loss: 0.7885 (0.7839) time: 0.1433 data: 0.0533 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:13:03 lr: 0.000028 grad: 0.1535 (0.1761) loss: 0.7899 (0.7845) time: 0.1474 data: 0.0581 max mem: 9377 +Train: [70] [1000/6250] eta: 0:12:49 lr: 0.000028 grad: 0.1588 (0.1747) loss: 0.7848 (0.7848) time: 0.1363 data: 0.0590 max mem: 9377 +Train: [70] [1100/6250] eta: 0:12:29 lr: 0.000028 grad: 0.1612 (0.1732) loss: 0.7743 (0.7849) time: 0.1170 data: 0.0266 max mem: 9377 +Train: [70] [1200/6250] eta: 0:12:11 lr: 0.000028 grad: 0.1476 (0.1716) loss: 0.7894 (0.7852) time: 0.1317 data: 0.0464 max mem: 9377 +Train: [70] [1300/6250] eta: 0:11:54 lr: 0.000028 grad: 0.1438 (0.1705) loss: 0.7856 (0.7855) time: 0.1655 data: 0.0876 max mem: 9377 +Train: [70] [1400/6250] eta: 0:11:48 lr: 0.000028 grad: 0.1512 (0.1693) loss: 0.7910 (0.7856) time: 0.1726 data: 0.0933 max mem: 9377 +Train: [70] [1500/6250] eta: 0:11:37 lr: 0.000028 grad: 0.1617 (0.1685) loss: 0.7823 (0.7858) time: 0.1590 data: 0.0786 max mem: 9377 +Train: [70] [1600/6250] eta: 0:11:24 lr: 0.000028 grad: 0.1544 (0.1676) loss: 0.7796 (0.7859) time: 0.1406 data: 0.0587 max mem: 9377 +Train: [70] [1700/6250] eta: 0:11:08 lr: 0.000028 grad: 0.1455 (0.1669) loss: 0.7787 (0.7859) time: 0.1282 data: 0.0482 max mem: 9377 +Train: [70] [1800/6250] eta: 0:10:51 lr: 0.000028 grad: 0.1495 (0.1664) loss: 0.7933 (0.7862) time: 0.1366 data: 0.0519 max mem: 9377 +Train: [70] [1900/6250] eta: 0:10:35 lr: 0.000028 grad: 0.1460 (0.1660) loss: 0.7951 (0.7864) time: 0.1306 data: 0.0449 max mem: 9377 +Train: [70] [2000/6250] eta: 0:10:19 lr: 0.000028 grad: 0.1680 (0.1660) loss: 0.7864 (0.7865) time: 0.1346 data: 0.0463 max mem: 9377 +Train: [70] [2100/6250] eta: 0:10:02 lr: 0.000028 grad: 0.1612 (0.1659) loss: 0.7983 (0.7866) time: 0.1362 data: 0.0509 max mem: 9377 +Train: [70] [2200/6250] eta: 0:09:45 lr: 0.000028 grad: 0.1549 (0.1655) loss: 0.7984 (0.7868) time: 0.1160 data: 0.0291 max mem: 9377 +Train: [70] [2300/6250] eta: 0:09:28 lr: 0.000028 grad: 0.1518 (0.1652) loss: 0.7827 (0.7868) time: 0.1244 data: 0.0409 max mem: 9377 +Train: [70] [2400/6250] eta: 0:09:11 lr: 0.000028 grad: 0.1616 (0.1650) loss: 0.7869 (0.7867) time: 0.1445 data: 0.0592 max mem: 9377 +Train: [70] [2500/6250] eta: 0:08:55 lr: 0.000028 grad: 0.1552 (0.1647) loss: 0.7863 (0.7868) time: 0.1425 data: 0.0636 max mem: 9377 +Train: [70] [2600/6250] eta: 0:08:39 lr: 0.000028 grad: 0.1663 (0.1646) loss: 0.7875 (0.7870) time: 0.1278 data: 0.0434 max mem: 9377 +Train: [70] [2700/6250] eta: 0:08:24 lr: 0.000028 grad: 0.1500 (0.1645) loss: 0.7975 (0.7871) time: 0.1469 data: 0.0678 max mem: 9377 +Train: [70] [2800/6250] eta: 0:08:10 lr: 0.000028 grad: 0.1525 (0.1642) loss: 0.7886 (0.7871) time: 0.1303 data: 0.0497 max mem: 9377 +Train: [70] [2900/6250] eta: 0:07:55 lr: 0.000028 grad: 0.1473 (0.1639) loss: 0.7890 (0.7872) time: 0.1430 data: 0.0601 max mem: 9377 +Train: [70] [3000/6250] eta: 0:07:41 lr: 0.000027 grad: 0.1581 (0.1638) loss: 0.7902 (0.7872) time: 0.1456 data: 0.0619 max mem: 9377 +Train: [70] [3100/6250] eta: 0:07:26 lr: 0.000027 grad: 0.1561 (0.1637) loss: 0.7901 (0.7872) time: 0.1421 data: 0.0611 max mem: 9377 +Train: [70] [3200/6250] eta: 0:07:10 lr: 0.000027 grad: 0.1531 (0.1635) loss: 0.7827 (0.7871) time: 0.1291 data: 0.0425 max mem: 9377 +Train: [70] [3300/6250] eta: 0:06:56 lr: 0.000027 grad: 0.1454 (0.1633) loss: 0.7887 (0.7872) time: 0.1295 data: 0.0497 max mem: 9377 +Train: [70] [3400/6250] eta: 0:06:41 lr: 0.000027 grad: 0.1500 (0.1630) loss: 0.7868 (0.7872) time: 0.1343 data: 0.0531 max mem: 9377 +Train: [70] [3500/6250] eta: 0:06:26 lr: 0.000027 grad: 0.1527 (0.1628) loss: 0.7801 (0.7871) time: 0.1238 data: 0.0363 max mem: 9377 +Train: [70] [3600/6250] eta: 0:06:12 lr: 0.000027 grad: 0.1479 (0.1627) loss: 0.7872 (0.7871) time: 0.1415 data: 0.0584 max mem: 9377 +Train: [70] [3700/6250] eta: 0:05:58 lr: 0.000027 grad: 0.1593 (0.1625) loss: 0.7868 (0.7871) time: 0.1464 data: 0.0667 max mem: 9377 +Train: [70] [3800/6250] eta: 0:05:44 lr: 0.000027 grad: 0.1631 (0.1623) loss: 0.7823 (0.7870) time: 0.1264 data: 0.0452 max mem: 9377 +Train: [70] [3900/6250] eta: 0:05:29 lr: 0.000027 grad: 0.1582 (0.1623) loss: 0.7924 (0.7871) time: 0.1348 data: 0.0510 max mem: 9377 +Train: [70] [4000/6250] eta: 0:05:15 lr: 0.000027 grad: 0.1517 (0.1622) loss: 0.7855 (0.7870) time: 0.1490 data: 0.0639 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:01 lr: 0.000027 grad: 0.1565 (0.1621) loss: 0.7779 (0.7870) time: 0.1602 data: 0.0843 max mem: 9377 +Train: [70] [4200/6250] eta: 0:04:47 lr: 0.000027 grad: 0.1613 (0.1621) loss: 0.7782 (0.7869) time: 0.1702 data: 0.0924 max mem: 9377 +Train: [70] [4300/6250] eta: 0:04:34 lr: 0.000027 grad: 0.1369 (0.1618) loss: 0.8007 (0.7870) time: 0.1364 data: 0.0546 max mem: 9377 +Train: [70] [4400/6250] eta: 0:04:20 lr: 0.000027 grad: 0.1433 (0.1617) loss: 0.7970 (0.7870) time: 0.1519 data: 0.0743 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:07 lr: 0.000027 grad: 0.1500 (0.1614) loss: 0.7893 (0.7870) time: 0.1533 data: 0.0680 max mem: 9377 +Train: [70] [4600/6250] eta: 0:03:54 lr: 0.000027 grad: 0.1468 (0.1612) loss: 0.7865 (0.7871) time: 0.1609 data: 0.0742 max mem: 9377 +Train: [70] [4700/6250] eta: 0:03:40 lr: 0.000027 grad: 0.1500 (0.1611) loss: 0.7880 (0.7872) time: 0.1616 data: 0.0774 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:26 lr: 0.000027 grad: 0.1541 (0.1610) loss: 0.7908 (0.7873) time: 0.1459 data: 0.0530 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:12 lr: 0.000027 grad: 0.1487 (0.1609) loss: 0.7940 (0.7874) time: 0.1632 data: 0.0801 max mem: 9377 +Train: [70] [5000/6250] eta: 0:02:58 lr: 0.000027 grad: 0.1526 (0.1608) loss: 0.7854 (0.7874) time: 0.1209 data: 0.0302 max mem: 9377 +Train: [70] [5100/6250] eta: 0:02:43 lr: 0.000027 grad: 0.1515 (0.1606) loss: 0.7855 (0.7874) time: 0.1451 data: 0.0620 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:29 lr: 0.000027 grad: 0.1477 (0.1605) loss: 0.7854 (0.7874) time: 0.1416 data: 0.0593 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:15 lr: 0.000027 grad: 0.1512 (0.1604) loss: 0.7871 (0.7874) time: 0.1612 data: 0.0871 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:01 lr: 0.000027 grad: 0.1555 (0.1604) loss: 0.7975 (0.7874) time: 0.1506 data: 0.0694 max mem: 9377 +Train: [70] [5500/6250] eta: 0:01:47 lr: 0.000027 grad: 0.1594 (0.1604) loss: 0.7782 (0.7874) time: 0.1696 data: 0.0901 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:33 lr: 0.000027 grad: 0.1479 (0.1603) loss: 0.7874 (0.7874) time: 0.1465 data: 0.0635 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:19 lr: 0.000027 grad: 0.1577 (0.1603) loss: 0.7782 (0.7874) time: 0.1598 data: 0.0796 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:04 lr: 0.000027 grad: 0.1532 (0.1603) loss: 0.7911 (0.7874) time: 0.1253 data: 0.0430 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:50 lr: 0.000027 grad: 0.1637 (0.1603) loss: 0.7793 (0.7874) time: 0.1232 data: 0.0361 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:35 lr: 0.000027 grad: 0.1576 (0.1604) loss: 0.7814 (0.7874) time: 0.1262 data: 0.0458 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:21 lr: 0.000027 grad: 0.1473 (0.1604) loss: 0.7922 (0.7873) time: 0.1533 data: 0.0708 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:07 lr: 0.000027 grad: 0.1527 (0.1604) loss: 0.7851 (0.7873) time: 0.1420 data: 0.0586 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1615 (0.1604) loss: 0.7835 (0.7873) time: 0.1436 data: 0.0670 max mem: 9377 +Train: [70] Total time: 0:15:00 (0.1441 s / it) +Averaged stats: lr: 0.000027 grad: 0.1615 (0.1604) loss: 0.7835 (0.7873) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:23 loss: 0.8365 (0.8365) time: 4.2558 data: 4.1770 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8430 (0.8423) time: 0.1433 data: 0.1181 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (hcp-train-subset): loss: 0.8430 (0.8423) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:57 loss: 0.8395 (0.8395) time: 5.7599 data: 5.7296 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8410 (0.8414) time: 0.1415 data: 0.1166 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:15 (0.2543 s / it) +Averaged stats (hcp-val): loss: 0.8410 (0.8414) +Eval (nsd-val): [70] [ 0/62] eta: 0:08:10 loss: 0.8135 (0.8135) time: 7.9036 data: 7.8603 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8222 (0.8244) time: 0.1331 data: 0.1063 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:15 (0.2450 s / it) +Averaged stats (nsd-val): loss: 0.8222 (0.8244) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 11:34:34 lr: 0.000027 grad: 0.3919 (0.3919) loss: 0.7815 (0.7815) time: 6.6680 data: 6.5691 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:21:45 lr: 0.000027 grad: 0.1528 (0.2196) loss: 0.7927 (0.8008) time: 0.1388 data: 0.0290 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:18:59 lr: 0.000027 grad: 0.1387 (0.1911) loss: 0.7958 (0.7998) time: 0.1595 data: 0.0577 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:17:50 lr: 0.000027 grad: 0.1470 (0.1800) loss: 0.8016 (0.7980) time: 0.1672 data: 0.0770 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:16:42 lr: 0.000026 grad: 0.1538 (0.1775) loss: 0.7943 (0.7963) time: 0.1639 data: 0.0587 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:15:59 lr: 0.000026 grad: 0.1499 (0.1745) loss: 0.7946 (0.7948) time: 0.1541 data: 0.0649 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:15:17 lr: 0.000026 grad: 0.1541 (0.1726) loss: 0.7901 (0.7941) time: 0.1275 data: 0.0391 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:14:48 lr: 0.000026 grad: 0.1501 (0.1698) loss: 0.7913 (0.7944) time: 0.1543 data: 0.0664 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:14:21 lr: 0.000026 grad: 0.1472 (0.1676) loss: 0.8051 (0.7945) time: 0.1531 data: 0.0712 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:13:58 lr: 0.000026 grad: 0.1429 (0.1657) loss: 0.7988 (0.7949) time: 0.1564 data: 0.0734 max mem: 9377 +Train: [71] [1000/6250] eta: 0:13:35 lr: 0.000026 grad: 0.1419 (0.1637) loss: 0.8010 (0.7953) time: 0.1481 data: 0.0686 max mem: 9377 +Train: [71] [1100/6250] eta: 0:13:15 lr: 0.000026 grad: 0.1436 (0.1624) loss: 0.7948 (0.7953) time: 0.1326 data: 0.0502 max mem: 9377 +Train: [71] [1200/6250] eta: 0:12:55 lr: 0.000026 grad: 0.1417 (0.1612) loss: 0.7963 (0.7954) time: 0.1350 data: 0.0545 max mem: 9377 +Train: [71] [1300/6250] eta: 0:12:36 lr: 0.000026 grad: 0.1431 (0.1600) loss: 0.7949 (0.7955) time: 0.1501 data: 0.0757 max mem: 9377 +Train: [71] [1400/6250] eta: 0:12:16 lr: 0.000026 grad: 0.1335 (0.1591) loss: 0.8008 (0.7957) time: 0.1377 data: 0.0568 max mem: 9377 +Train: [71] [1500/6250] eta: 0:12:00 lr: 0.000026 grad: 0.1502 (0.1584) loss: 0.7968 (0.7957) time: 0.1413 data: 0.0584 max mem: 9377 +Train: [71] [1600/6250] eta: 0:11:45 lr: 0.000026 grad: 0.1526 (0.1579) loss: 0.7867 (0.7957) time: 0.1537 data: 0.0742 max mem: 9377 +Train: [71] [1700/6250] eta: 0:11:29 lr: 0.000026 grad: 0.1503 (0.1575) loss: 0.7878 (0.7956) time: 0.1448 data: 0.0661 max mem: 9377 +Train: [71] [1800/6250] eta: 0:11:14 lr: 0.000026 grad: 0.1380 (0.1570) loss: 0.7952 (0.7956) time: 0.1271 data: 0.0547 max mem: 9377 +Train: [71] [1900/6250] eta: 0:11:01 lr: 0.000026 grad: 0.1471 (0.1566) loss: 0.7972 (0.7956) time: 0.2124 data: 0.1313 max mem: 9377 +Train: [71] [2000/6250] eta: 0:10:44 lr: 0.000026 grad: 0.1440 (0.1563) loss: 0.7987 (0.7955) time: 0.1397 data: 0.0592 max mem: 9377 +Train: [71] [2100/6250] eta: 0:10:27 lr: 0.000026 grad: 0.1575 (0.1562) loss: 0.7905 (0.7954) time: 0.1446 data: 0.0576 max mem: 9377 +Train: [71] [2200/6250] eta: 0:10:10 lr: 0.000026 grad: 0.1526 (0.1562) loss: 0.7909 (0.7952) time: 0.1518 data: 0.0733 max mem: 9377 +Train: [71] [2300/6250] eta: 0:09:54 lr: 0.000026 grad: 0.1496 (0.1564) loss: 0.7867 (0.7951) time: 0.1408 data: 0.0552 max mem: 9377 +Train: [71] [2400/6250] eta: 0:09:36 lr: 0.000026 grad: 0.1447 (0.1563) loss: 0.7911 (0.7949) time: 0.1533 data: 0.0734 max mem: 9377 +Train: [71] [2500/6250] eta: 0:09:18 lr: 0.000026 grad: 0.1459 (0.1563) loss: 0.7900 (0.7948) time: 0.1263 data: 0.0442 max mem: 9377 +Train: [71] [2600/6250] eta: 0:09:00 lr: 0.000026 grad: 0.1504 (0.1563) loss: 0.7911 (0.7947) time: 0.1246 data: 0.0273 max mem: 9377 +Train: [71] [2700/6250] eta: 0:08:43 lr: 0.000026 grad: 0.1497 (0.1561) loss: 0.7955 (0.7946) time: 0.1379 data: 0.0561 max mem: 9377 +Train: [71] [2800/6250] eta: 0:08:25 lr: 0.000026 grad: 0.1494 (0.1559) loss: 0.7975 (0.7947) time: 0.1243 data: 0.0365 max mem: 9377 +Train: [71] [2900/6250] eta: 0:08:09 lr: 0.000026 grad: 0.1470 (0.1558) loss: 0.7943 (0.7947) time: 0.1199 data: 0.0356 max mem: 9377 +Train: [71] [3000/6250] eta: 0:07:53 lr: 0.000026 grad: 0.1517 (0.1557) loss: 0.7879 (0.7946) time: 0.1323 data: 0.0518 max mem: 9377 +Train: [71] [3100/6250] eta: 0:07:37 lr: 0.000026 grad: 0.1503 (0.1557) loss: 0.7913 (0.7945) time: 0.1365 data: 0.0547 max mem: 9377 +Train: [71] [3200/6250] eta: 0:07:22 lr: 0.000026 grad: 0.1473 (0.1557) loss: 0.7953 (0.7945) time: 0.1689 data: 0.0903 max mem: 9377 +Train: [71] [3300/6250] eta: 0:07:06 lr: 0.000026 grad: 0.1499 (0.1556) loss: 0.7963 (0.7946) time: 0.1244 data: 0.0369 max mem: 9377 +Train: [71] [3400/6250] eta: 0:06:52 lr: 0.000026 grad: 0.1547 (0.1557) loss: 0.7901 (0.7945) time: 0.1541 data: 0.0761 max mem: 9377 +Train: [71] [3500/6250] eta: 0:06:37 lr: 0.000026 grad: 0.1539 (0.1557) loss: 0.7846 (0.7944) time: 0.1496 data: 0.0707 max mem: 9377 +Train: [71] [3600/6250] eta: 0:06:22 lr: 0.000026 grad: 0.1551 (0.1558) loss: 0.7965 (0.7943) time: 0.1305 data: 0.0467 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:07 lr: 0.000026 grad: 0.1578 (0.1559) loss: 0.7891 (0.7941) time: 0.1300 data: 0.0494 max mem: 9377 +Train: [71] [3800/6250] eta: 0:05:53 lr: 0.000026 grad: 0.1564 (0.1560) loss: 0.7948 (0.7941) time: 0.1392 data: 0.0605 max mem: 9377 +Train: [71] [3900/6250] eta: 0:05:38 lr: 0.000026 grad: 0.1485 (0.1561) loss: 0.7931 (0.7940) time: 0.1434 data: 0.0611 max mem: 9377 +Train: [71] [4000/6250] eta: 0:05:23 lr: 0.000026 grad: 0.1532 (0.1562) loss: 0.7922 (0.7939) time: 0.1353 data: 0.0521 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:08 lr: 0.000026 grad: 0.1578 (0.1563) loss: 0.7833 (0.7938) time: 0.1246 data: 0.0382 max mem: 9377 +Train: [71] [4200/6250] eta: 0:04:54 lr: 0.000025 grad: 0.1559 (0.1564) loss: 0.7883 (0.7937) time: 0.1349 data: 0.0502 max mem: 9377 +Train: [71] [4300/6250] eta: 0:04:40 lr: 0.000025 grad: 0.1460 (0.1564) loss: 0.7955 (0.7937) time: 0.1938 data: 0.1202 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:25 lr: 0.000025 grad: 0.1540 (0.1565) loss: 0.7862 (0.7936) time: 0.1369 data: 0.0530 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:11 lr: 0.000025 grad: 0.1617 (0.1569) loss: 0.7813 (0.7935) time: 0.1574 data: 0.0767 max mem: 9377 +Train: [71] [4600/6250] eta: 0:03:57 lr: 0.000025 grad: 0.1502 (0.1571) loss: 0.7937 (0.7933) time: 0.1400 data: 0.0581 max mem: 9377 +Train: [71] [4700/6250] eta: 0:03:43 lr: 0.000025 grad: 0.1538 (0.1573) loss: 0.7844 (0.7932) time: 0.1350 data: 0.0552 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:28 lr: 0.000025 grad: 0.1597 (0.1574) loss: 0.7931 (0.7931) time: 0.1233 data: 0.0321 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:14 lr: 0.000025 grad: 0.1657 (0.1575) loss: 0.7857 (0.7929) time: 0.1398 data: 0.0539 max mem: 9377 +Train: [71] [5000/6250] eta: 0:02:59 lr: 0.000025 grad: 0.1561 (0.1577) loss: 0.7869 (0.7928) time: 0.1205 data: 0.0399 max mem: 9377 +Train: [71] [5100/6250] eta: 0:02:45 lr: 0.000025 grad: 0.1542 (0.1578) loss: 0.7926 (0.7927) time: 0.1346 data: 0.0440 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:30 lr: 0.000025 grad: 0.1509 (0.1578) loss: 0.7943 (0.7926) time: 0.1344 data: 0.0441 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:15 lr: 0.000025 grad: 0.1718 (0.1580) loss: 0.7806 (0.7925) time: 0.1317 data: 0.0436 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:01 lr: 0.000025 grad: 0.1549 (0.1580) loss: 0.7896 (0.7924) time: 0.1172 data: 0.0314 max mem: 9377 +Train: [71] [5500/6250] eta: 0:01:46 lr: 0.000025 grad: 0.1570 (0.1581) loss: 0.7937 (0.7923) time: 0.1184 data: 0.0293 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:32 lr: 0.000025 grad: 0.1586 (0.1581) loss: 0.7968 (0.7923) time: 0.1292 data: 0.0427 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:18 lr: 0.000025 grad: 0.1546 (0.1582) loss: 0.7826 (0.7922) time: 0.1325 data: 0.0523 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:03 lr: 0.000025 grad: 0.1559 (0.1583) loss: 0.7942 (0.7922) time: 0.1337 data: 0.0423 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:49 lr: 0.000025 grad: 0.1615 (0.1583) loss: 0.7846 (0.7921) time: 0.1512 data: 0.0682 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:35 lr: 0.000025 grad: 0.1620 (0.1586) loss: 0.7796 (0.7919) time: 0.1382 data: 0.0548 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:21 lr: 0.000025 grad: 0.1594 (0.1586) loss: 0.7814 (0.7918) time: 0.1336 data: 0.0513 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1614 (0.1587) loss: 0.7818 (0.7917) time: 0.1427 data: 0.0632 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1541 (0.1587) loss: 0.7983 (0.7917) time: 0.1450 data: 0.0664 max mem: 9377 +Train: [71] Total time: 0:14:51 (0.1427 s / it) +Averaged stats: lr: 0.000025 grad: 0.1541 (0.1587) loss: 0.7983 (0.7917) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:05:04 loss: 0.8381 (0.8381) time: 4.9132 data: 4.8800 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8410 (0.8421) time: 0.1292 data: 0.1041 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (hcp-train-subset): loss: 0.8410 (0.8421) +Eval (hcp-val): [71] [ 0/62] eta: 0:05:09 loss: 0.8384 (0.8384) time: 4.9883 data: 4.9432 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8367 (0.8395) time: 0.1069 data: 0.0814 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8395) +Eval (nsd-val): [71] [ 0/62] eta: 0:06:05 loss: 0.8143 (0.8143) time: 5.8951 data: 5.8625 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8223 (0.8234) time: 0.0986 data: 0.0723 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:13 (0.2190 s / it) +Averaged stats (nsd-val): loss: 0.8223 (0.8234) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +Train: [72] [ 0/6250] eta: 11:13:26 lr: 0.000025 grad: 0.2820 (0.2820) loss: 0.7438 (0.7438) time: 6.4650 data: 6.3717 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:19:59 lr: 0.000025 grad: 0.1557 (0.2046) loss: 0.7969 (0.8055) time: 0.1460 data: 0.0593 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:17:02 lr: 0.000025 grad: 0.1622 (0.1876) loss: 0.8006 (0.8029) time: 0.1340 data: 0.0350 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:15:42 lr: 0.000025 grad: 0.1725 (0.1822) loss: 0.7963 (0.8009) time: 0.1385 data: 0.0463 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:14:58 lr: 0.000025 grad: 0.1617 (0.1790) loss: 0.7871 (0.7983) time: 0.1234 data: 0.0219 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:14:33 lr: 0.000025 grad: 0.1466 (0.1752) loss: 0.7870 (0.7963) time: 0.1603 data: 0.0643 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:14:07 lr: 0.000025 grad: 0.1542 (0.1734) loss: 0.7925 (0.7944) time: 0.1274 data: 0.0344 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:13:42 lr: 0.000025 grad: 0.1550 (0.1721) loss: 0.7993 (0.7932) time: 0.1353 data: 0.0416 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:13:22 lr: 0.000025 grad: 0.1504 (0.1710) loss: 0.7933 (0.7923) time: 0.1235 data: 0.0414 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:13:03 lr: 0.000025 grad: 0.1564 (0.1699) loss: 0.7972 (0.7921) time: 0.1338 data: 0.0505 max mem: 9377 +Train: [72] [1000/6250] eta: 0:12:48 lr: 0.000025 grad: 0.1613 (0.1690) loss: 0.7865 (0.7919) time: 0.1442 data: 0.0646 max mem: 9377 +Train: [72] [1100/6250] eta: 0:12:28 lr: 0.000025 grad: 0.1452 (0.1680) loss: 0.7956 (0.7918) time: 0.1216 data: 0.0302 max mem: 9377 +Train: [72] [1200/6250] eta: 0:12:09 lr: 0.000025 grad: 0.1475 (0.1672) loss: 0.7953 (0.7919) time: 0.1347 data: 0.0454 max mem: 9377 +Train: [72] [1300/6250] eta: 0:11:51 lr: 0.000025 grad: 0.1434 (0.1664) loss: 0.7982 (0.7919) time: 0.1313 data: 0.0488 max mem: 9377 +Train: [72] [1400/6250] eta: 0:11:33 lr: 0.000025 grad: 0.1507 (0.1656) loss: 0.7970 (0.7918) time: 0.1347 data: 0.0547 max mem: 9377 +Train: [72] [1500/6250] eta: 0:11:18 lr: 0.000025 grad: 0.1528 (0.1648) loss: 0.7949 (0.7920) time: 0.1454 data: 0.0657 max mem: 9377 +Train: [72] [1600/6250] eta: 0:11:02 lr: 0.000025 grad: 0.1547 (0.1646) loss: 0.7977 (0.7920) time: 0.1417 data: 0.0589 max mem: 9377 +Train: [72] [1700/6250] eta: 0:10:45 lr: 0.000024 grad: 0.1492 (0.1640) loss: 0.7943 (0.7922) time: 0.1334 data: 0.0413 max mem: 9377 +Train: [72] [1800/6250] eta: 0:10:28 lr: 0.000024 grad: 0.1582 (0.1635) loss: 0.7872 (0.7923) time: 0.1192 data: 0.0394 max mem: 9377 +Train: [72] [1900/6250] eta: 0:10:12 lr: 0.000024 grad: 0.1517 (0.1630) loss: 0.7939 (0.7924) time: 0.1132 data: 0.0369 max mem: 9377 +Train: [72] [2000/6250] eta: 0:10:02 lr: 0.000024 grad: 0.1458 (0.1626) loss: 0.7972 (0.7925) time: 0.1578 data: 0.0771 max mem: 9377 +Train: [72] [2100/6250] eta: 0:09:48 lr: 0.000024 grad: 0.1600 (0.1623) loss: 0.7847 (0.7924) time: 0.1463 data: 0.0587 max mem: 9377 +Train: [72] [2200/6250] eta: 0:09:34 lr: 0.000024 grad: 0.1578 (0.1620) loss: 0.7946 (0.7924) time: 0.1705 data: 0.0793 max mem: 9377 +Train: [72] [2300/6250] eta: 0:09:20 lr: 0.000024 grad: 0.1534 (0.1617) loss: 0.7887 (0.7924) time: 0.1352 data: 0.0551 max mem: 9377 +Train: [72] [2400/6250] eta: 0:09:04 lr: 0.000024 grad: 0.1558 (0.1616) loss: 0.7947 (0.7923) time: 0.1219 data: 0.0473 max mem: 9377 +Train: [72] [2500/6250] eta: 0:08:51 lr: 0.000024 grad: 0.1523 (0.1615) loss: 0.7906 (0.7923) time: 0.1490 data: 0.0687 max mem: 9377 +Train: [72] [2600/6250] eta: 0:08:35 lr: 0.000024 grad: 0.1502 (0.1614) loss: 0.7845 (0.7921) time: 0.1405 data: 0.0616 max mem: 9377 +Train: [72] [2700/6250] eta: 0:08:21 lr: 0.000024 grad: 0.1593 (0.1615) loss: 0.7828 (0.7918) time: 0.1372 data: 0.0459 max mem: 9377 +Train: [72] [2800/6250] eta: 0:08:05 lr: 0.000024 grad: 0.1511 (0.1615) loss: 0.7906 (0.7916) time: 0.1268 data: 0.0414 max mem: 9377 +Train: [72] [2900/6250] eta: 0:07:50 lr: 0.000024 grad: 0.1566 (0.1614) loss: 0.7828 (0.7914) time: 0.1026 data: 0.0120 max mem: 9377 +Train: [72] [3000/6250] eta: 0:07:36 lr: 0.000024 grad: 0.1546 (0.1614) loss: 0.7841 (0.7912) time: 0.1548 data: 0.0685 max mem: 9377 +Train: [72] [3100/6250] eta: 0:07:20 lr: 0.000024 grad: 0.1559 (0.1613) loss: 0.7871 (0.7910) time: 0.1174 data: 0.0322 max mem: 9377 +Train: [72] [3200/6250] eta: 0:07:05 lr: 0.000024 grad: 0.1589 (0.1613) loss: 0.7823 (0.7908) time: 0.1187 data: 0.0313 max mem: 9377 +Train: [72] [3300/6250] eta: 0:06:50 lr: 0.000024 grad: 0.1506 (0.1612) loss: 0.7887 (0.7907) time: 0.1248 data: 0.0405 max mem: 9377 +Train: [72] [3400/6250] eta: 0:06:37 lr: 0.000024 grad: 0.1527 (0.1611) loss: 0.7910 (0.7906) time: 0.1583 data: 0.0821 max mem: 9377 +Train: [72] [3500/6250] eta: 0:06:24 lr: 0.000024 grad: 0.1509 (0.1610) loss: 0.7872 (0.7906) time: 0.1355 data: 0.0551 max mem: 9377 +Train: [72] [3600/6250] eta: 0:06:10 lr: 0.000024 grad: 0.1519 (0.1609) loss: 0.7896 (0.7906) time: 0.1506 data: 0.0714 max mem: 9377 +Train: [72] [3700/6250] eta: 0:05:57 lr: 0.000024 grad: 0.1491 (0.1607) loss: 0.7951 (0.7906) time: 0.1367 data: 0.0563 max mem: 9377 +Train: [72] [3800/6250] eta: 0:05:45 lr: 0.000024 grad: 0.1605 (0.1607) loss: 0.7859 (0.7905) time: 0.1579 data: 0.0730 max mem: 9377 +Train: [72] [3900/6250] eta: 0:05:31 lr: 0.000024 grad: 0.1620 (0.1608) loss: 0.7895 (0.7905) time: 0.1504 data: 0.0668 max mem: 9377 +Train: [72] [4000/6250] eta: 0:05:18 lr: 0.000024 grad: 0.1591 (0.1608) loss: 0.7900 (0.7905) time: 0.1512 data: 0.0718 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:04 lr: 0.000024 grad: 0.1522 (0.1608) loss: 0.7927 (0.7904) time: 0.1548 data: 0.0761 max mem: 9377 +Train: [72] [4200/6250] eta: 0:04:50 lr: 0.000024 grad: 0.1582 (0.1609) loss: 0.7855 (0.7903) time: 0.1603 data: 0.0750 max mem: 9377 +Train: [72] [4300/6250] eta: 0:04:36 lr: 0.000024 grad: 0.1602 (0.1610) loss: 0.7834 (0.7902) time: 0.1523 data: 0.0644 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:22 lr: 0.000024 grad: 0.1578 (0.1610) loss: 0.7913 (0.7901) time: 0.1521 data: 0.0629 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:09 lr: 0.000024 grad: 0.1598 (0.1611) loss: 0.7912 (0.7901) time: 0.1550 data: 0.0704 max mem: 9377 +Train: [72] [4600/6250] eta: 0:03:55 lr: 0.000024 grad: 0.1589 (0.1611) loss: 0.7837 (0.7901) time: 0.1623 data: 0.0749 max mem: 9377 +Train: [72] [4700/6250] eta: 0:03:41 lr: 0.000024 grad: 0.1599 (0.1611) loss: 0.7887 (0.7901) time: 0.1347 data: 0.0454 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:27 lr: 0.000024 grad: 0.1680 (0.1613) loss: 0.7806 (0.7900) time: 0.1233 data: 0.0344 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:13 lr: 0.000024 grad: 0.1530 (0.1613) loss: 0.7893 (0.7899) time: 0.1252 data: 0.0368 max mem: 9377 +Train: [72] [5000/6250] eta: 0:02:58 lr: 0.000024 grad: 0.1517 (0.1612) loss: 0.7928 (0.7898) time: 0.1206 data: 0.0392 max mem: 9377 +Train: [72] [5100/6250] eta: 0:02:44 lr: 0.000024 grad: 0.1571 (0.1612) loss: 0.7839 (0.7898) time: 0.1302 data: 0.0484 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:30 lr: 0.000024 grad: 0.1568 (0.1611) loss: 0.7807 (0.7897) time: 0.1324 data: 0.0475 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:15 lr: 0.000024 grad: 0.1594 (0.1611) loss: 0.7825 (0.7897) time: 0.1456 data: 0.0633 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:01 lr: 0.000024 grad: 0.1528 (0.1611) loss: 0.7902 (0.7896) time: 0.1647 data: 0.0848 max mem: 9377 +Train: [72] [5500/6250] eta: 0:01:47 lr: 0.000023 grad: 0.1543 (0.1610) loss: 0.7907 (0.7896) time: 0.1523 data: 0.0693 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:33 lr: 0.000023 grad: 0.1672 (0.1611) loss: 0.7947 (0.7896) time: 0.1582 data: 0.0766 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:19 lr: 0.000023 grad: 0.1552 (0.1611) loss: 0.7877 (0.7896) time: 0.1456 data: 0.0646 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:04 lr: 0.000023 grad: 0.1491 (0.1611) loss: 0.7873 (0.7895) time: 0.1475 data: 0.0650 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:50 lr: 0.000023 grad: 0.1541 (0.1611) loss: 0.7860 (0.7895) time: 0.1225 data: 0.0394 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:35 lr: 0.000023 grad: 0.1622 (0.1612) loss: 0.7798 (0.7894) time: 0.1296 data: 0.0517 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:21 lr: 0.000023 grad: 0.1562 (0.1612) loss: 0.7912 (0.7893) time: 0.1570 data: 0.0774 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:07 lr: 0.000023 grad: 0.1621 (0.1614) loss: 0.7815 (0.7893) time: 0.1289 data: 0.0446 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1733 (0.1614) loss: 0.7762 (0.7892) time: 0.1476 data: 0.0642 max mem: 9377 +Train: [72] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000023 grad: 0.1733 (0.1614) loss: 0.7762 (0.7892) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:05:28 loss: 0.8387 (0.8387) time: 5.3058 data: 5.2762 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8420 (0.8430) time: 0.1476 data: 0.1228 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-train-subset): loss: 0.8420 (0.8430) +Eval (hcp-val): [72] [ 0/62] eta: 0:06:23 loss: 0.8412 (0.8412) time: 6.1873 data: 6.1572 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8395 (0.8414) time: 0.1302 data: 0.1052 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (hcp-val): loss: 0.8395 (0.8414) +Eval (nsd-val): [72] [ 0/62] eta: 0:04:26 loss: 0.8120 (0.8120) time: 4.2970 data: 4.1990 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8268 (0.8283) time: 0.1391 data: 0.1140 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:14 (0.2419 s / it) +Averaged stats (nsd-val): loss: 0.8268 (0.8283) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 10:50:17 lr: 0.000023 grad: 0.1567 (0.1567) loss: 0.8247 (0.8247) time: 6.2428 data: 6.0960 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:22:05 lr: 0.000023 grad: 0.1904 (0.2253) loss: 0.8099 (0.8032) time: 0.1499 data: 0.0460 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:18:43 lr: 0.000023 grad: 0.1600 (0.2078) loss: 0.7929 (0.7981) time: 0.1636 data: 0.0782 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:17:21 lr: 0.000023 grad: 0.1685 (0.1960) loss: 0.7890 (0.7952) time: 0.1728 data: 0.0812 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:16:02 lr: 0.000023 grad: 0.1688 (0.1906) loss: 0.7900 (0.7938) time: 0.1379 data: 0.0445 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:15:22 lr: 0.000023 grad: 0.1655 (0.1866) loss: 0.7865 (0.7920) time: 0.1455 data: 0.0594 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:14:41 lr: 0.000023 grad: 0.1545 (0.1832) loss: 0.7878 (0.7911) time: 0.1384 data: 0.0468 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:14:07 lr: 0.000023 grad: 0.1616 (0.1812) loss: 0.7811 (0.7903) time: 0.1255 data: 0.0327 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:13:39 lr: 0.000023 grad: 0.1605 (0.1791) loss: 0.7855 (0.7895) time: 0.1401 data: 0.0437 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:13:20 lr: 0.000023 grad: 0.1612 (0.1775) loss: 0.7850 (0.7893) time: 0.1386 data: 0.0487 max mem: 9377 +Train: [73] [1000/6250] eta: 0:13:04 lr: 0.000023 grad: 0.1585 (0.1760) loss: 0.7868 (0.7892) time: 0.1432 data: 0.0593 max mem: 9377 +Train: [73] [1100/6250] eta: 0:12:50 lr: 0.000023 grad: 0.1594 (0.1744) loss: 0.7876 (0.7889) time: 0.1451 data: 0.0631 max mem: 9377 +Train: [73] [1200/6250] eta: 0:12:32 lr: 0.000023 grad: 0.1580 (0.1734) loss: 0.7880 (0.7887) time: 0.1508 data: 0.0667 max mem: 9377 +Train: [73] [1300/6250] eta: 0:12:14 lr: 0.000023 grad: 0.1601 (0.1728) loss: 0.7804 (0.7882) time: 0.1574 data: 0.0803 max mem: 9377 +Train: [73] [1400/6250] eta: 0:11:54 lr: 0.000023 grad: 0.1687 (0.1721) loss: 0.7832 (0.7878) time: 0.1352 data: 0.0516 max mem: 9377 +Train: [73] [1500/6250] eta: 0:11:36 lr: 0.000023 grad: 0.1503 (0.1714) loss: 0.7805 (0.7872) time: 0.1290 data: 0.0480 max mem: 9377 +Train: [73] [1600/6250] eta: 0:11:19 lr: 0.000023 grad: 0.1614 (0.1709) loss: 0.7811 (0.7869) time: 0.1262 data: 0.0473 max mem: 9377 +Train: [73] [1700/6250] eta: 0:11:03 lr: 0.000023 grad: 0.1675 (0.1704) loss: 0.7731 (0.7865) time: 0.1443 data: 0.0596 max mem: 9377 +Train: [73] [1800/6250] eta: 0:10:47 lr: 0.000023 grad: 0.1611 (0.1700) loss: 0.7870 (0.7864) time: 0.1492 data: 0.0662 max mem: 9377 +Train: [73] [1900/6250] eta: 0:10:31 lr: 0.000023 grad: 0.1641 (0.1699) loss: 0.7859 (0.7863) time: 0.1418 data: 0.0593 max mem: 9377 +Train: [73] [2000/6250] eta: 0:10:15 lr: 0.000023 grad: 0.1711 (0.1697) loss: 0.7846 (0.7861) time: 0.1439 data: 0.0586 max mem: 9377 +Train: [73] [2100/6250] eta: 0:09:58 lr: 0.000023 grad: 0.1531 (0.1694) loss: 0.7879 (0.7861) time: 0.1430 data: 0.0630 max mem: 9377 +Train: [73] [2200/6250] eta: 0:09:43 lr: 0.000023 grad: 0.1553 (0.1692) loss: 0.7884 (0.7861) time: 0.1373 data: 0.0602 max mem: 9377 +Train: [73] [2300/6250] eta: 0:09:27 lr: 0.000023 grad: 0.1559 (0.1687) loss: 0.7842 (0.7862) time: 0.1472 data: 0.0606 max mem: 9377 +Train: [73] [2400/6250] eta: 0:09:14 lr: 0.000023 grad: 0.1614 (0.1684) loss: 0.7851 (0.7862) time: 0.1335 data: 0.0439 max mem: 9377 +Train: [73] [2500/6250] eta: 0:09:00 lr: 0.000023 grad: 0.1630 (0.1682) loss: 0.7891 (0.7861) time: 0.1528 data: 0.0721 max mem: 9377 +Train: [73] [2600/6250] eta: 0:08:47 lr: 0.000023 grad: 0.1689 (0.1681) loss: 0.7855 (0.7860) time: 0.1778 data: 0.0985 max mem: 9377 +Train: [73] [2700/6250] eta: 0:08:33 lr: 0.000023 grad: 0.1580 (0.1679) loss: 0.7860 (0.7860) time: 0.1562 data: 0.0753 max mem: 9377 +Train: [73] [2800/6250] eta: 0:08:19 lr: 0.000023 grad: 0.1603 (0.1677) loss: 0.7895 (0.7860) time: 0.1309 data: 0.0524 max mem: 9377 +Train: [73] [2900/6250] eta: 0:08:04 lr: 0.000023 grad: 0.1567 (0.1678) loss: 0.7852 (0.7859) time: 0.1469 data: 0.0658 max mem: 9377 +Train: [73] [3000/6250] eta: 0:07:49 lr: 0.000023 grad: 0.1589 (0.1677) loss: 0.7854 (0.7860) time: 0.1486 data: 0.0731 max mem: 9377 +Train: [73] [3100/6250] eta: 0:07:33 lr: 0.000023 grad: 0.1523 (0.1673) loss: 0.7889 (0.7861) time: 0.1297 data: 0.0420 max mem: 9377 +Train: [73] [3200/6250] eta: 0:07:18 lr: 0.000022 grad: 0.1639 (0.1671) loss: 0.7871 (0.7861) time: 0.1345 data: 0.0466 max mem: 9377 +Train: [73] [3300/6250] eta: 0:07:03 lr: 0.000022 grad: 0.1651 (0.1674) loss: 0.7772 (0.7860) time: 0.1350 data: 0.0469 max mem: 9377 +Train: [73] [3400/6250] eta: 0:06:48 lr: 0.000022 grad: 0.1608 (0.1673) loss: 0.7886 (0.7860) time: 0.1336 data: 0.0495 max mem: 9377 +Train: [73] [3500/6250] eta: 0:06:32 lr: 0.000022 grad: 0.1554 (0.1672) loss: 0.7858 (0.7860) time: 0.1222 data: 0.0419 max mem: 9377 +Train: [73] [3600/6250] eta: 0:06:18 lr: 0.000022 grad: 0.1707 (0.1672) loss: 0.7785 (0.7859) time: 0.1486 data: 0.0679 max mem: 9377 +Train: [73] [3700/6250] eta: 0:06:03 lr: 0.000022 grad: 0.1583 (0.1672) loss: 0.7767 (0.7858) time: 0.1511 data: 0.0692 max mem: 9377 +Train: [73] [3800/6250] eta: 0:05:49 lr: 0.000022 grad: 0.1588 (0.1671) loss: 0.7862 (0.7858) time: 0.1379 data: 0.0592 max mem: 9377 +Train: [73] [3900/6250] eta: 0:05:35 lr: 0.000022 grad: 0.1532 (0.1669) loss: 0.7908 (0.7858) time: 0.1310 data: 0.0501 max mem: 9377 +Train: [73] [4000/6250] eta: 0:05:21 lr: 0.000022 grad: 0.1534 (0.1668) loss: 0.7893 (0.7859) time: 0.1295 data: 0.0462 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:06 lr: 0.000022 grad: 0.1610 (0.1668) loss: 0.7759 (0.7859) time: 0.1472 data: 0.0634 max mem: 9377 +Train: [73] [4200/6250] eta: 0:04:51 lr: 0.000022 grad: 0.1646 (0.1668) loss: 0.7843 (0.7859) time: 0.1310 data: 0.0489 max mem: 9377 +Train: [73] [4300/6250] eta: 0:04:38 lr: 0.000022 grad: 0.1630 (0.1667) loss: 0.7807 (0.7860) time: 0.1205 data: 0.0365 max mem: 9377 +Train: [73] [4400/6250] eta: 0:04:24 lr: 0.000022 grad: 0.1671 (0.1666) loss: 0.7922 (0.7860) time: 0.1471 data: 0.0616 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:10 lr: 0.000022 grad: 0.1598 (0.1665) loss: 0.7814 (0.7860) time: 0.1424 data: 0.0587 max mem: 9377 +Train: [73] [4600/6250] eta: 0:03:55 lr: 0.000022 grad: 0.1568 (0.1665) loss: 0.7876 (0.7860) time: 0.1394 data: 0.0553 max mem: 9377 +Train: [73] [4700/6250] eta: 0:03:41 lr: 0.000022 grad: 0.1510 (0.1663) loss: 0.7943 (0.7861) time: 0.1386 data: 0.0535 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:27 lr: 0.000022 grad: 0.1525 (0.1662) loss: 0.7937 (0.7861) time: 0.1464 data: 0.0632 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:12 lr: 0.000022 grad: 0.1579 (0.1661) loss: 0.7811 (0.7861) time: 0.1334 data: 0.0500 max mem: 9377 +Train: [73] [5000/6250] eta: 0:02:58 lr: 0.000022 grad: 0.1648 (0.1660) loss: 0.7885 (0.7861) time: 0.1706 data: 0.0892 max mem: 9377 +Train: [73] [5100/6250] eta: 0:02:43 lr: 0.000022 grad: 0.1634 (0.1659) loss: 0.7929 (0.7861) time: 0.1333 data: 0.0558 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:29 lr: 0.000022 grad: 0.1542 (0.1657) loss: 0.7940 (0.7862) time: 0.1400 data: 0.0545 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:14 lr: 0.000022 grad: 0.1593 (0.1656) loss: 0.7868 (0.7863) time: 0.1377 data: 0.0488 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:00 lr: 0.000022 grad: 0.1604 (0.1654) loss: 0.7876 (0.7863) time: 0.1461 data: 0.0519 max mem: 9377 +Train: [73] [5500/6250] eta: 0:01:46 lr: 0.000022 grad: 0.1580 (0.1653) loss: 0.7916 (0.7863) time: 0.1409 data: 0.0552 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:31 lr: 0.000022 grad: 0.1625 (0.1653) loss: 0.7813 (0.7863) time: 0.1284 data: 0.0407 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:17 lr: 0.000022 grad: 0.1648 (0.1654) loss: 0.7821 (0.7863) time: 0.1348 data: 0.0490 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:03 lr: 0.000022 grad: 0.1572 (0.1654) loss: 0.7811 (0.7863) time: 0.1155 data: 0.0325 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:49 lr: 0.000022 grad: 0.1512 (0.1653) loss: 0.7897 (0.7863) time: 0.1436 data: 0.0541 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:35 lr: 0.000022 grad: 0.1595 (0.1652) loss: 0.7785 (0.7863) time: 0.1383 data: 0.0516 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:21 lr: 0.000022 grad: 0.1582 (0.1651) loss: 0.7815 (0.7863) time: 0.1331 data: 0.0474 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.1618 (0.1650) loss: 0.7802 (0.7863) time: 0.1493 data: 0.0638 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1586 (0.1651) loss: 0.7845 (0.7863) time: 0.1196 data: 0.0303 max mem: 9377 +Train: [73] Total time: 0:14:45 (0.1417 s / it) +Averaged stats: lr: 0.000022 grad: 0.1586 (0.1651) loss: 0.7845 (0.7863) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:06:08 loss: 0.8383 (0.8383) time: 5.9379 data: 5.9081 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8428 (0.8429) time: 0.1325 data: 0.1075 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:13 (0.2146 s / it) +Averaged stats (hcp-train-subset): loss: 0.8428 (0.8429) +Eval (hcp-val): [73] [ 0/62] eta: 0:06:07 loss: 0.8383 (0.8383) time: 5.9312 data: 5.8994 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8389 (0.8413) time: 0.1338 data: 0.1066 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8413) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:07 loss: 0.8167 (0.8167) time: 4.9572 data: 4.9264 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8278 (0.8288) time: 0.1220 data: 0.0969 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (nsd-val): loss: 0.8278 (0.8288) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 9:39:01 lr: 0.000022 grad: 0.2478 (0.2478) loss: 0.8263 (0.8263) time: 5.5586 data: 5.4086 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:20:06 lr: 0.000022 grad: 0.1921 (0.2084) loss: 0.8002 (0.8033) time: 0.1416 data: 0.0457 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:17:08 lr: 0.000022 grad: 0.1675 (0.1978) loss: 0.7944 (0.7981) time: 0.1479 data: 0.0554 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:15:56 lr: 0.000022 grad: 0.1619 (0.1863) loss: 0.7841 (0.7969) time: 0.1402 data: 0.0367 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:15:06 lr: 0.000022 grad: 0.1442 (0.1797) loss: 0.7972 (0.7962) time: 0.1354 data: 0.0447 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:14:30 lr: 0.000022 grad: 0.1572 (0.1760) loss: 0.7992 (0.7950) time: 0.1408 data: 0.0521 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:14:03 lr: 0.000022 grad: 0.1741 (0.1744) loss: 0.7866 (0.7940) time: 0.1391 data: 0.0485 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:13:43 lr: 0.000022 grad: 0.1595 (0.1735) loss: 0.7858 (0.7932) time: 0.1241 data: 0.0339 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:13:20 lr: 0.000022 grad: 0.1560 (0.1728) loss: 0.7917 (0.7924) time: 0.1432 data: 0.0541 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:13:03 lr: 0.000021 grad: 0.1581 (0.1710) loss: 0.7954 (0.7928) time: 0.1363 data: 0.0489 max mem: 9377 +Train: [74] [1000/6250] eta: 0:12:41 lr: 0.000021 grad: 0.1586 (0.1697) loss: 0.7910 (0.7927) time: 0.1243 data: 0.0374 max mem: 9377 +Train: [74] [1100/6250] eta: 0:12:23 lr: 0.000021 grad: 0.1560 (0.1686) loss: 0.7947 (0.7929) time: 0.1240 data: 0.0411 max mem: 9377 +Train: [74] [1200/6250] eta: 0:12:04 lr: 0.000021 grad: 0.1549 (0.1674) loss: 0.7910 (0.7931) time: 0.1224 data: 0.0310 max mem: 9377 +Train: [74] [1300/6250] eta: 0:11:46 lr: 0.000021 grad: 0.1554 (0.1668) loss: 0.7921 (0.7928) time: 0.1342 data: 0.0461 max mem: 9377 +Train: [74] [1400/6250] eta: 0:11:26 lr: 0.000021 grad: 0.1616 (0.1664) loss: 0.7777 (0.7926) time: 0.1150 data: 0.0251 max mem: 9377 +Train: [74] [1500/6250] eta: 0:11:10 lr: 0.000021 grad: 0.1696 (0.1661) loss: 0.7747 (0.7924) time: 0.1448 data: 0.0639 max mem: 9377 +Train: [74] [1600/6250] eta: 0:10:56 lr: 0.000021 grad: 0.1529 (0.1663) loss: 0.7896 (0.7921) time: 0.1551 data: 0.0812 max mem: 9377 +Train: [74] [1700/6250] eta: 0:10:41 lr: 0.000021 grad: 0.1612 (0.1661) loss: 0.7866 (0.7918) time: 0.1287 data: 0.0458 max mem: 9377 +Train: [74] [1800/6250] eta: 0:10:26 lr: 0.000021 grad: 0.1643 (0.1663) loss: 0.7748 (0.7912) time: 0.1386 data: 0.0551 max mem: 9377 +Train: [74] [1900/6250] eta: 0:10:10 lr: 0.000021 grad: 0.1618 (0.1662) loss: 0.7875 (0.7907) time: 0.1358 data: 0.0502 max mem: 9377 +Train: [74] [2000/6250] eta: 0:09:57 lr: 0.000021 grad: 0.1656 (0.1663) loss: 0.7730 (0.7902) time: 0.1611 data: 0.0782 max mem: 9377 +Train: [74] [2100/6250] eta: 0:09:43 lr: 0.000021 grad: 0.1656 (0.1665) loss: 0.7837 (0.7897) time: 0.1575 data: 0.0723 max mem: 9377 +Train: [74] [2200/6250] eta: 0:09:28 lr: 0.000021 grad: 0.1676 (0.1666) loss: 0.7710 (0.7893) time: 0.1428 data: 0.0544 max mem: 9377 +Train: [74] [2300/6250] eta: 0:09:14 lr: 0.000021 grad: 0.1766 (0.1667) loss: 0.7755 (0.7888) time: 0.1508 data: 0.0690 max mem: 9377 +Train: [74] [2400/6250] eta: 0:08:59 lr: 0.000021 grad: 0.1525 (0.1667) loss: 0.7777 (0.7884) time: 0.1077 data: 0.0227 max mem: 9377 +Train: [74] [2500/6250] eta: 0:08:44 lr: 0.000021 grad: 0.1696 (0.1667) loss: 0.7747 (0.7881) time: 0.1454 data: 0.0612 max mem: 9377 +Train: [74] [2600/6250] eta: 0:08:33 lr: 0.000021 grad: 0.1747 (0.1668) loss: 0.7667 (0.7878) time: 0.1328 data: 0.0462 max mem: 9377 +Train: [74] [2700/6250] eta: 0:08:21 lr: 0.000021 grad: 0.1649 (0.1671) loss: 0.7834 (0.7876) time: 0.1586 data: 0.0752 max mem: 9377 +Train: [74] [2800/6250] eta: 0:08:10 lr: 0.000021 grad: 0.1603 (0.1671) loss: 0.7837 (0.7873) time: 0.1680 data: 0.0727 max mem: 9377 +Train: [74] [2900/6250] eta: 0:07:59 lr: 0.000021 grad: 0.1532 (0.1671) loss: 0.7882 (0.7873) time: 0.1889 data: 0.1061 max mem: 9377 +Train: [74] [3000/6250] eta: 0:07:47 lr: 0.000021 grad: 0.1655 (0.1670) loss: 0.7794 (0.7872) time: 0.1526 data: 0.0614 max mem: 9377 +Train: [74] [3100/6250] eta: 0:07:35 lr: 0.000021 grad: 0.1600 (0.1669) loss: 0.7807 (0.7870) time: 0.1687 data: 0.0827 max mem: 9377 +Train: [74] [3200/6250] eta: 0:07:21 lr: 0.000021 grad: 0.1574 (0.1667) loss: 0.7840 (0.7871) time: 0.1583 data: 0.0650 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:08 lr: 0.000021 grad: 0.1681 (0.1666) loss: 0.7856 (0.7872) time: 0.1636 data: 0.0796 max mem: 9377 +Train: [74] [3400/6250] eta: 0:06:53 lr: 0.000021 grad: 0.1563 (0.1663) loss: 0.7872 (0.7873) time: 0.1332 data: 0.0467 max mem: 9377 +Train: [74] [3500/6250] eta: 0:06:38 lr: 0.000021 grad: 0.1586 (0.1661) loss: 0.7943 (0.7875) time: 0.1311 data: 0.0498 max mem: 9377 +Train: [74] [3600/6250] eta: 0:06:23 lr: 0.000021 grad: 0.1481 (0.1659) loss: 0.7914 (0.7876) time: 0.1467 data: 0.0619 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:08 lr: 0.000021 grad: 0.1518 (0.1659) loss: 0.7959 (0.7877) time: 0.1535 data: 0.0699 max mem: 9377 +Train: [74] [3800/6250] eta: 0:05:54 lr: 0.000021 grad: 0.1600 (0.1658) loss: 0.7969 (0.7878) time: 0.1392 data: 0.0542 max mem: 9377 +Train: [74] [3900/6250] eta: 0:05:40 lr: 0.000021 grad: 0.1585 (0.1659) loss: 0.7864 (0.7877) time: 0.1499 data: 0.0723 max mem: 9377 +Train: [74] [4000/6250] eta: 0:05:26 lr: 0.000021 grad: 0.1568 (0.1660) loss: 0.7914 (0.7878) time: 0.1650 data: 0.0861 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:11 lr: 0.000021 grad: 0.1688 (0.1660) loss: 0.7884 (0.7877) time: 0.1566 data: 0.0774 max mem: 9377 +Train: [74] [4200/6250] eta: 0:04:57 lr: 0.000021 grad: 0.1666 (0.1660) loss: 0.7858 (0.7877) time: 0.1388 data: 0.0571 max mem: 9377 +Train: [74] [4300/6250] eta: 0:04:43 lr: 0.000021 grad: 0.1675 (0.1660) loss: 0.7844 (0.7878) time: 0.1210 data: 0.0294 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:29 lr: 0.000021 grad: 0.1619 (0.1659) loss: 0.7897 (0.7877) time: 0.1084 data: 0.0156 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:14 lr: 0.000021 grad: 0.1587 (0.1659) loss: 0.7881 (0.7875) time: 0.1625 data: 0.0740 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:00 lr: 0.000021 grad: 0.1596 (0.1659) loss: 0.7840 (0.7875) time: 0.1499 data: 0.0651 max mem: 9377 +Train: [74] [4700/6250] eta: 0:03:45 lr: 0.000021 grad: 0.1590 (0.1659) loss: 0.7857 (0.7874) time: 0.1513 data: 0.0640 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:30 lr: 0.000021 grad: 0.1638 (0.1660) loss: 0.7828 (0.7872) time: 0.1488 data: 0.0676 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:16 lr: 0.000020 grad: 0.1758 (0.1660) loss: 0.7733 (0.7871) time: 0.1547 data: 0.0699 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:01 lr: 0.000020 grad: 0.1446 (0.1661) loss: 0.7824 (0.7870) time: 0.1519 data: 0.0725 max mem: 9377 +Train: [74] [5100/6250] eta: 0:02:46 lr: 0.000020 grad: 0.1598 (0.1661) loss: 0.7815 (0.7869) time: 0.1254 data: 0.0383 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:31 lr: 0.000020 grad: 0.1548 (0.1660) loss: 0.7873 (0.7869) time: 0.1087 data: 0.0251 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:17 lr: 0.000020 grad: 0.1599 (0.1659) loss: 0.7773 (0.7869) time: 0.1252 data: 0.0368 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:02 lr: 0.000020 grad: 0.1544 (0.1659) loss: 0.7836 (0.7868) time: 0.1465 data: 0.0622 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:47 lr: 0.000020 grad: 0.1714 (0.1659) loss: 0.7772 (0.7867) time: 0.1236 data: 0.0391 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:33 lr: 0.000020 grad: 0.1628 (0.1660) loss: 0.7732 (0.7867) time: 0.1457 data: 0.0566 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:18 lr: 0.000020 grad: 0.1670 (0.1660) loss: 0.7834 (0.7866) time: 0.1476 data: 0.0656 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:04 lr: 0.000020 grad: 0.1524 (0.1659) loss: 0.7990 (0.7866) time: 0.1232 data: 0.0446 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:50 lr: 0.000020 grad: 0.1520 (0.1658) loss: 0.7842 (0.7866) time: 0.1399 data: 0.0513 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:35 lr: 0.000020 grad: 0.1581 (0.1657) loss: 0.7812 (0.7866) time: 0.1208 data: 0.0438 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:21 lr: 0.000020 grad: 0.1581 (0.1657) loss: 0.7882 (0.7866) time: 0.1086 data: 0.0202 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1495 (0.1656) loss: 0.7925 (0.7867) time: 0.1303 data: 0.0428 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1626 (0.1656) loss: 0.7874 (0.7867) time: 0.1450 data: 0.0584 max mem: 9377 +Train: [74] Total time: 0:14:59 (0.1440 s / it) +Averaged stats: lr: 0.000020 grad: 0.1626 (0.1656) loss: 0.7874 (0.7867) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:06:09 loss: 0.8397 (0.8397) time: 5.9582 data: 5.9283 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8412 (0.8424) time: 0.1242 data: 0.0990 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (hcp-train-subset): loss: 0.8412 (0.8424) +Making plots (hcp-train-subset): example=19 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:36 loss: 0.8419 (0.8419) time: 6.3955 data: 6.3628 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8394 (0.8410) time: 0.1470 data: 0.1215 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (hcp-val): loss: 0.8394 (0.8410) +Making plots (hcp-val): example=5 +Eval (nsd-val): [74] [ 0/62] eta: 0:06:56 loss: 0.8250 (0.8250) time: 6.7194 data: 6.6853 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8362 (0.8375) time: 0.1335 data: 0.1066 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (nsd-val): loss: 0.8362 (0.8375) +Making plots (nsd-val): example=52 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 9:49:44 lr: 0.000020 grad: 0.1116 (0.1116) loss: 0.8496 (0.8496) time: 5.6616 data: 5.4290 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:21:35 lr: 0.000020 grad: 0.1631 (0.2017) loss: 0.8120 (0.8153) time: 0.1324 data: 0.0302 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:18:05 lr: 0.000020 grad: 0.1646 (0.1883) loss: 0.8004 (0.8086) time: 0.1517 data: 0.0616 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:16:34 lr: 0.000020 grad: 0.1615 (0.1801) loss: 0.8083 (0.8080) time: 0.1344 data: 0.0413 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:15:44 lr: 0.000020 grad: 0.1699 (0.1790) loss: 0.7930 (0.8050) time: 0.1485 data: 0.0575 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:14:52 lr: 0.000020 grad: 0.1649 (0.1773) loss: 0.8023 (0.8032) time: 0.1255 data: 0.0317 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:14:26 lr: 0.000020 grad: 0.1569 (0.1754) loss: 0.7950 (0.8019) time: 0.1700 data: 0.0853 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:13:54 lr: 0.000020 grad: 0.1582 (0.1758) loss: 0.7904 (0.7999) time: 0.1523 data: 0.0612 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:13:38 lr: 0.000020 grad: 0.1522 (0.1745) loss: 0.7915 (0.7991) time: 0.1523 data: 0.0625 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:13:19 lr: 0.000020 grad: 0.1547 (0.1733) loss: 0.7920 (0.7983) time: 0.1387 data: 0.0552 max mem: 9377 +Train: [75] [1000/6250] eta: 0:13:00 lr: 0.000020 grad: 0.1531 (0.1717) loss: 0.7959 (0.7979) time: 0.1386 data: 0.0542 max mem: 9377 +Train: [75] [1100/6250] eta: 0:12:40 lr: 0.000020 grad: 0.1469 (0.1703) loss: 0.8039 (0.7977) time: 0.1398 data: 0.0575 max mem: 9377 +Train: [75] [1200/6250] eta: 0:12:21 lr: 0.000020 grad: 0.1471 (0.1694) loss: 0.7991 (0.7972) time: 0.1526 data: 0.0664 max mem: 9377 +Train: [75] [1300/6250] eta: 0:12:03 lr: 0.000020 grad: 0.1558 (0.1687) loss: 0.7917 (0.7968) time: 0.1461 data: 0.0690 max mem: 9377 +Train: [75] [1400/6250] eta: 0:11:45 lr: 0.000020 grad: 0.1566 (0.1684) loss: 0.7976 (0.7963) time: 0.1412 data: 0.0548 max mem: 9377 +Train: [75] [1500/6250] eta: 0:11:27 lr: 0.000020 grad: 0.1525 (0.1680) loss: 0.7969 (0.7958) time: 0.1344 data: 0.0508 max mem: 9377 +Train: [75] [1600/6250] eta: 0:11:12 lr: 0.000020 grad: 0.1499 (0.1675) loss: 0.7937 (0.7954) time: 0.1295 data: 0.0444 max mem: 9377 +Train: [75] [1700/6250] eta: 0:10:58 lr: 0.000020 grad: 0.1586 (0.1672) loss: 0.7889 (0.7950) time: 0.1496 data: 0.0654 max mem: 9377 +Train: [75] [1800/6250] eta: 0:10:42 lr: 0.000020 grad: 0.1653 (0.1671) loss: 0.7891 (0.7948) time: 0.1550 data: 0.0724 max mem: 9377 +Train: [75] [1900/6250] eta: 0:10:25 lr: 0.000020 grad: 0.1606 (0.1669) loss: 0.7914 (0.7944) time: 0.1281 data: 0.0381 max mem: 9377 +Train: [75] [2000/6250] eta: 0:10:09 lr: 0.000020 grad: 0.1667 (0.1673) loss: 0.7816 (0.7939) time: 0.1503 data: 0.0689 max mem: 9377 +Train: [75] [2100/6250] eta: 0:09:53 lr: 0.000020 grad: 0.1711 (0.1673) loss: 0.7761 (0.7935) time: 0.1239 data: 0.0432 max mem: 9377 +Train: [75] [2200/6250] eta: 0:09:38 lr: 0.000020 grad: 0.1610 (0.1672) loss: 0.7888 (0.7931) time: 0.1314 data: 0.0477 max mem: 9377 +Train: [75] [2300/6250] eta: 0:09:23 lr: 0.000020 grad: 0.1621 (0.1673) loss: 0.7862 (0.7926) time: 0.1488 data: 0.0682 max mem: 9377 +Train: [75] [2400/6250] eta: 0:09:07 lr: 0.000020 grad: 0.1652 (0.1673) loss: 0.7797 (0.7922) time: 0.1242 data: 0.0409 max mem: 9377 +Train: [75] [2500/6250] eta: 0:08:51 lr: 0.000020 grad: 0.1603 (0.1672) loss: 0.7850 (0.7920) time: 0.1271 data: 0.0459 max mem: 9377 +Train: [75] [2600/6250] eta: 0:08:37 lr: 0.000020 grad: 0.1718 (0.1675) loss: 0.7790 (0.7915) time: 0.1614 data: 0.0780 max mem: 9377 +Train: [75] [2700/6250] eta: 0:08:22 lr: 0.000020 grad: 0.1661 (0.1678) loss: 0.7763 (0.7911) time: 0.1646 data: 0.0884 max mem: 9377 +Train: [75] [2800/6250] eta: 0:08:08 lr: 0.000019 grad: 0.1649 (0.1680) loss: 0.7860 (0.7908) time: 0.1361 data: 0.0461 max mem: 9377 +Train: [75] [2900/6250] eta: 0:07:56 lr: 0.000019 grad: 0.1744 (0.1680) loss: 0.7709 (0.7905) time: 0.1491 data: 0.0633 max mem: 9377 +Train: [75] [3000/6250] eta: 0:07:42 lr: 0.000019 grad: 0.1728 (0.1681) loss: 0.7791 (0.7902) time: 0.1271 data: 0.0454 max mem: 9377 +Train: [75] [3100/6250] eta: 0:07:29 lr: 0.000019 grad: 0.1591 (0.1680) loss: 0.7759 (0.7899) time: 0.1510 data: 0.0692 max mem: 9377 +Train: [75] [3200/6250] eta: 0:07:15 lr: 0.000019 grad: 0.1602 (0.1681) loss: 0.7848 (0.7897) time: 0.1458 data: 0.0647 max mem: 9377 +Train: [75] [3300/6250] eta: 0:07:01 lr: 0.000019 grad: 0.1652 (0.1680) loss: 0.7818 (0.7894) time: 0.1396 data: 0.0540 max mem: 9377 +Train: [75] [3400/6250] eta: 0:06:48 lr: 0.000019 grad: 0.1534 (0.1680) loss: 0.7823 (0.7891) time: 0.1607 data: 0.0670 max mem: 9377 +Train: [75] [3500/6250] eta: 0:06:34 lr: 0.000019 grad: 0.1666 (0.1681) loss: 0.7916 (0.7889) time: 0.1550 data: 0.0729 max mem: 9377 +Train: [75] [3600/6250] eta: 0:06:19 lr: 0.000019 grad: 0.1552 (0.1682) loss: 0.7872 (0.7887) time: 0.1404 data: 0.0567 max mem: 9377 +Train: [75] [3700/6250] eta: 0:06:05 lr: 0.000019 grad: 0.1578 (0.1682) loss: 0.7837 (0.7886) time: 0.1421 data: 0.0544 max mem: 9377 +Train: [75] [3800/6250] eta: 0:05:50 lr: 0.000019 grad: 0.1730 (0.1682) loss: 0.7761 (0.7886) time: 0.1258 data: 0.0432 max mem: 9377 +Train: [75] [3900/6250] eta: 0:05:35 lr: 0.000019 grad: 0.1629 (0.1681) loss: 0.7928 (0.7885) time: 0.1259 data: 0.0448 max mem: 9377 +Train: [75] [4000/6250] eta: 0:05:21 lr: 0.000019 grad: 0.1530 (0.1680) loss: 0.7916 (0.7885) time: 0.1199 data: 0.0337 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:06 lr: 0.000019 grad: 0.1561 (0.1682) loss: 0.7939 (0.7885) time: 0.1336 data: 0.0488 max mem: 9377 +Train: [75] [4200/6250] eta: 0:04:52 lr: 0.000019 grad: 0.1660 (0.1682) loss: 0.7844 (0.7885) time: 0.1374 data: 0.0583 max mem: 9377 +Train: [75] [4300/6250] eta: 0:04:38 lr: 0.000019 grad: 0.1626 (0.1681) loss: 0.7876 (0.7885) time: 0.1495 data: 0.0710 max mem: 9377 +Train: [75] [4400/6250] eta: 0:04:24 lr: 0.000019 grad: 0.1680 (0.1680) loss: 0.7881 (0.7885) time: 0.1565 data: 0.0771 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:10 lr: 0.000019 grad: 0.1549 (0.1681) loss: 0.7931 (0.7885) time: 0.1564 data: 0.0715 max mem: 9377 +Train: [75] [4600/6250] eta: 0:03:57 lr: 0.000019 grad: 0.1586 (0.1681) loss: 0.7803 (0.7885) time: 0.1664 data: 0.0846 max mem: 9377 +Train: [75] [4700/6250] eta: 0:03:42 lr: 0.000019 grad: 0.1557 (0.1680) loss: 0.7932 (0.7885) time: 0.1371 data: 0.0521 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:28 lr: 0.000019 grad: 0.1527 (0.1678) loss: 0.7968 (0.7886) time: 0.1393 data: 0.0594 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:13 lr: 0.000019 grad: 0.1607 (0.1677) loss: 0.7892 (0.7886) time: 0.1320 data: 0.0526 max mem: 9377 +Train: [75] [5000/6250] eta: 0:02:58 lr: 0.000019 grad: 0.1578 (0.1676) loss: 0.7887 (0.7886) time: 0.1265 data: 0.0480 max mem: 9377 +Train: [75] [5100/6250] eta: 0:02:44 lr: 0.000019 grad: 0.1495 (0.1674) loss: 0.7957 (0.7886) time: 0.1218 data: 0.0360 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:29 lr: 0.000019 grad: 0.1591 (0.1673) loss: 0.7946 (0.7886) time: 0.1325 data: 0.0467 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:15 lr: 0.000019 grad: 0.1560 (0.1671) loss: 0.7969 (0.7887) time: 0.1295 data: 0.0458 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:00 lr: 0.000019 grad: 0.1603 (0.1670) loss: 0.7801 (0.7888) time: 0.1380 data: 0.0572 max mem: 9377 +Train: [75] [5500/6250] eta: 0:01:46 lr: 0.000019 grad: 0.1611 (0.1670) loss: 0.7811 (0.7887) time: 0.1508 data: 0.0696 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:32 lr: 0.000019 grad: 0.1688 (0.1670) loss: 0.7886 (0.7887) time: 0.1361 data: 0.0587 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:18 lr: 0.000019 grad: 0.1554 (0.1670) loss: 0.7935 (0.7887) time: 0.1250 data: 0.0374 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:03 lr: 0.000019 grad: 0.1653 (0.1670) loss: 0.7875 (0.7887) time: 0.1405 data: 0.0602 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:49 lr: 0.000019 grad: 0.1632 (0.1669) loss: 0.7848 (0.7887) time: 0.1565 data: 0.0763 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:35 lr: 0.000019 grad: 0.1681 (0.1670) loss: 0.7832 (0.7886) time: 0.1385 data: 0.0563 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:21 lr: 0.000019 grad: 0.1727 (0.1671) loss: 0.7796 (0.7885) time: 0.1356 data: 0.0582 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.1659 (0.1671) loss: 0.7828 (0.7885) time: 0.1448 data: 0.0638 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1698 (0.1672) loss: 0.7832 (0.7884) time: 0.1258 data: 0.0424 max mem: 9377 +Train: [75] Total time: 0:14:50 (0.1425 s / it) +Averaged stats: lr: 0.000019 grad: 0.1698 (0.1672) loss: 0.7832 (0.7884) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:05:54 loss: 0.8395 (0.8395) time: 5.7202 data: 5.6893 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8428 (0.8424) time: 0.1273 data: 0.1004 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-train-subset): loss: 0.8428 (0.8424) +Eval (hcp-val): [75] [ 0/62] eta: 0:05:45 loss: 0.8382 (0.8382) time: 5.5789 data: 5.5473 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8391 (0.8410) time: 0.1245 data: 0.0989 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8410) +Eval (nsd-val): [75] [ 0/62] eta: 0:04:49 loss: 0.8186 (0.8186) time: 4.6667 data: 4.6192 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8284 (0.8297) time: 0.1250 data: 0.1001 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (nsd-val): loss: 0.8284 (0.8297) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 11:13:58 lr: 0.000019 grad: 0.1690 (0.1690) loss: 0.7972 (0.7972) time: 6.4702 data: 6.3742 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:19:34 lr: 0.000019 grad: 0.1649 (0.2005) loss: 0.8107 (0.8115) time: 0.1253 data: 0.0256 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:16:54 lr: 0.000019 grad: 0.1717 (0.1933) loss: 0.8071 (0.8119) time: 0.1391 data: 0.0598 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:15:45 lr: 0.000019 grad: 0.1701 (0.1902) loss: 0.8072 (0.8087) time: 0.1538 data: 0.0600 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:15:15 lr: 0.000019 grad: 0.1643 (0.1866) loss: 0.7915 (0.8054) time: 0.1677 data: 0.0775 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:14:44 lr: 0.000019 grad: 0.1591 (0.1842) loss: 0.7913 (0.8027) time: 0.1394 data: 0.0445 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:14:16 lr: 0.000019 grad: 0.1630 (0.1822) loss: 0.7918 (0.8002) time: 0.1394 data: 0.0487 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:13:47 lr: 0.000019 grad: 0.1653 (0.1804) loss: 0.7957 (0.7988) time: 0.1279 data: 0.0334 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:13:31 lr: 0.000018 grad: 0.1734 (0.1788) loss: 0.7830 (0.7975) time: 0.1523 data: 0.0742 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:13:14 lr: 0.000018 grad: 0.1686 (0.1779) loss: 0.7892 (0.7964) time: 0.1323 data: 0.0500 max mem: 9377 +Train: [76] [1000/6250] eta: 0:12:59 lr: 0.000018 grad: 0.1630 (0.1765) loss: 0.7848 (0.7958) time: 0.1504 data: 0.0672 max mem: 9377 +Train: [76] [1100/6250] eta: 0:12:48 lr: 0.000018 grad: 0.1673 (0.1756) loss: 0.7838 (0.7950) time: 0.1549 data: 0.0708 max mem: 9377 +Train: [76] [1200/6250] eta: 0:12:33 lr: 0.000018 grad: 0.1703 (0.1748) loss: 0.7835 (0.7943) time: 0.1402 data: 0.0603 max mem: 9377 +Train: [76] [1300/6250] eta: 0:12:21 lr: 0.000018 grad: 0.1657 (0.1742) loss: 0.7883 (0.7939) time: 0.1858 data: 0.1073 max mem: 9377 +Train: [76] [1400/6250] eta: 0:12:06 lr: 0.000018 grad: 0.1562 (0.1736) loss: 0.7951 (0.7934) time: 0.1569 data: 0.0766 max mem: 9377 +Train: [76] [1500/6250] eta: 0:11:51 lr: 0.000018 grad: 0.1621 (0.1733) loss: 0.7835 (0.7931) time: 0.1545 data: 0.0713 max mem: 9377 +Train: [76] [1600/6250] eta: 0:11:37 lr: 0.000018 grad: 0.1622 (0.1726) loss: 0.7898 (0.7929) time: 0.1667 data: 0.0881 max mem: 9377 +Train: [76] [1700/6250] eta: 0:11:22 lr: 0.000018 grad: 0.1593 (0.1721) loss: 0.7832 (0.7926) time: 0.1480 data: 0.0646 max mem: 9377 +Train: [76] [1800/6250] eta: 0:11:06 lr: 0.000018 grad: 0.1487 (0.1715) loss: 0.7966 (0.7927) time: 0.1513 data: 0.0706 max mem: 9377 +Train: [76] [1900/6250] eta: 0:10:48 lr: 0.000018 grad: 0.1511 (0.1708) loss: 0.7902 (0.7926) time: 0.1401 data: 0.0577 max mem: 9377 +Train: [76] [2000/6250] eta: 0:10:31 lr: 0.000018 grad: 0.1523 (0.1700) loss: 0.8008 (0.7928) time: 0.1654 data: 0.0853 max mem: 9377 +Train: [76] [2100/6250] eta: 0:10:14 lr: 0.000018 grad: 0.1489 (0.1693) loss: 0.7958 (0.7930) time: 0.1362 data: 0.0517 max mem: 9377 +Train: [76] [2200/6250] eta: 0:09:58 lr: 0.000018 grad: 0.1521 (0.1688) loss: 0.8088 (0.7931) time: 0.1509 data: 0.0675 max mem: 9377 +Train: [76] [2300/6250] eta: 0:09:44 lr: 0.000018 grad: 0.1613 (0.1688) loss: 0.7822 (0.7930) time: 0.1605 data: 0.0765 max mem: 9377 +Train: [76] [2400/6250] eta: 0:09:27 lr: 0.000018 grad: 0.1603 (0.1687) loss: 0.7867 (0.7927) time: 0.1421 data: 0.0651 max mem: 9377 +Train: [76] [2500/6250] eta: 0:09:11 lr: 0.000018 grad: 0.1605 (0.1686) loss: 0.7870 (0.7926) time: 0.1426 data: 0.0576 max mem: 9377 +Train: [76] [2600/6250] eta: 0:08:55 lr: 0.000018 grad: 0.1619 (0.1687) loss: 0.7866 (0.7925) time: 0.1377 data: 0.0505 max mem: 9377 +Train: [76] [2700/6250] eta: 0:08:39 lr: 0.000018 grad: 0.1688 (0.1685) loss: 0.7885 (0.7925) time: 0.1490 data: 0.0649 max mem: 9377 +Train: [76] [2800/6250] eta: 0:08:24 lr: 0.000018 grad: 0.1689 (0.1684) loss: 0.7880 (0.7925) time: 0.1272 data: 0.0442 max mem: 9377 +Train: [76] [2900/6250] eta: 0:08:10 lr: 0.000018 grad: 0.1562 (0.1684) loss: 0.7880 (0.7923) time: 0.1308 data: 0.0575 max mem: 9377 +Train: [76] [3000/6250] eta: 0:07:56 lr: 0.000018 grad: 0.1613 (0.1683) loss: 0.7913 (0.7923) time: 0.1546 data: 0.0706 max mem: 9377 +Train: [76] [3100/6250] eta: 0:07:42 lr: 0.000018 grad: 0.1605 (0.1682) loss: 0.7890 (0.7922) time: 0.1559 data: 0.0698 max mem: 9377 +Train: [76] [3200/6250] eta: 0:07:27 lr: 0.000018 grad: 0.1555 (0.1681) loss: 0.7929 (0.7920) time: 0.1379 data: 0.0612 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:12 lr: 0.000018 grad: 0.1677 (0.1682) loss: 0.7933 (0.7918) time: 0.1442 data: 0.0648 max mem: 9377 +Train: [76] [3400/6250] eta: 0:06:57 lr: 0.000018 grad: 0.1651 (0.1682) loss: 0.7855 (0.7917) time: 0.1395 data: 0.0593 max mem: 9377 +Train: [76] [3500/6250] eta: 0:06:41 lr: 0.000018 grad: 0.1631 (0.1681) loss: 0.7978 (0.7917) time: 0.1176 data: 0.0233 max mem: 9377 +Train: [76] [3600/6250] eta: 0:06:26 lr: 0.000018 grad: 0.1656 (0.1681) loss: 0.7766 (0.7916) time: 0.1538 data: 0.0805 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:11 lr: 0.000018 grad: 0.1639 (0.1680) loss: 0.7910 (0.7916) time: 0.1273 data: 0.0412 max mem: 9377 +Train: [76] [3800/6250] eta: 0:05:55 lr: 0.000018 grad: 0.1663 (0.1679) loss: 0.7861 (0.7915) time: 0.1403 data: 0.0579 max mem: 9377 +Train: [76] [3900/6250] eta: 0:05:40 lr: 0.000018 grad: 0.1598 (0.1679) loss: 0.7925 (0.7913) time: 0.1246 data: 0.0439 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:25 lr: 0.000018 grad: 0.1661 (0.1679) loss: 0.7881 (0.7911) time: 0.1310 data: 0.0453 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:09 lr: 0.000018 grad: 0.1609 (0.1679) loss: 0.7782 (0.7909) time: 0.1161 data: 0.0296 max mem: 9377 +Train: [76] [4200/6250] eta: 0:04:55 lr: 0.000018 grad: 0.1658 (0.1681) loss: 0.7859 (0.7907) time: 0.1425 data: 0.0602 max mem: 9377 +Train: [76] [4300/6250] eta: 0:04:41 lr: 0.000018 grad: 0.1663 (0.1681) loss: 0.7801 (0.7905) time: 0.1665 data: 0.0815 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:27 lr: 0.000018 grad: 0.1629 (0.1681) loss: 0.7828 (0.7903) time: 0.1708 data: 0.0920 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:14 lr: 0.000018 grad: 0.1595 (0.1681) loss: 0.7772 (0.7901) time: 0.1686 data: 0.0929 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:00 lr: 0.000018 grad: 0.1616 (0.1682) loss: 0.7837 (0.7900) time: 0.1777 data: 0.0926 max mem: 9377 +Train: [76] [4700/6250] eta: 0:03:46 lr: 0.000018 grad: 0.1629 (0.1681) loss: 0.7826 (0.7899) time: 0.1535 data: 0.0667 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:31 lr: 0.000018 grad: 0.1637 (0.1681) loss: 0.7888 (0.7897) time: 0.1506 data: 0.0641 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:17 lr: 0.000018 grad: 0.1557 (0.1681) loss: 0.7767 (0.7896) time: 0.1520 data: 0.0638 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:02 lr: 0.000018 grad: 0.1521 (0.1680) loss: 0.7885 (0.7896) time: 0.1333 data: 0.0350 max mem: 9377 +Train: [76] [5100/6250] eta: 0:02:48 lr: 0.000017 grad: 0.1625 (0.1679) loss: 0.7880 (0.7895) time: 0.1400 data: 0.0518 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:33 lr: 0.000017 grad: 0.1551 (0.1680) loss: 0.7954 (0.7895) time: 0.1355 data: 0.0512 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:18 lr: 0.000017 grad: 0.1586 (0.1680) loss: 0.7854 (0.7894) time: 0.1271 data: 0.0393 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:03 lr: 0.000017 grad: 0.1583 (0.1679) loss: 0.7883 (0.7894) time: 0.1557 data: 0.0769 max mem: 9377 +Train: [76] [5500/6250] eta: 0:01:49 lr: 0.000017 grad: 0.1700 (0.1680) loss: 0.7865 (0.7893) time: 0.1604 data: 0.0854 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:34 lr: 0.000017 grad: 0.1705 (0.1680) loss: 0.7846 (0.7892) time: 0.1620 data: 0.0842 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:20 lr: 0.000017 grad: 0.1662 (0.1681) loss: 0.7787 (0.7891) time: 0.1474 data: 0.0700 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:05 lr: 0.000017 grad: 0.1551 (0.1680) loss: 0.7920 (0.7891) time: 0.1513 data: 0.0716 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:51 lr: 0.000017 grad: 0.1675 (0.1680) loss: 0.7905 (0.7890) time: 0.1473 data: 0.0650 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:36 lr: 0.000017 grad: 0.1657 (0.1680) loss: 0.7816 (0.7890) time: 0.1635 data: 0.0845 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:21 lr: 0.000017 grad: 0.1562 (0.1679) loss: 0.7901 (0.7890) time: 0.1473 data: 0.0700 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1595 (0.1678) loss: 0.7911 (0.7891) time: 0.1339 data: 0.0498 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1700 (0.1679) loss: 0.7897 (0.7891) time: 0.1378 data: 0.0555 max mem: 9377 +Train: [76] Total time: 0:15:16 (0.1467 s / it) +Averaged stats: lr: 0.000017 grad: 0.1700 (0.1679) loss: 0.7897 (0.7891) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:06 loss: 0.8365 (0.8365) time: 5.9085 data: 5.8791 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8433 (0.8439) time: 0.1291 data: 0.1042 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-train-subset): loss: 0.8433 (0.8439) +Eval (hcp-val): [76] [ 0/62] eta: 0:05:39 loss: 0.8407 (0.8407) time: 5.4799 data: 5.4485 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8423 (0.8420) time: 0.1359 data: 0.1104 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (hcp-val): loss: 0.8423 (0.8420) +Eval (nsd-val): [76] [ 0/62] eta: 0:03:55 loss: 0.8212 (0.8212) time: 3.7961 data: 3.7258 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8338 (0.8361) time: 0.1345 data: 0.1091 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (nsd-val): loss: 0.8338 (0.8361) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 7:56:12 lr: 0.000017 grad: 0.3695 (0.3695) loss: 0.7843 (0.7843) time: 4.5715 data: 4.2604 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:20:48 lr: 0.000017 grad: 0.2236 (0.2318) loss: 0.8081 (0.8006) time: 0.1627 data: 0.0679 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:17:42 lr: 0.000017 grad: 0.1970 (0.2203) loss: 0.7878 (0.7952) time: 0.1408 data: 0.0490 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:16:18 lr: 0.000017 grad: 0.1724 (0.2088) loss: 0.7829 (0.7921) time: 0.1313 data: 0.0373 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:15:25 lr: 0.000017 grad: 0.1745 (0.2022) loss: 0.7902 (0.7892) time: 0.1504 data: 0.0644 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:14:37 lr: 0.000017 grad: 0.1864 (0.1993) loss: 0.7780 (0.7875) time: 0.1330 data: 0.0483 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:14:08 lr: 0.000017 grad: 0.1641 (0.1950) loss: 0.7908 (0.7873) time: 0.1549 data: 0.0655 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:13:46 lr: 0.000017 grad: 0.1674 (0.1917) loss: 0.7885 (0.7874) time: 0.1519 data: 0.0656 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:13:34 lr: 0.000017 grad: 0.1591 (0.1885) loss: 0.7904 (0.7877) time: 0.1524 data: 0.0629 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:13:20 lr: 0.000017 grad: 0.1645 (0.1860) loss: 0.7842 (0.7878) time: 0.1543 data: 0.0730 max mem: 9377 +Train: [77] [1000/6250] eta: 0:13:01 lr: 0.000017 grad: 0.1546 (0.1841) loss: 0.7927 (0.7884) time: 0.1506 data: 0.0677 max mem: 9377 +Train: [77] [1100/6250] eta: 0:12:40 lr: 0.000017 grad: 0.1597 (0.1828) loss: 0.7936 (0.7886) time: 0.1365 data: 0.0504 max mem: 9377 +Train: [77] [1200/6250] eta: 0:12:24 lr: 0.000017 grad: 0.1626 (0.1813) loss: 0.7956 (0.7889) time: 0.1172 data: 0.0224 max mem: 9377 +Train: [77] [1300/6250] eta: 0:12:08 lr: 0.000017 grad: 0.1629 (0.1802) loss: 0.7982 (0.7891) time: 0.1537 data: 0.0711 max mem: 9377 +Train: [77] [1400/6250] eta: 0:11:49 lr: 0.000017 grad: 0.1622 (0.1791) loss: 0.7929 (0.7893) time: 0.1324 data: 0.0467 max mem: 9377 +Train: [77] [1500/6250] eta: 0:11:34 lr: 0.000017 grad: 0.1611 (0.1781) loss: 0.7910 (0.7893) time: 0.1435 data: 0.0595 max mem: 9377 +Train: [77] [1600/6250] eta: 0:11:18 lr: 0.000017 grad: 0.1651 (0.1773) loss: 0.7937 (0.7892) time: 0.1374 data: 0.0579 max mem: 9377 +Train: [77] [1700/6250] eta: 0:11:03 lr: 0.000017 grad: 0.1602 (0.1766) loss: 0.7863 (0.7893) time: 0.1418 data: 0.0655 max mem: 9377 +Train: [77] [1800/6250] eta: 0:10:47 lr: 0.000017 grad: 0.1542 (0.1758) loss: 0.7999 (0.7895) time: 0.1362 data: 0.0555 max mem: 9377 +Train: [77] [1900/6250] eta: 0:10:30 lr: 0.000017 grad: 0.1644 (0.1752) loss: 0.7923 (0.7898) time: 0.1471 data: 0.0674 max mem: 9377 +Train: [77] [2000/6250] eta: 0:10:16 lr: 0.000017 grad: 0.1545 (0.1746) loss: 0.7920 (0.7898) time: 0.1611 data: 0.0783 max mem: 9377 +Train: [77] [2100/6250] eta: 0:09:59 lr: 0.000017 grad: 0.1612 (0.1742) loss: 0.7934 (0.7899) time: 0.1341 data: 0.0467 max mem: 9377 +Train: [77] [2200/6250] eta: 0:09:43 lr: 0.000017 grad: 0.1584 (0.1738) loss: 0.7922 (0.7900) time: 0.1354 data: 0.0535 max mem: 9377 +Train: [77] [2300/6250] eta: 0:09:28 lr: 0.000017 grad: 0.1568 (0.1734) loss: 0.7963 (0.7901) time: 0.1484 data: 0.0647 max mem: 9377 +Train: [77] [2400/6250] eta: 0:09:12 lr: 0.000017 grad: 0.1552 (0.1731) loss: 0.7933 (0.7902) time: 0.1449 data: 0.0673 max mem: 9377 +Train: [77] [2500/6250] eta: 0:08:58 lr: 0.000017 grad: 0.1601 (0.1727) loss: 0.7937 (0.7902) time: 0.1606 data: 0.0801 max mem: 9377 +Train: [77] [2600/6250] eta: 0:08:44 lr: 0.000017 grad: 0.1547 (0.1723) loss: 0.7964 (0.7903) time: 0.1588 data: 0.0792 max mem: 9377 +Train: [77] [2700/6250] eta: 0:08:29 lr: 0.000017 grad: 0.1573 (0.1720) loss: 0.7922 (0.7904) time: 0.1479 data: 0.0646 max mem: 9377 +Train: [77] [2800/6250] eta: 0:08:15 lr: 0.000017 grad: 0.1542 (0.1717) loss: 0.7915 (0.7904) time: 0.1506 data: 0.0649 max mem: 9377 +Train: [77] [2900/6250] eta: 0:08:00 lr: 0.000017 grad: 0.1674 (0.1713) loss: 0.7839 (0.7905) time: 0.1449 data: 0.0598 max mem: 9377 +Train: [77] [3000/6250] eta: 0:07:44 lr: 0.000017 grad: 0.1691 (0.1712) loss: 0.7873 (0.7905) time: 0.1543 data: 0.0765 max mem: 9377 +Train: [77] [3100/6250] eta: 0:07:29 lr: 0.000017 grad: 0.1648 (0.1711) loss: 0.7882 (0.7904) time: 0.1183 data: 0.0368 max mem: 9377 +Train: [77] [3200/6250] eta: 0:07:16 lr: 0.000017 grad: 0.1620 (0.1710) loss: 0.7890 (0.7903) time: 0.1435 data: 0.0611 max mem: 9377 +Train: [77] [3300/6250] eta: 0:07:03 lr: 0.000016 grad: 0.1596 (0.1710) loss: 0.7915 (0.7902) time: 0.1438 data: 0.0598 max mem: 9377 +Train: [77] [3400/6250] eta: 0:06:50 lr: 0.000016 grad: 0.1738 (0.1711) loss: 0.7826 (0.7901) time: 0.1643 data: 0.0833 max mem: 9377 +Train: [77] [3500/6250] eta: 0:06:37 lr: 0.000016 grad: 0.1634 (0.1711) loss: 0.7832 (0.7899) time: 0.1409 data: 0.0518 max mem: 9377 +Train: [77] [3600/6250] eta: 0:06:23 lr: 0.000016 grad: 0.1689 (0.1711) loss: 0.7821 (0.7899) time: 0.1464 data: 0.0609 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:10 lr: 0.000016 grad: 0.1673 (0.1712) loss: 0.7841 (0.7898) time: 0.1722 data: 0.0907 max mem: 9377 +Train: [77] [3800/6250] eta: 0:05:56 lr: 0.000016 grad: 0.1726 (0.1713) loss: 0.7825 (0.7896) time: 0.1415 data: 0.0609 max mem: 9377 +Train: [77] [3900/6250] eta: 0:05:41 lr: 0.000016 grad: 0.1789 (0.1715) loss: 0.7789 (0.7894) time: 0.1454 data: 0.0654 max mem: 9377 +Train: [77] [4000/6250] eta: 0:05:26 lr: 0.000016 grad: 0.1661 (0.1715) loss: 0.7799 (0.7892) time: 0.1205 data: 0.0351 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:10 lr: 0.000016 grad: 0.1828 (0.1715) loss: 0.7767 (0.7891) time: 0.1432 data: 0.0571 max mem: 9377 +Train: [77] [4200/6250] eta: 0:04:56 lr: 0.000016 grad: 0.1719 (0.1715) loss: 0.7799 (0.7890) time: 0.1272 data: 0.0402 max mem: 9377 +Train: [77] [4300/6250] eta: 0:04:42 lr: 0.000016 grad: 0.1682 (0.1716) loss: 0.7811 (0.7889) time: 0.1571 data: 0.0723 max mem: 9377 +Train: [77] [4400/6250] eta: 0:04:27 lr: 0.000016 grad: 0.1718 (0.1718) loss: 0.7830 (0.7888) time: 0.1634 data: 0.0888 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:13 lr: 0.000016 grad: 0.1624 (0.1718) loss: 0.7895 (0.7887) time: 0.1333 data: 0.0561 max mem: 9377 +Train: [77] [4600/6250] eta: 0:03:58 lr: 0.000016 grad: 0.1721 (0.1719) loss: 0.7843 (0.7886) time: 0.1479 data: 0.0649 max mem: 9377 +Train: [77] [4700/6250] eta: 0:03:43 lr: 0.000016 grad: 0.1791 (0.1720) loss: 0.7818 (0.7885) time: 0.1149 data: 0.0309 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:28 lr: 0.000016 grad: 0.1617 (0.1721) loss: 0.7916 (0.7884) time: 0.1247 data: 0.0367 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:14 lr: 0.000016 grad: 0.1616 (0.1721) loss: 0.7794 (0.7883) time: 0.1480 data: 0.0637 max mem: 9377 +Train: [77] [5000/6250] eta: 0:02:59 lr: 0.000016 grad: 0.1705 (0.1720) loss: 0.7813 (0.7883) time: 0.1500 data: 0.0703 max mem: 9377 +Train: [77] [5100/6250] eta: 0:02:45 lr: 0.000016 grad: 0.1549 (0.1719) loss: 0.7930 (0.7883) time: 0.1511 data: 0.0667 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:30 lr: 0.000016 grad: 0.1662 (0.1718) loss: 0.7910 (0.7883) time: 0.1396 data: 0.0587 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:16 lr: 0.000016 grad: 0.1609 (0.1717) loss: 0.7975 (0.7883) time: 0.1168 data: 0.0330 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:01 lr: 0.000016 grad: 0.1728 (0.1717) loss: 0.7844 (0.7883) time: 0.1213 data: 0.0324 max mem: 9377 +Train: [77] [5500/6250] eta: 0:01:47 lr: 0.000016 grad: 0.1685 (0.1717) loss: 0.7849 (0.7883) time: 0.1468 data: 0.0656 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:32 lr: 0.000016 grad: 0.1685 (0.1716) loss: 0.7881 (0.7884) time: 0.1580 data: 0.0756 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:18 lr: 0.000016 grad: 0.1622 (0.1715) loss: 0.7926 (0.7884) time: 0.1583 data: 0.0797 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:04 lr: 0.000016 grad: 0.1725 (0.1715) loss: 0.7900 (0.7884) time: 0.1490 data: 0.0699 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:49 lr: 0.000016 grad: 0.1642 (0.1714) loss: 0.7844 (0.7884) time: 0.1497 data: 0.0681 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:35 lr: 0.000016 grad: 0.1584 (0.1714) loss: 0.7944 (0.7884) time: 0.1419 data: 0.0622 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:21 lr: 0.000016 grad: 0.1686 (0.1714) loss: 0.7873 (0.7884) time: 0.1396 data: 0.0593 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:07 lr: 0.000016 grad: 0.1704 (0.1713) loss: 0.7705 (0.7883) time: 0.1299 data: 0.0514 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1668 (0.1713) loss: 0.7911 (0.7883) time: 0.1331 data: 0.0561 max mem: 9377 +Train: [77] Total time: 0:14:53 (0.1430 s / it) +Averaged stats: lr: 0.000016 grad: 0.1668 (0.1713) loss: 0.7911 (0.7883) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:27 loss: 0.8399 (0.8399) time: 5.2743 data: 5.2446 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8407 (0.8424) time: 0.1365 data: 0.1096 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-train-subset): loss: 0.8407 (0.8424) +Eval (hcp-val): [77] [ 0/62] eta: 0:06:16 loss: 0.8406 (0.8406) time: 6.0724 data: 6.0429 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8407 (0.8417) time: 0.1190 data: 0.0922 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8407 (0.8417) +Eval (nsd-val): [77] [ 0/62] eta: 0:05:44 loss: 0.8291 (0.8291) time: 5.5493 data: 5.5177 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8378 (0.8400) time: 0.1300 data: 0.1050 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:13 (0.2141 s / it) +Averaged stats (nsd-val): loss: 0.8378 (0.8400) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 10:14:44 lr: 0.000016 grad: 0.3647 (0.3647) loss: 0.7845 (0.7845) time: 5.9016 data: 5.7808 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:19:26 lr: 0.000016 grad: 0.2187 (0.2562) loss: 0.7837 (0.7788) time: 0.1279 data: 0.0321 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:17:03 lr: 0.000016 grad: 0.1747 (0.2334) loss: 0.7832 (0.7762) time: 0.1509 data: 0.0603 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:15:37 lr: 0.000016 grad: 0.1817 (0.2198) loss: 0.7934 (0.7782) time: 0.1207 data: 0.0279 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:14:52 lr: 0.000016 grad: 0.1708 (0.2104) loss: 0.7783 (0.7801) time: 0.1333 data: 0.0417 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:14:21 lr: 0.000016 grad: 0.1809 (0.2068) loss: 0.7785 (0.7800) time: 0.1359 data: 0.0389 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:13:47 lr: 0.000016 grad: 0.1774 (0.2036) loss: 0.7801 (0.7796) time: 0.1242 data: 0.0254 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:13:20 lr: 0.000016 grad: 0.1795 (0.2008) loss: 0.7845 (0.7797) time: 0.1394 data: 0.0498 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:12:57 lr: 0.000016 grad: 0.1846 (0.1996) loss: 0.7701 (0.7789) time: 0.1236 data: 0.0300 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:12:39 lr: 0.000016 grad: 0.1864 (0.1979) loss: 0.7727 (0.7786) time: 0.1341 data: 0.0409 max mem: 9377 +Train: [78] [1000/6250] eta: 0:12:24 lr: 0.000016 grad: 0.1724 (0.1963) loss: 0.7792 (0.7786) time: 0.1543 data: 0.0698 max mem: 9377 +Train: [78] [1100/6250] eta: 0:12:05 lr: 0.000016 grad: 0.1701 (0.1955) loss: 0.7918 (0.7789) time: 0.1410 data: 0.0593 max mem: 9377 +Train: [78] [1200/6250] eta: 0:11:56 lr: 0.000016 grad: 0.1668 (0.1936) loss: 0.7852 (0.7793) time: 0.1369 data: 0.0499 max mem: 9377 +Train: [78] [1300/6250] eta: 0:11:42 lr: 0.000016 grad: 0.1724 (0.1917) loss: 0.7824 (0.7797) time: 0.1630 data: 0.0795 max mem: 9377 +Train: [78] [1400/6250] eta: 0:11:27 lr: 0.000016 grad: 0.1651 (0.1904) loss: 0.7755 (0.7800) time: 0.1644 data: 0.0841 max mem: 9377 +Train: [78] [1500/6250] eta: 0:11:12 lr: 0.000015 grad: 0.1641 (0.1891) loss: 0.7898 (0.7803) time: 0.1247 data: 0.0444 max mem: 9377 +Train: [78] [1600/6250] eta: 0:10:57 lr: 0.000015 grad: 0.1723 (0.1884) loss: 0.7823 (0.7803) time: 0.1488 data: 0.0703 max mem: 9377 +Train: [78] [1700/6250] eta: 0:10:47 lr: 0.000015 grad: 0.1716 (0.1879) loss: 0.7903 (0.7806) time: 0.1758 data: 0.0963 max mem: 9377 +Train: [78] [1800/6250] eta: 0:10:33 lr: 0.000015 grad: 0.1650 (0.1873) loss: 0.7807 (0.7808) time: 0.1373 data: 0.0559 max mem: 9377 +Train: [78] [1900/6250] eta: 0:10:19 lr: 0.000015 grad: 0.1730 (0.1869) loss: 0.7834 (0.7808) time: 0.1337 data: 0.0565 max mem: 9377 +Train: [78] [2000/6250] eta: 0:10:05 lr: 0.000015 grad: 0.1711 (0.1863) loss: 0.7864 (0.7808) time: 0.1457 data: 0.0611 max mem: 9377 +Train: [78] [2100/6250] eta: 0:09:52 lr: 0.000015 grad: 0.1634 (0.1856) loss: 0.7839 (0.7809) time: 0.1513 data: 0.0682 max mem: 9377 +Train: [78] [2200/6250] eta: 0:09:36 lr: 0.000015 grad: 0.1720 (0.1853) loss: 0.7747 (0.7808) time: 0.1511 data: 0.0655 max mem: 9377 +Train: [78] [2300/6250] eta: 0:09:22 lr: 0.000015 grad: 0.1763 (0.1848) loss: 0.7941 (0.7809) time: 0.1548 data: 0.0722 max mem: 9377 +Train: [78] [2400/6250] eta: 0:09:07 lr: 0.000015 grad: 0.1605 (0.1842) loss: 0.7847 (0.7810) time: 0.1500 data: 0.0719 max mem: 9377 +Train: [78] [2500/6250] eta: 0:08:53 lr: 0.000015 grad: 0.1651 (0.1837) loss: 0.7831 (0.7812) time: 0.1546 data: 0.0707 max mem: 9377 +Train: [78] [2600/6250] eta: 0:08:38 lr: 0.000015 grad: 0.1573 (0.1832) loss: 0.7851 (0.7813) time: 0.1446 data: 0.0648 max mem: 9377 +Train: [78] [2700/6250] eta: 0:08:23 lr: 0.000015 grad: 0.1697 (0.1827) loss: 0.7855 (0.7816) time: 0.1303 data: 0.0445 max mem: 9377 +Train: [78] [2800/6250] eta: 0:08:09 lr: 0.000015 grad: 0.1622 (0.1823) loss: 0.7874 (0.7817) time: 0.1311 data: 0.0340 max mem: 9377 +Train: [78] [2900/6250] eta: 0:07:53 lr: 0.000015 grad: 0.1735 (0.1822) loss: 0.7773 (0.7817) time: 0.1376 data: 0.0597 max mem: 9377 +Train: [78] [3000/6250] eta: 0:07:38 lr: 0.000015 grad: 0.1684 (0.1817) loss: 0.7812 (0.7818) time: 0.1109 data: 0.0292 max mem: 9377 +Train: [78] [3100/6250] eta: 0:07:24 lr: 0.000015 grad: 0.1560 (0.1812) loss: 0.7919 (0.7820) time: 0.1378 data: 0.0571 max mem: 9377 +Train: [78] [3200/6250] eta: 0:07:10 lr: 0.000015 grad: 0.1623 (0.1808) loss: 0.7820 (0.7823) time: 0.1356 data: 0.0499 max mem: 9377 +Train: [78] [3300/6250] eta: 0:06:56 lr: 0.000015 grad: 0.1617 (0.1804) loss: 0.7879 (0.7825) time: 0.1239 data: 0.0384 max mem: 9377 +Train: [78] [3400/6250] eta: 0:06:42 lr: 0.000015 grad: 0.1573 (0.1799) loss: 0.7924 (0.7827) time: 0.1319 data: 0.0488 max mem: 9377 +Train: [78] [3500/6250] eta: 0:06:28 lr: 0.000015 grad: 0.1645 (0.1795) loss: 0.7872 (0.7829) time: 0.1345 data: 0.0532 max mem: 9377 +Train: [78] [3600/6250] eta: 0:06:15 lr: 0.000015 grad: 0.1609 (0.1790) loss: 0.7845 (0.7831) time: 0.1561 data: 0.0766 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:02 lr: 0.000015 grad: 0.1621 (0.1787) loss: 0.7892 (0.7833) time: 0.1521 data: 0.0656 max mem: 9377 +Train: [78] [3800/6250] eta: 0:05:49 lr: 0.000015 grad: 0.1607 (0.1784) loss: 0.7907 (0.7835) time: 0.1519 data: 0.0674 max mem: 9377 +Train: [78] [3900/6250] eta: 0:05:35 lr: 0.000015 grad: 0.1671 (0.1782) loss: 0.7898 (0.7836) time: 0.1447 data: 0.0629 max mem: 9377 +Train: [78] [4000/6250] eta: 0:05:21 lr: 0.000015 grad: 0.1720 (0.1780) loss: 0.7885 (0.7837) time: 0.1468 data: 0.0609 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:07 lr: 0.000015 grad: 0.1764 (0.1778) loss: 0.7794 (0.7837) time: 0.1426 data: 0.0538 max mem: 9377 +Train: [78] [4200/6250] eta: 0:04:53 lr: 0.000015 grad: 0.1653 (0.1777) loss: 0.7842 (0.7837) time: 0.1520 data: 0.0673 max mem: 9377 +Train: [78] [4300/6250] eta: 0:04:40 lr: 0.000015 grad: 0.1616 (0.1776) loss: 0.7827 (0.7837) time: 0.1957 data: 0.1121 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:27 lr: 0.000015 grad: 0.1753 (0.1776) loss: 0.7746 (0.7835) time: 0.1695 data: 0.0871 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:13 lr: 0.000015 grad: 0.1724 (0.1776) loss: 0.7748 (0.7834) time: 0.1376 data: 0.0540 max mem: 9377 +Train: [78] [4600/6250] eta: 0:03:58 lr: 0.000015 grad: 0.1817 (0.1777) loss: 0.7738 (0.7833) time: 0.1663 data: 0.0741 max mem: 9377 +Train: [78] [4700/6250] eta: 0:03:44 lr: 0.000015 grad: 0.1717 (0.1776) loss: 0.7823 (0.7832) time: 0.1454 data: 0.0573 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:30 lr: 0.000015 grad: 0.1713 (0.1776) loss: 0.7819 (0.7832) time: 0.1375 data: 0.0524 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:15 lr: 0.000015 grad: 0.1736 (0.1776) loss: 0.7805 (0.7832) time: 0.1468 data: 0.0617 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:00 lr: 0.000015 grad: 0.1717 (0.1775) loss: 0.7802 (0.7831) time: 0.1354 data: 0.0541 max mem: 9377 +Train: [78] [5100/6250] eta: 0:02:46 lr: 0.000015 grad: 0.1711 (0.1776) loss: 0.7719 (0.7830) time: 0.1492 data: 0.0686 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:32 lr: 0.000015 grad: 0.1757 (0.1776) loss: 0.7822 (0.7829) time: 0.1655 data: 0.0876 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:17 lr: 0.000015 grad: 0.1752 (0.1777) loss: 0.7794 (0.7829) time: 0.1620 data: 0.0855 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:03 lr: 0.000015 grad: 0.1710 (0.1777) loss: 0.7734 (0.7828) time: 0.1607 data: 0.0839 max mem: 9377 +Train: [78] [5500/6250] eta: 0:01:48 lr: 0.000015 grad: 0.1743 (0.1777) loss: 0.7749 (0.7827) time: 0.1236 data: 0.0392 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:34 lr: 0.000015 grad: 0.1838 (0.1777) loss: 0.7772 (0.7827) time: 0.1337 data: 0.0544 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:19 lr: 0.000015 grad: 0.1657 (0.1777) loss: 0.7841 (0.7826) time: 0.1604 data: 0.0799 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:05 lr: 0.000015 grad: 0.1764 (0.1778) loss: 0.7806 (0.7826) time: 0.1131 data: 0.0349 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:50 lr: 0.000015 grad: 0.1727 (0.1777) loss: 0.7732 (0.7825) time: 0.1372 data: 0.0540 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:36 lr: 0.000015 grad: 0.1746 (0.1777) loss: 0.7783 (0.7824) time: 0.1629 data: 0.0822 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:21 lr: 0.000015 grad: 0.1682 (0.1776) loss: 0.7786 (0.7824) time: 0.1215 data: 0.0355 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:07 lr: 0.000014 grad: 0.1803 (0.1777) loss: 0.7785 (0.7823) time: 0.1483 data: 0.0649 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1745 (0.1777) loss: 0.7809 (0.7823) time: 0.1423 data: 0.0566 max mem: 9377 +Train: [78] Total time: 0:15:10 (0.1457 s / it) +Averaged stats: lr: 0.000014 grad: 0.1745 (0.1777) loss: 0.7809 (0.7823) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:03:38 loss: 0.8366 (0.8366) time: 3.5236 data: 3.4419 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8413 (0.8423) time: 0.1235 data: 0.0987 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:13 (0.2206 s / it) +Averaged stats (hcp-train-subset): loss: 0.8413 (0.8423) +Eval (hcp-val): [78] [ 0/62] eta: 0:06:19 loss: 0.8385 (0.8385) time: 6.1251 data: 6.0958 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8402 (0.8409) time: 0.1243 data: 0.0995 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:13 (0.2247 s / it) +Averaged stats (hcp-val): loss: 0.8402 (0.8409) +Eval (nsd-val): [78] [ 0/62] eta: 0:03:40 loss: 0.8228 (0.8228) time: 3.5561 data: 3.4780 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8291 (0.8300) time: 0.1015 data: 0.0762 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:14 (0.2297 s / it) +Averaged stats (nsd-val): loss: 0.8291 (0.8300) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 7:38:56 lr: 0.000014 grad: 0.1525 (0.1525) loss: 0.8714 (0.8714) time: 4.4059 data: 4.1923 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:21:32 lr: 0.000014 grad: 0.1899 (0.2372) loss: 0.7949 (0.8037) time: 0.1364 data: 0.0376 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:18:29 lr: 0.000014 grad: 0.1601 (0.2088) loss: 0.8018 (0.8024) time: 0.1521 data: 0.0503 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:16:57 lr: 0.000014 grad: 0.1613 (0.1983) loss: 0.7906 (0.8009) time: 0.1499 data: 0.0565 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:15:57 lr: 0.000014 grad: 0.1636 (0.1909) loss: 0.8060 (0.8004) time: 0.1340 data: 0.0337 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:15:16 lr: 0.000014 grad: 0.1639 (0.1863) loss: 0.7898 (0.7994) time: 0.1454 data: 0.0557 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:14:41 lr: 0.000014 grad: 0.1725 (0.1840) loss: 0.7873 (0.7983) time: 0.1201 data: 0.0224 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:14:14 lr: 0.000014 grad: 0.1610 (0.1821) loss: 0.7949 (0.7971) time: 0.1318 data: 0.0436 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:13:58 lr: 0.000014 grad: 0.1668 (0.1806) loss: 0.7965 (0.7964) time: 0.1561 data: 0.0707 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:13:49 lr: 0.000014 grad: 0.1669 (0.1795) loss: 0.7926 (0.7961) time: 0.1788 data: 0.0892 max mem: 9377 +Train: [79] [1000/6250] eta: 0:13:37 lr: 0.000014 grad: 0.1618 (0.1783) loss: 0.7852 (0.7958) time: 0.1605 data: 0.0788 max mem: 9377 +Train: [79] [1100/6250] eta: 0:13:26 lr: 0.000014 grad: 0.1704 (0.1776) loss: 0.7825 (0.7953) time: 0.1641 data: 0.0820 max mem: 9377 +Train: [79] [1200/6250] eta: 0:13:10 lr: 0.000014 grad: 0.1688 (0.1773) loss: 0.7839 (0.7947) time: 0.1532 data: 0.0646 max mem: 9377 +Train: [79] [1300/6250] eta: 0:12:53 lr: 0.000014 grad: 0.1675 (0.1772) loss: 0.7816 (0.7938) time: 0.1460 data: 0.0680 max mem: 9377 +Train: [79] [1400/6250] eta: 0:12:37 lr: 0.000014 grad: 0.1641 (0.1769) loss: 0.7880 (0.7931) time: 0.1478 data: 0.0679 max mem: 9377 +Train: [79] [1500/6250] eta: 0:12:14 lr: 0.000014 grad: 0.1656 (0.1764) loss: 0.7857 (0.7927) time: 0.1314 data: 0.0474 max mem: 9377 +Train: [79] [1600/6250] eta: 0:11:53 lr: 0.000014 grad: 0.1657 (0.1763) loss: 0.7855 (0.7923) time: 0.1422 data: 0.0577 max mem: 9377 +Train: [79] [1700/6250] eta: 0:11:34 lr: 0.000014 grad: 0.1578 (0.1758) loss: 0.7830 (0.7920) time: 0.1301 data: 0.0353 max mem: 9377 +Train: [79] [1800/6250] eta: 0:11:13 lr: 0.000014 grad: 0.1674 (0.1757) loss: 0.7898 (0.7917) time: 0.1388 data: 0.0498 max mem: 9377 +Train: [79] [1900/6250] eta: 0:10:57 lr: 0.000014 grad: 0.1647 (0.1753) loss: 0.7836 (0.7915) time: 0.1604 data: 0.0812 max mem: 9377 +Train: [79] [2000/6250] eta: 0:10:39 lr: 0.000014 grad: 0.1700 (0.1748) loss: 0.7878 (0.7913) time: 0.1504 data: 0.0665 max mem: 9377 +Train: [79] [2100/6250] eta: 0:10:21 lr: 0.000014 grad: 0.1658 (0.1746) loss: 0.7915 (0.7911) time: 0.1441 data: 0.0599 max mem: 9377 +Train: [79] [2200/6250] eta: 0:10:04 lr: 0.000014 grad: 0.1661 (0.1744) loss: 0.7843 (0.7908) time: 0.1542 data: 0.0649 max mem: 9377 +Train: [79] [2300/6250] eta: 0:09:47 lr: 0.000014 grad: 0.1687 (0.1743) loss: 0.7850 (0.7904) time: 0.1346 data: 0.0529 max mem: 9377 +Train: [79] [2400/6250] eta: 0:09:33 lr: 0.000014 grad: 0.1596 (0.1741) loss: 0.7823 (0.7902) time: 0.2049 data: 0.1252 max mem: 9377 +Train: [79] [2500/6250] eta: 0:09:16 lr: 0.000014 grad: 0.1656 (0.1739) loss: 0.7905 (0.7901) time: 0.1297 data: 0.0454 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:00 lr: 0.000014 grad: 0.1709 (0.1738) loss: 0.7806 (0.7899) time: 0.1311 data: 0.0470 max mem: 9377 +Train: [79] [2700/6250] eta: 0:08:45 lr: 0.000014 grad: 0.1666 (0.1736) loss: 0.7964 (0.7898) time: 0.1398 data: 0.0577 max mem: 9377 +Train: [79] [2800/6250] eta: 0:08:30 lr: 0.000014 grad: 0.1685 (0.1736) loss: 0.7865 (0.7898) time: 0.1418 data: 0.0630 max mem: 9377 +Train: [79] [2900/6250] eta: 0:08:15 lr: 0.000014 grad: 0.1635 (0.1735) loss: 0.7866 (0.7897) time: 0.1579 data: 0.0846 max mem: 9377 +Train: [79] [3000/6250] eta: 0:07:59 lr: 0.000014 grad: 0.1688 (0.1735) loss: 0.7808 (0.7896) time: 0.1272 data: 0.0428 max mem: 9377 +Train: [79] [3100/6250] eta: 0:07:44 lr: 0.000014 grad: 0.1723 (0.1735) loss: 0.7887 (0.7895) time: 0.1372 data: 0.0548 max mem: 9377 +Train: [79] [3200/6250] eta: 0:07:29 lr: 0.000014 grad: 0.1768 (0.1736) loss: 0.7839 (0.7894) time: 0.1771 data: 0.0947 max mem: 9377 +Train: [79] [3300/6250] eta: 0:07:13 lr: 0.000014 grad: 0.1848 (0.1737) loss: 0.7826 (0.7891) time: 0.1381 data: 0.0577 max mem: 9377 +Train: [79] [3400/6250] eta: 0:06:58 lr: 0.000014 grad: 0.1718 (0.1738) loss: 0.7710 (0.7890) time: 0.1248 data: 0.0464 max mem: 9377 +Train: [79] [3500/6250] eta: 0:06:43 lr: 0.000014 grad: 0.1705 (0.1739) loss: 0.7841 (0.7888) time: 0.1324 data: 0.0564 max mem: 9377 +Train: [79] [3600/6250] eta: 0:06:28 lr: 0.000014 grad: 0.1763 (0.1740) loss: 0.7769 (0.7886) time: 0.1444 data: 0.0654 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:13 lr: 0.000014 grad: 0.1717 (0.1741) loss: 0.7810 (0.7884) time: 0.1407 data: 0.0585 max mem: 9377 +Train: [79] [3800/6250] eta: 0:05:59 lr: 0.000014 grad: 0.1808 (0.1741) loss: 0.7842 (0.7882) time: 0.1881 data: 0.1163 max mem: 9377 +Train: [79] [3900/6250] eta: 0:05:45 lr: 0.000014 grad: 0.1721 (0.1741) loss: 0.7697 (0.7881) time: 0.1652 data: 0.0882 max mem: 9377 +Train: [79] [4000/6250] eta: 0:05:31 lr: 0.000014 grad: 0.1702 (0.1741) loss: 0.7801 (0.7879) time: 0.1518 data: 0.0704 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:16 lr: 0.000014 grad: 0.1852 (0.1740) loss: 0.7692 (0.7877) time: 0.1384 data: 0.0530 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:01 lr: 0.000014 grad: 0.1672 (0.1740) loss: 0.7831 (0.7875) time: 0.1377 data: 0.0520 max mem: 9377 +Train: [79] [4300/6250] eta: 0:04:47 lr: 0.000014 grad: 0.1685 (0.1740) loss: 0.7870 (0.7874) time: 0.1550 data: 0.0752 max mem: 9377 +Train: [79] [4400/6250] eta: 0:04:32 lr: 0.000014 grad: 0.1712 (0.1739) loss: 0.7823 (0.7873) time: 0.1327 data: 0.0458 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:17 lr: 0.000014 grad: 0.1695 (0.1739) loss: 0.7839 (0.7872) time: 0.1399 data: 0.0567 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:01 lr: 0.000014 grad: 0.1792 (0.1738) loss: 0.7854 (0.7871) time: 0.1299 data: 0.0463 max mem: 9377 +Train: [79] [4700/6250] eta: 0:03:46 lr: 0.000013 grad: 0.1588 (0.1739) loss: 0.7851 (0.7870) time: 0.1359 data: 0.0515 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:31 lr: 0.000013 grad: 0.1831 (0.1739) loss: 0.7739 (0.7868) time: 0.1562 data: 0.0759 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:16 lr: 0.000013 grad: 0.1699 (0.1740) loss: 0.7848 (0.7867) time: 0.1440 data: 0.0638 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:02 lr: 0.000013 grad: 0.1664 (0.1741) loss: 0.7805 (0.7866) time: 0.1441 data: 0.0500 max mem: 9377 +Train: [79] [5100/6250] eta: 0:02:47 lr: 0.000013 grad: 0.1790 (0.1741) loss: 0.7844 (0.7865) time: 0.1609 data: 0.0759 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:32 lr: 0.000013 grad: 0.1769 (0.1741) loss: 0.7801 (0.7864) time: 0.1170 data: 0.0364 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:17 lr: 0.000013 grad: 0.1776 (0.1741) loss: 0.7787 (0.7864) time: 0.1162 data: 0.0264 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:02 lr: 0.000013 grad: 0.1681 (0.1741) loss: 0.7806 (0.7863) time: 0.1193 data: 0.0344 max mem: 9377 +Train: [79] [5500/6250] eta: 0:01:48 lr: 0.000013 grad: 0.1618 (0.1741) loss: 0.7879 (0.7863) time: 0.1102 data: 0.0287 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:33 lr: 0.000013 grad: 0.1639 (0.1743) loss: 0.7866 (0.7863) time: 0.1428 data: 0.0609 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:19 lr: 0.000013 grad: 0.1645 (0.1742) loss: 0.7880 (0.7863) time: 0.1439 data: 0.0606 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:04 lr: 0.000013 grad: 0.1669 (0.1742) loss: 0.7818 (0.7863) time: 0.1576 data: 0.0825 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:50 lr: 0.000013 grad: 0.1727 (0.1742) loss: 0.7881 (0.7863) time: 0.1405 data: 0.0601 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:36 lr: 0.000013 grad: 0.1691 (0.1742) loss: 0.7765 (0.7863) time: 0.1341 data: 0.0553 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:21 lr: 0.000013 grad: 0.1692 (0.1742) loss: 0.7810 (0.7863) time: 0.1363 data: 0.0565 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:07 lr: 0.000013 grad: 0.1754 (0.1743) loss: 0.7786 (0.7863) time: 0.1453 data: 0.0677 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1752 (0.1742) loss: 0.7763 (0.7863) time: 0.1456 data: 0.0622 max mem: 9377 +Train: [79] Total time: 0:15:05 (0.1448 s / it) +Averaged stats: lr: 0.000013 grad: 0.1752 (0.1742) loss: 0.7763 (0.7863) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:03:54 loss: 0.8382 (0.8382) time: 3.7788 data: 3.6912 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8409 (0.8423) time: 0.1339 data: 0.1072 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:13 (0.2202 s / it) +Averaged stats (hcp-train-subset): loss: 0.8409 (0.8423) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [79] [ 0/62] eta: 0:04:04 loss: 0.8404 (0.8404) time: 3.9477 data: 3.8773 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8385 (0.8403) time: 0.1308 data: 0.1040 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (hcp-val): loss: 0.8385 (0.8403) +Making plots (hcp-val): example=2 +Eval (nsd-val): [79] [ 0/62] eta: 0:05:44 loss: 0.8173 (0.8173) time: 5.5611 data: 5.5299 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8292 (0.8292) time: 0.1230 data: 0.0980 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (nsd-val): loss: 0.8292 (0.8292) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 8:49:20 lr: 0.000013 grad: 0.1715 (0.1715) loss: 0.8520 (0.8520) time: 5.0817 data: 4.8614 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:19:59 lr: 0.000013 grad: 0.1934 (0.2041) loss: 0.8099 (0.8048) time: 0.1510 data: 0.0433 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:16:52 lr: 0.000013 grad: 0.1727 (0.1953) loss: 0.8150 (0.8016) time: 0.1366 data: 0.0413 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:15:37 lr: 0.000013 grad: 0.1568 (0.1890) loss: 0.8068 (0.7989) time: 0.1222 data: 0.0330 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:15:00 lr: 0.000013 grad: 0.1688 (0.1847) loss: 0.8002 (0.7984) time: 0.1451 data: 0.0527 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:14:30 lr: 0.000013 grad: 0.1629 (0.1826) loss: 0.7889 (0.7973) time: 0.1476 data: 0.0509 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:14:11 lr: 0.000013 grad: 0.1726 (0.1808) loss: 0.7916 (0.7962) time: 0.1629 data: 0.0776 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:13:46 lr: 0.000013 grad: 0.1718 (0.1795) loss: 0.8020 (0.7954) time: 0.1203 data: 0.0273 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:13:24 lr: 0.000013 grad: 0.1726 (0.1783) loss: 0.7909 (0.7950) time: 0.1448 data: 0.0572 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:13:06 lr: 0.000013 grad: 0.1610 (0.1778) loss: 0.7958 (0.7943) time: 0.1520 data: 0.0707 max mem: 9377 +Train: [80] [1000/6250] eta: 0:12:48 lr: 0.000013 grad: 0.1696 (0.1771) loss: 0.7918 (0.7937) time: 0.1357 data: 0.0537 max mem: 9377 +Train: [80] [1100/6250] eta: 0:12:26 lr: 0.000013 grad: 0.1658 (0.1768) loss: 0.7788 (0.7932) time: 0.1265 data: 0.0400 max mem: 9377 +Train: [80] [1200/6250] eta: 0:12:08 lr: 0.000013 grad: 0.1701 (0.1765) loss: 0.7860 (0.7926) time: 0.1372 data: 0.0521 max mem: 9377 +Train: [80] [1300/6250] eta: 0:11:51 lr: 0.000013 grad: 0.1662 (0.1759) loss: 0.7805 (0.7920) time: 0.1261 data: 0.0450 max mem: 9377 +Train: [80] [1400/6250] eta: 0:11:35 lr: 0.000013 grad: 0.1730 (0.1755) loss: 0.7853 (0.7915) time: 0.1234 data: 0.0408 max mem: 9377 +Train: [80] [1500/6250] eta: 0:11:20 lr: 0.000013 grad: 0.1549 (0.1748) loss: 0.7930 (0.7913) time: 0.1408 data: 0.0629 max mem: 9377 +Train: [80] [1600/6250] eta: 0:11:04 lr: 0.000013 grad: 0.1593 (0.1742) loss: 0.7854 (0.7913) time: 0.1293 data: 0.0459 max mem: 9377 +Train: [80] [1700/6250] eta: 0:10:49 lr: 0.000013 grad: 0.1650 (0.1744) loss: 0.7841 (0.7910) time: 0.1469 data: 0.0663 max mem: 9377 +Train: [80] [1800/6250] eta: 0:10:34 lr: 0.000013 grad: 0.1651 (0.1741) loss: 0.7826 (0.7907) time: 0.1413 data: 0.0617 max mem: 9377 +Train: [80] [1900/6250] eta: 0:10:21 lr: 0.000013 grad: 0.1667 (0.1737) loss: 0.7874 (0.7907) time: 0.1461 data: 0.0668 max mem: 9377 +Train: [80] [2000/6250] eta: 0:10:07 lr: 0.000013 grad: 0.1590 (0.1733) loss: 0.7852 (0.7905) time: 0.1437 data: 0.0600 max mem: 9377 +Train: [80] [2100/6250] eta: 0:09:54 lr: 0.000013 grad: 0.1537 (0.1729) loss: 0.7952 (0.7905) time: 0.1310 data: 0.0520 max mem: 9377 +Train: [80] [2200/6250] eta: 0:09:40 lr: 0.000013 grad: 0.1622 (0.1728) loss: 0.7874 (0.7902) time: 0.1657 data: 0.0886 max mem: 9377 +Train: [80] [2300/6250] eta: 0:09:25 lr: 0.000013 grad: 0.1568 (0.1725) loss: 0.7920 (0.7900) time: 0.1296 data: 0.0517 max mem: 9377 +Train: [80] [2400/6250] eta: 0:09:12 lr: 0.000013 grad: 0.1658 (0.1723) loss: 0.7892 (0.7897) time: 0.1522 data: 0.0710 max mem: 9377 +Train: [80] [2500/6250] eta: 0:08:57 lr: 0.000013 grad: 0.1667 (0.1722) loss: 0.7777 (0.7895) time: 0.1195 data: 0.0343 max mem: 9377 +Train: [80] [2600/6250] eta: 0:08:42 lr: 0.000013 grad: 0.1652 (0.1722) loss: 0.7911 (0.7894) time: 0.1383 data: 0.0525 max mem: 9377 +Train: [80] [2700/6250] eta: 0:08:27 lr: 0.000013 grad: 0.1745 (0.1721) loss: 0.7781 (0.7893) time: 0.1417 data: 0.0563 max mem: 9377 +Train: [80] [2800/6250] eta: 0:08:12 lr: 0.000013 grad: 0.1618 (0.1720) loss: 0.7866 (0.7892) time: 0.1584 data: 0.0773 max mem: 9377 +Train: [80] [2900/6250] eta: 0:07:56 lr: 0.000013 grad: 0.1753 (0.1720) loss: 0.7767 (0.7890) time: 0.1365 data: 0.0514 max mem: 9377 +Train: [80] [3000/6250] eta: 0:07:41 lr: 0.000013 grad: 0.1738 (0.1720) loss: 0.7818 (0.7888) time: 0.1196 data: 0.0377 max mem: 9377 +Train: [80] [3100/6250] eta: 0:07:26 lr: 0.000013 grad: 0.1750 (0.1720) loss: 0.7767 (0.7886) time: 0.1301 data: 0.0509 max mem: 9377 +Train: [80] [3200/6250] eta: 0:07:12 lr: 0.000013 grad: 0.1781 (0.1722) loss: 0.7821 (0.7885) time: 0.1383 data: 0.0587 max mem: 9377 +Train: [80] [3300/6250] eta: 0:06:57 lr: 0.000013 grad: 0.1737 (0.1722) loss: 0.7714 (0.7882) time: 0.1360 data: 0.0555 max mem: 9377 +Train: [80] [3400/6250] eta: 0:06:43 lr: 0.000012 grad: 0.1661 (0.1723) loss: 0.7897 (0.7881) time: 0.1506 data: 0.0677 max mem: 9377 +Train: [80] [3500/6250] eta: 0:06:28 lr: 0.000012 grad: 0.1789 (0.1723) loss: 0.7833 (0.7880) time: 0.1096 data: 0.0231 max mem: 9377 +Train: [80] [3600/6250] eta: 0:06:13 lr: 0.000012 grad: 0.1628 (0.1723) loss: 0.7932 (0.7880) time: 0.1338 data: 0.0541 max mem: 9377 +Train: [80] [3700/6250] eta: 0:05:58 lr: 0.000012 grad: 0.1680 (0.1723) loss: 0.7788 (0.7879) time: 0.1371 data: 0.0567 max mem: 9377 +Train: [80] [3800/6250] eta: 0:05:44 lr: 0.000012 grad: 0.1673 (0.1724) loss: 0.7786 (0.7878) time: 0.1328 data: 0.0546 max mem: 9377 +Train: [80] [3900/6250] eta: 0:05:30 lr: 0.000012 grad: 0.1680 (0.1725) loss: 0.7948 (0.7878) time: 0.1260 data: 0.0387 max mem: 9377 +Train: [80] [4000/6250] eta: 0:05:17 lr: 0.000012 grad: 0.1687 (0.1725) loss: 0.7929 (0.7878) time: 0.1645 data: 0.0873 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:03 lr: 0.000012 grad: 0.1648 (0.1725) loss: 0.7832 (0.7879) time: 0.1525 data: 0.0668 max mem: 9377 +Train: [80] [4200/6250] eta: 0:04:50 lr: 0.000012 grad: 0.1606 (0.1725) loss: 0.7902 (0.7879) time: 0.1548 data: 0.0695 max mem: 9377 +Train: [80] [4300/6250] eta: 0:04:38 lr: 0.000012 grad: 0.1601 (0.1724) loss: 0.7952 (0.7879) time: 0.1827 data: 0.0904 max mem: 9377 +Train: [80] [4400/6250] eta: 0:04:25 lr: 0.000012 grad: 0.1576 (0.1723) loss: 0.7972 (0.7879) time: 0.1707 data: 0.0897 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:12 lr: 0.000012 grad: 0.1703 (0.1724) loss: 0.7846 (0.7879) time: 0.1697 data: 0.0905 max mem: 9377 +Train: [80] [4600/6250] eta: 0:03:58 lr: 0.000012 grad: 0.1764 (0.1724) loss: 0.7842 (0.7880) time: 0.1476 data: 0.0683 max mem: 9377 +Train: [80] [4700/6250] eta: 0:03:43 lr: 0.000012 grad: 0.1614 (0.1724) loss: 0.7907 (0.7880) time: 0.1453 data: 0.0603 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:29 lr: 0.000012 grad: 0.1611 (0.1725) loss: 0.7849 (0.7879) time: 0.1254 data: 0.0305 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:14 lr: 0.000012 grad: 0.1786 (0.1725) loss: 0.7811 (0.7880) time: 0.1491 data: 0.0635 max mem: 9377 +Train: [80] [5000/6250] eta: 0:02:59 lr: 0.000012 grad: 0.1707 (0.1726) loss: 0.7808 (0.7879) time: 0.1259 data: 0.0354 max mem: 9377 +Train: [80] [5100/6250] eta: 0:02:45 lr: 0.000012 grad: 0.1659 (0.1726) loss: 0.7936 (0.7880) time: 0.1361 data: 0.0525 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:30 lr: 0.000012 grad: 0.1635 (0.1726) loss: 0.7915 (0.7880) time: 0.1388 data: 0.0544 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:16 lr: 0.000012 grad: 0.1682 (0.1727) loss: 0.7889 (0.7880) time: 0.1564 data: 0.0698 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:01 lr: 0.000012 grad: 0.1757 (0.1727) loss: 0.7907 (0.7881) time: 0.1089 data: 0.0293 max mem: 9377 +Train: [80] [5500/6250] eta: 0:01:47 lr: 0.000012 grad: 0.1631 (0.1728) loss: 0.7889 (0.7881) time: 0.1439 data: 0.0551 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:32 lr: 0.000012 grad: 0.1683 (0.1728) loss: 0.7880 (0.7880) time: 0.1314 data: 0.0416 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:18 lr: 0.000012 grad: 0.1666 (0.1728) loss: 0.7884 (0.7880) time: 0.1547 data: 0.0772 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:04 lr: 0.000012 grad: 0.1640 (0.1729) loss: 0.7945 (0.7880) time: 0.1454 data: 0.0674 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:50 lr: 0.000012 grad: 0.1740 (0.1730) loss: 0.7888 (0.7879) time: 0.1315 data: 0.0463 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:35 lr: 0.000012 grad: 0.1695 (0.1731) loss: 0.7852 (0.7879) time: 0.1369 data: 0.0576 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:21 lr: 0.000012 grad: 0.1806 (0.1733) loss: 0.7790 (0.7878) time: 0.1213 data: 0.0398 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:07 lr: 0.000012 grad: 0.1681 (0.1734) loss: 0.7893 (0.7877) time: 0.1485 data: 0.0672 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1655 (0.1734) loss: 0.7895 (0.7877) time: 0.1371 data: 0.0523 max mem: 9377 +Train: [80] Total time: 0:14:58 (0.1437 s / it) +Averaged stats: lr: 0.000012 grad: 0.1655 (0.1734) loss: 0.7895 (0.7877) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:05:50 loss: 0.8372 (0.8372) time: 5.6582 data: 5.6279 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8425 (0.8420) time: 0.1341 data: 0.1090 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-train-subset): loss: 0.8425 (0.8420) +Eval (hcp-val): [80] [ 0/62] eta: 0:03:59 loss: 0.8374 (0.8374) time: 3.8650 data: 3.8048 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8405 (0.8406) time: 0.1195 data: 0.0927 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:13 (0.2252 s / it) +Averaged stats (hcp-val): loss: 0.8405 (0.8406) +Eval (nsd-val): [80] [ 0/62] eta: 0:04:56 loss: 0.8169 (0.8169) time: 4.7903 data: 4.6429 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8336 (0.8337) time: 0.1295 data: 0.1023 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (nsd-val): loss: 0.8336 (0.8337) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 8:21:19 lr: 0.000012 grad: 0.1919 (0.1919) loss: 0.8516 (0.8516) time: 4.8127 data: 4.6209 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:20:16 lr: 0.000012 grad: 0.2196 (0.2388) loss: 0.7756 (0.7928) time: 0.1542 data: 0.0568 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:17:11 lr: 0.000012 grad: 0.1980 (0.2248) loss: 0.7786 (0.7844) time: 0.1349 data: 0.0423 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:15:48 lr: 0.000012 grad: 0.1755 (0.2143) loss: 0.7751 (0.7826) time: 0.1274 data: 0.0335 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:15:03 lr: 0.000012 grad: 0.1936 (0.2077) loss: 0.7771 (0.7831) time: 0.1258 data: 0.0379 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:14:25 lr: 0.000012 grad: 0.1687 (0.2011) loss: 0.7839 (0.7839) time: 0.1371 data: 0.0491 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:13:52 lr: 0.000012 grad: 0.1770 (0.1971) loss: 0.7762 (0.7843) time: 0.1236 data: 0.0269 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:13:35 lr: 0.000012 grad: 0.1641 (0.1943) loss: 0.7993 (0.7849) time: 0.1387 data: 0.0429 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:13:18 lr: 0.000012 grad: 0.1666 (0.1926) loss: 0.7797 (0.7849) time: 0.1393 data: 0.0469 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:12:58 lr: 0.000012 grad: 0.1704 (0.1911) loss: 0.7846 (0.7845) time: 0.1298 data: 0.0430 max mem: 9377 +Train: [81] [1000/6250] eta: 0:12:40 lr: 0.000012 grad: 0.1702 (0.1904) loss: 0.7798 (0.7841) time: 0.1520 data: 0.0645 max mem: 9377 +Train: [81] [1100/6250] eta: 0:12:21 lr: 0.000012 grad: 0.1714 (0.1896) loss: 0.7800 (0.7836) time: 0.1462 data: 0.0605 max mem: 9377 +Train: [81] [1200/6250] eta: 0:12:05 lr: 0.000012 grad: 0.1818 (0.1887) loss: 0.7816 (0.7832) time: 0.1387 data: 0.0520 max mem: 9377 +Train: [81] [1300/6250] eta: 0:11:49 lr: 0.000012 grad: 0.1746 (0.1879) loss: 0.7911 (0.7831) time: 0.1223 data: 0.0355 max mem: 9377 +Train: [81] [1400/6250] eta: 0:11:33 lr: 0.000012 grad: 0.1861 (0.1873) loss: 0.7771 (0.7828) time: 0.1479 data: 0.0653 max mem: 9377 +Train: [81] [1500/6250] eta: 0:11:18 lr: 0.000012 grad: 0.1769 (0.1871) loss: 0.7826 (0.7825) time: 0.1354 data: 0.0490 max mem: 9377 +Train: [81] [1600/6250] eta: 0:11:03 lr: 0.000012 grad: 0.1787 (0.1871) loss: 0.7752 (0.7822) time: 0.1433 data: 0.0633 max mem: 9377 +Train: [81] [1700/6250] eta: 0:10:49 lr: 0.000012 grad: 0.1823 (0.1870) loss: 0.7822 (0.7819) time: 0.1412 data: 0.0621 max mem: 9377 +Train: [81] [1800/6250] eta: 0:10:34 lr: 0.000012 grad: 0.1754 (0.1870) loss: 0.7798 (0.7815) time: 0.1437 data: 0.0600 max mem: 9377 +Train: [81] [1900/6250] eta: 0:10:19 lr: 0.000012 grad: 0.1731 (0.1868) loss: 0.7804 (0.7813) time: 0.1216 data: 0.0398 max mem: 9377 +Train: [81] [2000/6250] eta: 0:10:04 lr: 0.000012 grad: 0.1765 (0.1866) loss: 0.7681 (0.7811) time: 0.1476 data: 0.0628 max mem: 9377 +Train: [81] [2100/6250] eta: 0:09:48 lr: 0.000012 grad: 0.1767 (0.1862) loss: 0.7865 (0.7812) time: 0.1333 data: 0.0530 max mem: 9377 +Train: [81] [2200/6250] eta: 0:09:33 lr: 0.000012 grad: 0.1700 (0.1858) loss: 0.7799 (0.7812) time: 0.1580 data: 0.0808 max mem: 9377 +Train: [81] [2300/6250] eta: 0:09:18 lr: 0.000011 grad: 0.1764 (0.1856) loss: 0.7732 (0.7812) time: 0.1490 data: 0.0703 max mem: 9377 +Train: [81] [2400/6250] eta: 0:09:03 lr: 0.000011 grad: 0.1712 (0.1855) loss: 0.7825 (0.7812) time: 0.1410 data: 0.0571 max mem: 9377 +Train: [81] [2500/6250] eta: 0:08:49 lr: 0.000011 grad: 0.1811 (0.1852) loss: 0.7744 (0.7812) time: 0.1447 data: 0.0638 max mem: 9377 +Train: [81] [2600/6250] eta: 0:08:34 lr: 0.000011 grad: 0.1770 (0.1850) loss: 0.7813 (0.7811) time: 0.1242 data: 0.0395 max mem: 9377 +Train: [81] [2700/6250] eta: 0:08:20 lr: 0.000011 grad: 0.1780 (0.1848) loss: 0.7809 (0.7811) time: 0.1185 data: 0.0278 max mem: 9377 +Train: [81] [2800/6250] eta: 0:08:06 lr: 0.000011 grad: 0.1723 (0.1846) loss: 0.7843 (0.7811) time: 0.1469 data: 0.0673 max mem: 9377 +Train: [81] [2900/6250] eta: 0:07:53 lr: 0.000011 grad: 0.1732 (0.1843) loss: 0.7886 (0.7812) time: 0.1305 data: 0.0507 max mem: 9377 +Train: [81] [3000/6250] eta: 0:07:40 lr: 0.000011 grad: 0.1715 (0.1843) loss: 0.7840 (0.7812) time: 0.1387 data: 0.0551 max mem: 9377 +Train: [81] [3100/6250] eta: 0:07:25 lr: 0.000011 grad: 0.1879 (0.1843) loss: 0.7807 (0.7811) time: 0.1435 data: 0.0660 max mem: 9377 +Train: [81] [3200/6250] eta: 0:07:11 lr: 0.000011 grad: 0.1806 (0.1843) loss: 0.7820 (0.7810) time: 0.1077 data: 0.0241 max mem: 9377 +Train: [81] [3300/6250] eta: 0:06:57 lr: 0.000011 grad: 0.1849 (0.1842) loss: 0.7835 (0.7811) time: 0.1475 data: 0.0716 max mem: 9377 +Train: [81] [3400/6250] eta: 0:06:43 lr: 0.000011 grad: 0.1705 (0.1841) loss: 0.7797 (0.7811) time: 0.1462 data: 0.0671 max mem: 9377 +Train: [81] [3500/6250] eta: 0:06:28 lr: 0.000011 grad: 0.1776 (0.1841) loss: 0.7798 (0.7810) time: 0.1392 data: 0.0586 max mem: 9377 +Train: [81] [3600/6250] eta: 0:06:14 lr: 0.000011 grad: 0.1813 (0.1840) loss: 0.7763 (0.7809) time: 0.1676 data: 0.0843 max mem: 9377 +Train: [81] [3700/6250] eta: 0:05:59 lr: 0.000011 grad: 0.1781 (0.1840) loss: 0.7790 (0.7809) time: 0.1459 data: 0.0681 max mem: 9377 +Train: [81] [3800/6250] eta: 0:05:44 lr: 0.000011 grad: 0.1710 (0.1838) loss: 0.7795 (0.7810) time: 0.1304 data: 0.0442 max mem: 9377 +Train: [81] [3900/6250] eta: 0:05:30 lr: 0.000011 grad: 0.1725 (0.1836) loss: 0.7843 (0.7810) time: 0.1207 data: 0.0300 max mem: 9377 +Train: [81] [4000/6250] eta: 0:05:16 lr: 0.000011 grad: 0.1690 (0.1835) loss: 0.7841 (0.7811) time: 0.1292 data: 0.0485 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:01 lr: 0.000011 grad: 0.1746 (0.1836) loss: 0.7910 (0.7812) time: 0.1368 data: 0.0483 max mem: 9377 +Train: [81] [4200/6250] eta: 0:04:47 lr: 0.000011 grad: 0.1809 (0.1834) loss: 0.7790 (0.7812) time: 0.1566 data: 0.0729 max mem: 9377 +Train: [81] [4300/6250] eta: 0:04:34 lr: 0.000011 grad: 0.1746 (0.1833) loss: 0.7736 (0.7813) time: 0.1667 data: 0.0835 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:21 lr: 0.000011 grad: 0.1823 (0.1832) loss: 0.7803 (0.7814) time: 0.1476 data: 0.0625 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:07 lr: 0.000011 grad: 0.1680 (0.1831) loss: 0.7880 (0.7815) time: 0.1440 data: 0.0602 max mem: 9377 +Train: [81] [4600/6250] eta: 0:03:53 lr: 0.000011 grad: 0.1792 (0.1831) loss: 0.7838 (0.7816) time: 0.1269 data: 0.0460 max mem: 9377 +Train: [81] [4700/6250] eta: 0:03:39 lr: 0.000011 grad: 0.1772 (0.1830) loss: 0.7847 (0.7816) time: 0.1392 data: 0.0466 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:25 lr: 0.000011 grad: 0.1705 (0.1828) loss: 0.7966 (0.7817) time: 0.1394 data: 0.0588 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:11 lr: 0.000011 grad: 0.1700 (0.1827) loss: 0.7801 (0.7817) time: 0.1546 data: 0.0700 max mem: 9377 +Train: [81] [5000/6250] eta: 0:02:57 lr: 0.000011 grad: 0.1772 (0.1828) loss: 0.7785 (0.7817) time: 0.1482 data: 0.0610 max mem: 9377 +Train: [81] [5100/6250] eta: 0:02:43 lr: 0.000011 grad: 0.1754 (0.1827) loss: 0.7809 (0.7817) time: 0.1419 data: 0.0532 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:29 lr: 0.000011 grad: 0.1742 (0.1826) loss: 0.7815 (0.7817) time: 0.1537 data: 0.0632 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:15 lr: 0.000011 grad: 0.1814 (0.1826) loss: 0.7866 (0.7817) time: 0.1451 data: 0.0640 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:00 lr: 0.000011 grad: 0.1704 (0.1825) loss: 0.7820 (0.7817) time: 0.1268 data: 0.0453 max mem: 9377 +Train: [81] [5500/6250] eta: 0:01:46 lr: 0.000011 grad: 0.1820 (0.1824) loss: 0.7845 (0.7817) time: 0.1625 data: 0.0900 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:32 lr: 0.000011 grad: 0.1687 (0.1824) loss: 0.7752 (0.7817) time: 0.1661 data: 0.0876 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:18 lr: 0.000011 grad: 0.1742 (0.1824) loss: 0.7805 (0.7817) time: 0.1248 data: 0.0476 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:04 lr: 0.000011 grad: 0.1703 (0.1823) loss: 0.7828 (0.7818) time: 0.1251 data: 0.0459 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:49 lr: 0.000011 grad: 0.1674 (0.1823) loss: 0.7872 (0.7818) time: 0.1485 data: 0.0674 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:35 lr: 0.000011 grad: 0.1716 (0.1822) loss: 0.7950 (0.7819) time: 0.1582 data: 0.0822 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:21 lr: 0.000011 grad: 0.1765 (0.1822) loss: 0.7778 (0.7819) time: 0.1692 data: 0.0901 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:07 lr: 0.000011 grad: 0.1618 (0.1822) loss: 0.7946 (0.7820) time: 0.1392 data: 0.0614 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1695 (0.1822) loss: 0.7904 (0.7820) time: 0.1272 data: 0.0407 max mem: 9377 +Train: [81] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000011 grad: 0.1695 (0.1822) loss: 0.7904 (0.7820) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:04:57 loss: 0.8371 (0.8371) time: 4.7969 data: 4.7666 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8403 (0.8423) time: 0.1113 data: 0.0864 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:13 (0.2132 s / it) +Averaged stats (hcp-train-subset): loss: 0.8403 (0.8423) +Eval (hcp-val): [81] [ 0/62] eta: 0:05:54 loss: 0.8377 (0.8377) time: 5.7147 data: 5.6849 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8403 (0.8412) time: 0.1098 data: 0.0847 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-val): loss: 0.8403 (0.8412) +Eval (nsd-val): [81] [ 0/62] eta: 0:06:01 loss: 0.8255 (0.8255) time: 5.8293 data: 5.7987 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8360 (0.8365) time: 0.1170 data: 0.0919 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (nsd-val): loss: 0.8360 (0.8365) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 10:18:37 lr: 0.000011 grad: 0.1374 (0.1374) loss: 0.8466 (0.8466) time: 5.9388 data: 5.7137 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:21:18 lr: 0.000011 grad: 0.2026 (0.2374) loss: 0.7637 (0.7904) time: 0.1493 data: 0.0434 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:18:22 lr: 0.000011 grad: 0.2009 (0.2240) loss: 0.7815 (0.7835) time: 0.1420 data: 0.0427 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:17:28 lr: 0.000011 grad: 0.1879 (0.2193) loss: 0.7794 (0.7813) time: 0.1513 data: 0.0524 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:16:36 lr: 0.000011 grad: 0.1907 (0.2180) loss: 0.7669 (0.7792) time: 0.1140 data: 0.0102 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:15:48 lr: 0.000011 grad: 0.1880 (0.2141) loss: 0.7717 (0.7782) time: 0.1386 data: 0.0477 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:15:10 lr: 0.000011 grad: 0.1858 (0.2102) loss: 0.7732 (0.7784) time: 0.1294 data: 0.0461 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:14:42 lr: 0.000011 grad: 0.1960 (0.2071) loss: 0.7803 (0.7786) time: 0.1353 data: 0.0421 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:14:21 lr: 0.000011 grad: 0.1959 (0.2048) loss: 0.7897 (0.7792) time: 0.1617 data: 0.0823 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:14:07 lr: 0.000011 grad: 0.1757 (0.2024) loss: 0.7876 (0.7801) time: 0.1420 data: 0.0529 max mem: 9377 +Train: [82] [1000/6250] eta: 0:13:55 lr: 0.000011 grad: 0.1717 (0.2007) loss: 0.7895 (0.7810) time: 0.1617 data: 0.0782 max mem: 9377 +Train: [82] [1100/6250] eta: 0:13:40 lr: 0.000011 grad: 0.1736 (0.1989) loss: 0.7789 (0.7814) time: 0.1832 data: 0.0989 max mem: 9377 +Train: [82] [1200/6250] eta: 0:13:23 lr: 0.000011 grad: 0.1749 (0.1973) loss: 0.7892 (0.7819) time: 0.1722 data: 0.0825 max mem: 9377 +Train: [82] [1300/6250] eta: 0:13:05 lr: 0.000011 grad: 0.1691 (0.1958) loss: 0.7900 (0.7823) time: 0.1446 data: 0.0608 max mem: 9377 +Train: [82] [1400/6250] eta: 0:12:46 lr: 0.000010 grad: 0.1686 (0.1943) loss: 0.7904 (0.7826) time: 0.1524 data: 0.0719 max mem: 9377 +Train: [82] [1500/6250] eta: 0:12:28 lr: 0.000010 grad: 0.1666 (0.1931) loss: 0.7849 (0.7829) time: 0.1381 data: 0.0578 max mem: 9377 +Train: [82] [1600/6250] eta: 0:12:10 lr: 0.000010 grad: 0.1734 (0.1920) loss: 0.7850 (0.7831) time: 0.1498 data: 0.0688 max mem: 9377 +Train: [82] [1700/6250] eta: 0:11:52 lr: 0.000010 grad: 0.1765 (0.1913) loss: 0.7767 (0.7831) time: 0.1230 data: 0.0384 max mem: 9377 +Train: [82] [1800/6250] eta: 0:11:33 lr: 0.000010 grad: 0.1808 (0.1904) loss: 0.7783 (0.7830) time: 0.1257 data: 0.0469 max mem: 9377 +Train: [82] [1900/6250] eta: 0:11:14 lr: 0.000010 grad: 0.1772 (0.1897) loss: 0.7753 (0.7831) time: 0.1540 data: 0.0720 max mem: 9377 +Train: [82] [2000/6250] eta: 0:10:54 lr: 0.000010 grad: 0.1687 (0.1891) loss: 0.7829 (0.7830) time: 0.1522 data: 0.0662 max mem: 9377 +Train: [82] [2100/6250] eta: 0:10:35 lr: 0.000010 grad: 0.1742 (0.1885) loss: 0.7845 (0.7830) time: 0.1413 data: 0.0585 max mem: 9377 +Train: [82] [2200/6250] eta: 0:10:17 lr: 0.000010 grad: 0.1678 (0.1879) loss: 0.7847 (0.7831) time: 0.1484 data: 0.0672 max mem: 9377 +Train: [82] [2300/6250] eta: 0:09:59 lr: 0.000010 grad: 0.1692 (0.1872) loss: 0.7847 (0.7832) time: 0.1229 data: 0.0395 max mem: 9377 +Train: [82] [2400/6250] eta: 0:09:41 lr: 0.000010 grad: 0.1620 (0.1866) loss: 0.7816 (0.7833) time: 0.1525 data: 0.0733 max mem: 9377 +Train: [82] [2500/6250] eta: 0:09:24 lr: 0.000010 grad: 0.1620 (0.1859) loss: 0.7884 (0.7835) time: 0.1370 data: 0.0592 max mem: 9377 +Train: [82] [2600/6250] eta: 0:09:07 lr: 0.000010 grad: 0.1579 (0.1854) loss: 0.7959 (0.7837) time: 0.1269 data: 0.0553 max mem: 9377 +Train: [82] [2700/6250] eta: 0:08:51 lr: 0.000010 grad: 0.1761 (0.1849) loss: 0.7817 (0.7838) time: 0.1236 data: 0.0404 max mem: 9377 +Train: [82] [2800/6250] eta: 0:08:34 lr: 0.000010 grad: 0.1757 (0.1845) loss: 0.7829 (0.7840) time: 0.1327 data: 0.0467 max mem: 9377 +Train: [82] [2900/6250] eta: 0:08:17 lr: 0.000010 grad: 0.1663 (0.1841) loss: 0.7882 (0.7841) time: 0.1336 data: 0.0477 max mem: 9377 +Train: [82] [3000/6250] eta: 0:08:01 lr: 0.000010 grad: 0.1698 (0.1837) loss: 0.7916 (0.7843) time: 0.1219 data: 0.0418 max mem: 9377 +Train: [82] [3100/6250] eta: 0:07:45 lr: 0.000010 grad: 0.1646 (0.1832) loss: 0.7903 (0.7845) time: 0.1309 data: 0.0504 max mem: 9377 +Train: [82] [3200/6250] eta: 0:07:29 lr: 0.000010 grad: 0.1571 (0.1828) loss: 0.7986 (0.7848) time: 0.1385 data: 0.0572 max mem: 9377 +Train: [82] [3300/6250] eta: 0:07:13 lr: 0.000010 grad: 0.1742 (0.1824) loss: 0.7944 (0.7850) time: 0.1190 data: 0.0317 max mem: 9377 +Train: [82] [3400/6250] eta: 0:06:58 lr: 0.000010 grad: 0.1690 (0.1821) loss: 0.7903 (0.7852) time: 0.1759 data: 0.0927 max mem: 9377 +Train: [82] [3500/6250] eta: 0:06:42 lr: 0.000010 grad: 0.1656 (0.1817) loss: 0.7908 (0.7854) time: 0.1404 data: 0.0588 max mem: 9377 +Train: [82] [3600/6250] eta: 0:06:27 lr: 0.000010 grad: 0.1648 (0.1814) loss: 0.7927 (0.7856) time: 0.1406 data: 0.0620 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:11 lr: 0.000010 grad: 0.1654 (0.1810) loss: 0.7943 (0.7858) time: 0.1471 data: 0.0658 max mem: 9377 +Train: [82] [3800/6250] eta: 0:05:56 lr: 0.000010 grad: 0.1680 (0.1808) loss: 0.7879 (0.7860) time: 0.1563 data: 0.0754 max mem: 9377 +Train: [82] [3900/6250] eta: 0:05:42 lr: 0.000010 grad: 0.1775 (0.1806) loss: 0.7801 (0.7861) time: 0.1528 data: 0.0715 max mem: 9377 +Train: [82] [4000/6250] eta: 0:05:27 lr: 0.000010 grad: 0.1641 (0.1804) loss: 0.7928 (0.7862) time: 0.1665 data: 0.0911 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:13 lr: 0.000010 grad: 0.1641 (0.1802) loss: 0.7970 (0.7863) time: 0.1464 data: 0.0657 max mem: 9377 +Train: [82] [4200/6250] eta: 0:04:58 lr: 0.000010 grad: 0.1713 (0.1799) loss: 0.7898 (0.7865) time: 0.1564 data: 0.0775 max mem: 9377 +Train: [82] [4300/6250] eta: 0:04:43 lr: 0.000010 grad: 0.1803 (0.1799) loss: 0.7887 (0.7865) time: 0.1265 data: 0.0506 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:29 lr: 0.000010 grad: 0.1650 (0.1798) loss: 0.7837 (0.7865) time: 0.1382 data: 0.0549 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:15 lr: 0.000010 grad: 0.1674 (0.1798) loss: 0.7902 (0.7864) time: 0.1755 data: 0.0920 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:01 lr: 0.000010 grad: 0.1799 (0.1799) loss: 0.7781 (0.7864) time: 0.1673 data: 0.0829 max mem: 9377 +Train: [82] [4700/6250] eta: 0:03:46 lr: 0.000010 grad: 0.1825 (0.1799) loss: 0.7772 (0.7863) time: 0.1456 data: 0.0601 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:32 lr: 0.000010 grad: 0.1728 (0.1799) loss: 0.7741 (0.7862) time: 0.1131 data: 0.0343 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:17 lr: 0.000010 grad: 0.1718 (0.1799) loss: 0.7827 (0.7861) time: 0.1496 data: 0.0672 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:02 lr: 0.000010 grad: 0.1729 (0.1799) loss: 0.7865 (0.7860) time: 0.1470 data: 0.0652 max mem: 9377 +Train: [82] [5100/6250] eta: 0:02:47 lr: 0.000010 grad: 0.1784 (0.1799) loss: 0.7766 (0.7859) time: 0.1370 data: 0.0507 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:32 lr: 0.000010 grad: 0.1757 (0.1799) loss: 0.7813 (0.7858) time: 0.1314 data: 0.0425 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:17 lr: 0.000010 grad: 0.1946 (0.1800) loss: 0.7638 (0.7856) time: 0.1362 data: 0.0497 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:03 lr: 0.000010 grad: 0.1846 (0.1801) loss: 0.7714 (0.7854) time: 0.1169 data: 0.0279 max mem: 9377 +Train: [82] [5500/6250] eta: 0:01:48 lr: 0.000010 grad: 0.1759 (0.1802) loss: 0.7840 (0.7853) time: 0.1249 data: 0.0321 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:33 lr: 0.000010 grad: 0.1686 (0.1802) loss: 0.7761 (0.7851) time: 0.1411 data: 0.0516 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:19 lr: 0.000010 grad: 0.1875 (0.1802) loss: 0.7750 (0.7850) time: 0.1386 data: 0.0561 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:04 lr: 0.000010 grad: 0.1790 (0.1802) loss: 0.7809 (0.7849) time: 0.1445 data: 0.0607 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:50 lr: 0.000010 grad: 0.1752 (0.1801) loss: 0.7757 (0.7848) time: 0.1321 data: 0.0489 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:35 lr: 0.000010 grad: 0.1769 (0.1801) loss: 0.7759 (0.7848) time: 0.1286 data: 0.0456 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:21 lr: 0.000010 grad: 0.1783 (0.1801) loss: 0.7734 (0.7847) time: 0.1475 data: 0.0621 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:07 lr: 0.000010 grad: 0.1791 (0.1801) loss: 0.7751 (0.7846) time: 0.1176 data: 0.0358 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1799 (0.1801) loss: 0.7789 (0.7846) time: 0.1333 data: 0.0497 max mem: 9377 +Train: [82] Total time: 0:15:05 (0.1448 s / it) +Averaged stats: lr: 0.000010 grad: 0.1799 (0.1801) loss: 0.7789 (0.7846) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:04:11 loss: 0.8405 (0.8405) time: 4.0571 data: 3.9718 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8388 (0.8419) time: 0.1236 data: 0.0984 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:13 (0.2191 s / it) +Averaged stats (hcp-train-subset): loss: 0.8388 (0.8419) +Eval (hcp-val): [82] [ 0/62] eta: 0:03:49 loss: 0.8355 (0.8355) time: 3.7026 data: 3.6153 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8382 (0.8402) time: 0.1411 data: 0.1144 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-val): loss: 0.8382 (0.8402) +Eval (nsd-val): [82] [ 0/62] eta: 0:04:32 loss: 0.8213 (0.8213) time: 4.3879 data: 4.3132 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8322 (0.8330) time: 0.1223 data: 0.0972 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:13 (0.2167 s / it) +Averaged stats (nsd-val): loss: 0.8322 (0.8330) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 8:51:46 lr: 0.000010 grad: 0.1143 (0.1143) loss: 0.8434 (0.8434) time: 5.1050 data: 4.7946 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:19:42 lr: 0.000010 grad: 0.1546 (0.2174) loss: 0.8113 (0.8079) time: 0.1226 data: 0.0230 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:17:02 lr: 0.000010 grad: 0.1623 (0.1947) loss: 0.8214 (0.8096) time: 0.1328 data: 0.0382 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:15:54 lr: 0.000010 grad: 0.1673 (0.1876) loss: 0.8019 (0.8076) time: 0.1443 data: 0.0560 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:15:12 lr: 0.000010 grad: 0.1696 (0.1875) loss: 0.7968 (0.8041) time: 0.1424 data: 0.0521 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:14:33 lr: 0.000010 grad: 0.1803 (0.1864) loss: 0.7907 (0.8012) time: 0.1377 data: 0.0445 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:14:00 lr: 0.000010 grad: 0.1663 (0.1843) loss: 0.7950 (0.7999) time: 0.1392 data: 0.0530 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:13:30 lr: 0.000009 grad: 0.1739 (0.1826) loss: 0.7937 (0.7987) time: 0.1190 data: 0.0199 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:13:06 lr: 0.000009 grad: 0.1710 (0.1815) loss: 0.7904 (0.7978) time: 0.1392 data: 0.0513 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:12:46 lr: 0.000009 grad: 0.1791 (0.1809) loss: 0.7911 (0.7971) time: 0.1350 data: 0.0432 max mem: 9377 +Train: [83] [1000/6250] eta: 0:12:30 lr: 0.000009 grad: 0.1731 (0.1802) loss: 0.7984 (0.7966) time: 0.1283 data: 0.0368 max mem: 9377 +Train: [83] [1100/6250] eta: 0:12:13 lr: 0.000009 grad: 0.1660 (0.1795) loss: 0.7971 (0.7961) time: 0.1211 data: 0.0245 max mem: 9377 +Train: [83] [1200/6250] eta: 0:11:55 lr: 0.000009 grad: 0.1781 (0.1795) loss: 0.7797 (0.7952) time: 0.1420 data: 0.0625 max mem: 9377 +Train: [83] [1300/6250] eta: 0:11:42 lr: 0.000009 grad: 0.1796 (0.1794) loss: 0.7811 (0.7944) time: 0.1387 data: 0.0559 max mem: 9377 +Train: [83] [1400/6250] eta: 0:11:26 lr: 0.000009 grad: 0.1796 (0.1791) loss: 0.7783 (0.7938) time: 0.1110 data: 0.0231 max mem: 9377 +Train: [83] [1500/6250] eta: 0:11:11 lr: 0.000009 grad: 0.1740 (0.1789) loss: 0.7861 (0.7933) time: 0.1519 data: 0.0707 max mem: 9377 +Train: [83] [1600/6250] eta: 0:10:55 lr: 0.000009 grad: 0.1725 (0.1787) loss: 0.7820 (0.7928) time: 0.1148 data: 0.0307 max mem: 9377 +Train: [83] [1700/6250] eta: 0:10:42 lr: 0.000009 grad: 0.1900 (0.1788) loss: 0.7735 (0.7921) time: 0.1368 data: 0.0560 max mem: 9377 +Train: [83] [1800/6250] eta: 0:10:26 lr: 0.000009 grad: 0.1714 (0.1787) loss: 0.7873 (0.7918) time: 0.1426 data: 0.0549 max mem: 9377 +Train: [83] [1900/6250] eta: 0:10:12 lr: 0.000009 grad: 0.1729 (0.1786) loss: 0.7793 (0.7912) time: 0.1516 data: 0.0680 max mem: 9377 +Train: [83] [2000/6250] eta: 0:09:55 lr: 0.000009 grad: 0.1598 (0.1785) loss: 0.7923 (0.7909) time: 0.1218 data: 0.0358 max mem: 9377 +Train: [83] [2100/6250] eta: 0:09:41 lr: 0.000009 grad: 0.1808 (0.1785) loss: 0.7806 (0.7906) time: 0.1548 data: 0.0693 max mem: 9377 +Train: [83] [2200/6250] eta: 0:09:26 lr: 0.000009 grad: 0.1797 (0.1785) loss: 0.7809 (0.7903) time: 0.1292 data: 0.0518 max mem: 9377 +Train: [83] [2300/6250] eta: 0:09:13 lr: 0.000009 grad: 0.1712 (0.1783) loss: 0.7858 (0.7902) time: 0.1450 data: 0.0728 max mem: 9377 +Train: [83] [2400/6250] eta: 0:08:59 lr: 0.000009 grad: 0.1684 (0.1784) loss: 0.7931 (0.7899) time: 0.1374 data: 0.0570 max mem: 9377 +Train: [83] [2500/6250] eta: 0:08:45 lr: 0.000009 grad: 0.1778 (0.1783) loss: 0.7870 (0.7898) time: 0.1360 data: 0.0520 max mem: 9377 +Train: [83] [2600/6250] eta: 0:08:32 lr: 0.000009 grad: 0.1684 (0.1781) loss: 0.7841 (0.7897) time: 0.1815 data: 0.1051 max mem: 9377 +Train: [83] [2700/6250] eta: 0:08:17 lr: 0.000009 grad: 0.1746 (0.1780) loss: 0.7843 (0.7895) time: 0.1364 data: 0.0553 max mem: 9377 +Train: [83] [2800/6250] eta: 0:08:04 lr: 0.000009 grad: 0.1722 (0.1780) loss: 0.7818 (0.7893) time: 0.1728 data: 0.0878 max mem: 9377 +Train: [83] [2900/6250] eta: 0:07:50 lr: 0.000009 grad: 0.1792 (0.1781) loss: 0.7828 (0.7891) time: 0.1489 data: 0.0685 max mem: 9377 +Train: [83] [3000/6250] eta: 0:07:35 lr: 0.000009 grad: 0.1781 (0.1782) loss: 0.7856 (0.7890) time: 0.1346 data: 0.0563 max mem: 9377 +Train: [83] [3100/6250] eta: 0:07:21 lr: 0.000009 grad: 0.1789 (0.1783) loss: 0.7853 (0.7889) time: 0.1446 data: 0.0582 max mem: 9377 +Train: [83] [3200/6250] eta: 0:07:07 lr: 0.000009 grad: 0.1767 (0.1782) loss: 0.7740 (0.7887) time: 0.1531 data: 0.0703 max mem: 9377 +Train: [83] [3300/6250] eta: 0:06:53 lr: 0.000009 grad: 0.1785 (0.1784) loss: 0.7879 (0.7886) time: 0.1536 data: 0.0681 max mem: 9377 +Train: [83] [3400/6250] eta: 0:06:39 lr: 0.000009 grad: 0.1799 (0.1784) loss: 0.7841 (0.7885) time: 0.1320 data: 0.0520 max mem: 9377 +Train: [83] [3500/6250] eta: 0:06:26 lr: 0.000009 grad: 0.1697 (0.1783) loss: 0.7947 (0.7885) time: 0.1324 data: 0.0506 max mem: 9377 +Train: [83] [3600/6250] eta: 0:06:12 lr: 0.000009 grad: 0.1667 (0.1783) loss: 0.7887 (0.7884) time: 0.1318 data: 0.0484 max mem: 9377 +Train: [83] [3700/6250] eta: 0:05:58 lr: 0.000009 grad: 0.1840 (0.1783) loss: 0.7845 (0.7885) time: 0.1359 data: 0.0542 max mem: 9377 +Train: [83] [3800/6250] eta: 0:05:44 lr: 0.000009 grad: 0.1755 (0.1784) loss: 0.7843 (0.7884) time: 0.1296 data: 0.0494 max mem: 9377 +Train: [83] [3900/6250] eta: 0:05:29 lr: 0.000009 grad: 0.1661 (0.1783) loss: 0.7848 (0.7884) time: 0.1397 data: 0.0580 max mem: 9377 +Train: [83] [4000/6250] eta: 0:05:15 lr: 0.000009 grad: 0.1681 (0.1782) loss: 0.7862 (0.7885) time: 0.1275 data: 0.0509 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:01 lr: 0.000009 grad: 0.1706 (0.1782) loss: 0.7916 (0.7885) time: 0.1351 data: 0.0581 max mem: 9377 +Train: [83] [4200/6250] eta: 0:04:48 lr: 0.000009 grad: 0.1717 (0.1782) loss: 0.7883 (0.7885) time: 0.1427 data: 0.0609 max mem: 9377 +Train: [83] [4300/6250] eta: 0:04:34 lr: 0.000009 grad: 0.1641 (0.1781) loss: 0.7948 (0.7886) time: 0.1476 data: 0.0607 max mem: 9377 +Train: [83] [4400/6250] eta: 0:04:21 lr: 0.000009 grad: 0.1762 (0.1780) loss: 0.7875 (0.7887) time: 0.1683 data: 0.0833 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:07 lr: 0.000009 grad: 0.1863 (0.1781) loss: 0.7756 (0.7886) time: 0.1531 data: 0.0683 max mem: 9377 +Train: [83] [4600/6250] eta: 0:03:53 lr: 0.000009 grad: 0.1778 (0.1782) loss: 0.7869 (0.7886) time: 0.1563 data: 0.0791 max mem: 9377 +Train: [83] [4700/6250] eta: 0:03:39 lr: 0.000009 grad: 0.1866 (0.1784) loss: 0.7830 (0.7885) time: 0.2012 data: 0.1239 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:25 lr: 0.000009 grad: 0.1783 (0.1785) loss: 0.7848 (0.7884) time: 0.1479 data: 0.0720 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:11 lr: 0.000009 grad: 0.1890 (0.1786) loss: 0.7771 (0.7884) time: 0.1403 data: 0.0654 max mem: 9377 +Train: [83] [5000/6250] eta: 0:02:56 lr: 0.000009 grad: 0.1710 (0.1787) loss: 0.7823 (0.7883) time: 0.1157 data: 0.0353 max mem: 9377 +Train: [83] [5100/6250] eta: 0:02:42 lr: 0.000009 grad: 0.1775 (0.1788) loss: 0.7839 (0.7882) time: 0.1306 data: 0.0464 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:28 lr: 0.000009 grad: 0.1769 (0.1788) loss: 0.7766 (0.7882) time: 0.1214 data: 0.0318 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:14 lr: 0.000009 grad: 0.1790 (0.1791) loss: 0.7723 (0.7880) time: 0.1426 data: 0.0590 max mem: 9377 +Train: [83] [5400/6250] eta: 0:01:59 lr: 0.000009 grad: 0.1847 (0.1794) loss: 0.7752 (0.7878) time: 0.1302 data: 0.0397 max mem: 9377 +Train: [83] [5500/6250] eta: 0:01:45 lr: 0.000009 grad: 0.1764 (0.1795) loss: 0.7774 (0.7877) time: 0.1283 data: 0.0355 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:31 lr: 0.000009 grad: 0.1749 (0.1795) loss: 0.7798 (0.7876) time: 0.1187 data: 0.0334 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:17 lr: 0.000009 grad: 0.1867 (0.1796) loss: 0.7777 (0.7875) time: 0.1341 data: 0.0509 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:03 lr: 0.000009 grad: 0.1772 (0.1797) loss: 0.7761 (0.7874) time: 0.1482 data: 0.0618 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:49 lr: 0.000009 grad: 0.1784 (0.1797) loss: 0.7776 (0.7873) time: 0.1818 data: 0.1031 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:35 lr: 0.000009 grad: 0.1729 (0.1797) loss: 0.7760 (0.7871) time: 0.1507 data: 0.0703 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:21 lr: 0.000009 grad: 0.1775 (0.1797) loss: 0.7770 (0.7870) time: 0.1789 data: 0.1052 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:07 lr: 0.000009 grad: 0.1755 (0.1797) loss: 0.7835 (0.7870) time: 0.1542 data: 0.0761 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1770 (0.1798) loss: 0.7826 (0.7869) time: 0.1443 data: 0.0635 max mem: 9377 +Train: [83] Total time: 0:14:47 (0.1421 s / it) +Averaged stats: lr: 0.000009 grad: 0.1770 (0.1798) loss: 0.7826 (0.7869) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:03:36 loss: 0.8386 (0.8386) time: 3.4908 data: 3.4227 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8398 (0.8422) time: 0.1196 data: 0.0929 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:13 (0.2114 s / it) +Averaged stats (hcp-train-subset): loss: 0.8398 (0.8422) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:53 loss: 0.8391 (0.8391) time: 4.7357 data: 4.6702 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8386 (0.8408) time: 0.1329 data: 0.1076 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-val): loss: 0.8386 (0.8408) +Eval (nsd-val): [83] [ 0/62] eta: 0:03:43 loss: 0.8237 (0.8237) time: 3.6037 data: 3.5333 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8308 (0.8322) time: 0.1162 data: 0.0911 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (nsd-val): loss: 0.8308 (0.8322) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 10:38:55 lr: 0.000009 grad: 0.2022 (0.2022) loss: 0.8005 (0.8005) time: 6.1337 data: 6.0310 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:21:06 lr: 0.000009 grad: 0.1704 (0.2166) loss: 0.8063 (0.8110) time: 0.1607 data: 0.0558 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:18:37 lr: 0.000009 grad: 0.1788 (0.2046) loss: 0.7925 (0.8041) time: 0.1788 data: 0.0747 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:16:57 lr: 0.000008 grad: 0.1693 (0.2021) loss: 0.7947 (0.7997) time: 0.1197 data: 0.0331 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:16:01 lr: 0.000008 grad: 0.1796 (0.1985) loss: 0.7837 (0.7973) time: 0.1519 data: 0.0603 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:15:24 lr: 0.000008 grad: 0.1783 (0.1957) loss: 0.7930 (0.7954) time: 0.1575 data: 0.0732 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:14:52 lr: 0.000008 grad: 0.1820 (0.1938) loss: 0.7873 (0.7939) time: 0.1501 data: 0.0616 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:14:24 lr: 0.000008 grad: 0.1851 (0.1930) loss: 0.7815 (0.7923) time: 0.1662 data: 0.0719 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:14:15 lr: 0.000008 grad: 0.1756 (0.1918) loss: 0.7874 (0.7914) time: 0.1403 data: 0.0468 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:14:06 lr: 0.000008 grad: 0.1779 (0.1908) loss: 0.7847 (0.7909) time: 0.1760 data: 0.0923 max mem: 9377 +Train: [84] [1000/6250] eta: 0:13:58 lr: 0.000008 grad: 0.1798 (0.1899) loss: 0.7774 (0.7903) time: 0.1744 data: 0.0942 max mem: 9377 +Train: [84] [1100/6250] eta: 0:13:46 lr: 0.000008 grad: 0.1728 (0.1889) loss: 0.7713 (0.7897) time: 0.1578 data: 0.0789 max mem: 9377 +Train: [84] [1200/6250] eta: 0:13:33 lr: 0.000008 grad: 0.1834 (0.1886) loss: 0.7842 (0.7890) time: 0.1533 data: 0.0715 max mem: 9377 +Train: [84] [1300/6250] eta: 0:13:20 lr: 0.000008 grad: 0.1767 (0.1882) loss: 0.7840 (0.7885) time: 0.1600 data: 0.0814 max mem: 9377 +Train: [84] [1400/6250] eta: 0:13:03 lr: 0.000008 grad: 0.1732 (0.1876) loss: 0.7845 (0.7882) time: 0.1548 data: 0.0748 max mem: 9377 +Train: [84] [1500/6250] eta: 0:12:44 lr: 0.000008 grad: 0.1706 (0.1868) loss: 0.7764 (0.7882) time: 0.1414 data: 0.0609 max mem: 9377 +Train: [84] [1600/6250] eta: 0:12:28 lr: 0.000008 grad: 0.1753 (0.1862) loss: 0.7856 (0.7881) time: 0.1471 data: 0.0727 max mem: 9377 +Train: [84] [1700/6250] eta: 0:12:09 lr: 0.000008 grad: 0.1752 (0.1857) loss: 0.7822 (0.7880) time: 0.1518 data: 0.0654 max mem: 9377 +Train: [84] [1800/6250] eta: 0:11:49 lr: 0.000008 grad: 0.1732 (0.1852) loss: 0.7820 (0.7878) time: 0.1280 data: 0.0431 max mem: 9377 +Train: [84] [1900/6250] eta: 0:11:29 lr: 0.000008 grad: 0.1849 (0.1851) loss: 0.7715 (0.7876) time: 0.1365 data: 0.0551 max mem: 9377 +Train: [84] [2000/6250] eta: 0:11:10 lr: 0.000008 grad: 0.1774 (0.1850) loss: 0.7827 (0.7874) time: 0.1505 data: 0.0669 max mem: 9377 +Train: [84] [2100/6250] eta: 0:10:49 lr: 0.000008 grad: 0.1798 (0.1849) loss: 0.7909 (0.7873) time: 0.1324 data: 0.0473 max mem: 9377 +Train: [84] [2200/6250] eta: 0:10:30 lr: 0.000008 grad: 0.1805 (0.1848) loss: 0.7829 (0.7871) time: 0.1197 data: 0.0349 max mem: 9377 +Train: [84] [2300/6250] eta: 0:10:13 lr: 0.000008 grad: 0.1810 (0.1848) loss: 0.7867 (0.7869) time: 0.1593 data: 0.0754 max mem: 9377 +Train: [84] [2400/6250] eta: 0:09:55 lr: 0.000008 grad: 0.1840 (0.1846) loss: 0.7786 (0.7869) time: 0.1421 data: 0.0575 max mem: 9377 +Train: [84] [2500/6250] eta: 0:09:37 lr: 0.000008 grad: 0.1809 (0.1846) loss: 0.7900 (0.7869) time: 0.1184 data: 0.0350 max mem: 9377 +Train: [84] [2600/6250] eta: 0:09:21 lr: 0.000008 grad: 0.1881 (0.1845) loss: 0.7856 (0.7869) time: 0.1388 data: 0.0646 max mem: 9377 +Train: [84] [2700/6250] eta: 0:09:02 lr: 0.000008 grad: 0.1792 (0.1843) loss: 0.7848 (0.7869) time: 0.1476 data: 0.0642 max mem: 9377 +Train: [84] [2800/6250] eta: 0:08:45 lr: 0.000008 grad: 0.1722 (0.1841) loss: 0.7890 (0.7871) time: 0.1363 data: 0.0536 max mem: 9377 +Train: [84] [2900/6250] eta: 0:08:28 lr: 0.000008 grad: 0.1716 (0.1839) loss: 0.7887 (0.7871) time: 0.1541 data: 0.0726 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:12 lr: 0.000008 grad: 0.1762 (0.1838) loss: 0.7876 (0.7871) time: 0.1307 data: 0.0511 max mem: 9377 +Train: [84] [3100/6250] eta: 0:07:57 lr: 0.000008 grad: 0.1821 (0.1838) loss: 0.7793 (0.7870) time: 0.0943 data: 0.0002 max mem: 9377 +Train: [84] [3200/6250] eta: 0:07:40 lr: 0.000008 grad: 0.1766 (0.1837) loss: 0.7868 (0.7869) time: 0.1385 data: 0.0579 max mem: 9377 +Train: [84] [3300/6250] eta: 0:07:26 lr: 0.000008 grad: 0.1728 (0.1836) loss: 0.7793 (0.7868) time: 0.1583 data: 0.0794 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:10 lr: 0.000008 grad: 0.1735 (0.1836) loss: 0.7832 (0.7867) time: 0.1544 data: 0.0796 max mem: 9377 +Train: [84] [3500/6250] eta: 0:06:55 lr: 0.000008 grad: 0.1709 (0.1834) loss: 0.7880 (0.7868) time: 0.1657 data: 0.0818 max mem: 9377 +Train: [84] [3600/6250] eta: 0:06:40 lr: 0.000008 grad: 0.1702 (0.1831) loss: 0.7906 (0.7868) time: 0.1677 data: 0.0866 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:24 lr: 0.000008 grad: 0.1809 (0.1830) loss: 0.7770 (0.7868) time: 0.1120 data: 0.0301 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:09 lr: 0.000008 grad: 0.1812 (0.1828) loss: 0.7831 (0.7868) time: 0.1389 data: 0.0519 max mem: 9377 +Train: [84] [3900/6250] eta: 0:05:53 lr: 0.000008 grad: 0.1844 (0.1827) loss: 0.7939 (0.7869) time: 0.1310 data: 0.0528 max mem: 9377 +Train: [84] [4000/6250] eta: 0:05:38 lr: 0.000008 grad: 0.1731 (0.1826) loss: 0.7904 (0.7869) time: 0.1339 data: 0.0471 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:23 lr: 0.000008 grad: 0.1808 (0.1826) loss: 0.7940 (0.7869) time: 0.1898 data: 0.1121 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:08 lr: 0.000008 grad: 0.1709 (0.1825) loss: 0.7907 (0.7869) time: 0.1557 data: 0.0762 max mem: 9377 +Train: [84] [4300/6250] eta: 0:04:54 lr: 0.000008 grad: 0.1747 (0.1824) loss: 0.7823 (0.7870) time: 0.1308 data: 0.0481 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:39 lr: 0.000008 grad: 0.1744 (0.1824) loss: 0.7835 (0.7870) time: 0.1293 data: 0.0427 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:24 lr: 0.000008 grad: 0.1762 (0.1822) loss: 0.7807 (0.7870) time: 0.1478 data: 0.0652 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:09 lr: 0.000008 grad: 0.1734 (0.1821) loss: 0.7897 (0.7870) time: 0.1530 data: 0.0705 max mem: 9377 +Train: [84] [4700/6250] eta: 0:03:55 lr: 0.000008 grad: 0.1692 (0.1821) loss: 0.7898 (0.7870) time: 0.1646 data: 0.0838 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:40 lr: 0.000008 grad: 0.1713 (0.1822) loss: 0.7869 (0.7869) time: 0.1526 data: 0.0670 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:25 lr: 0.000008 grad: 0.1692 (0.1821) loss: 0.7905 (0.7869) time: 0.1638 data: 0.0831 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:09 lr: 0.000008 grad: 0.1745 (0.1820) loss: 0.7879 (0.7869) time: 0.1775 data: 0.0993 max mem: 9377 +Train: [84] [5100/6250] eta: 0:02:54 lr: 0.000008 grad: 0.1737 (0.1820) loss: 0.7935 (0.7869) time: 0.1320 data: 0.0539 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:38 lr: 0.000008 grad: 0.1730 (0.1820) loss: 0.7846 (0.7868) time: 0.1265 data: 0.0469 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:23 lr: 0.000008 grad: 0.1734 (0.1820) loss: 0.7882 (0.7868) time: 0.1384 data: 0.0535 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:08 lr: 0.000008 grad: 0.1795 (0.1819) loss: 0.7856 (0.7868) time: 0.1492 data: 0.0668 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:52 lr: 0.000008 grad: 0.1703 (0.1818) loss: 0.7912 (0.7868) time: 0.1472 data: 0.0600 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:37 lr: 0.000008 grad: 0.1746 (0.1818) loss: 0.7862 (0.7868) time: 0.1212 data: 0.0383 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:22 lr: 0.000008 grad: 0.1698 (0.1818) loss: 0.7894 (0.7868) time: 0.1373 data: 0.0491 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:07 lr: 0.000008 grad: 0.1737 (0.1818) loss: 0.7918 (0.7868) time: 0.1399 data: 0.0619 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:52 lr: 0.000008 grad: 0.1774 (0.1819) loss: 0.7788 (0.7868) time: 0.1323 data: 0.0478 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:37 lr: 0.000008 grad: 0.1781 (0.1819) loss: 0.7829 (0.7867) time: 0.1304 data: 0.0473 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:22 lr: 0.000008 grad: 0.1739 (0.1819) loss: 0.7881 (0.7867) time: 0.1302 data: 0.0502 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.1762 (0.1819) loss: 0.7901 (0.7867) time: 0.1266 data: 0.0322 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.1765 (0.1819) loss: 0.7941 (0.7867) time: 0.1456 data: 0.0625 max mem: 9377 +Train: [84] Total time: 0:15:34 (0.1496 s / it) +Averaged stats: lr: 0.000008 grad: 0.1765 (0.1819) loss: 0.7941 (0.7867) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:06:10 loss: 0.8376 (0.8376) time: 5.9815 data: 5.9522 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8410 (0.8418) time: 0.1282 data: 0.0994 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (hcp-train-subset): loss: 0.8410 (0.8418) +Making plots (hcp-train-subset): example=49 +Eval (hcp-val): [84] [ 0/62] eta: 0:04:15 loss: 0.8400 (0.8400) time: 4.1242 data: 4.0378 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8413 (0.8411) time: 0.1515 data: 0.1248 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (hcp-val): loss: 0.8413 (0.8411) +Making plots (hcp-val): example=57 +Eval (nsd-val): [84] [ 0/62] eta: 0:04:55 loss: 0.8220 (0.8220) time: 4.7591 data: 4.6816 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8284 (0.8308) time: 0.1361 data: 0.1106 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (nsd-val): loss: 0.8284 (0.8308) +Making plots (nsd-val): example=40 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 10:28:01 lr: 0.000008 grad: 0.1175 (0.1175) loss: 0.8452 (0.8452) time: 6.0291 data: 5.9245 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:20:20 lr: 0.000008 grad: 0.2204 (0.2576) loss: 0.7822 (0.7845) time: 0.1425 data: 0.0495 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:17:06 lr: 0.000008 grad: 0.1848 (0.2326) loss: 0.7862 (0.7868) time: 0.1396 data: 0.0336 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:15:51 lr: 0.000007 grad: 0.1884 (0.2193) loss: 0.7867 (0.7877) time: 0.1403 data: 0.0388 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:14:58 lr: 0.000007 grad: 0.1827 (0.2119) loss: 0.7769 (0.7877) time: 0.1316 data: 0.0412 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:14:20 lr: 0.000007 grad: 0.1810 (0.2069) loss: 0.7779 (0.7872) time: 0.1342 data: 0.0471 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:13:48 lr: 0.000007 grad: 0.1848 (0.2042) loss: 0.7834 (0.7863) time: 0.1316 data: 0.0350 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:13:21 lr: 0.000007 grad: 0.1794 (0.2028) loss: 0.7793 (0.7854) time: 0.1375 data: 0.0427 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:13:04 lr: 0.000007 grad: 0.1791 (0.2008) loss: 0.7790 (0.7852) time: 0.1581 data: 0.0674 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:12:50 lr: 0.000007 grad: 0.1833 (0.1989) loss: 0.7851 (0.7852) time: 0.1397 data: 0.0517 max mem: 9377 +Train: [85] [1000/6250] eta: 0:12:32 lr: 0.000007 grad: 0.1688 (0.1974) loss: 0.7810 (0.7848) time: 0.1299 data: 0.0526 max mem: 9377 +Train: [85] [1100/6250] eta: 0:12:17 lr: 0.000007 grad: 0.1798 (0.1960) loss: 0.7764 (0.7843) time: 0.1468 data: 0.0698 max mem: 9377 +Train: [85] [1200/6250] eta: 0:12:03 lr: 0.000007 grad: 0.1764 (0.1949) loss: 0.7714 (0.7839) time: 0.1279 data: 0.0502 max mem: 9377 +Train: [85] [1300/6250] eta: 0:11:47 lr: 0.000007 grad: 0.1783 (0.1939) loss: 0.7806 (0.7835) time: 0.1309 data: 0.0456 max mem: 9377 +Train: [85] [1400/6250] eta: 0:11:31 lr: 0.000007 grad: 0.1834 (0.1931) loss: 0.7733 (0.7833) time: 0.1441 data: 0.0604 max mem: 9377 +Train: [85] [1500/6250] eta: 0:11:17 lr: 0.000007 grad: 0.1861 (0.1926) loss: 0.7851 (0.7830) time: 0.1567 data: 0.0706 max mem: 9377 +Train: [85] [1600/6250] eta: 0:11:01 lr: 0.000007 grad: 0.1803 (0.1918) loss: 0.7936 (0.7831) time: 0.1405 data: 0.0617 max mem: 9377 +Train: [85] [1700/6250] eta: 0:10:46 lr: 0.000007 grad: 0.1773 (0.1913) loss: 0.7808 (0.7830) time: 0.1153 data: 0.0324 max mem: 9377 +Train: [85] [1800/6250] eta: 0:10:32 lr: 0.000007 grad: 0.1788 (0.1910) loss: 0.7796 (0.7830) time: 0.1388 data: 0.0521 max mem: 9377 +Train: [85] [1900/6250] eta: 0:10:17 lr: 0.000007 grad: 0.1754 (0.1905) loss: 0.7831 (0.7829) time: 0.1327 data: 0.0510 max mem: 9377 +Train: [85] [2000/6250] eta: 0:10:02 lr: 0.000007 grad: 0.1775 (0.1902) loss: 0.7867 (0.7828) time: 0.1494 data: 0.0749 max mem: 9377 +Train: [85] [2100/6250] eta: 0:09:48 lr: 0.000007 grad: 0.1736 (0.1898) loss: 0.7868 (0.7829) time: 0.1486 data: 0.0671 max mem: 9377 +Train: [85] [2200/6250] eta: 0:09:33 lr: 0.000007 grad: 0.1856 (0.1895) loss: 0.7830 (0.7828) time: 0.1325 data: 0.0545 max mem: 9377 +Train: [85] [2300/6250] eta: 0:09:19 lr: 0.000007 grad: 0.1810 (0.1892) loss: 0.7826 (0.7828) time: 0.1412 data: 0.0614 max mem: 9377 +Train: [85] [2400/6250] eta: 0:09:05 lr: 0.000007 grad: 0.1786 (0.1888) loss: 0.7836 (0.7828) time: 0.1625 data: 0.0777 max mem: 9377 +Train: [85] [2500/6250] eta: 0:08:50 lr: 0.000007 grad: 0.1755 (0.1885) loss: 0.7843 (0.7827) time: 0.1617 data: 0.0836 max mem: 9377 +Train: [85] [2600/6250] eta: 0:08:35 lr: 0.000007 grad: 0.1692 (0.1882) loss: 0.7873 (0.7826) time: 0.1381 data: 0.0624 max mem: 9377 +Train: [85] [2700/6250] eta: 0:08:20 lr: 0.000007 grad: 0.1786 (0.1878) loss: 0.7804 (0.7826) time: 0.1112 data: 0.0220 max mem: 9377 +Train: [85] [2800/6250] eta: 0:08:05 lr: 0.000007 grad: 0.1788 (0.1875) loss: 0.7869 (0.7826) time: 0.1451 data: 0.0656 max mem: 9377 +Train: [85] [2900/6250] eta: 0:07:51 lr: 0.000007 grad: 0.1825 (0.1874) loss: 0.7780 (0.7825) time: 0.1345 data: 0.0521 max mem: 9377 +Train: [85] [3000/6250] eta: 0:07:37 lr: 0.000007 grad: 0.1633 (0.1871) loss: 0.7986 (0.7826) time: 0.1498 data: 0.0702 max mem: 9377 +Train: [85] [3100/6250] eta: 0:07:23 lr: 0.000007 grad: 0.1825 (0.1868) loss: 0.7862 (0.7827) time: 0.1486 data: 0.0641 max mem: 9377 +Train: [85] [3200/6250] eta: 0:07:09 lr: 0.000007 grad: 0.1700 (0.1866) loss: 0.7887 (0.7827) time: 0.1442 data: 0.0576 max mem: 9377 +Train: [85] [3300/6250] eta: 0:06:55 lr: 0.000007 grad: 0.1755 (0.1864) loss: 0.7809 (0.7827) time: 0.1325 data: 0.0437 max mem: 9377 +Train: [85] [3400/6250] eta: 0:06:40 lr: 0.000007 grad: 0.1721 (0.1862) loss: 0.7860 (0.7828) time: 0.1372 data: 0.0524 max mem: 9377 +Train: [85] [3500/6250] eta: 0:06:26 lr: 0.000007 grad: 0.1767 (0.1859) loss: 0.7828 (0.7829) time: 0.1213 data: 0.0416 max mem: 9377 +Train: [85] [3600/6250] eta: 0:06:12 lr: 0.000007 grad: 0.1721 (0.1858) loss: 0.7885 (0.7830) time: 0.1520 data: 0.0711 max mem: 9377 +Train: [85] [3700/6250] eta: 0:05:58 lr: 0.000007 grad: 0.1764 (0.1855) loss: 0.7978 (0.7832) time: 0.1530 data: 0.0726 max mem: 9377 +Train: [85] [3800/6250] eta: 0:05:44 lr: 0.000007 grad: 0.1793 (0.1853) loss: 0.7914 (0.7833) time: 0.1401 data: 0.0575 max mem: 9377 +Train: [85] [3900/6250] eta: 0:05:30 lr: 0.000007 grad: 0.1762 (0.1851) loss: 0.7902 (0.7834) time: 0.1600 data: 0.0801 max mem: 9377 +Train: [85] [4000/6250] eta: 0:05:16 lr: 0.000007 grad: 0.1794 (0.1849) loss: 0.7904 (0.7835) time: 0.1464 data: 0.0620 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:02 lr: 0.000007 grad: 0.1697 (0.1848) loss: 0.7838 (0.7835) time: 0.1401 data: 0.0589 max mem: 9377 +Train: [85] [4200/6250] eta: 0:04:48 lr: 0.000007 grad: 0.1730 (0.1846) loss: 0.7889 (0.7836) time: 0.1518 data: 0.0665 max mem: 9377 +Train: [85] [4300/6250] eta: 0:04:35 lr: 0.000007 grad: 0.1696 (0.1844) loss: 0.7776 (0.7836) time: 0.1330 data: 0.0505 max mem: 9377 +Train: [85] [4400/6250] eta: 0:04:21 lr: 0.000007 grad: 0.1761 (0.1843) loss: 0.7806 (0.7837) time: 0.1468 data: 0.0724 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:07 lr: 0.000007 grad: 0.1705 (0.1842) loss: 0.7855 (0.7836) time: 0.1432 data: 0.0524 max mem: 9377 +Train: [85] [4600/6250] eta: 0:03:53 lr: 0.000007 grad: 0.1738 (0.1842) loss: 0.7809 (0.7836) time: 0.1524 data: 0.0706 max mem: 9377 +Train: [85] [4700/6250] eta: 0:03:39 lr: 0.000007 grad: 0.1708 (0.1842) loss: 0.7804 (0.7836) time: 0.1394 data: 0.0633 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:25 lr: 0.000007 grad: 0.1717 (0.1840) loss: 0.7814 (0.7836) time: 0.1567 data: 0.0739 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:11 lr: 0.000007 grad: 0.1774 (0.1839) loss: 0.7708 (0.7835) time: 0.1539 data: 0.0633 max mem: 9377 +Train: [85] [5000/6250] eta: 0:02:57 lr: 0.000007 grad: 0.1770 (0.1839) loss: 0.7752 (0.7836) time: 0.2015 data: 0.1212 max mem: 9377 +Train: [85] [5100/6250] eta: 0:02:43 lr: 0.000007 grad: 0.1752 (0.1838) loss: 0.7835 (0.7836) time: 0.1607 data: 0.0788 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:29 lr: 0.000007 grad: 0.1800 (0.1837) loss: 0.7856 (0.7836) time: 0.1895 data: 0.1105 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:15 lr: 0.000007 grad: 0.1767 (0.1837) loss: 0.7797 (0.7836) time: 0.1290 data: 0.0450 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:01 lr: 0.000007 grad: 0.1748 (0.1837) loss: 0.7900 (0.7836) time: 0.1622 data: 0.0657 max mem: 9377 +Train: [85] [5500/6250] eta: 0:01:47 lr: 0.000007 grad: 0.1787 (0.1836) loss: 0.7817 (0.7837) time: 0.1439 data: 0.0496 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:33 lr: 0.000007 grad: 0.1739 (0.1835) loss: 0.7904 (0.7837) time: 0.1411 data: 0.0536 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:19 lr: 0.000007 grad: 0.1699 (0.1835) loss: 0.7872 (0.7838) time: 0.1663 data: 0.0784 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:04 lr: 0.000007 grad: 0.1829 (0.1836) loss: 0.7859 (0.7839) time: 0.1350 data: 0.0453 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:50 lr: 0.000007 grad: 0.1672 (0.1835) loss: 0.7900 (0.7840) time: 0.1044 data: 0.0154 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:35 lr: 0.000007 grad: 0.1705 (0.1834) loss: 0.7876 (0.7840) time: 0.1633 data: 0.0884 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:21 lr: 0.000007 grad: 0.1714 (0.1833) loss: 0.7950 (0.7841) time: 0.1664 data: 0.0800 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.1660 (0.1832) loss: 0.8000 (0.7842) time: 0.1583 data: 0.0767 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1726 (0.1832) loss: 0.7881 (0.7842) time: 0.1611 data: 0.0794 max mem: 9377 +Train: [85] Total time: 0:15:08 (0.1454 s / it) +Averaged stats: lr: 0.000007 grad: 0.1726 (0.1832) loss: 0.7881 (0.7842) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:03:46 loss: 0.8379 (0.8379) time: 3.6466 data: 3.5305 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8409 (0.8422) time: 0.1269 data: 0.1018 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (hcp-train-subset): loss: 0.8409 (0.8422) +Eval (hcp-val): [85] [ 0/62] eta: 0:04:49 loss: 0.8416 (0.8416) time: 4.6730 data: 4.5961 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8390 (0.8405) time: 0.1230 data: 0.0979 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:13 (0.2197 s / it) +Averaged stats (hcp-val): loss: 0.8390 (0.8405) +Eval (nsd-val): [85] [ 0/62] eta: 0:03:48 loss: 0.8209 (0.8209) time: 3.6924 data: 3.6144 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8310 (0.8326) time: 0.1106 data: 0.0856 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:13 (0.2206 s / it) +Averaged stats (nsd-val): loss: 0.8310 (0.8326) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 10:46:12 lr: 0.000007 grad: 0.1066 (0.1066) loss: 0.8712 (0.8712) time: 6.2036 data: 6.1022 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:19:29 lr: 0.000007 grad: 0.1826 (0.2553) loss: 0.7843 (0.7868) time: 0.1374 data: 0.0375 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:17:08 lr: 0.000007 grad: 0.1779 (0.2197) loss: 0.8009 (0.7954) time: 0.1469 data: 0.0439 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:16:17 lr: 0.000007 grad: 0.1803 (0.2070) loss: 0.7910 (0.7954) time: 0.1378 data: 0.0362 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:15:36 lr: 0.000007 grad: 0.1877 (0.2017) loss: 0.7883 (0.7949) time: 0.1521 data: 0.0593 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:14:52 lr: 0.000007 grad: 0.1946 (0.1988) loss: 0.7892 (0.7942) time: 0.1264 data: 0.0339 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:14:33 lr: 0.000006 grad: 0.1772 (0.1961) loss: 0.8027 (0.7946) time: 0.1497 data: 0.0492 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:14:28 lr: 0.000006 grad: 0.1952 (0.1950) loss: 0.7885 (0.7945) time: 0.1822 data: 0.0988 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:14:04 lr: 0.000006 grad: 0.1762 (0.1940) loss: 0.7813 (0.7939) time: 0.1552 data: 0.0631 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:13:52 lr: 0.000006 grad: 0.1821 (0.1932) loss: 0.7943 (0.7936) time: 0.1655 data: 0.0827 max mem: 9377 +Train: [86] [1000/6250] eta: 0:13:46 lr: 0.000006 grad: 0.1663 (0.1925) loss: 0.7972 (0.7932) time: 0.1930 data: 0.1047 max mem: 9377 +Train: [86] [1100/6250] eta: 0:13:28 lr: 0.000006 grad: 0.1710 (0.1917) loss: 0.7960 (0.7928) time: 0.1459 data: 0.0597 max mem: 9377 +Train: [86] [1200/6250] eta: 0:13:10 lr: 0.000006 grad: 0.1664 (0.1907) loss: 0.7994 (0.7928) time: 0.1377 data: 0.0489 max mem: 9377 +Train: [86] [1300/6250] eta: 0:12:51 lr: 0.000006 grad: 0.1884 (0.1903) loss: 0.7886 (0.7925) time: 0.1444 data: 0.0552 max mem: 9377 +Train: [86] [1400/6250] eta: 0:12:32 lr: 0.000006 grad: 0.1835 (0.1895) loss: 0.7958 (0.7923) time: 0.1616 data: 0.0882 max mem: 9377 +Train: [86] [1500/6250] eta: 0:12:12 lr: 0.000006 grad: 0.1749 (0.1899) loss: 0.7892 (0.7920) time: 0.1404 data: 0.0546 max mem: 9377 +Train: [86] [1600/6250] eta: 0:11:53 lr: 0.000006 grad: 0.1893 (0.1897) loss: 0.7866 (0.7917) time: 0.1709 data: 0.0874 max mem: 9377 +Train: [86] [1700/6250] eta: 0:11:33 lr: 0.000006 grad: 0.1691 (0.1893) loss: 0.7985 (0.7916) time: 0.1308 data: 0.0496 max mem: 9377 +Train: [86] [1800/6250] eta: 0:11:17 lr: 0.000006 grad: 0.1852 (0.1891) loss: 0.7873 (0.7914) time: 0.1492 data: 0.0686 max mem: 9377 +Train: [86] [1900/6250] eta: 0:11:01 lr: 0.000006 grad: 0.1771 (0.1886) loss: 0.7953 (0.7914) time: 0.1533 data: 0.0742 max mem: 9377 +Train: [86] [2000/6250] eta: 0:10:42 lr: 0.000006 grad: 0.1711 (0.1881) loss: 0.7951 (0.7913) time: 0.1221 data: 0.0412 max mem: 9377 +Train: [86] [2100/6250] eta: 0:10:26 lr: 0.000006 grad: 0.1830 (0.1879) loss: 0.7824 (0.7912) time: 0.1645 data: 0.0828 max mem: 9377 +Train: [86] [2200/6250] eta: 0:10:08 lr: 0.000006 grad: 0.1862 (0.1876) loss: 0.7767 (0.7910) time: 0.1199 data: 0.0311 max mem: 9377 +Train: [86] [2300/6250] eta: 0:09:50 lr: 0.000006 grad: 0.1835 (0.1874) loss: 0.7812 (0.7908) time: 0.1419 data: 0.0643 max mem: 9377 +Train: [86] [2400/6250] eta: 0:09:33 lr: 0.000006 grad: 0.1790 (0.1873) loss: 0.7808 (0.7906) time: 0.1211 data: 0.0372 max mem: 9377 +Train: [86] [2500/6250] eta: 0:09:17 lr: 0.000006 grad: 0.1799 (0.1872) loss: 0.7817 (0.7904) time: 0.1328 data: 0.0503 max mem: 9377 +Train: [86] [2600/6250] eta: 0:09:01 lr: 0.000006 grad: 0.1829 (0.1874) loss: 0.7767 (0.7900) time: 0.1520 data: 0.0728 max mem: 9377 +Train: [86] [2700/6250] eta: 0:08:45 lr: 0.000006 grad: 0.1787 (0.1873) loss: 0.7887 (0.7897) time: 0.1546 data: 0.0699 max mem: 9377 +Train: [86] [2800/6250] eta: 0:08:29 lr: 0.000006 grad: 0.1759 (0.1872) loss: 0.7838 (0.7896) time: 0.1515 data: 0.0723 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:15 lr: 0.000006 grad: 0.1823 (0.1871) loss: 0.7797 (0.7894) time: 0.1443 data: 0.0621 max mem: 9377 +Train: [86] [3000/6250] eta: 0:07:59 lr: 0.000006 grad: 0.1826 (0.1872) loss: 0.7928 (0.7892) time: 0.1329 data: 0.0555 max mem: 9377 +Train: [86] [3100/6250] eta: 0:07:44 lr: 0.000006 grad: 0.1857 (0.1871) loss: 0.7845 (0.7891) time: 0.1403 data: 0.0627 max mem: 9377 +Train: [86] [3200/6250] eta: 0:07:29 lr: 0.000006 grad: 0.1797 (0.1872) loss: 0.7734 (0.7889) time: 0.1338 data: 0.0582 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:14 lr: 0.000006 grad: 0.1875 (0.1873) loss: 0.7848 (0.7887) time: 0.1420 data: 0.0592 max mem: 9377 +Train: [86] [3400/6250] eta: 0:06:59 lr: 0.000006 grad: 0.1689 (0.1873) loss: 0.7832 (0.7885) time: 0.1395 data: 0.0601 max mem: 9377 +Train: [86] [3500/6250] eta: 0:06:44 lr: 0.000006 grad: 0.1778 (0.1874) loss: 0.7852 (0.7884) time: 0.1354 data: 0.0505 max mem: 9377 +Train: [86] [3600/6250] eta: 0:06:29 lr: 0.000006 grad: 0.1691 (0.1873) loss: 0.7827 (0.7884) time: 0.1379 data: 0.0536 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:14 lr: 0.000006 grad: 0.1759 (0.1872) loss: 0.7879 (0.7885) time: 0.1356 data: 0.0526 max mem: 9377 +Train: [86] [3800/6250] eta: 0:05:58 lr: 0.000006 grad: 0.1870 (0.1871) loss: 0.7898 (0.7885) time: 0.1527 data: 0.0743 max mem: 9377 +Train: [86] [3900/6250] eta: 0:05:43 lr: 0.000006 grad: 0.1843 (0.1870) loss: 0.7891 (0.7885) time: 0.1370 data: 0.0516 max mem: 9377 +Train: [86] [4000/6250] eta: 0:05:28 lr: 0.000006 grad: 0.1797 (0.1869) loss: 0.7861 (0.7884) time: 0.1395 data: 0.0607 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:15 lr: 0.000006 grad: 0.1736 (0.1869) loss: 0.7894 (0.7883) time: 0.1558 data: 0.0756 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:01 lr: 0.000006 grad: 0.1967 (0.1869) loss: 0.7779 (0.7882) time: 0.1421 data: 0.0636 max mem: 9377 +Train: [86] [4300/6250] eta: 0:04:46 lr: 0.000006 grad: 0.1822 (0.1869) loss: 0.7821 (0.7880) time: 0.1544 data: 0.0737 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:32 lr: 0.000006 grad: 0.1911 (0.1869) loss: 0.7794 (0.7879) time: 0.1214 data: 0.0436 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:17 lr: 0.000006 grad: 0.1935 (0.1869) loss: 0.7837 (0.7879) time: 0.1471 data: 0.0620 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:02 lr: 0.000006 grad: 0.1868 (0.1869) loss: 0.7746 (0.7878) time: 0.1384 data: 0.0569 max mem: 9377 +Train: [86] [4700/6250] eta: 0:03:47 lr: 0.000006 grad: 0.1800 (0.1869) loss: 0.7912 (0.7877) time: 0.1348 data: 0.0607 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:32 lr: 0.000006 grad: 0.1713 (0.1871) loss: 0.7831 (0.7876) time: 0.1493 data: 0.0689 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:17 lr: 0.000006 grad: 0.1781 (0.1872) loss: 0.7856 (0.7875) time: 0.1401 data: 0.0574 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:02 lr: 0.000006 grad: 0.1873 (0.1872) loss: 0.7881 (0.7874) time: 0.1438 data: 0.0508 max mem: 9377 +Train: [86] [5100/6250] eta: 0:02:47 lr: 0.000006 grad: 0.1836 (0.1872) loss: 0.7738 (0.7873) time: 0.1379 data: 0.0612 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:33 lr: 0.000006 grad: 0.1767 (0.1872) loss: 0.7832 (0.7873) time: 0.1286 data: 0.0510 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:18 lr: 0.000006 grad: 0.1870 (0.1873) loss: 0.7843 (0.7872) time: 0.1528 data: 0.0733 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:03 lr: 0.000006 grad: 0.1840 (0.1873) loss: 0.7805 (0.7871) time: 0.1237 data: 0.0438 max mem: 9377 +Train: [86] [5500/6250] eta: 0:01:48 lr: 0.000006 grad: 0.1841 (0.1873) loss: 0.7866 (0.7871) time: 0.1391 data: 0.0599 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:34 lr: 0.000006 grad: 0.1813 (0.1872) loss: 0.7894 (0.7871) time: 0.1309 data: 0.0469 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:19 lr: 0.000006 grad: 0.1766 (0.1871) loss: 0.7861 (0.7872) time: 0.1210 data: 0.0354 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:05 lr: 0.000006 grad: 0.1701 (0.1870) loss: 0.7897 (0.7872) time: 0.1351 data: 0.0519 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:50 lr: 0.000006 grad: 0.1776 (0.1869) loss: 0.7916 (0.7873) time: 0.1779 data: 0.0963 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:36 lr: 0.000006 grad: 0.1917 (0.1869) loss: 0.7860 (0.7873) time: 0.1251 data: 0.0381 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:21 lr: 0.000006 grad: 0.1725 (0.1868) loss: 0.7908 (0.7873) time: 0.1097 data: 0.0294 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.1768 (0.1867) loss: 0.7883 (0.7873) time: 0.1526 data: 0.0749 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1827 (0.1867) loss: 0.7861 (0.7873) time: 0.1441 data: 0.0651 max mem: 9377 +Train: [86] Total time: 0:15:06 (0.1451 s / it) +Averaged stats: lr: 0.000006 grad: 0.1827 (0.1867) loss: 0.7861 (0.7873) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:04:27 loss: 0.8370 (0.8370) time: 4.3107 data: 4.2328 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8391 (0.8420) time: 0.1160 data: 0.0894 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-train-subset): loss: 0.8391 (0.8420) +Eval (hcp-val): [86] [ 0/62] eta: 0:04:17 loss: 0.8389 (0.8389) time: 4.1522 data: 4.0762 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8385 (0.8405) time: 0.1167 data: 0.0917 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (hcp-val): loss: 0.8385 (0.8405) +Eval (nsd-val): [86] [ 0/62] eta: 0:05:41 loss: 0.8202 (0.8202) time: 5.5001 data: 5.4697 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8267 (0.8292) time: 0.1246 data: 0.0993 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (nsd-val): loss: 0.8267 (0.8292) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 10:13:40 lr: 0.000006 grad: 0.1242 (0.1242) loss: 0.8603 (0.8603) time: 5.8913 data: 5.7464 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:19:09 lr: 0.000006 grad: 0.1953 (0.2137) loss: 0.8062 (0.7930) time: 0.1339 data: 0.0275 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:16:20 lr: 0.000006 grad: 0.2057 (0.2012) loss: 0.7883 (0.7933) time: 0.1272 data: 0.0335 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:15:22 lr: 0.000006 grad: 0.1822 (0.1962) loss: 0.7840 (0.7905) time: 0.1478 data: 0.0555 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:14:39 lr: 0.000006 grad: 0.1730 (0.1927) loss: 0.7910 (0.7906) time: 0.1291 data: 0.0334 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:14:11 lr: 0.000006 grad: 0.1829 (0.1914) loss: 0.7847 (0.7898) time: 0.1546 data: 0.0743 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:13:44 lr: 0.000006 grad: 0.1803 (0.1906) loss: 0.7842 (0.7896) time: 0.1165 data: 0.0194 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:13:29 lr: 0.000006 grad: 0.1716 (0.1896) loss: 0.7876 (0.7888) time: 0.1516 data: 0.0611 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:13:25 lr: 0.000006 grad: 0.1735 (0.1887) loss: 0.7816 (0.7884) time: 0.1566 data: 0.0636 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:13:18 lr: 0.000006 grad: 0.1887 (0.1885) loss: 0.7766 (0.7879) time: 0.1752 data: 0.0876 max mem: 9377 +Train: [87] [1000/6250] eta: 0:13:00 lr: 0.000006 grad: 0.1859 (0.1885) loss: 0.7722 (0.7873) time: 0.1553 data: 0.0724 max mem: 9377 +Train: [87] [1100/6250] eta: 0:12:42 lr: 0.000006 grad: 0.1748 (0.1884) loss: 0.7868 (0.7870) time: 0.1493 data: 0.0731 max mem: 9377 +Train: [87] [1200/6250] eta: 0:12:21 lr: 0.000006 grad: 0.1845 (0.1886) loss: 0.7765 (0.7864) time: 0.1408 data: 0.0577 max mem: 9377 +Train: [87] [1300/6250] eta: 0:12:04 lr: 0.000006 grad: 0.1903 (0.1884) loss: 0.7839 (0.7863) time: 0.1485 data: 0.0660 max mem: 9377 +Train: [87] [1400/6250] eta: 0:11:47 lr: 0.000005 grad: 0.1825 (0.1883) loss: 0.7861 (0.7862) time: 0.1543 data: 0.0708 max mem: 9377 +Train: [87] [1500/6250] eta: 0:11:30 lr: 0.000005 grad: 0.1792 (0.1880) loss: 0.7839 (0.7861) time: 0.1259 data: 0.0402 max mem: 9377 +Train: [87] [1600/6250] eta: 0:11:13 lr: 0.000005 grad: 0.1757 (0.1876) loss: 0.7898 (0.7859) time: 0.1331 data: 0.0514 max mem: 9377 +Train: [87] [1700/6250] eta: 0:10:55 lr: 0.000005 grad: 0.1843 (0.1875) loss: 0.7819 (0.7858) time: 0.1150 data: 0.0242 max mem: 9377 +Train: [87] [1800/6250] eta: 0:10:38 lr: 0.000005 grad: 0.1842 (0.1875) loss: 0.7845 (0.7856) time: 0.1329 data: 0.0517 max mem: 9377 +Train: [87] [1900/6250] eta: 0:10:24 lr: 0.000005 grad: 0.1743 (0.1871) loss: 0.7778 (0.7856) time: 0.1237 data: 0.0407 max mem: 9377 +Train: [87] [2000/6250] eta: 0:10:10 lr: 0.000005 grad: 0.1767 (0.1869) loss: 0.7948 (0.7856) time: 0.1501 data: 0.0773 max mem: 9377 +Train: [87] [2100/6250] eta: 0:09:56 lr: 0.000005 grad: 0.1770 (0.1867) loss: 0.7835 (0.7854) time: 0.1430 data: 0.0629 max mem: 9377 +Train: [87] [2200/6250] eta: 0:09:42 lr: 0.000005 grad: 0.1710 (0.1864) loss: 0.7835 (0.7855) time: 0.1517 data: 0.0707 max mem: 9377 +Train: [87] [2300/6250] eta: 0:09:27 lr: 0.000005 grad: 0.1787 (0.1861) loss: 0.7854 (0.7855) time: 0.1360 data: 0.0540 max mem: 9377 +Train: [87] [2400/6250] eta: 0:09:12 lr: 0.000005 grad: 0.1747 (0.1859) loss: 0.7868 (0.7854) time: 0.1500 data: 0.0721 max mem: 9377 +Train: [87] [2500/6250] eta: 0:08:57 lr: 0.000005 grad: 0.1897 (0.1859) loss: 0.7808 (0.7853) time: 0.1346 data: 0.0561 max mem: 9377 +Train: [87] [2600/6250] eta: 0:08:42 lr: 0.000005 grad: 0.1778 (0.1857) loss: 0.7789 (0.7852) time: 0.1416 data: 0.0604 max mem: 9377 +Train: [87] [2700/6250] eta: 0:08:27 lr: 0.000005 grad: 0.1823 (0.1857) loss: 0.7889 (0.7852) time: 0.1261 data: 0.0354 max mem: 9377 +Train: [87] [2800/6250] eta: 0:08:12 lr: 0.000005 grad: 0.1767 (0.1854) loss: 0.7899 (0.7852) time: 0.1326 data: 0.0466 max mem: 9377 +Train: [87] [2900/6250] eta: 0:07:59 lr: 0.000005 grad: 0.1782 (0.1853) loss: 0.7796 (0.7853) time: 0.1611 data: 0.0820 max mem: 9377 +Train: [87] [3000/6250] eta: 0:07:44 lr: 0.000005 grad: 0.1839 (0.1853) loss: 0.7889 (0.7852) time: 0.1447 data: 0.0654 max mem: 9377 +Train: [87] [3100/6250] eta: 0:07:29 lr: 0.000005 grad: 0.1780 (0.1853) loss: 0.7840 (0.7851) time: 0.1406 data: 0.0489 max mem: 9377 +Train: [87] [3200/6250] eta: 0:07:14 lr: 0.000005 grad: 0.1760 (0.1853) loss: 0.7905 (0.7852) time: 0.1374 data: 0.0585 max mem: 9377 +Train: [87] [3300/6250] eta: 0:06:59 lr: 0.000005 grad: 0.1946 (0.1857) loss: 0.7879 (0.7851) time: 0.1433 data: 0.0616 max mem: 9377 +Train: [87] [3400/6250] eta: 0:06:45 lr: 0.000005 grad: 0.1933 (0.1858) loss: 0.7904 (0.7850) time: 0.1398 data: 0.0622 max mem: 9377 +Train: [87] [3500/6250] eta: 0:06:30 lr: 0.000005 grad: 0.1743 (0.1858) loss: 0.7899 (0.7849) time: 0.1436 data: 0.0570 max mem: 9377 +Train: [87] [3600/6250] eta: 0:06:16 lr: 0.000005 grad: 0.1785 (0.1858) loss: 0.7900 (0.7848) time: 0.1374 data: 0.0557 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:02 lr: 0.000005 grad: 0.1922 (0.1860) loss: 0.7763 (0.7847) time: 0.1562 data: 0.0752 max mem: 9377 +Train: [87] [3800/6250] eta: 0:05:48 lr: 0.000005 grad: 0.1774 (0.1859) loss: 0.7871 (0.7848) time: 0.1527 data: 0.0731 max mem: 9377 +Train: [87] [3900/6250] eta: 0:05:34 lr: 0.000005 grad: 0.1726 (0.1860) loss: 0.7834 (0.7847) time: 0.1496 data: 0.0708 max mem: 9377 +Train: [87] [4000/6250] eta: 0:05:20 lr: 0.000005 grad: 0.1866 (0.1861) loss: 0.7855 (0.7847) time: 0.1764 data: 0.0918 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:06 lr: 0.000005 grad: 0.1793 (0.1860) loss: 0.7835 (0.7846) time: 0.1606 data: 0.0742 max mem: 9377 +Train: [87] [4200/6250] eta: 0:04:52 lr: 0.000005 grad: 0.1764 (0.1860) loss: 0.7856 (0.7846) time: 0.1562 data: 0.0798 max mem: 9377 +Train: [87] [4300/6250] eta: 0:04:38 lr: 0.000005 grad: 0.1964 (0.1861) loss: 0.7814 (0.7844) time: 0.1318 data: 0.0497 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:24 lr: 0.000005 grad: 0.1791 (0.1861) loss: 0.7842 (0.7844) time: 0.1223 data: 0.0361 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:10 lr: 0.000005 grad: 0.1860 (0.1861) loss: 0.7733 (0.7843) time: 0.1436 data: 0.0641 max mem: 9377 +Train: [87] [4600/6250] eta: 0:03:55 lr: 0.000005 grad: 0.1761 (0.1860) loss: 0.7875 (0.7843) time: 0.1391 data: 0.0495 max mem: 9377 +Train: [87] [4700/6250] eta: 0:03:41 lr: 0.000005 grad: 0.1884 (0.1860) loss: 0.7789 (0.7842) time: 0.1454 data: 0.0593 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:27 lr: 0.000005 grad: 0.1825 (0.1859) loss: 0.7875 (0.7842) time: 0.1274 data: 0.0515 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:12 lr: 0.000005 grad: 0.1847 (0.1858) loss: 0.7935 (0.7843) time: 0.1532 data: 0.0745 max mem: 9377 +Train: [87] [5000/6250] eta: 0:02:58 lr: 0.000005 grad: 0.1760 (0.1857) loss: 0.7957 (0.7844) time: 0.0887 data: 0.0002 max mem: 9377 +Train: [87] [5100/6250] eta: 0:02:44 lr: 0.000005 grad: 0.1827 (0.1856) loss: 0.7827 (0.7845) time: 0.1276 data: 0.0484 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:29 lr: 0.000005 grad: 0.1861 (0.1855) loss: 0.7796 (0.7845) time: 0.1509 data: 0.0717 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:15 lr: 0.000005 grad: 0.1821 (0.1855) loss: 0.7727 (0.7846) time: 0.1878 data: 0.1115 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:01 lr: 0.000005 grad: 0.1776 (0.1854) loss: 0.7719 (0.7846) time: 0.1491 data: 0.0659 max mem: 9377 +Train: [87] [5500/6250] eta: 0:01:47 lr: 0.000005 grad: 0.1785 (0.1855) loss: 0.7927 (0.7846) time: 0.1411 data: 0.0570 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:33 lr: 0.000005 grad: 0.1769 (0.1856) loss: 0.7877 (0.7846) time: 0.1411 data: 0.0578 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:18 lr: 0.000005 grad: 0.1915 (0.1856) loss: 0.7840 (0.7846) time: 0.1440 data: 0.0670 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:04 lr: 0.000005 grad: 0.1767 (0.1856) loss: 0.7795 (0.7846) time: 0.1394 data: 0.0687 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:50 lr: 0.000005 grad: 0.1880 (0.1857) loss: 0.7776 (0.7846) time: 0.1299 data: 0.0444 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:35 lr: 0.000005 grad: 0.1715 (0.1856) loss: 0.7842 (0.7846) time: 0.1191 data: 0.0297 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:21 lr: 0.000005 grad: 0.1798 (0.1856) loss: 0.7843 (0.7846) time: 0.1247 data: 0.0394 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.1844 (0.1856) loss: 0.7803 (0.7846) time: 0.1203 data: 0.0362 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1787 (0.1855) loss: 0.7905 (0.7847) time: 0.1385 data: 0.0531 max mem: 9377 +Train: [87] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000005 grad: 0.1787 (0.1855) loss: 0.7905 (0.7847) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:05:15 loss: 0.8405 (0.8405) time: 5.0944 data: 5.0640 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8405 (0.8424) time: 0.1136 data: 0.0890 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (hcp-train-subset): loss: 0.8405 (0.8424) +Eval (hcp-val): [87] [ 0/62] eta: 0:05:53 loss: 0.8390 (0.8390) time: 5.6956 data: 5.6651 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8388 (0.8404) time: 0.1107 data: 0.0861 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (hcp-val): loss: 0.8388 (0.8404) +Eval (nsd-val): [87] [ 0/62] eta: 0:03:33 loss: 0.8198 (0.8198) time: 3.4507 data: 3.3882 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8318 (0.8324) time: 0.1110 data: 0.0863 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (nsd-val): loss: 0.8318 (0.8324) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 11:29:01 lr: 0.000005 grad: 0.2951 (0.2951) loss: 0.7595 (0.7595) time: 6.6146 data: 6.5082 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:20:59 lr: 0.000005 grad: 0.2239 (0.2258) loss: 0.7957 (0.8019) time: 0.1914 data: 0.1006 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:17:33 lr: 0.000005 grad: 0.2028 (0.2171) loss: 0.7853 (0.7984) time: 0.1514 data: 0.0447 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:16:38 lr: 0.000005 grad: 0.2081 (0.2138) loss: 0.7689 (0.7947) time: 0.1635 data: 0.0682 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:15:25 lr: 0.000005 grad: 0.1870 (0.2092) loss: 0.7890 (0.7927) time: 0.1400 data: 0.0482 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:14:54 lr: 0.000005 grad: 0.1932 (0.2064) loss: 0.7857 (0.7910) time: 0.1400 data: 0.0531 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:14:25 lr: 0.000005 grad: 0.1896 (0.2039) loss: 0.7814 (0.7893) time: 0.1384 data: 0.0487 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:14:05 lr: 0.000005 grad: 0.1950 (0.2022) loss: 0.7728 (0.7887) time: 0.1391 data: 0.0479 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:13:48 lr: 0.000005 grad: 0.1972 (0.2010) loss: 0.7780 (0.7880) time: 0.1428 data: 0.0507 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:13:41 lr: 0.000005 grad: 0.1883 (0.2002) loss: 0.7776 (0.7875) time: 0.1525 data: 0.0683 max mem: 9377 +Train: [88] [1000/6250] eta: 0:13:31 lr: 0.000005 grad: 0.1749 (0.1987) loss: 0.7819 (0.7873) time: 0.1898 data: 0.1070 max mem: 9377 +Train: [88] [1100/6250] eta: 0:13:15 lr: 0.000005 grad: 0.1909 (0.1976) loss: 0.7844 (0.7868) time: 0.1526 data: 0.0690 max mem: 9377 +Train: [88] [1200/6250] eta: 0:13:08 lr: 0.000005 grad: 0.1851 (0.1968) loss: 0.7779 (0.7865) time: 0.1531 data: 0.0668 max mem: 9377 +Train: [88] [1300/6250] eta: 0:12:56 lr: 0.000005 grad: 0.1791 (0.1957) loss: 0.7765 (0.7863) time: 0.1692 data: 0.0892 max mem: 9377 +Train: [88] [1400/6250] eta: 0:12:42 lr: 0.000005 grad: 0.1858 (0.1954) loss: 0.7818 (0.7859) time: 0.1620 data: 0.0781 max mem: 9377 +Train: [88] [1500/6250] eta: 0:12:25 lr: 0.000005 grad: 0.1821 (0.1948) loss: 0.7815 (0.7857) time: 0.1564 data: 0.0761 max mem: 9377 +Train: [88] [1600/6250] eta: 0:12:10 lr: 0.000005 grad: 0.1833 (0.1943) loss: 0.7908 (0.7854) time: 0.1558 data: 0.0793 max mem: 9377 +Train: [88] [1700/6250] eta: 0:11:52 lr: 0.000005 grad: 0.1826 (0.1940) loss: 0.7851 (0.7853) time: 0.1360 data: 0.0570 max mem: 9377 +Train: [88] [1800/6250] eta: 0:11:34 lr: 0.000005 grad: 0.1844 (0.1934) loss: 0.7800 (0.7852) time: 0.1406 data: 0.0604 max mem: 9377 +Train: [88] [1900/6250] eta: 0:11:17 lr: 0.000005 grad: 0.1867 (0.1929) loss: 0.7879 (0.7851) time: 0.1448 data: 0.0645 max mem: 9377 +Train: [88] [2000/6250] eta: 0:10:57 lr: 0.000005 grad: 0.1864 (0.1926) loss: 0.7770 (0.7849) time: 0.1663 data: 0.0833 max mem: 9377 +Train: [88] [2100/6250] eta: 0:10:37 lr: 0.000005 grad: 0.1869 (0.1924) loss: 0.7832 (0.7847) time: 0.1355 data: 0.0484 max mem: 9377 +Train: [88] [2200/6250] eta: 0:10:21 lr: 0.000005 grad: 0.1862 (0.1923) loss: 0.7787 (0.7846) time: 0.1332 data: 0.0469 max mem: 9377 +Train: [88] [2300/6250] eta: 0:10:03 lr: 0.000005 grad: 0.1843 (0.1923) loss: 0.7834 (0.7845) time: 0.1498 data: 0.0661 max mem: 9377 +Train: [88] [2400/6250] eta: 0:09:44 lr: 0.000005 grad: 0.1929 (0.1924) loss: 0.7822 (0.7845) time: 0.1465 data: 0.0680 max mem: 9377 +Train: [88] [2500/6250] eta: 0:09:26 lr: 0.000005 grad: 0.1909 (0.1923) loss: 0.7742 (0.7845) time: 0.1368 data: 0.0522 max mem: 9377 +Train: [88] [2600/6250] eta: 0:09:09 lr: 0.000005 grad: 0.1846 (0.1922) loss: 0.7884 (0.7845) time: 0.1406 data: 0.0593 max mem: 9377 +Train: [88] [2700/6250] eta: 0:08:53 lr: 0.000005 grad: 0.1952 (0.1924) loss: 0.7749 (0.7843) time: 0.1147 data: 0.0355 max mem: 9377 +Train: [88] [2800/6250] eta: 0:08:37 lr: 0.000005 grad: 0.1827 (0.1925) loss: 0.7870 (0.7843) time: 0.1564 data: 0.0750 max mem: 9377 +Train: [88] [2900/6250] eta: 0:08:22 lr: 0.000004 grad: 0.1831 (0.1926) loss: 0.7781 (0.7842) time: 0.1459 data: 0.0728 max mem: 9377 +Train: [88] [3000/6250] eta: 0:08:06 lr: 0.000004 grad: 0.1866 (0.1926) loss: 0.7763 (0.7840) time: 0.1362 data: 0.0537 max mem: 9377 +Train: [88] [3100/6250] eta: 0:07:50 lr: 0.000004 grad: 0.1989 (0.1926) loss: 0.7811 (0.7839) time: 0.1310 data: 0.0486 max mem: 9377 +Train: [88] [3200/6250] eta: 0:07:34 lr: 0.000004 grad: 0.1926 (0.1926) loss: 0.7730 (0.7837) time: 0.1690 data: 0.0774 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:18 lr: 0.000004 grad: 0.1935 (0.1926) loss: 0.7847 (0.7836) time: 0.1456 data: 0.0637 max mem: 9377 +Train: [88] [3400/6250] eta: 0:07:02 lr: 0.000004 grad: 0.1911 (0.1925) loss: 0.7760 (0.7835) time: 0.1308 data: 0.0474 max mem: 9377 +Train: [88] [3500/6250] eta: 0:06:47 lr: 0.000004 grad: 0.1881 (0.1925) loss: 0.7813 (0.7833) time: 0.1207 data: 0.0281 max mem: 9377 +Train: [88] [3600/6250] eta: 0:06:31 lr: 0.000004 grad: 0.1875 (0.1924) loss: 0.7813 (0.7833) time: 0.1406 data: 0.0596 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:16 lr: 0.000004 grad: 0.2048 (0.1925) loss: 0.7701 (0.7832) time: 0.1338 data: 0.0572 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:01 lr: 0.000004 grad: 0.1840 (0.1924) loss: 0.7830 (0.7831) time: 0.1280 data: 0.0440 max mem: 9377 +Train: [88] [3900/6250] eta: 0:05:46 lr: 0.000004 grad: 0.1819 (0.1923) loss: 0.7816 (0.7831) time: 0.1344 data: 0.0512 max mem: 9377 +Train: [88] [4000/6250] eta: 0:05:32 lr: 0.000004 grad: 0.1951 (0.1924) loss: 0.7797 (0.7830) time: 0.1592 data: 0.0791 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:18 lr: 0.000004 grad: 0.1859 (0.1927) loss: 0.7839 (0.7829) time: 0.1494 data: 0.0636 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:03 lr: 0.000004 grad: 0.1876 (0.1926) loss: 0.7800 (0.7830) time: 0.1453 data: 0.0550 max mem: 9377 +Train: [88] [4300/6250] eta: 0:04:48 lr: 0.000004 grad: 0.1898 (0.1924) loss: 0.7826 (0.7830) time: 0.1540 data: 0.0694 max mem: 9377 +Train: [88] [4400/6250] eta: 0:04:33 lr: 0.000004 grad: 0.1850 (0.1923) loss: 0.7845 (0.7830) time: 0.1352 data: 0.0534 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:18 lr: 0.000004 grad: 0.1853 (0.1922) loss: 0.7850 (0.7831) time: 0.1405 data: 0.0520 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:03 lr: 0.000004 grad: 0.1835 (0.1922) loss: 0.7881 (0.7831) time: 0.1295 data: 0.0472 max mem: 9377 +Train: [88] [4700/6250] eta: 0:03:48 lr: 0.000004 grad: 0.1929 (0.1922) loss: 0.7892 (0.7832) time: 0.1129 data: 0.0256 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:33 lr: 0.000004 grad: 0.1873 (0.1921) loss: 0.7816 (0.7832) time: 0.1446 data: 0.0585 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:18 lr: 0.000004 grad: 0.1877 (0.1920) loss: 0.7881 (0.7832) time: 0.1202 data: 0.0336 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:02 lr: 0.000004 grad: 0.1780 (0.1920) loss: 0.7864 (0.7832) time: 0.1201 data: 0.0314 max mem: 9377 +Train: [88] [5100/6250] eta: 0:02:47 lr: 0.000004 grad: 0.1935 (0.1921) loss: 0.7823 (0.7832) time: 0.1285 data: 0.0407 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:33 lr: 0.000004 grad: 0.1973 (0.1920) loss: 0.7775 (0.7832) time: 0.1151 data: 0.0284 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:18 lr: 0.000004 grad: 0.1805 (0.1920) loss: 0.7869 (0.7832) time: 0.1410 data: 0.0588 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:03 lr: 0.000004 grad: 0.1912 (0.1920) loss: 0.7799 (0.7832) time: 0.1401 data: 0.0650 max mem: 9377 +Train: [88] [5500/6250] eta: 0:01:48 lr: 0.000004 grad: 0.1882 (0.1920) loss: 0.7845 (0.7833) time: 0.1341 data: 0.0463 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:34 lr: 0.000004 grad: 0.1834 (0.1919) loss: 0.7827 (0.7833) time: 0.1291 data: 0.0448 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:19 lr: 0.000004 grad: 0.1730 (0.1918) loss: 0.7908 (0.7833) time: 0.1477 data: 0.0713 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:05 lr: 0.000004 grad: 0.1760 (0.1917) loss: 0.7932 (0.7834) time: 0.1790 data: 0.0958 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:50 lr: 0.000004 grad: 0.1906 (0.1917) loss: 0.7868 (0.7834) time: 0.1732 data: 0.0932 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:36 lr: 0.000004 grad: 0.1880 (0.1916) loss: 0.7861 (0.7834) time: 0.1517 data: 0.0697 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:21 lr: 0.000004 grad: 0.1819 (0.1916) loss: 0.7811 (0.7835) time: 0.1675 data: 0.0823 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.1870 (0.1917) loss: 0.7859 (0.7835) time: 0.1410 data: 0.0594 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1774 (0.1917) loss: 0.7809 (0.7835) time: 0.1389 data: 0.0534 max mem: 9377 +Train: [88] Total time: 0:15:20 (0.1473 s / it) +Averaged stats: lr: 0.000004 grad: 0.1774 (0.1917) loss: 0.7809 (0.7835) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:06:02 loss: 0.8368 (0.8368) time: 5.8459 data: 5.8161 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8404 (0.8427) time: 0.1102 data: 0.0856 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:13 (0.2166 s / it) +Averaged stats (hcp-train-subset): loss: 0.8404 (0.8427) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:20 loss: 0.8403 (0.8403) time: 4.1957 data: 4.1076 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8379 (0.8409) time: 0.1104 data: 0.0857 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (hcp-val): loss: 0.8379 (0.8409) +Eval (nsd-val): [88] [ 0/62] eta: 0:05:40 loss: 0.8281 (0.8281) time: 5.4960 data: 5.4655 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8364 (0.8368) time: 0.1152 data: 0.0887 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:12 (0.2036 s / it) +Averaged stats (nsd-val): loss: 0.8364 (0.8368) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 9:21:18 lr: 0.000004 grad: 0.1740 (0.1740) loss: 0.7973 (0.7973) time: 5.3886 data: 5.1159 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:19:37 lr: 0.000004 grad: 0.2112 (0.2256) loss: 0.7996 (0.8048) time: 0.1344 data: 0.0313 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:17:00 lr: 0.000004 grad: 0.2033 (0.2235) loss: 0.8031 (0.7977) time: 0.1478 data: 0.0515 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:15:48 lr: 0.000004 grad: 0.1962 (0.2160) loss: 0.7798 (0.7957) time: 0.1342 data: 0.0350 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:15:01 lr: 0.000004 grad: 0.1955 (0.2113) loss: 0.7784 (0.7941) time: 0.1537 data: 0.0597 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:14:18 lr: 0.000004 grad: 0.1924 (0.2084) loss: 0.7790 (0.7918) time: 0.1329 data: 0.0429 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:13:49 lr: 0.000004 grad: 0.1906 (0.2054) loss: 0.7797 (0.7907) time: 0.1472 data: 0.0587 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:13:30 lr: 0.000004 grad: 0.1902 (0.2044) loss: 0.7813 (0.7896) time: 0.1469 data: 0.0557 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:13:09 lr: 0.000004 grad: 0.1806 (0.2027) loss: 0.7862 (0.7889) time: 0.1310 data: 0.0366 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:12:54 lr: 0.000004 grad: 0.1782 (0.2009) loss: 0.7893 (0.7887) time: 0.1203 data: 0.0417 max mem: 9377 +Train: [89] [1000/6250] eta: 0:12:38 lr: 0.000004 grad: 0.1753 (0.1996) loss: 0.7829 (0.7885) time: 0.1429 data: 0.0539 max mem: 9377 +Train: [89] [1100/6250] eta: 0:12:20 lr: 0.000004 grad: 0.1919 (0.1989) loss: 0.7818 (0.7880) time: 0.1392 data: 0.0546 max mem: 9377 +Train: [89] [1200/6250] eta: 0:12:02 lr: 0.000004 grad: 0.1838 (0.1978) loss: 0.7861 (0.7880) time: 0.1161 data: 0.0240 max mem: 9377 +Train: [89] [1300/6250] eta: 0:11:46 lr: 0.000004 grad: 0.1680 (0.1969) loss: 0.7895 (0.7878) time: 0.1451 data: 0.0623 max mem: 9377 +Train: [89] [1400/6250] eta: 0:11:32 lr: 0.000004 grad: 0.1726 (0.1961) loss: 0.7878 (0.7879) time: 0.1432 data: 0.0575 max mem: 9377 +Train: [89] [1500/6250] eta: 0:11:16 lr: 0.000004 grad: 0.1775 (0.1952) loss: 0.7854 (0.7878) time: 0.1401 data: 0.0602 max mem: 9377 +Train: [89] [1600/6250] eta: 0:11:01 lr: 0.000004 grad: 0.1765 (0.1945) loss: 0.7969 (0.7876) time: 0.1308 data: 0.0552 max mem: 9377 +Train: [89] [1700/6250] eta: 0:10:46 lr: 0.000004 grad: 0.1745 (0.1937) loss: 0.7874 (0.7876) time: 0.1390 data: 0.0572 max mem: 9377 +Train: [89] [1800/6250] eta: 0:10:31 lr: 0.000004 grad: 0.1835 (0.1932) loss: 0.7818 (0.7875) time: 0.1431 data: 0.0601 max mem: 9377 +Train: [89] [1900/6250] eta: 0:10:15 lr: 0.000004 grad: 0.1876 (0.1930) loss: 0.7782 (0.7875) time: 0.1321 data: 0.0534 max mem: 9377 +Train: [89] [2000/6250] eta: 0:10:00 lr: 0.000004 grad: 0.1779 (0.1925) loss: 0.7934 (0.7874) time: 0.1412 data: 0.0579 max mem: 9377 +Train: [89] [2100/6250] eta: 0:09:45 lr: 0.000004 grad: 0.1852 (0.1921) loss: 0.7848 (0.7873) time: 0.1315 data: 0.0507 max mem: 9377 +Train: [89] [2200/6250] eta: 0:09:32 lr: 0.000004 grad: 0.1833 (0.1917) loss: 0.7864 (0.7874) time: 0.1678 data: 0.0880 max mem: 9377 +Train: [89] [2300/6250] eta: 0:09:17 lr: 0.000004 grad: 0.1736 (0.1912) loss: 0.7900 (0.7872) time: 0.1367 data: 0.0552 max mem: 9377 +Train: [89] [2400/6250] eta: 0:09:03 lr: 0.000004 grad: 0.1816 (0.1909) loss: 0.7990 (0.7871) time: 0.1494 data: 0.0637 max mem: 9377 +Train: [89] [2500/6250] eta: 0:08:48 lr: 0.000004 grad: 0.1795 (0.1905) loss: 0.7962 (0.7872) time: 0.1534 data: 0.0749 max mem: 9377 +Train: [89] [2600/6250] eta: 0:08:35 lr: 0.000004 grad: 0.1728 (0.1903) loss: 0.7913 (0.7871) time: 0.1616 data: 0.0807 max mem: 9377 +Train: [89] [2700/6250] eta: 0:08:21 lr: 0.000004 grad: 0.1733 (0.1899) loss: 0.7888 (0.7871) time: 0.1459 data: 0.0627 max mem: 9377 +Train: [89] [2800/6250] eta: 0:08:08 lr: 0.000004 grad: 0.1769 (0.1895) loss: 0.7891 (0.7871) time: 0.1346 data: 0.0589 max mem: 9377 +Train: [89] [2900/6250] eta: 0:07:54 lr: 0.000004 grad: 0.1818 (0.1898) loss: 0.7890 (0.7871) time: 0.1548 data: 0.0790 max mem: 9377 +Train: [89] [3000/6250] eta: 0:07:40 lr: 0.000004 grad: 0.1796 (0.1895) loss: 0.7832 (0.7872) time: 0.1661 data: 0.0876 max mem: 9377 +Train: [89] [3100/6250] eta: 0:07:26 lr: 0.000004 grad: 0.1811 (0.1893) loss: 0.7788 (0.7871) time: 0.1630 data: 0.0873 max mem: 9377 +Train: [89] [3200/6250] eta: 0:07:12 lr: 0.000004 grad: 0.1786 (0.1892) loss: 0.7841 (0.7870) time: 0.1372 data: 0.0600 max mem: 9377 +Train: [89] [3300/6250] eta: 0:06:58 lr: 0.000004 grad: 0.1893 (0.1893) loss: 0.7855 (0.7868) time: 0.1516 data: 0.0762 max mem: 9377 +Train: [89] [3400/6250] eta: 0:06:44 lr: 0.000004 grad: 0.1919 (0.1893) loss: 0.7810 (0.7867) time: 0.1458 data: 0.0606 max mem: 9377 +Train: [89] [3500/6250] eta: 0:06:30 lr: 0.000004 grad: 0.1827 (0.1892) loss: 0.7812 (0.7866) time: 0.1488 data: 0.0668 max mem: 9377 +Train: [89] [3600/6250] eta: 0:06:16 lr: 0.000004 grad: 0.1804 (0.1893) loss: 0.7753 (0.7865) time: 0.1425 data: 0.0650 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:02 lr: 0.000004 grad: 0.1862 (0.1894) loss: 0.7808 (0.7864) time: 0.1540 data: 0.0794 max mem: 9377 +Train: [89] [3800/6250] eta: 0:05:48 lr: 0.000004 grad: 0.1941 (0.1894) loss: 0.7789 (0.7863) time: 0.1480 data: 0.0699 max mem: 9377 +Train: [89] [3900/6250] eta: 0:05:34 lr: 0.000004 grad: 0.1818 (0.1892) loss: 0.7753 (0.7863) time: 0.1351 data: 0.0524 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:21 lr: 0.000004 grad: 0.1854 (0.1893) loss: 0.7826 (0.7862) time: 0.1528 data: 0.0680 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:07 lr: 0.000004 grad: 0.1755 (0.1892) loss: 0.7836 (0.7861) time: 0.1780 data: 0.0821 max mem: 9377 +Train: [89] [4200/6250] eta: 0:04:53 lr: 0.000004 grad: 0.1748 (0.1892) loss: 0.7794 (0.7860) time: 0.1431 data: 0.0569 max mem: 9377 +Train: [89] [4300/6250] eta: 0:04:39 lr: 0.000004 grad: 0.1757 (0.1891) loss: 0.7919 (0.7860) time: 0.1320 data: 0.0548 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:26 lr: 0.000004 grad: 0.1727 (0.1891) loss: 0.7877 (0.7859) time: 0.1756 data: 0.0885 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:12 lr: 0.000004 grad: 0.1806 (0.1890) loss: 0.7840 (0.7860) time: 0.1622 data: 0.0815 max mem: 9377 +Train: [89] [4600/6250] eta: 0:03:58 lr: 0.000004 grad: 0.1823 (0.1889) loss: 0.7840 (0.7860) time: 0.1791 data: 0.0961 max mem: 9377 +Train: [89] [4700/6250] eta: 0:03:44 lr: 0.000004 grad: 0.1812 (0.1888) loss: 0.7889 (0.7860) time: 0.1382 data: 0.0595 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:30 lr: 0.000004 grad: 0.1945 (0.1887) loss: 0.7822 (0.7861) time: 0.1323 data: 0.0441 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:15 lr: 0.000004 grad: 0.1781 (0.1886) loss: 0.7874 (0.7861) time: 0.1442 data: 0.0540 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:00 lr: 0.000004 grad: 0.1935 (0.1885) loss: 0.7837 (0.7862) time: 0.1241 data: 0.0428 max mem: 9377 +Train: [89] [5100/6250] eta: 0:02:46 lr: 0.000004 grad: 0.1805 (0.1885) loss: 0.7917 (0.7862) time: 0.1639 data: 0.0820 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:32 lr: 0.000003 grad: 0.1740 (0.1884) loss: 0.7892 (0.7863) time: 0.1750 data: 0.0947 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:18 lr: 0.000003 grad: 0.1889 (0.1883) loss: 0.7923 (0.7864) time: 0.1661 data: 0.0864 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:03 lr: 0.000003 grad: 0.1776 (0.1882) loss: 0.7901 (0.7865) time: 0.1552 data: 0.0781 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:49 lr: 0.000003 grad: 0.1681 (0.1881) loss: 0.7947 (0.7866) time: 0.1703 data: 0.0934 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:35 lr: 0.000003 grad: 0.1812 (0.1881) loss: 0.7855 (0.7866) time: 0.1717 data: 0.0944 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:20 lr: 0.000003 grad: 0.1797 (0.1880) loss: 0.7873 (0.7866) time: 0.1678 data: 0.0887 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:05 lr: 0.000003 grad: 0.1840 (0.1880) loss: 0.7882 (0.7866) time: 0.1535 data: 0.0692 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:51 lr: 0.000003 grad: 0.1825 (0.1880) loss: 0.7919 (0.7866) time: 0.1662 data: 0.0785 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:36 lr: 0.000003 grad: 0.1819 (0.1879) loss: 0.7781 (0.7866) time: 0.1656 data: 0.0838 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1822 (0.1879) loss: 0.7844 (0.7866) time: 0.1400 data: 0.0625 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1866 (0.1879) loss: 0.7886 (0.7867) time: 0.1804 data: 0.0970 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1740 (0.1878) loss: 0.7900 (0.7867) time: 0.1874 data: 0.1029 max mem: 9377 +Train: [89] Total time: 0:15:28 (0.1486 s / it) +Averaged stats: lr: 0.000003 grad: 0.1740 (0.1878) loss: 0.7900 (0.7867) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:06:19 loss: 0.8395 (0.8395) time: 6.1224 data: 6.0916 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8412 (0.8425) time: 0.1422 data: 0.1158 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:15 (0.2491 s / it) +Averaged stats (hcp-train-subset): loss: 0.8412 (0.8425) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [89] [ 0/62] eta: 0:06:13 loss: 0.8344 (0.8344) time: 6.0196 data: 5.9892 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8391 (0.8407) time: 0.1291 data: 0.1042 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8407) +Making plots (hcp-val): example=26 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:16 loss: 0.8236 (0.8236) time: 4.1406 data: 4.0411 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8337 (0.8346) time: 0.1136 data: 0.0869 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2410 s / it) +Averaged stats (nsd-val): loss: 0.8337 (0.8346) +Making plots (nsd-val): example=61 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 12:21:07 lr: 0.000003 grad: 0.1371 (0.1371) loss: 0.8508 (0.8508) time: 7.1148 data: 6.9816 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:23:34 lr: 0.000003 grad: 0.1808 (0.1976) loss: 0.8048 (0.8110) time: 0.1812 data: 0.0756 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:19:43 lr: 0.000003 grad: 0.2096 (0.1994) loss: 0.7869 (0.8009) time: 0.1641 data: 0.0582 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:17:50 lr: 0.000003 grad: 0.1917 (0.2024) loss: 0.7897 (0.7956) time: 0.1349 data: 0.0355 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:17:01 lr: 0.000003 grad: 0.2060 (0.2032) loss: 0.7903 (0.7931) time: 0.1530 data: 0.0578 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:16:36 lr: 0.000003 grad: 0.1968 (0.2011) loss: 0.7814 (0.7921) time: 0.1482 data: 0.0608 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:16:12 lr: 0.000003 grad: 0.1897 (0.1989) loss: 0.7778 (0.7911) time: 0.1656 data: 0.0829 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:15:51 lr: 0.000003 grad: 0.1672 (0.1959) loss: 0.7981 (0.7913) time: 0.1659 data: 0.0809 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:15:38 lr: 0.000003 grad: 0.1709 (0.1939) loss: 0.7952 (0.7913) time: 0.1896 data: 0.1043 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:15:22 lr: 0.000003 grad: 0.1763 (0.1928) loss: 0.7921 (0.7912) time: 0.1580 data: 0.0689 max mem: 9377 +Train: [90] [1000/6250] eta: 0:14:53 lr: 0.000003 grad: 0.1874 (0.1920) loss: 0.7829 (0.7911) time: 0.1174 data: 0.0302 max mem: 9377 +Train: [90] [1100/6250] eta: 0:14:24 lr: 0.000003 grad: 0.1797 (0.1914) loss: 0.7810 (0.7909) time: 0.1525 data: 0.0720 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:53 lr: 0.000003 grad: 0.1889 (0.1911) loss: 0.7950 (0.7905) time: 0.1360 data: 0.0490 max mem: 9377 +Train: [90] [1300/6250] eta: 0:13:28 lr: 0.000003 grad: 0.1861 (0.1906) loss: 0.7874 (0.7901) time: 0.1453 data: 0.0619 max mem: 9377 +Train: [90] [1400/6250] eta: 0:13:05 lr: 0.000003 grad: 0.1768 (0.1905) loss: 0.7877 (0.7896) time: 0.1540 data: 0.0670 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:42 lr: 0.000003 grad: 0.1863 (0.1903) loss: 0.7795 (0.7893) time: 0.1266 data: 0.0469 max mem: 9377 +Train: [90] [1600/6250] eta: 0:12:22 lr: 0.000003 grad: 0.1851 (0.1900) loss: 0.7770 (0.7889) time: 0.1584 data: 0.0748 max mem: 9377 +Train: [90] [1700/6250] eta: 0:12:00 lr: 0.000003 grad: 0.1846 (0.1898) loss: 0.7861 (0.7887) time: 0.1428 data: 0.0579 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:41 lr: 0.000003 grad: 0.1802 (0.1897) loss: 0.7835 (0.7882) time: 0.1380 data: 0.0578 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:22 lr: 0.000003 grad: 0.1849 (0.1899) loss: 0.7888 (0.7878) time: 0.1736 data: 0.0858 max mem: 9377 +Train: [90] [2000/6250] eta: 0:11:03 lr: 0.000003 grad: 0.1910 (0.1897) loss: 0.7854 (0.7876) time: 0.1521 data: 0.0780 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:44 lr: 0.000003 grad: 0.1759 (0.1896) loss: 0.7874 (0.7875) time: 0.1311 data: 0.0443 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:26 lr: 0.000003 grad: 0.1913 (0.1894) loss: 0.7807 (0.7875) time: 0.1279 data: 0.0408 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:07 lr: 0.000003 grad: 0.1730 (0.1891) loss: 0.7913 (0.7876) time: 0.1418 data: 0.0561 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:49 lr: 0.000003 grad: 0.1789 (0.1888) loss: 0.7847 (0.7877) time: 0.1343 data: 0.0581 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:32 lr: 0.000003 grad: 0.1726 (0.1883) loss: 0.7857 (0.7879) time: 0.1423 data: 0.0557 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:16 lr: 0.000003 grad: 0.1683 (0.1879) loss: 0.7925 (0.7881) time: 0.1321 data: 0.0456 max mem: 9377 +Train: [90] [2700/6250] eta: 0:08:58 lr: 0.000003 grad: 0.1817 (0.1876) loss: 0.7888 (0.7883) time: 0.1566 data: 0.0755 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:43 lr: 0.000003 grad: 0.1832 (0.1873) loss: 0.7915 (0.7884) time: 0.1344 data: 0.0549 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:27 lr: 0.000003 grad: 0.1767 (0.1869) loss: 0.7909 (0.7886) time: 0.1628 data: 0.0813 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:11 lr: 0.000003 grad: 0.1668 (0.1867) loss: 0.7928 (0.7888) time: 0.1324 data: 0.0470 max mem: 9377 +Train: [90] [3100/6250] eta: 0:07:55 lr: 0.000003 grad: 0.1781 (0.1865) loss: 0.7884 (0.7889) time: 0.1434 data: 0.0641 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:39 lr: 0.000003 grad: 0.1761 (0.1863) loss: 0.7957 (0.7889) time: 0.1243 data: 0.0383 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:24 lr: 0.000003 grad: 0.1792 (0.1863) loss: 0.7857 (0.7889) time: 0.1512 data: 0.0690 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:09 lr: 0.000003 grad: 0.1810 (0.1862) loss: 0.7950 (0.7889) time: 0.1169 data: 0.0349 max mem: 9377 +Train: [90] [3500/6250] eta: 0:06:53 lr: 0.000003 grad: 0.1852 (0.1861) loss: 0.7846 (0.7889) time: 0.1454 data: 0.0654 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:37 lr: 0.000003 grad: 0.1749 (0.1860) loss: 0.7911 (0.7888) time: 0.1279 data: 0.0451 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:21 lr: 0.000003 grad: 0.1810 (0.1859) loss: 0.7811 (0.7887) time: 0.1441 data: 0.0609 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:06 lr: 0.000003 grad: 0.1783 (0.1858) loss: 0.7839 (0.7887) time: 0.1390 data: 0.0552 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:49 lr: 0.000003 grad: 0.1711 (0.1859) loss: 0.7976 (0.7887) time: 0.1027 data: 0.0187 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:35 lr: 0.000003 grad: 0.1792 (0.1859) loss: 0.7901 (0.7886) time: 0.1497 data: 0.0638 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:20 lr: 0.000003 grad: 0.1786 (0.1859) loss: 0.7867 (0.7885) time: 0.1444 data: 0.0542 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:05 lr: 0.000003 grad: 0.1767 (0.1859) loss: 0.7870 (0.7884) time: 0.1548 data: 0.0753 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:50 lr: 0.000003 grad: 0.1785 (0.1859) loss: 0.7842 (0.7883) time: 0.1659 data: 0.0848 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:35 lr: 0.000003 grad: 0.1785 (0.1859) loss: 0.7856 (0.7883) time: 0.1443 data: 0.0651 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:19 lr: 0.000003 grad: 0.1835 (0.1860) loss: 0.7910 (0.7881) time: 0.1322 data: 0.0476 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:04 lr: 0.000003 grad: 0.1809 (0.1860) loss: 0.7854 (0.7881) time: 0.1257 data: 0.0416 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:49 lr: 0.000003 grad: 0.1851 (0.1861) loss: 0.7807 (0.7880) time: 0.1263 data: 0.0425 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:34 lr: 0.000003 grad: 0.1869 (0.1861) loss: 0.7815 (0.7879) time: 0.1315 data: 0.0499 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:19 lr: 0.000003 grad: 0.1905 (0.1862) loss: 0.7777 (0.7878) time: 0.1248 data: 0.0345 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:04 lr: 0.000003 grad: 0.1875 (0.1862) loss: 0.7783 (0.7877) time: 0.1468 data: 0.0643 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:49 lr: 0.000003 grad: 0.1748 (0.1864) loss: 0.7841 (0.7876) time: 0.1014 data: 0.0105 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:34 lr: 0.000003 grad: 0.1778 (0.1865) loss: 0.7828 (0.7875) time: 0.1512 data: 0.0663 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:19 lr: 0.000003 grad: 0.1971 (0.1865) loss: 0.7766 (0.7874) time: 0.1200 data: 0.0368 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:04 lr: 0.000003 grad: 0.1716 (0.1865) loss: 0.7906 (0.7874) time: 0.1377 data: 0.0538 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:49 lr: 0.000003 grad: 0.1891 (0.1866) loss: 0.7817 (0.7873) time: 0.1248 data: 0.0445 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:34 lr: 0.000003 grad: 0.1848 (0.1865) loss: 0.7889 (0.7873) time: 0.1410 data: 0.0585 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:20 lr: 0.000003 grad: 0.1865 (0.1865) loss: 0.7800 (0.7872) time: 0.1477 data: 0.0682 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:05 lr: 0.000003 grad: 0.1880 (0.1865) loss: 0.7813 (0.7872) time: 0.1660 data: 0.0789 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:51 lr: 0.000003 grad: 0.1803 (0.1864) loss: 0.7885 (0.7872) time: 0.1528 data: 0.0695 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:36 lr: 0.000003 grad: 0.1875 (0.1864) loss: 0.7858 (0.7872) time: 0.1392 data: 0.0547 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:21 lr: 0.000003 grad: 0.1789 (0.1864) loss: 0.7839 (0.7871) time: 0.1432 data: 0.0639 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1743 (0.1864) loss: 0.7832 (0.7870) time: 0.1316 data: 0.0495 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1826 (0.1863) loss: 0.7802 (0.7870) time: 0.1444 data: 0.0635 max mem: 9377 +Train: [90] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000003 grad: 0.1826 (0.1863) loss: 0.7802 (0.7870) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:05:46 loss: 0.8359 (0.8359) time: 5.5817 data: 5.5514 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8413 (0.8427) time: 0.1174 data: 0.0927 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:13 (0.2100 s / it) +Averaged stats (hcp-train-subset): loss: 0.8413 (0.8427) +Eval (hcp-val): [90] [ 0/62] eta: 0:03:43 loss: 0.8370 (0.8370) time: 3.6028 data: 3.5330 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8386 (0.8407) time: 0.1238 data: 0.0991 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (hcp-val): loss: 0.8386 (0.8407) +Eval (nsd-val): [90] [ 0/62] eta: 0:04:59 loss: 0.8189 (0.8189) time: 4.8304 data: 4.7996 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8295 (0.8294) time: 0.1099 data: 0.0851 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:12 (0.2048 s / it) +Averaged stats (nsd-val): loss: 0.8295 (0.8294) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 8:55:13 lr: 0.000003 grad: 0.7481 (0.7481) loss: 0.7585 (0.7585) time: 5.1382 data: 4.8016 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:19:51 lr: 0.000003 grad: 0.2196 (0.2458) loss: 0.7749 (0.7924) time: 0.1391 data: 0.0341 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:16:31 lr: 0.000003 grad: 0.1944 (0.2322) loss: 0.7860 (0.7859) time: 0.1358 data: 0.0343 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:15:22 lr: 0.000003 grad: 0.1827 (0.2200) loss: 0.7867 (0.7856) time: 0.1358 data: 0.0402 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:14:35 lr: 0.000003 grad: 0.1906 (0.2148) loss: 0.7785 (0.7848) time: 0.1412 data: 0.0510 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:14:11 lr: 0.000003 grad: 0.1998 (0.2113) loss: 0.7909 (0.7840) time: 0.1413 data: 0.0473 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:13:47 lr: 0.000003 grad: 0.1772 (0.2072) loss: 0.7906 (0.7849) time: 0.1361 data: 0.0456 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:13:24 lr: 0.000003 grad: 0.1807 (0.2062) loss: 0.7983 (0.7855) time: 0.1451 data: 0.0520 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:13:09 lr: 0.000003 grad: 0.1958 (0.2048) loss: 0.7836 (0.7856) time: 0.1395 data: 0.0589 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:12:53 lr: 0.000003 grad: 0.1994 (0.2036) loss: 0.7833 (0.7857) time: 0.1424 data: 0.0555 max mem: 9377 +Train: [91] [1000/6250] eta: 0:12:38 lr: 0.000003 grad: 0.1874 (0.2024) loss: 0.7870 (0.7857) time: 0.1386 data: 0.0640 max mem: 9377 +Train: [91] [1100/6250] eta: 0:12:18 lr: 0.000003 grad: 0.1850 (0.2014) loss: 0.7832 (0.7859) time: 0.1329 data: 0.0486 max mem: 9377 +Train: [91] [1200/6250] eta: 0:12:01 lr: 0.000003 grad: 0.1808 (0.2002) loss: 0.7947 (0.7859) time: 0.1531 data: 0.0684 max mem: 9377 +Train: [91] [1300/6250] eta: 0:11:43 lr: 0.000003 grad: 0.1812 (0.1992) loss: 0.7943 (0.7860) time: 0.1511 data: 0.0692 max mem: 9377 +Train: [91] [1400/6250] eta: 0:11:26 lr: 0.000003 grad: 0.1799 (0.1983) loss: 0.7894 (0.7862) time: 0.1301 data: 0.0468 max mem: 9377 +Train: [91] [1500/6250] eta: 0:11:11 lr: 0.000003 grad: 0.1788 (0.1975) loss: 0.7891 (0.7864) time: 0.1356 data: 0.0526 max mem: 9377 +Train: [91] [1600/6250] eta: 0:10:57 lr: 0.000003 grad: 0.1944 (0.1970) loss: 0.7821 (0.7865) time: 0.1433 data: 0.0612 max mem: 9377 +Train: [91] [1700/6250] eta: 0:10:45 lr: 0.000003 grad: 0.1900 (0.1966) loss: 0.7821 (0.7865) time: 0.1368 data: 0.0522 max mem: 9377 +Train: [91] [1800/6250] eta: 0:10:30 lr: 0.000003 grad: 0.1990 (0.1965) loss: 0.7731 (0.7860) time: 0.1376 data: 0.0601 max mem: 9377 +Train: [91] [1900/6250] eta: 0:10:17 lr: 0.000003 grad: 0.1891 (0.1963) loss: 0.7823 (0.7858) time: 0.1319 data: 0.0531 max mem: 9377 +Train: [91] [2000/6250] eta: 0:10:04 lr: 0.000003 grad: 0.1809 (0.1960) loss: 0.7783 (0.7857) time: 0.1384 data: 0.0612 max mem: 9377 +Train: [91] [2100/6250] eta: 0:09:51 lr: 0.000003 grad: 0.1888 (0.1957) loss: 0.7848 (0.7857) time: 0.1468 data: 0.0622 max mem: 9377 +Train: [91] [2200/6250] eta: 0:09:36 lr: 0.000003 grad: 0.1849 (0.1953) loss: 0.7856 (0.7857) time: 0.1284 data: 0.0454 max mem: 9377 +Train: [91] [2300/6250] eta: 0:09:21 lr: 0.000003 grad: 0.1858 (0.1952) loss: 0.7819 (0.7856) time: 0.1247 data: 0.0410 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:07 lr: 0.000003 grad: 0.1870 (0.1950) loss: 0.7840 (0.7855) time: 0.1190 data: 0.0355 max mem: 9377 +Train: [91] [2500/6250] eta: 0:08:51 lr: 0.000003 grad: 0.1873 (0.1946) loss: 0.7857 (0.7856) time: 0.1404 data: 0.0570 max mem: 9377 +Train: [91] [2600/6250] eta: 0:08:37 lr: 0.000003 grad: 0.1878 (0.1945) loss: 0.7847 (0.7857) time: 0.1637 data: 0.0841 max mem: 9377 +Train: [91] [2700/6250] eta: 0:08:23 lr: 0.000002 grad: 0.1881 (0.1943) loss: 0.7854 (0.7856) time: 0.1288 data: 0.0491 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:09 lr: 0.000002 grad: 0.2005 (0.1941) loss: 0.7870 (0.7856) time: 0.1325 data: 0.0549 max mem: 9377 +Train: [91] [2900/6250] eta: 0:07:56 lr: 0.000002 grad: 0.1858 (0.1941) loss: 0.7790 (0.7855) time: 0.1497 data: 0.0647 max mem: 9377 +Train: [91] [3000/6250] eta: 0:07:41 lr: 0.000002 grad: 0.1864 (0.1940) loss: 0.7761 (0.7855) time: 0.1224 data: 0.0371 max mem: 9377 +Train: [91] [3100/6250] eta: 0:07:27 lr: 0.000002 grad: 0.1838 (0.1941) loss: 0.7881 (0.7855) time: 0.1400 data: 0.0622 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:12 lr: 0.000002 grad: 0.1754 (0.1938) loss: 0.7882 (0.7856) time: 0.1452 data: 0.0706 max mem: 9377 +Train: [91] [3300/6250] eta: 0:06:58 lr: 0.000002 grad: 0.1736 (0.1935) loss: 0.7856 (0.7856) time: 0.1563 data: 0.0732 max mem: 9377 +Train: [91] [3400/6250] eta: 0:06:44 lr: 0.000002 grad: 0.1920 (0.1933) loss: 0.7871 (0.7856) time: 0.1640 data: 0.0858 max mem: 9377 +Train: [91] [3500/6250] eta: 0:06:29 lr: 0.000002 grad: 0.1717 (0.1931) loss: 0.7913 (0.7856) time: 0.1396 data: 0.0510 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:14 lr: 0.000002 grad: 0.1877 (0.1931) loss: 0.7900 (0.7856) time: 0.1324 data: 0.0462 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:00 lr: 0.000002 grad: 0.1834 (0.1929) loss: 0.7930 (0.7857) time: 0.1428 data: 0.0619 max mem: 9377 +Train: [91] [3800/6250] eta: 0:05:45 lr: 0.000002 grad: 0.1885 (0.1928) loss: 0.7805 (0.7858) time: 0.1309 data: 0.0480 max mem: 9377 +Train: [91] [3900/6250] eta: 0:05:31 lr: 0.000002 grad: 0.1785 (0.1927) loss: 0.7930 (0.7858) time: 0.1171 data: 0.0425 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:17 lr: 0.000002 grad: 0.1922 (0.1927) loss: 0.7824 (0.7858) time: 0.1450 data: 0.0589 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:03 lr: 0.000002 grad: 0.1875 (0.1927) loss: 0.7866 (0.7858) time: 0.1497 data: 0.0628 max mem: 9377 +Train: [91] [4200/6250] eta: 0:04:50 lr: 0.000002 grad: 0.1853 (0.1926) loss: 0.7798 (0.7858) time: 0.1591 data: 0.0799 max mem: 9377 +Train: [91] [4300/6250] eta: 0:04:36 lr: 0.000002 grad: 0.1744 (0.1925) loss: 0.7864 (0.7858) time: 0.1508 data: 0.0728 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:22 lr: 0.000002 grad: 0.1837 (0.1924) loss: 0.7918 (0.7858) time: 0.1421 data: 0.0604 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:08 lr: 0.000002 grad: 0.1949 (0.1924) loss: 0.7801 (0.7858) time: 0.1299 data: 0.0533 max mem: 9377 +Train: [91] [4600/6250] eta: 0:03:53 lr: 0.000002 grad: 0.1820 (0.1924) loss: 0.7833 (0.7858) time: 0.1259 data: 0.0401 max mem: 9377 +Train: [91] [4700/6250] eta: 0:03:39 lr: 0.000002 grad: 0.2009 (0.1924) loss: 0.7766 (0.7857) time: 0.1346 data: 0.0411 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:25 lr: 0.000002 grad: 0.1802 (0.1925) loss: 0.7897 (0.7857) time: 0.1346 data: 0.0511 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1839 (0.1924) loss: 0.7845 (0.7857) time: 0.1422 data: 0.0629 max mem: 9377 +Train: [91] [5000/6250] eta: 0:02:56 lr: 0.000002 grad: 0.1838 (0.1924) loss: 0.7936 (0.7857) time: 0.1109 data: 0.0214 max mem: 9377 +Train: [91] [5100/6250] eta: 0:02:42 lr: 0.000002 grad: 0.1833 (0.1923) loss: 0.7881 (0.7857) time: 0.1493 data: 0.0687 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:28 lr: 0.000002 grad: 0.1896 (0.1923) loss: 0.7786 (0.7856) time: 0.1576 data: 0.0779 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:14 lr: 0.000002 grad: 0.1895 (0.1923) loss: 0.7751 (0.7856) time: 0.1460 data: 0.0640 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:00 lr: 0.000002 grad: 0.1813 (0.1922) loss: 0.7873 (0.7855) time: 0.1572 data: 0.0794 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:46 lr: 0.000002 grad: 0.1872 (0.1923) loss: 0.7828 (0.7854) time: 0.1683 data: 0.0896 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:32 lr: 0.000002 grad: 0.1921 (0.1923) loss: 0.7800 (0.7854) time: 0.1431 data: 0.0629 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:17 lr: 0.000002 grad: 0.1934 (0.1923) loss: 0.7736 (0.7852) time: 0.1157 data: 0.0313 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:03 lr: 0.000002 grad: 0.1802 (0.1923) loss: 0.7855 (0.7852) time: 0.1310 data: 0.0429 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:49 lr: 0.000002 grad: 0.1899 (0.1924) loss: 0.7837 (0.7851) time: 0.1437 data: 0.0629 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.1849 (0.1923) loss: 0.7836 (0.7851) time: 0.1512 data: 0.0660 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.1900 (0.1923) loss: 0.7825 (0.7850) time: 0.1740 data: 0.0933 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1811 (0.1922) loss: 0.7932 (0.7850) time: 0.1799 data: 0.1015 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1866 (0.1922) loss: 0.7797 (0.7849) time: 0.1649 data: 0.0766 max mem: 9377 +Train: [91] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000002 grad: 0.1866 (0.1922) loss: 0.7797 (0.7849) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:06:37 loss: 0.8373 (0.8373) time: 6.4093 data: 6.3792 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8404 (0.8419) time: 0.1374 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:16 (0.2591 s / it) +Averaged stats (hcp-train-subset): loss: 0.8404 (0.8419) +Eval (hcp-val): [91] [ 0/62] eta: 0:04:19 loss: 0.8390 (0.8390) time: 4.1847 data: 4.1137 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8394 (0.8408) time: 0.1462 data: 0.1193 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:15 (0.2452 s / it) +Averaged stats (hcp-val): loss: 0.8394 (0.8408) +Eval (nsd-val): [91] [ 0/62] eta: 0:05:57 loss: 0.8268 (0.8268) time: 5.7601 data: 5.7125 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8352 (0.8345) time: 0.1310 data: 0.1036 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:14 (0.2372 s / it) +Averaged stats (nsd-val): loss: 0.8352 (0.8345) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 11:59:42 lr: 0.000002 grad: 0.3859 (0.3859) loss: 0.7184 (0.7184) time: 6.9092 data: 6.7753 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:20:53 lr: 0.000002 grad: 0.2084 (0.2523) loss: 0.8053 (0.7966) time: 0.1263 data: 0.0181 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:17:36 lr: 0.000002 grad: 0.2078 (0.2348) loss: 0.7759 (0.7933) time: 0.1441 data: 0.0532 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:16:16 lr: 0.000002 grad: 0.1986 (0.2277) loss: 0.7940 (0.7907) time: 0.1315 data: 0.0360 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:15:27 lr: 0.000002 grad: 0.1964 (0.2228) loss: 0.7888 (0.7904) time: 0.1308 data: 0.0339 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:14:44 lr: 0.000002 grad: 0.1867 (0.2169) loss: 0.7936 (0.7906) time: 0.1253 data: 0.0287 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:14:14 lr: 0.000002 grad: 0.1833 (0.2126) loss: 0.7863 (0.7910) time: 0.1417 data: 0.0465 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:13:45 lr: 0.000002 grad: 0.1876 (0.2091) loss: 0.7982 (0.7912) time: 0.1517 data: 0.0630 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:13:26 lr: 0.000002 grad: 0.1759 (0.2065) loss: 0.7957 (0.7915) time: 0.1496 data: 0.0521 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:13:10 lr: 0.000002 grad: 0.1775 (0.2043) loss: 0.7942 (0.7916) time: 0.1372 data: 0.0483 max mem: 9377 +Train: [92] [1000/6250] eta: 0:12:50 lr: 0.000002 grad: 0.1874 (0.2039) loss: 0.7838 (0.7914) time: 0.1187 data: 0.0377 max mem: 9377 +Train: [92] [1100/6250] eta: 0:12:31 lr: 0.000002 grad: 0.1913 (0.2031) loss: 0.7872 (0.7910) time: 0.1530 data: 0.0674 max mem: 9377 +Train: [92] [1200/6250] eta: 0:12:14 lr: 0.000002 grad: 0.1849 (0.2022) loss: 0.7896 (0.7908) time: 0.1322 data: 0.0413 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:00 lr: 0.000002 grad: 0.1883 (0.2012) loss: 0.7873 (0.7905) time: 0.1711 data: 0.0869 max mem: 9377 +Train: [92] [1400/6250] eta: 0:11:42 lr: 0.000002 grad: 0.1846 (0.2005) loss: 0.7848 (0.7902) time: 0.1485 data: 0.0575 max mem: 9377 +Train: [92] [1500/6250] eta: 0:11:27 lr: 0.000002 grad: 0.1916 (0.2001) loss: 0.7823 (0.7900) time: 0.1401 data: 0.0559 max mem: 9377 +Train: [92] [1600/6250] eta: 0:11:12 lr: 0.000002 grad: 0.1881 (0.1998) loss: 0.7806 (0.7897) time: 0.1547 data: 0.0769 max mem: 9377 +Train: [92] [1700/6250] eta: 0:10:56 lr: 0.000002 grad: 0.1827 (0.1991) loss: 0.7911 (0.7894) time: 0.1456 data: 0.0614 max mem: 9377 +Train: [92] [1800/6250] eta: 0:10:40 lr: 0.000002 grad: 0.1952 (0.1986) loss: 0.7790 (0.7890) time: 0.1360 data: 0.0551 max mem: 9377 +Train: [92] [1900/6250] eta: 0:10:25 lr: 0.000002 grad: 0.1942 (0.1980) loss: 0.7850 (0.7888) time: 0.1418 data: 0.0533 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:09 lr: 0.000002 grad: 0.1827 (0.1974) loss: 0.7819 (0.7887) time: 0.1431 data: 0.0600 max mem: 9377 +Train: [92] [2100/6250] eta: 0:09:54 lr: 0.000002 grad: 0.1853 (0.1970) loss: 0.7887 (0.7885) time: 0.1529 data: 0.0709 max mem: 9377 +Train: [92] [2200/6250] eta: 0:09:38 lr: 0.000002 grad: 0.1941 (0.1969) loss: 0.7773 (0.7881) time: 0.1367 data: 0.0498 max mem: 9377 +Train: [92] [2300/6250] eta: 0:09:23 lr: 0.000002 grad: 0.1865 (0.1968) loss: 0.7867 (0.7879) time: 0.1259 data: 0.0410 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:08 lr: 0.000002 grad: 0.1912 (0.1965) loss: 0.7858 (0.7877) time: 0.1404 data: 0.0550 max mem: 9377 +Train: [92] [2500/6250] eta: 0:08:52 lr: 0.000002 grad: 0.1810 (0.1965) loss: 0.7802 (0.7876) time: 0.1381 data: 0.0566 max mem: 9377 +Train: [92] [2600/6250] eta: 0:08:37 lr: 0.000002 grad: 0.1912 (0.1964) loss: 0.7794 (0.7873) time: 0.1345 data: 0.0504 max mem: 9377 +Train: [92] [2700/6250] eta: 0:08:23 lr: 0.000002 grad: 0.1816 (0.1964) loss: 0.7777 (0.7872) time: 0.1505 data: 0.0689 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:09 lr: 0.000002 grad: 0.1955 (0.1966) loss: 0.7795 (0.7869) time: 0.1397 data: 0.0581 max mem: 9377 +Train: [92] [2900/6250] eta: 0:07:54 lr: 0.000002 grad: 0.1987 (0.1965) loss: 0.7783 (0.7866) time: 0.1458 data: 0.0685 max mem: 9377 +Train: [92] [3000/6250] eta: 0:07:40 lr: 0.000002 grad: 0.1863 (0.1965) loss: 0.7856 (0.7864) time: 0.1390 data: 0.0576 max mem: 9377 +Train: [92] [3100/6250] eta: 0:07:25 lr: 0.000002 grad: 0.1842 (0.1963) loss: 0.7794 (0.7862) time: 0.1364 data: 0.0515 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:11 lr: 0.000002 grad: 0.1902 (0.1964) loss: 0.7755 (0.7860) time: 0.1325 data: 0.0495 max mem: 9377 +Train: [92] [3300/6250] eta: 0:06:56 lr: 0.000002 grad: 0.1798 (0.1962) loss: 0.7899 (0.7859) time: 0.1511 data: 0.0705 max mem: 9377 +Train: [92] [3400/6250] eta: 0:06:42 lr: 0.000002 grad: 0.1992 (0.1961) loss: 0.7790 (0.7858) time: 0.1464 data: 0.0739 max mem: 9377 +Train: [92] [3500/6250] eta: 0:06:27 lr: 0.000002 grad: 0.1972 (0.1961) loss: 0.7788 (0.7857) time: 0.1295 data: 0.0384 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:13 lr: 0.000002 grad: 0.1973 (0.1961) loss: 0.7735 (0.7856) time: 0.1433 data: 0.0609 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:00 lr: 0.000002 grad: 0.1870 (0.1960) loss: 0.7867 (0.7856) time: 0.1659 data: 0.0850 max mem: 9377 +Train: [92] [3800/6250] eta: 0:05:46 lr: 0.000002 grad: 0.1807 (0.1959) loss: 0.7872 (0.7856) time: 0.1505 data: 0.0655 max mem: 9377 +Train: [92] [3900/6250] eta: 0:05:32 lr: 0.000002 grad: 0.1863 (0.1958) loss: 0.7824 (0.7856) time: 0.1534 data: 0.0681 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:19 lr: 0.000002 grad: 0.1963 (0.1957) loss: 0.7711 (0.7855) time: 0.1904 data: 0.1155 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:05 lr: 0.000002 grad: 0.1828 (0.1957) loss: 0.7831 (0.7854) time: 0.1443 data: 0.0628 max mem: 9377 +Train: [92] [4200/6250] eta: 0:04:51 lr: 0.000002 grad: 0.1887 (0.1956) loss: 0.7871 (0.7854) time: 0.1409 data: 0.0488 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:38 lr: 0.000002 grad: 0.1785 (0.1955) loss: 0.7796 (0.7854) time: 0.1308 data: 0.0422 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:24 lr: 0.000002 grad: 0.1837 (0.1953) loss: 0.7798 (0.7854) time: 0.1424 data: 0.0637 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:09 lr: 0.000002 grad: 0.1859 (0.1951) loss: 0.7842 (0.7854) time: 0.1214 data: 0.0448 max mem: 9377 +Train: [92] [4600/6250] eta: 0:03:55 lr: 0.000002 grad: 0.1707 (0.1949) loss: 0.7919 (0.7855) time: 0.1380 data: 0.0523 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:40 lr: 0.000002 grad: 0.1850 (0.1947) loss: 0.7816 (0.7856) time: 0.1307 data: 0.0387 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:26 lr: 0.000002 grad: 0.1852 (0.1946) loss: 0.7883 (0.7856) time: 0.1376 data: 0.0559 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1770 (0.1945) loss: 0.7938 (0.7857) time: 0.1281 data: 0.0418 max mem: 9377 +Train: [92] [5000/6250] eta: 0:02:57 lr: 0.000002 grad: 0.1818 (0.1943) loss: 0.7987 (0.7858) time: 0.1277 data: 0.0417 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:42 lr: 0.000002 grad: 0.1673 (0.1940) loss: 0.7993 (0.7859) time: 0.1171 data: 0.0237 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:28 lr: 0.000002 grad: 0.1846 (0.1938) loss: 0.7919 (0.7860) time: 0.1464 data: 0.0600 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:14 lr: 0.000002 grad: 0.1807 (0.1937) loss: 0.7846 (0.7861) time: 0.1483 data: 0.0665 max mem: 9377 +Train: [92] [5400/6250] eta: 0:01:59 lr: 0.000002 grad: 0.1940 (0.1936) loss: 0.7845 (0.7861) time: 0.1053 data: 0.0127 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:45 lr: 0.000002 grad: 0.1773 (0.1934) loss: 0.7939 (0.7861) time: 0.1323 data: 0.0481 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:31 lr: 0.000002 grad: 0.1778 (0.1934) loss: 0.7863 (0.7862) time: 0.1513 data: 0.0652 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:17 lr: 0.000002 grad: 0.1794 (0.1934) loss: 0.7897 (0.7861) time: 0.1379 data: 0.0579 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:03 lr: 0.000002 grad: 0.1966 (0.1933) loss: 0.7920 (0.7862) time: 0.1297 data: 0.0383 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:49 lr: 0.000002 grad: 0.1824 (0.1933) loss: 0.7849 (0.7862) time: 0.1299 data: 0.0497 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.1840 (0.1932) loss: 0.7902 (0.7863) time: 0.1240 data: 0.0395 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.1976 (0.1932) loss: 0.7854 (0.7863) time: 0.1374 data: 0.0563 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1853 (0.1931) loss: 0.7866 (0.7863) time: 0.1623 data: 0.0820 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1838 (0.1931) loss: 0.7899 (0.7863) time: 0.1273 data: 0.0401 max mem: 9377 +Train: [92] Total time: 0:14:44 (0.1416 s / it) +Averaged stats: lr: 0.000002 grad: 0.1838 (0.1931) loss: 0.7899 (0.7863) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:05:08 loss: 0.8394 (0.8394) time: 4.9722 data: 4.9418 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8417 (0.8417) time: 0.1503 data: 0.1250 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-train-subset): loss: 0.8417 (0.8417) +Eval (hcp-val): [92] [ 0/62] eta: 0:05:17 loss: 0.8364 (0.8364) time: 5.1193 data: 5.0879 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8391 (0.8401) time: 0.1174 data: 0.0924 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2258 s / it) +Averaged stats (hcp-val): loss: 0.8391 (0.8401) +Eval (nsd-val): [92] [ 0/62] eta: 0:05:09 loss: 0.8234 (0.8234) time: 4.9897 data: 4.9581 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8333 (0.8351) time: 0.1128 data: 0.0881 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (nsd-val): loss: 0.8333 (0.8351) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 8:16:42 lr: 0.000002 grad: 0.2266 (0.2266) loss: 0.8265 (0.8265) time: 4.7685 data: 4.4366 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:23:37 lr: 0.000002 grad: 0.2047 (0.2191) loss: 0.8021 (0.8083) time: 0.1772 data: 0.0789 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:20:19 lr: 0.000002 grad: 0.1808 (0.2101) loss: 0.8032 (0.8003) time: 0.1658 data: 0.0619 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:19:10 lr: 0.000002 grad: 0.1767 (0.2043) loss: 0.8048 (0.7978) time: 0.1762 data: 0.0698 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:18:22 lr: 0.000002 grad: 0.1762 (0.2006) loss: 0.7968 (0.7975) time: 0.2066 data: 0.1096 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:17:26 lr: 0.000002 grad: 0.1851 (0.1976) loss: 0.7932 (0.7970) time: 0.1451 data: 0.0531 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:16:40 lr: 0.000002 grad: 0.1871 (0.1960) loss: 0.7929 (0.7963) time: 0.1567 data: 0.0560 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:16:02 lr: 0.000002 grad: 0.1798 (0.1943) loss: 0.7915 (0.7957) time: 0.1646 data: 0.0645 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:15:31 lr: 0.000002 grad: 0.1873 (0.1937) loss: 0.7850 (0.7948) time: 0.1847 data: 0.0993 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:14:59 lr: 0.000002 grad: 0.1919 (0.1934) loss: 0.7894 (0.7943) time: 0.1407 data: 0.0517 max mem: 9377 +Train: [93] [1000/6250] eta: 0:14:41 lr: 0.000002 grad: 0.1785 (0.1930) loss: 0.7855 (0.7935) time: 0.1603 data: 0.0761 max mem: 9377 +Train: [93] [1100/6250] eta: 0:14:21 lr: 0.000002 grad: 0.1821 (0.1923) loss: 0.7854 (0.7927) time: 0.1614 data: 0.0766 max mem: 9377 +Train: [93] [1200/6250] eta: 0:14:01 lr: 0.000002 grad: 0.1786 (0.1920) loss: 0.7903 (0.7922) time: 0.1534 data: 0.0637 max mem: 9377 +Train: [93] [1300/6250] eta: 0:13:37 lr: 0.000002 grad: 0.1821 (0.1916) loss: 0.7901 (0.7916) time: 0.1294 data: 0.0437 max mem: 9377 +Train: [93] [1400/6250] eta: 0:13:18 lr: 0.000002 grad: 0.1848 (0.1912) loss: 0.7787 (0.7913) time: 0.1755 data: 0.0927 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:56 lr: 0.000002 grad: 0.1715 (0.1907) loss: 0.7876 (0.7910) time: 0.1530 data: 0.0671 max mem: 9377 +Train: [93] [1600/6250] eta: 0:12:33 lr: 0.000002 grad: 0.1768 (0.1902) loss: 0.7815 (0.7908) time: 0.1386 data: 0.0588 max mem: 9377 +Train: [93] [1700/6250] eta: 0:12:12 lr: 0.000002 grad: 0.1815 (0.1899) loss: 0.7821 (0.7906) time: 0.1262 data: 0.0450 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:53 lr: 0.000002 grad: 0.1722 (0.1897) loss: 0.7933 (0.7904) time: 0.1414 data: 0.0596 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:31 lr: 0.000002 grad: 0.1928 (0.1896) loss: 0.7866 (0.7901) time: 0.1231 data: 0.0351 max mem: 9377 +Train: [93] [2000/6250] eta: 0:11:11 lr: 0.000002 grad: 0.1796 (0.1894) loss: 0.7820 (0.7901) time: 0.1301 data: 0.0379 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:52 lr: 0.000002 grad: 0.1843 (0.1894) loss: 0.7824 (0.7900) time: 0.1389 data: 0.0553 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:33 lr: 0.000002 grad: 0.1886 (0.1894) loss: 0.7824 (0.7899) time: 0.1522 data: 0.0645 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:16 lr: 0.000001 grad: 0.1774 (0.1891) loss: 0.7839 (0.7899) time: 0.1561 data: 0.0744 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:57 lr: 0.000001 grad: 0.1748 (0.1888) loss: 0.7922 (0.7898) time: 0.1283 data: 0.0465 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:40 lr: 0.000001 grad: 0.1908 (0.1887) loss: 0.7828 (0.7897) time: 0.1448 data: 0.0622 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:23 lr: 0.000001 grad: 0.1823 (0.1888) loss: 0.7824 (0.7895) time: 0.1470 data: 0.0677 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:06 lr: 0.000001 grad: 0.1751 (0.1886) loss: 0.7900 (0.7893) time: 0.1384 data: 0.0583 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:49 lr: 0.000001 grad: 0.1853 (0.1885) loss: 0.7851 (0.7892) time: 0.1294 data: 0.0432 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:32 lr: 0.000001 grad: 0.1779 (0.1885) loss: 0.7919 (0.7891) time: 0.1463 data: 0.0675 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:15 lr: 0.000001 grad: 0.1851 (0.1885) loss: 0.7812 (0.7889) time: 0.1352 data: 0.0491 max mem: 9377 +Train: [93] [3100/6250] eta: 0:07:59 lr: 0.000001 grad: 0.1877 (0.1886) loss: 0.7794 (0.7887) time: 0.1317 data: 0.0480 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:42 lr: 0.000001 grad: 0.1917 (0.1887) loss: 0.7786 (0.7885) time: 0.1379 data: 0.0613 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:25 lr: 0.000001 grad: 0.1903 (0.1889) loss: 0.7859 (0.7883) time: 0.1343 data: 0.0526 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:09 lr: 0.000001 grad: 0.1994 (0.1893) loss: 0.7754 (0.7880) time: 0.1433 data: 0.0611 max mem: 9377 +Train: [93] [3500/6250] eta: 0:06:54 lr: 0.000001 grad: 0.1940 (0.1896) loss: 0.7839 (0.7876) time: 0.1628 data: 0.0830 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:37 lr: 0.000001 grad: 0.1897 (0.1899) loss: 0.7685 (0.7873) time: 0.1592 data: 0.0798 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:21 lr: 0.000001 grad: 0.1881 (0.1900) loss: 0.7857 (0.7871) time: 0.1461 data: 0.0623 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:06 lr: 0.000001 grad: 0.1930 (0.1901) loss: 0.7833 (0.7870) time: 0.1435 data: 0.0583 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:50 lr: 0.000001 grad: 0.1851 (0.1902) loss: 0.7863 (0.7868) time: 0.1267 data: 0.0454 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:35 lr: 0.000001 grad: 0.1839 (0.1903) loss: 0.7805 (0.7867) time: 0.1597 data: 0.0730 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:20 lr: 0.000001 grad: 0.1860 (0.1903) loss: 0.7860 (0.7866) time: 0.1506 data: 0.0665 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:06 lr: 0.000001 grad: 0.1905 (0.1902) loss: 0.7852 (0.7866) time: 0.1555 data: 0.0671 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:51 lr: 0.000001 grad: 0.1795 (0.1903) loss: 0.7771 (0.7865) time: 0.1794 data: 0.1002 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:36 lr: 0.000001 grad: 0.1918 (0.1903) loss: 0.7845 (0.7864) time: 0.1569 data: 0.0679 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:21 lr: 0.000001 grad: 0.1709 (0.1903) loss: 0.7929 (0.7863) time: 0.1567 data: 0.0727 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:06 lr: 0.000001 grad: 0.1871 (0.1903) loss: 0.7875 (0.7862) time: 0.1121 data: 0.0170 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:52 lr: 0.000001 grad: 0.1968 (0.1903) loss: 0.7743 (0.7861) time: 0.1556 data: 0.0648 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:37 lr: 0.000001 grad: 0.1786 (0.1903) loss: 0.7813 (0.7860) time: 0.1369 data: 0.0518 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:21 lr: 0.000001 grad: 0.1879 (0.1902) loss: 0.7882 (0.7860) time: 0.1297 data: 0.0491 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:06 lr: 0.000001 grad: 0.1753 (0.1902) loss: 0.7917 (0.7860) time: 0.1636 data: 0.0777 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:51 lr: 0.000001 grad: 0.1768 (0.1901) loss: 0.7885 (0.7861) time: 0.1426 data: 0.0588 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:37 lr: 0.000001 grad: 0.1776 (0.1899) loss: 0.7881 (0.7862) time: 0.1586 data: 0.0819 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:22 lr: 0.000001 grad: 0.1803 (0.1898) loss: 0.7838 (0.7862) time: 0.1545 data: 0.0744 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:07 lr: 0.000001 grad: 0.1851 (0.1897) loss: 0.7948 (0.7863) time: 0.1733 data: 0.0983 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:52 lr: 0.000001 grad: 0.1845 (0.1897) loss: 0.7908 (0.7864) time: 0.1540 data: 0.0716 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:37 lr: 0.000001 grad: 0.1848 (0.1897) loss: 0.7907 (0.7864) time: 0.1449 data: 0.0634 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:22 lr: 0.000001 grad: 0.1858 (0.1897) loss: 0.7937 (0.7865) time: 0.1245 data: 0.0428 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.1738 (0.1896) loss: 0.7952 (0.7866) time: 0.1412 data: 0.0563 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1810 (0.1896) loss: 0.7920 (0.7867) time: 0.1248 data: 0.0341 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1860 (0.1895) loss: 0.7919 (0.7868) time: 0.1419 data: 0.0583 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1811 (0.1895) loss: 0.7933 (0.7869) time: 0.1416 data: 0.0561 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1854 (0.1895) loss: 0.7853 (0.7869) time: 0.1331 data: 0.0549 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1839 (0.1895) loss: 0.7883 (0.7869) time: 0.1223 data: 0.0385 max mem: 9377 +Train: [93] Total time: 0:15:35 (0.1497 s / it) +Averaged stats: lr: 0.000001 grad: 0.1839 (0.1895) loss: 0.7883 (0.7869) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:05:35 loss: 0.8350 (0.8350) time: 5.4166 data: 5.3859 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8413 (0.8417) time: 0.1310 data: 0.1059 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-train-subset): loss: 0.8413 (0.8417) +Eval (hcp-val): [93] [ 0/62] eta: 0:04:45 loss: 0.8352 (0.8352) time: 4.5985 data: 4.5382 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8377 (0.8399) time: 0.1429 data: 0.1178 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (hcp-val): loss: 0.8377 (0.8399) +Eval (nsd-val): [93] [ 0/62] eta: 0:06:28 loss: 0.8184 (0.8184) time: 6.2676 data: 6.2375 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8317 (0.8320) time: 0.1241 data: 0.0992 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (nsd-val): loss: 0.8317 (0.8320) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 10:36:52 lr: 0.000001 grad: 0.1497 (0.1497) loss: 0.8472 (0.8472) time: 6.1140 data: 5.9520 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:20:01 lr: 0.000001 grad: 0.2024 (0.2330) loss: 0.7893 (0.7965) time: 0.1132 data: 0.0170 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:16:51 lr: 0.000001 grad: 0.2096 (0.2254) loss: 0.7838 (0.7924) time: 0.1361 data: 0.0437 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:15:56 lr: 0.000001 grad: 0.1884 (0.2189) loss: 0.7837 (0.7912) time: 0.1395 data: 0.0431 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:15:09 lr: 0.000001 grad: 0.1768 (0.2139) loss: 0.8028 (0.7907) time: 0.1335 data: 0.0404 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:14:33 lr: 0.000001 grad: 0.1786 (0.2095) loss: 0.8011 (0.7911) time: 0.1287 data: 0.0292 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:14:05 lr: 0.000001 grad: 0.1894 (0.2059) loss: 0.7933 (0.7916) time: 0.1445 data: 0.0459 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:13:38 lr: 0.000001 grad: 0.1893 (0.2028) loss: 0.7864 (0.7919) time: 0.1332 data: 0.0388 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:13:14 lr: 0.000001 grad: 0.1832 (0.2008) loss: 0.7878 (0.7917) time: 0.1230 data: 0.0313 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:12:58 lr: 0.000001 grad: 0.1749 (0.1993) loss: 0.8000 (0.7921) time: 0.1559 data: 0.0625 max mem: 9377 +Train: [94] [1000/6250] eta: 0:12:34 lr: 0.000001 grad: 0.1749 (0.1976) loss: 0.7894 (0.7922) time: 0.1295 data: 0.0410 max mem: 9377 +Train: [94] [1100/6250] eta: 0:12:18 lr: 0.000001 grad: 0.1758 (0.1964) loss: 0.7916 (0.7924) time: 0.1487 data: 0.0617 max mem: 9377 +Train: [94] [1200/6250] eta: 0:11:59 lr: 0.000001 grad: 0.1827 (0.1960) loss: 0.8010 (0.7924) time: 0.1339 data: 0.0504 max mem: 9377 +Train: [94] [1300/6250] eta: 0:11:41 lr: 0.000001 grad: 0.1681 (0.1949) loss: 0.7997 (0.7926) time: 0.1213 data: 0.0381 max mem: 9377 +Train: [94] [1400/6250] eta: 0:11:25 lr: 0.000001 grad: 0.1742 (0.1938) loss: 0.7947 (0.7927) time: 0.1287 data: 0.0375 max mem: 9377 +Train: [94] [1500/6250] eta: 0:11:09 lr: 0.000001 grad: 0.1789 (0.1927) loss: 0.7919 (0.7927) time: 0.1110 data: 0.0227 max mem: 9377 +Train: [94] [1600/6250] eta: 0:10:55 lr: 0.000001 grad: 0.1787 (0.1919) loss: 0.7932 (0.7926) time: 0.1451 data: 0.0589 max mem: 9377 +Train: [94] [1700/6250] eta: 0:10:38 lr: 0.000001 grad: 0.1800 (0.1911) loss: 0.7955 (0.7926) time: 0.1332 data: 0.0560 max mem: 9377 +Train: [94] [1800/6250] eta: 0:10:22 lr: 0.000001 grad: 0.1642 (0.1904) loss: 0.7959 (0.7927) time: 0.1207 data: 0.0411 max mem: 9377 +Train: [94] [1900/6250] eta: 0:10:07 lr: 0.000001 grad: 0.1733 (0.1899) loss: 0.7977 (0.7926) time: 0.1403 data: 0.0559 max mem: 9377 +Train: [94] [2000/6250] eta: 0:09:51 lr: 0.000001 grad: 0.1846 (0.1894) loss: 0.7911 (0.7927) time: 0.1379 data: 0.0530 max mem: 9377 +Train: [94] [2100/6250] eta: 0:09:38 lr: 0.000001 grad: 0.1687 (0.1889) loss: 0.7977 (0.7928) time: 0.1666 data: 0.0896 max mem: 9377 +Train: [94] [2200/6250] eta: 0:09:23 lr: 0.000001 grad: 0.1811 (0.1884) loss: 0.7961 (0.7928) time: 0.1644 data: 0.0876 max mem: 9377 +Train: [94] [2300/6250] eta: 0:09:09 lr: 0.000001 grad: 0.1804 (0.1883) loss: 0.7890 (0.7928) time: 0.1488 data: 0.0668 max mem: 9377 +Train: [94] [2400/6250] eta: 0:08:55 lr: 0.000001 grad: 0.1853 (0.1883) loss: 0.7852 (0.7926) time: 0.1330 data: 0.0504 max mem: 9377 +Train: [94] [2500/6250] eta: 0:08:42 lr: 0.000001 grad: 0.1890 (0.1882) loss: 0.7892 (0.7924) time: 0.1772 data: 0.1013 max mem: 9377 +Train: [94] [2600/6250] eta: 0:08:28 lr: 0.000001 grad: 0.1830 (0.1882) loss: 0.7861 (0.7921) time: 0.1393 data: 0.0618 max mem: 9377 +Train: [94] [2700/6250] eta: 0:08:13 lr: 0.000001 grad: 0.1730 (0.1880) loss: 0.7948 (0.7920) time: 0.1336 data: 0.0543 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:00 lr: 0.000001 grad: 0.1839 (0.1880) loss: 0.7856 (0.7918) time: 0.1265 data: 0.0431 max mem: 9377 +Train: [94] [2900/6250] eta: 0:07:46 lr: 0.000001 grad: 0.1827 (0.1878) loss: 0.7826 (0.7917) time: 0.1385 data: 0.0542 max mem: 9377 +Train: [94] [3000/6250] eta: 0:07:32 lr: 0.000001 grad: 0.1848 (0.1878) loss: 0.7847 (0.7914) time: 0.1525 data: 0.0745 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:17 lr: 0.000001 grad: 0.1874 (0.1878) loss: 0.7805 (0.7912) time: 0.1353 data: 0.0555 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:04 lr: 0.000001 grad: 0.1862 (0.1879) loss: 0.7855 (0.7909) time: 0.1374 data: 0.0568 max mem: 9377 +Train: [94] [3300/6250] eta: 0:06:51 lr: 0.000001 grad: 0.1797 (0.1880) loss: 0.7877 (0.7906) time: 0.1474 data: 0.0598 max mem: 9377 +Train: [94] [3400/6250] eta: 0:06:37 lr: 0.000001 grad: 0.1766 (0.1879) loss: 0.7776 (0.7904) time: 0.1337 data: 0.0501 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:23 lr: 0.000001 grad: 0.1833 (0.1878) loss: 0.7888 (0.7903) time: 0.1261 data: 0.0416 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:09 lr: 0.000001 grad: 0.1879 (0.1879) loss: 0.7840 (0.7902) time: 0.1327 data: 0.0488 max mem: 9377 +Train: [94] [3700/6250] eta: 0:05:55 lr: 0.000001 grad: 0.1812 (0.1878) loss: 0.7826 (0.7900) time: 0.1363 data: 0.0531 max mem: 9377 +Train: [94] [3800/6250] eta: 0:05:41 lr: 0.000001 grad: 0.1821 (0.1878) loss: 0.7867 (0.7899) time: 0.1024 data: 0.0171 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:28 lr: 0.000001 grad: 0.1780 (0.1879) loss: 0.7916 (0.7897) time: 0.1562 data: 0.0679 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:14 lr: 0.000001 grad: 0.1884 (0.1881) loss: 0.7854 (0.7895) time: 0.1467 data: 0.0655 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:01 lr: 0.000001 grad: 0.1754 (0.1882) loss: 0.7879 (0.7893) time: 0.1686 data: 0.0870 max mem: 9377 +Train: [94] [4200/6250] eta: 0:04:48 lr: 0.000001 grad: 0.1874 (0.1883) loss: 0.7819 (0.7891) time: 0.1654 data: 0.0704 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:34 lr: 0.000001 grad: 0.1910 (0.1884) loss: 0.7829 (0.7890) time: 0.1415 data: 0.0549 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:20 lr: 0.000001 grad: 0.1825 (0.1884) loss: 0.7876 (0.7889) time: 0.1204 data: 0.0427 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:06 lr: 0.000001 grad: 0.1847 (0.1884) loss: 0.7864 (0.7888) time: 0.1553 data: 0.0703 max mem: 9377 +Train: [94] [4600/6250] eta: 0:03:52 lr: 0.000001 grad: 0.1909 (0.1885) loss: 0.7817 (0.7887) time: 0.1392 data: 0.0594 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:38 lr: 0.000001 grad: 0.1862 (0.1885) loss: 0.7802 (0.7886) time: 0.1441 data: 0.0542 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:23 lr: 0.000001 grad: 0.1999 (0.1887) loss: 0.7791 (0.7885) time: 0.1326 data: 0.0470 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:09 lr: 0.000001 grad: 0.1890 (0.1887) loss: 0.7784 (0.7884) time: 0.1394 data: 0.0562 max mem: 9377 +Train: [94] [5000/6250] eta: 0:02:55 lr: 0.000001 grad: 0.1780 (0.1887) loss: 0.7855 (0.7883) time: 0.1173 data: 0.0321 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:41 lr: 0.000001 grad: 0.1861 (0.1887) loss: 0.7822 (0.7882) time: 0.1293 data: 0.0308 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:26 lr: 0.000001 grad: 0.1870 (0.1886) loss: 0.7891 (0.7882) time: 0.1124 data: 0.0224 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:12 lr: 0.000001 grad: 0.1778 (0.1885) loss: 0.7855 (0.7881) time: 0.1436 data: 0.0556 max mem: 9377 +Train: [94] [5400/6250] eta: 0:01:58 lr: 0.000001 grad: 0.1738 (0.1885) loss: 0.7909 (0.7882) time: 0.1319 data: 0.0366 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:44 lr: 0.000001 grad: 0.1847 (0.1884) loss: 0.7919 (0.7881) time: 0.1263 data: 0.0329 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:30 lr: 0.000001 grad: 0.1841 (0.1885) loss: 0.7862 (0.7881) time: 0.1282 data: 0.0473 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:16 lr: 0.000001 grad: 0.1968 (0.1885) loss: 0.7835 (0.7880) time: 0.1169 data: 0.0303 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:02 lr: 0.000001 grad: 0.1815 (0.1885) loss: 0.7904 (0.7881) time: 0.1262 data: 0.0385 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:48 lr: 0.000001 grad: 0.1663 (0.1884) loss: 0.7966 (0.7881) time: 0.1405 data: 0.0563 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:34 lr: 0.000001 grad: 0.1960 (0.1884) loss: 0.7777 (0.7881) time: 0.1410 data: 0.0591 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:20 lr: 0.000001 grad: 0.1868 (0.1884) loss: 0.7836 (0.7881) time: 0.1269 data: 0.0457 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:06 lr: 0.000001 grad: 0.1952 (0.1885) loss: 0.7784 (0.7881) time: 0.1386 data: 0.0580 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1814 (0.1885) loss: 0.7876 (0.7880) time: 0.1352 data: 0.0528 max mem: 9377 +Train: [94] Total time: 0:14:33 (0.1397 s / it) +Averaged stats: lr: 0.000001 grad: 0.1814 (0.1885) loss: 0.7876 (0.7880) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:04:00 loss: 0.8403 (0.8403) time: 3.8764 data: 3.8084 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8385 (0.8413) time: 0.1187 data: 0.0942 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (hcp-train-subset): loss: 0.8385 (0.8413) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [94] [ 0/62] eta: 0:06:00 loss: 0.8396 (0.8396) time: 5.8191 data: 5.7887 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8384 (0.8399) time: 0.1190 data: 0.0905 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2141 s / it) +Averaged stats (hcp-val): loss: 0.8384 (0.8399) +Making plots (hcp-val): example=51 +Eval (nsd-val): [94] [ 0/62] eta: 0:06:01 loss: 0.8175 (0.8175) time: 5.8273 data: 5.7965 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8279 (0.8309) time: 0.1369 data: 0.1118 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (nsd-val): loss: 0.8279 (0.8309) +Making plots (nsd-val): example=41 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 10:55:30 lr: 0.000001 grad: 0.1454 (0.1454) loss: 0.8332 (0.8332) time: 6.2929 data: 6.1364 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:21:02 lr: 0.000001 grad: 0.2115 (0.2478) loss: 0.7781 (0.7861) time: 0.1438 data: 0.0389 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:17:45 lr: 0.000001 grad: 0.2030 (0.2377) loss: 0.7780 (0.7783) time: 0.1376 data: 0.0476 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:16:13 lr: 0.000001 grad: 0.2237 (0.2331) loss: 0.7654 (0.7775) time: 0.1605 data: 0.0722 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:15:33 lr: 0.000001 grad: 0.2082 (0.2277) loss: 0.7681 (0.7775) time: 0.1463 data: 0.0485 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:07 lr: 0.000001 grad: 0.1923 (0.2214) loss: 0.7903 (0.7789) time: 0.1625 data: 0.0743 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:14:25 lr: 0.000001 grad: 0.1769 (0.2155) loss: 0.7920 (0.7808) time: 0.1281 data: 0.0419 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:13:56 lr: 0.000001 grad: 0.1854 (0.2119) loss: 0.7869 (0.7823) time: 0.1371 data: 0.0426 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:13:26 lr: 0.000001 grad: 0.1882 (0.2095) loss: 0.7915 (0.7834) time: 0.1406 data: 0.0623 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:13:02 lr: 0.000001 grad: 0.1855 (0.2071) loss: 0.7920 (0.7842) time: 0.1363 data: 0.0415 max mem: 9377 +Train: [95] [1000/6250] eta: 0:12:40 lr: 0.000001 grad: 0.1768 (0.2051) loss: 0.7986 (0.7848) time: 0.1362 data: 0.0524 max mem: 9377 +Train: [95] [1100/6250] eta: 0:12:34 lr: 0.000001 grad: 0.1743 (0.2031) loss: 0.7933 (0.7854) time: 0.1390 data: 0.0489 max mem: 9377 +Train: [95] [1200/6250] eta: 0:12:23 lr: 0.000001 grad: 0.1861 (0.2018) loss: 0.7869 (0.7858) time: 0.1597 data: 0.0776 max mem: 9377 +Train: [95] [1300/6250] eta: 0:12:10 lr: 0.000001 grad: 0.1855 (0.2006) loss: 0.7896 (0.7863) time: 0.1274 data: 0.0444 max mem: 9377 +Train: [95] [1400/6250] eta: 0:11:47 lr: 0.000001 grad: 0.1812 (0.1999) loss: 0.7904 (0.7865) time: 0.1220 data: 0.0328 max mem: 9377 +Train: [95] [1500/6250] eta: 0:11:31 lr: 0.000001 grad: 0.1782 (0.1991) loss: 0.7875 (0.7867) time: 0.1463 data: 0.0539 max mem: 9377 +Train: [95] [1600/6250] eta: 0:11:12 lr: 0.000001 grad: 0.1779 (0.1982) loss: 0.7934 (0.7870) time: 0.1194 data: 0.0367 max mem: 9377 +Train: [95] [1700/6250] eta: 0:10:56 lr: 0.000001 grad: 0.1862 (0.1975) loss: 0.7917 (0.7873) time: 0.1343 data: 0.0564 max mem: 9377 +Train: [95] [1800/6250] eta: 0:10:41 lr: 0.000001 grad: 0.1890 (0.1967) loss: 0.7929 (0.7875) time: 0.1475 data: 0.0669 max mem: 9377 +Train: [95] [1900/6250] eta: 0:10:26 lr: 0.000001 grad: 0.1806 (0.1961) loss: 0.7890 (0.7877) time: 0.1488 data: 0.0650 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:10 lr: 0.000001 grad: 0.1798 (0.1955) loss: 0.7871 (0.7878) time: 0.1345 data: 0.0476 max mem: 9377 +Train: [95] [2100/6250] eta: 0:09:55 lr: 0.000001 grad: 0.1866 (0.1952) loss: 0.7886 (0.7877) time: 0.1209 data: 0.0378 max mem: 9377 +Train: [95] [2200/6250] eta: 0:09:41 lr: 0.000001 grad: 0.1933 (0.1949) loss: 0.7764 (0.7877) time: 0.1398 data: 0.0585 max mem: 9377 +Train: [95] [2300/6250] eta: 0:09:25 lr: 0.000001 grad: 0.1818 (0.1947) loss: 0.7955 (0.7876) time: 0.1267 data: 0.0405 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:10 lr: 0.000001 grad: 0.1929 (0.1949) loss: 0.7854 (0.7875) time: 0.1556 data: 0.0790 max mem: 9377 +Train: [95] [2500/6250] eta: 0:08:55 lr: 0.000001 grad: 0.1914 (0.1947) loss: 0.7852 (0.7875) time: 0.1396 data: 0.0601 max mem: 9377 +Train: [95] [2600/6250] eta: 0:08:40 lr: 0.000001 grad: 0.1860 (0.1947) loss: 0.7812 (0.7873) time: 0.1444 data: 0.0592 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:25 lr: 0.000001 grad: 0.1884 (0.1947) loss: 0.7836 (0.7873) time: 0.1579 data: 0.0745 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:09 lr: 0.000001 grad: 0.1857 (0.1945) loss: 0.7916 (0.7873) time: 0.1367 data: 0.0585 max mem: 9377 +Train: [95] [2900/6250] eta: 0:07:53 lr: 0.000001 grad: 0.1822 (0.1943) loss: 0.7856 (0.7873) time: 0.1272 data: 0.0405 max mem: 9377 +Train: [95] [3000/6250] eta: 0:07:39 lr: 0.000001 grad: 0.1934 (0.1942) loss: 0.7829 (0.7874) time: 0.1327 data: 0.0509 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:25 lr: 0.000001 grad: 0.1739 (0.1941) loss: 0.7908 (0.7873) time: 0.1297 data: 0.0466 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:11 lr: 0.000001 grad: 0.2063 (0.1942) loss: 0.7834 (0.7873) time: 0.1475 data: 0.0667 max mem: 9377 +Train: [95] [3300/6250] eta: 0:06:57 lr: 0.000001 grad: 0.1888 (0.1942) loss: 0.7842 (0.7872) time: 0.1395 data: 0.0598 max mem: 9377 +Train: [95] [3400/6250] eta: 0:06:42 lr: 0.000001 grad: 0.1816 (0.1943) loss: 0.7813 (0.7870) time: 0.1407 data: 0.0558 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:28 lr: 0.000001 grad: 0.1796 (0.1942) loss: 0.7856 (0.7868) time: 0.1219 data: 0.0355 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:14 lr: 0.000001 grad: 0.1933 (0.1942) loss: 0.7707 (0.7866) time: 0.1615 data: 0.0827 max mem: 9377 +Train: [95] [3700/6250] eta: 0:05:59 lr: 0.000001 grad: 0.1890 (0.1943) loss: 0.7725 (0.7864) time: 0.1391 data: 0.0564 max mem: 9377 +Train: [95] [3800/6250] eta: 0:05:45 lr: 0.000001 grad: 0.1921 (0.1942) loss: 0.7844 (0.7863) time: 0.1636 data: 0.0819 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:31 lr: 0.000001 grad: 0.1847 (0.1942) loss: 0.7851 (0.7862) time: 0.1538 data: 0.0735 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:18 lr: 0.000001 grad: 0.1838 (0.1941) loss: 0.7775 (0.7861) time: 0.1577 data: 0.0716 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:05 lr: 0.000001 grad: 0.1849 (0.1940) loss: 0.7782 (0.7860) time: 0.1743 data: 0.0850 max mem: 9377 +Train: [95] [4200/6250] eta: 0:04:52 lr: 0.000001 grad: 0.1800 (0.1940) loss: 0.7800 (0.7859) time: 0.1870 data: 0.0938 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:38 lr: 0.000001 grad: 0.1950 (0.1941) loss: 0.7819 (0.7857) time: 0.1638 data: 0.0878 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:25 lr: 0.000001 grad: 0.1920 (0.1942) loss: 0.7764 (0.7855) time: 0.1448 data: 0.0588 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:11 lr: 0.000001 grad: 0.1828 (0.1942) loss: 0.7776 (0.7854) time: 0.1570 data: 0.0727 max mem: 9377 +Train: [95] [4600/6250] eta: 0:03:57 lr: 0.000001 grad: 0.1864 (0.1942) loss: 0.7842 (0.7852) time: 0.1534 data: 0.0681 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:43 lr: 0.000001 grad: 0.1842 (0.1942) loss: 0.7807 (0.7850) time: 0.1241 data: 0.0402 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:29 lr: 0.000001 grad: 0.1840 (0.1941) loss: 0.7816 (0.7849) time: 0.1318 data: 0.0424 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:14 lr: 0.000001 grad: 0.1922 (0.1941) loss: 0.7831 (0.7848) time: 0.1235 data: 0.0403 max mem: 9377 +Train: [95] [5000/6250] eta: 0:02:59 lr: 0.000001 grad: 0.1884 (0.1941) loss: 0.7811 (0.7846) time: 0.1290 data: 0.0497 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:45 lr: 0.000001 grad: 0.1903 (0.1939) loss: 0.7808 (0.7845) time: 0.1602 data: 0.0804 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:31 lr: 0.000001 grad: 0.1711 (0.1938) loss: 0.7844 (0.7845) time: 0.1509 data: 0.0710 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:17 lr: 0.000001 grad: 0.1832 (0.1936) loss: 0.7847 (0.7845) time: 0.1628 data: 0.0797 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:02 lr: 0.000001 grad: 0.1812 (0.1934) loss: 0.7895 (0.7846) time: 0.1539 data: 0.0767 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:48 lr: 0.000001 grad: 0.1730 (0.1933) loss: 0.7892 (0.7846) time: 0.1829 data: 0.1059 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:34 lr: 0.000001 grad: 0.1828 (0.1932) loss: 0.7851 (0.7846) time: 0.1758 data: 0.0955 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:19 lr: 0.000001 grad: 0.1865 (0.1931) loss: 0.7869 (0.7846) time: 0.1517 data: 0.0684 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:05 lr: 0.000001 grad: 0.1870 (0.1930) loss: 0.7790 (0.7846) time: 0.1456 data: 0.0609 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:50 lr: 0.000001 grad: 0.1838 (0.1929) loss: 0.7848 (0.7846) time: 0.1348 data: 0.0531 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.1854 (0.1929) loss: 0.7800 (0.7846) time: 0.1375 data: 0.0575 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.1888 (0.1929) loss: 0.7840 (0.7846) time: 0.1355 data: 0.0554 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1929 (0.1929) loss: 0.7761 (0.7845) time: 0.1297 data: 0.0409 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1804 (0.1929) loss: 0.7865 (0.7845) time: 0.1488 data: 0.0685 max mem: 9377 +Train: [95] Total time: 0:15:09 (0.1455 s / it) +Averaged stats: lr: 0.000001 grad: 0.1804 (0.1929) loss: 0.7865 (0.7845) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:05:04 loss: 0.8384 (0.8384) time: 4.9159 data: 4.8523 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8397 (0.8416) time: 0.1157 data: 0.0911 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (hcp-train-subset): loss: 0.8397 (0.8416) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:57 loss: 0.8404 (0.8404) time: 5.7600 data: 5.7301 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8392 (0.8399) time: 0.1323 data: 0.1076 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:14 (0.2260 s / it) +Averaged stats (hcp-val): loss: 0.8392 (0.8399) +Eval (nsd-val): [95] [ 0/62] eta: 0:04:00 loss: 0.8251 (0.8251) time: 3.8776 data: 3.8159 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8307 (0.8327) time: 0.1239 data: 0.0990 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (nsd-val): loss: 0.8307 (0.8327) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 10:40:05 lr: 0.000001 grad: 0.2575 (0.2575) loss: 0.7968 (0.7968) time: 6.1449 data: 5.9807 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:21:04 lr: 0.000001 grad: 0.2005 (0.2341) loss: 0.7972 (0.7906) time: 0.1422 data: 0.0344 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:17:40 lr: 0.000001 grad: 0.2271 (0.2278) loss: 0.7764 (0.7886) time: 0.1328 data: 0.0344 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:16:49 lr: 0.000001 grad: 0.2121 (0.2243) loss: 0.7832 (0.7857) time: 0.1401 data: 0.0350 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:23 lr: 0.000001 grad: 0.2026 (0.2197) loss: 0.7916 (0.7861) time: 0.1725 data: 0.0799 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:15:46 lr: 0.000001 grad: 0.1825 (0.2161) loss: 0.7875 (0.7858) time: 0.1496 data: 0.0653 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:14:59 lr: 0.000001 grad: 0.1858 (0.2134) loss: 0.7851 (0.7857) time: 0.1313 data: 0.0394 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:31 lr: 0.000001 grad: 0.1958 (0.2121) loss: 0.7765 (0.7846) time: 0.1633 data: 0.0720 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:13:56 lr: 0.000001 grad: 0.2002 (0.2100) loss: 0.7856 (0.7846) time: 0.1304 data: 0.0394 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:13:30 lr: 0.000001 grad: 0.1839 (0.2082) loss: 0.7947 (0.7850) time: 0.1415 data: 0.0556 max mem: 9377 +Train: [96] [1000/6250] eta: 0:13:01 lr: 0.000001 grad: 0.1921 (0.2067) loss: 0.7856 (0.7850) time: 0.1008 data: 0.0123 max mem: 9377 +Train: [96] [1100/6250] eta: 0:12:37 lr: 0.000000 grad: 0.1822 (0.2053) loss: 0.7828 (0.7850) time: 0.1278 data: 0.0412 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:15 lr: 0.000000 grad: 0.1886 (0.2043) loss: 0.7870 (0.7846) time: 0.1242 data: 0.0354 max mem: 9377 +Train: [96] [1300/6250] eta: 0:11:54 lr: 0.000000 grad: 0.1933 (0.2038) loss: 0.7903 (0.7845) time: 0.1202 data: 0.0384 max mem: 9377 +Train: [96] [1400/6250] eta: 0:11:37 lr: 0.000000 grad: 0.1860 (0.2029) loss: 0.7889 (0.7846) time: 0.1424 data: 0.0552 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:17 lr: 0.000000 grad: 0.1861 (0.2019) loss: 0.7859 (0.7846) time: 0.1366 data: 0.0534 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:01 lr: 0.000000 grad: 0.1853 (0.2012) loss: 0.7896 (0.7845) time: 0.1296 data: 0.0427 max mem: 9377 +Train: [96] [1700/6250] eta: 0:10:49 lr: 0.000000 grad: 0.1815 (0.2004) loss: 0.7825 (0.7846) time: 0.1493 data: 0.0769 max mem: 9377 +Train: [96] [1800/6250] eta: 0:10:37 lr: 0.000000 grad: 0.1875 (0.1997) loss: 0.7875 (0.7846) time: 0.1723 data: 0.0920 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:23 lr: 0.000000 grad: 0.1804 (0.1991) loss: 0.7851 (0.7846) time: 0.1374 data: 0.0540 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:07 lr: 0.000000 grad: 0.1934 (0.1988) loss: 0.7866 (0.7845) time: 0.1396 data: 0.0550 max mem: 9377 +Train: [96] [2100/6250] eta: 0:09:53 lr: 0.000000 grad: 0.1910 (0.1986) loss: 0.7774 (0.7844) time: 0.1468 data: 0.0672 max mem: 9377 +Train: [96] [2200/6250] eta: 0:09:38 lr: 0.000000 grad: 0.1840 (0.1983) loss: 0.7898 (0.7845) time: 0.1404 data: 0.0501 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:23 lr: 0.000000 grad: 0.1799 (0.1981) loss: 0.7909 (0.7847) time: 0.1400 data: 0.0554 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:09 lr: 0.000000 grad: 0.1816 (0.1978) loss: 0.7943 (0.7848) time: 0.1395 data: 0.0556 max mem: 9377 +Train: [96] [2500/6250] eta: 0:08:55 lr: 0.000000 grad: 0.1850 (0.1974) loss: 0.7874 (0.7849) time: 0.1568 data: 0.0658 max mem: 9377 +Train: [96] [2600/6250] eta: 0:08:39 lr: 0.000000 grad: 0.1865 (0.1971) loss: 0.7844 (0.7849) time: 0.1360 data: 0.0477 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:25 lr: 0.000000 grad: 0.1850 (0.1967) loss: 0.7824 (0.7850) time: 0.1165 data: 0.0260 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:09 lr: 0.000000 grad: 0.1899 (0.1965) loss: 0.7908 (0.7852) time: 0.1293 data: 0.0454 max mem: 9377 +Train: [96] [2900/6250] eta: 0:07:55 lr: 0.000000 grad: 0.1824 (0.1962) loss: 0.7916 (0.7855) time: 0.1373 data: 0.0544 max mem: 9377 +Train: [96] [3000/6250] eta: 0:07:41 lr: 0.000000 grad: 0.1869 (0.1961) loss: 0.7819 (0.7855) time: 0.1473 data: 0.0642 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:26 lr: 0.000000 grad: 0.1936 (0.1960) loss: 0.7856 (0.7855) time: 0.1378 data: 0.0551 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:12 lr: 0.000000 grad: 0.1884 (0.1959) loss: 0.7811 (0.7855) time: 0.1425 data: 0.0520 max mem: 9377 +Train: [96] [3300/6250] eta: 0:06:57 lr: 0.000000 grad: 0.1842 (0.1957) loss: 0.7869 (0.7856) time: 0.1404 data: 0.0575 max mem: 9377 +Train: [96] [3400/6250] eta: 0:06:43 lr: 0.000000 grad: 0.1944 (0.1958) loss: 0.7872 (0.7856) time: 0.1320 data: 0.0518 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:28 lr: 0.000000 grad: 0.1857 (0.1957) loss: 0.7867 (0.7856) time: 0.1361 data: 0.0569 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:13 lr: 0.000000 grad: 0.1886 (0.1957) loss: 0.7810 (0.7857) time: 0.1402 data: 0.0557 max mem: 9377 +Train: [96] [3700/6250] eta: 0:05:59 lr: 0.000000 grad: 0.1818 (0.1954) loss: 0.7901 (0.7858) time: 0.1517 data: 0.0720 max mem: 9377 +Train: [96] [3800/6250] eta: 0:05:44 lr: 0.000000 grad: 0.1813 (0.1952) loss: 0.7964 (0.7859) time: 0.1254 data: 0.0384 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:30 lr: 0.000000 grad: 0.1850 (0.1950) loss: 0.7962 (0.7860) time: 0.1141 data: 0.0317 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:16 lr: 0.000000 grad: 0.1779 (0.1948) loss: 0.7895 (0.7862) time: 0.1428 data: 0.0613 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:02 lr: 0.000000 grad: 0.1835 (0.1947) loss: 0.7853 (0.7863) time: 0.1241 data: 0.0396 max mem: 9377 +Train: [96] [4200/6250] eta: 0:04:49 lr: 0.000000 grad: 0.1867 (0.1947) loss: 0.7888 (0.7864) time: 0.1497 data: 0.0608 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:35 lr: 0.000000 grad: 0.1853 (0.1946) loss: 0.7897 (0.7865) time: 0.1413 data: 0.0598 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:21 lr: 0.000000 grad: 0.1811 (0.1947) loss: 0.7881 (0.7865) time: 0.1071 data: 0.0139 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:07 lr: 0.000000 grad: 0.1903 (0.1946) loss: 0.7922 (0.7865) time: 0.1068 data: 0.0206 max mem: 9377 +Train: [96] [4600/6250] eta: 0:03:52 lr: 0.000000 grad: 0.1839 (0.1946) loss: 0.7927 (0.7865) time: 0.1320 data: 0.0505 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:38 lr: 0.000000 grad: 0.1875 (0.1946) loss: 0.7964 (0.7866) time: 0.1601 data: 0.0782 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:24 lr: 0.000000 grad: 0.1765 (0.1945) loss: 0.7912 (0.7867) time: 0.1372 data: 0.0510 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:10 lr: 0.000000 grad: 0.1811 (0.1945) loss: 0.7893 (0.7867) time: 0.1296 data: 0.0357 max mem: 9377 +Train: [96] [5000/6250] eta: 0:02:55 lr: 0.000000 grad: 0.1872 (0.1945) loss: 0.7904 (0.7868) time: 0.1281 data: 0.0357 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:41 lr: 0.000000 grad: 0.1907 (0.1944) loss: 0.7858 (0.7868) time: 0.1216 data: 0.0331 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:27 lr: 0.000000 grad: 0.1829 (0.1943) loss: 0.7942 (0.7869) time: 0.1326 data: 0.0474 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:13 lr: 0.000000 grad: 0.1888 (0.1942) loss: 0.7938 (0.7869) time: 0.1388 data: 0.0552 max mem: 9377 +Train: [96] [5400/6250] eta: 0:01:59 lr: 0.000000 grad: 0.1841 (0.1941) loss: 0.7917 (0.7869) time: 0.1364 data: 0.0591 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:45 lr: 0.000000 grad: 0.1831 (0.1940) loss: 0.7913 (0.7869) time: 0.1136 data: 0.0204 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:31 lr: 0.000000 grad: 0.1912 (0.1940) loss: 0.7860 (0.7870) time: 0.1514 data: 0.0685 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:17 lr: 0.000000 grad: 0.1793 (0.1939) loss: 0.7866 (0.7870) time: 0.1232 data: 0.0332 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:02 lr: 0.000000 grad: 0.1874 (0.1938) loss: 0.7773 (0.7870) time: 0.1308 data: 0.0464 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:48 lr: 0.000000 grad: 0.1905 (0.1937) loss: 0.7828 (0.7871) time: 0.1304 data: 0.0463 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:34 lr: 0.000000 grad: 0.1812 (0.1936) loss: 0.7927 (0.7871) time: 0.1413 data: 0.0579 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:20 lr: 0.000000 grad: 0.1734 (0.1936) loss: 0.7946 (0.7871) time: 0.1261 data: 0.0397 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1754 (0.1935) loss: 0.7937 (0.7871) time: 0.1532 data: 0.0761 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1817 (0.1935) loss: 0.7900 (0.7872) time: 0.1428 data: 0.0623 max mem: 9377 +Train: [96] Total time: 0:14:39 (0.1406 s / it) +Averaged stats: lr: 0.000000 grad: 0.1817 (0.1935) loss: 0.7900 (0.7872) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:04:01 loss: 0.8385 (0.8385) time: 3.8975 data: 3.8215 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8422 (0.8417) time: 0.1421 data: 0.1157 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-train-subset): loss: 0.8422 (0.8417) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:29 loss: 0.8398 (0.8398) time: 4.3476 data: 4.2684 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8384 (0.8399) time: 0.1457 data: 0.1206 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-val): loss: 0.8384 (0.8399) +Eval (nsd-val): [96] [ 0/62] eta: 0:06:09 loss: 0.8204 (0.8204) time: 5.9649 data: 5.9124 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8289 (0.8309) time: 0.1086 data: 0.0835 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (nsd-val): loss: 0.8289 (0.8309) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 10:21:42 lr: 0.000000 grad: 0.2543 (0.2543) loss: 0.8015 (0.8015) time: 5.9683 data: 5.8109 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:20:21 lr: 0.000000 grad: 0.2242 (0.2558) loss: 0.7764 (0.7837) time: 0.1370 data: 0.0480 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:17:27 lr: 0.000000 grad: 0.1931 (0.2328) loss: 0.7938 (0.7887) time: 0.1321 data: 0.0301 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:16:16 lr: 0.000000 grad: 0.2014 (0.2250) loss: 0.7913 (0.7894) time: 0.1547 data: 0.0702 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:15:25 lr: 0.000000 grad: 0.1921 (0.2219) loss: 0.7869 (0.7890) time: 0.1459 data: 0.0508 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:14:49 lr: 0.000000 grad: 0.1837 (0.2184) loss: 0.7909 (0.7889) time: 0.1096 data: 0.0077 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:14:32 lr: 0.000000 grad: 0.1905 (0.2149) loss: 0.7847 (0.7885) time: 0.1931 data: 0.0780 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:14:16 lr: 0.000000 grad: 0.1817 (0.2122) loss: 0.7901 (0.7882) time: 0.1421 data: 0.0557 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:14:04 lr: 0.000000 grad: 0.1940 (0.2096) loss: 0.7849 (0.7882) time: 0.1327 data: 0.0445 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:13:51 lr: 0.000000 grad: 0.1937 (0.2087) loss: 0.7898 (0.7879) time: 0.1406 data: 0.0570 max mem: 9377 +Train: [97] [1000/6250] eta: 0:13:30 lr: 0.000000 grad: 0.1852 (0.2075) loss: 0.7812 (0.7876) time: 0.1566 data: 0.0734 max mem: 9377 +Train: [97] [1100/6250] eta: 0:13:07 lr: 0.000000 grad: 0.1920 (0.2066) loss: 0.7860 (0.7876) time: 0.1375 data: 0.0542 max mem: 9377 +Train: [97] [1200/6250] eta: 0:12:45 lr: 0.000000 grad: 0.1978 (0.2058) loss: 0.7838 (0.7875) time: 0.1324 data: 0.0411 max mem: 9377 +Train: [97] [1300/6250] eta: 0:12:25 lr: 0.000000 grad: 0.1920 (0.2051) loss: 0.7781 (0.7873) time: 0.1418 data: 0.0488 max mem: 9377 +Train: [97] [1400/6250] eta: 0:12:03 lr: 0.000000 grad: 0.1874 (0.2042) loss: 0.7828 (0.7873) time: 0.1075 data: 0.0122 max mem: 9377 +Train: [97] [1500/6250] eta: 0:11:46 lr: 0.000000 grad: 0.1755 (0.2034) loss: 0.7898 (0.7875) time: 0.1347 data: 0.0489 max mem: 9377 +Train: [97] [1600/6250] eta: 0:11:27 lr: 0.000000 grad: 0.1847 (0.2030) loss: 0.7818 (0.7874) time: 0.1414 data: 0.0525 max mem: 9377 +Train: [97] [1700/6250] eta: 0:11:11 lr: 0.000000 grad: 0.1942 (0.2026) loss: 0.7814 (0.7872) time: 0.1264 data: 0.0396 max mem: 9377 +Train: [97] [1800/6250] eta: 0:10:55 lr: 0.000000 grad: 0.1941 (0.2021) loss: 0.7743 (0.7870) time: 0.1427 data: 0.0574 max mem: 9377 +Train: [97] [1900/6250] eta: 0:10:38 lr: 0.000000 grad: 0.1829 (0.2015) loss: 0.7791 (0.7869) time: 0.1374 data: 0.0577 max mem: 9377 +Train: [97] [2000/6250] eta: 0:10:23 lr: 0.000000 grad: 0.1817 (0.2010) loss: 0.7866 (0.7870) time: 0.1240 data: 0.0344 max mem: 9377 +Train: [97] [2100/6250] eta: 0:10:08 lr: 0.000000 grad: 0.1858 (0.2007) loss: 0.7797 (0.7869) time: 0.1226 data: 0.0251 max mem: 9377 +Train: [97] [2200/6250] eta: 0:09:53 lr: 0.000000 grad: 0.1930 (0.2003) loss: 0.7906 (0.7869) time: 0.1499 data: 0.0385 max mem: 9377 +Train: [97] [2300/6250] eta: 0:09:45 lr: 0.000000 grad: 0.1872 (0.1999) loss: 0.7896 (0.7871) time: 0.2299 data: 0.1421 max mem: 9377 +Train: [97] [2400/6250] eta: 0:09:30 lr: 0.000000 grad: 0.1844 (0.1993) loss: 0.7850 (0.7872) time: 0.1667 data: 0.0788 max mem: 9377 +Train: [97] [2500/6250] eta: 0:09:13 lr: 0.000000 grad: 0.1897 (0.1989) loss: 0.7925 (0.7872) time: 0.1330 data: 0.0499 max mem: 9377 +Train: [97] [2600/6250] eta: 0:08:57 lr: 0.000000 grad: 0.1918 (0.1987) loss: 0.7862 (0.7872) time: 0.1596 data: 0.0728 max mem: 9377 +Train: [97] [2700/6250] eta: 0:08:40 lr: 0.000000 grad: 0.1912 (0.1983) loss: 0.7836 (0.7871) time: 0.1423 data: 0.0638 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:24 lr: 0.000000 grad: 0.1878 (0.1980) loss: 0.7829 (0.7871) time: 0.1436 data: 0.0609 max mem: 9377 +Train: [97] [2900/6250] eta: 0:08:08 lr: 0.000000 grad: 0.1793 (0.1978) loss: 0.7789 (0.7870) time: 0.1492 data: 0.0657 max mem: 9377 +Train: [97] [3000/6250] eta: 0:07:52 lr: 0.000000 grad: 0.1778 (0.1975) loss: 0.7875 (0.7870) time: 0.1362 data: 0.0585 max mem: 9377 +Train: [97] [3100/6250] eta: 0:07:37 lr: 0.000000 grad: 0.1795 (0.1973) loss: 0.7989 (0.7869) time: 0.1331 data: 0.0476 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1847 (0.1972) loss: 0.7901 (0.7869) time: 0.1440 data: 0.0609 max mem: 9377 +Train: [97] [3300/6250] eta: 0:07:05 lr: 0.000000 grad: 0.1968 (0.1969) loss: 0.7888 (0.7869) time: 0.1275 data: 0.0437 max mem: 9377 +Train: [97] [3400/6250] eta: 0:06:50 lr: 0.000000 grad: 0.1864 (0.1968) loss: 0.7886 (0.7868) time: 0.1451 data: 0.0625 max mem: 9377 +Train: [97] [3500/6250] eta: 0:06:35 lr: 0.000000 grad: 0.1941 (0.1968) loss: 0.7854 (0.7867) time: 0.1353 data: 0.0525 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:20 lr: 0.000000 grad: 0.2050 (0.1969) loss: 0.7834 (0.7866) time: 0.1551 data: 0.0758 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:05 lr: 0.000000 grad: 0.1950 (0.1969) loss: 0.7811 (0.7864) time: 0.1478 data: 0.0633 max mem: 9377 +Train: [97] [3800/6250] eta: 0:05:50 lr: 0.000000 grad: 0.1950 (0.1969) loss: 0.7833 (0.7863) time: 0.1390 data: 0.0537 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:36 lr: 0.000000 grad: 0.1971 (0.1969) loss: 0.7778 (0.7861) time: 0.1357 data: 0.0478 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:21 lr: 0.000000 grad: 0.1936 (0.1969) loss: 0.7794 (0.7860) time: 0.1639 data: 0.0817 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:08 lr: 0.000000 grad: 0.2034 (0.1969) loss: 0.7754 (0.7858) time: 0.1557 data: 0.0770 max mem: 9377 +Train: [97] [4200/6250] eta: 0:04:54 lr: 0.000000 grad: 0.1955 (0.1969) loss: 0.7683 (0.7856) time: 0.1155 data: 0.0359 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:40 lr: 0.000000 grad: 0.1925 (0.1971) loss: 0.7709 (0.7853) time: 0.1394 data: 0.0479 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:26 lr: 0.000000 grad: 0.2001 (0.1971) loss: 0.7714 (0.7851) time: 0.1386 data: 0.0503 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:12 lr: 0.000000 grad: 0.1897 (0.1971) loss: 0.7859 (0.7850) time: 0.1562 data: 0.0715 max mem: 9377 +Train: [97] [4600/6250] eta: 0:03:57 lr: 0.000000 grad: 0.1942 (0.1972) loss: 0.7781 (0.7849) time: 0.1584 data: 0.0718 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:42 lr: 0.000000 grad: 0.1939 (0.1971) loss: 0.7776 (0.7848) time: 0.1176 data: 0.0323 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:28 lr: 0.000000 grad: 0.1907 (0.1970) loss: 0.7798 (0.7847) time: 0.1358 data: 0.0475 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:13 lr: 0.000000 grad: 0.1866 (0.1969) loss: 0.7820 (0.7847) time: 0.1089 data: 0.0142 max mem: 9377 +Train: [97] [5000/6250] eta: 0:02:59 lr: 0.000000 grad: 0.1880 (0.1968) loss: 0.7845 (0.7846) time: 0.1331 data: 0.0452 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:44 lr: 0.000000 grad: 0.1818 (0.1967) loss: 0.7826 (0.7846) time: 0.1226 data: 0.0393 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:29 lr: 0.000000 grad: 0.2006 (0.1968) loss: 0.7815 (0.7845) time: 0.1293 data: 0.0314 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:15 lr: 0.000000 grad: 0.1892 (0.1967) loss: 0.7849 (0.7845) time: 0.1298 data: 0.0429 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:00 lr: 0.000000 grad: 0.1966 (0.1966) loss: 0.7790 (0.7845) time: 0.1396 data: 0.0576 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:46 lr: 0.000000 grad: 0.1971 (0.1965) loss: 0.7834 (0.7845) time: 0.1408 data: 0.0569 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:32 lr: 0.000000 grad: 0.1797 (0.1964) loss: 0.7930 (0.7846) time: 0.1273 data: 0.0403 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:18 lr: 0.000000 grad: 0.1889 (0.1962) loss: 0.7822 (0.7846) time: 0.1287 data: 0.0440 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:03 lr: 0.000000 grad: 0.1849 (0.1961) loss: 0.7891 (0.7846) time: 0.1604 data: 0.0772 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:49 lr: 0.000000 grad: 0.1880 (0.1961) loss: 0.7809 (0.7846) time: 0.1445 data: 0.0525 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.1874 (0.1960) loss: 0.7914 (0.7847) time: 0.1408 data: 0.0600 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1826 (0.1959) loss: 0.7858 (0.7847) time: 0.1343 data: 0.0507 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1925 (0.1958) loss: 0.7805 (0.7847) time: 0.1252 data: 0.0473 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1828 (0.1958) loss: 0.7897 (0.7847) time: 0.1281 data: 0.0427 max mem: 9377 +Train: [97] Total time: 0:14:51 (0.1426 s / it) +Averaged stats: lr: 0.000000 grad: 0.1828 (0.1958) loss: 0.7897 (0.7847) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:04:52 loss: 0.8340 (0.8340) time: 4.7148 data: 4.6280 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8426 (0.8414) time: 0.1413 data: 0.1162 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-train-subset): loss: 0.8426 (0.8414) +Eval (hcp-val): [97] [ 0/62] eta: 0:05:21 loss: 0.8364 (0.8364) time: 5.1821 data: 5.1410 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8402 (0.8399) time: 0.1171 data: 0.0922 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (hcp-val): loss: 0.8402 (0.8399) +Eval (nsd-val): [97] [ 0/62] eta: 0:05:02 loss: 0.8121 (0.8121) time: 4.8812 data: 4.8267 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8316 (0.8319) time: 0.1209 data: 0.0944 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8316 (0.8319) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 10:41:04 lr: 0.000000 grad: 0.3364 (0.3364) loss: 0.7455 (0.7455) time: 6.1542 data: 5.9221 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:21:25 lr: 0.000000 grad: 0.2050 (0.2493) loss: 0.7865 (0.7797) time: 0.1643 data: 0.0718 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:18:26 lr: 0.000000 grad: 0.1861 (0.2308) loss: 0.7881 (0.7812) time: 0.1379 data: 0.0406 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:16:46 lr: 0.000000 grad: 0.1964 (0.2234) loss: 0.7932 (0.7835) time: 0.1568 data: 0.0683 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:15:44 lr: 0.000000 grad: 0.1793 (0.2181) loss: 0.7977 (0.7849) time: 0.1639 data: 0.0730 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:15:01 lr: 0.000000 grad: 0.2185 (0.2155) loss: 0.7841 (0.7846) time: 0.1486 data: 0.0579 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:14:20 lr: 0.000000 grad: 0.1972 (0.2132) loss: 0.7963 (0.7851) time: 0.1348 data: 0.0460 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:13:50 lr: 0.000000 grad: 0.2077 (0.2108) loss: 0.7799 (0.7859) time: 0.1198 data: 0.0211 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:13:27 lr: 0.000000 grad: 0.1996 (0.2099) loss: 0.7787 (0.7860) time: 0.1386 data: 0.0532 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:13:17 lr: 0.000000 grad: 0.2006 (0.2092) loss: 0.7900 (0.7861) time: 0.1574 data: 0.0690 max mem: 9377 +Train: [98] [1000/6250] eta: 0:13:04 lr: 0.000000 grad: 0.1970 (0.2087) loss: 0.7846 (0.7859) time: 0.1782 data: 0.0923 max mem: 9377 +Train: [98] [1100/6250] eta: 0:12:45 lr: 0.000000 grad: 0.1897 (0.2083) loss: 0.7811 (0.7856) time: 0.1590 data: 0.0661 max mem: 9377 +Train: [98] [1200/6250] eta: 0:12:28 lr: 0.000000 grad: 0.1950 (0.2077) loss: 0.7830 (0.7853) time: 0.1404 data: 0.0534 max mem: 9377 +Train: [98] [1300/6250] eta: 0:12:08 lr: 0.000000 grad: 0.1825 (0.2072) loss: 0.7839 (0.7852) time: 0.1394 data: 0.0592 max mem: 9377 +Train: [98] [1400/6250] eta: 0:11:56 lr: 0.000000 grad: 0.1961 (0.2068) loss: 0.7795 (0.7852) time: 0.1653 data: 0.0800 max mem: 9377 +Train: [98] [1500/6250] eta: 0:11:42 lr: 0.000000 grad: 0.1913 (0.2064) loss: 0.7785 (0.7852) time: 0.1546 data: 0.0632 max mem: 9377 +Train: [98] [1600/6250] eta: 0:11:29 lr: 0.000000 grad: 0.1970 (0.2057) loss: 0.7793 (0.7852) time: 0.1580 data: 0.0705 max mem: 9377 +Train: [98] [1700/6250] eta: 0:11:13 lr: 0.000000 grad: 0.2023 (0.2050) loss: 0.7845 (0.7851) time: 0.1450 data: 0.0580 max mem: 9377 +Train: [98] [1800/6250] eta: 0:10:56 lr: 0.000000 grad: 0.1905 (0.2042) loss: 0.7850 (0.7851) time: 0.1294 data: 0.0455 max mem: 9377 +Train: [98] [1900/6250] eta: 0:10:39 lr: 0.000000 grad: 0.1959 (0.2037) loss: 0.7835 (0.7850) time: 0.1385 data: 0.0561 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:27 lr: 0.000000 grad: 0.1894 (0.2031) loss: 0.7864 (0.7849) time: 0.1910 data: 0.1154 max mem: 9377 +Train: [98] [2100/6250] eta: 0:10:13 lr: 0.000000 grad: 0.2044 (0.2029) loss: 0.7728 (0.7848) time: 0.1596 data: 0.0850 max mem: 9377 +Train: [98] [2200/6250] eta: 0:10:02 lr: 0.000000 grad: 0.1965 (0.2026) loss: 0.7828 (0.7846) time: 0.1733 data: 0.0934 max mem: 9377 +Train: [98] [2300/6250] eta: 0:09:51 lr: 0.000000 grad: 0.1949 (0.2023) loss: 0.7856 (0.7845) time: 0.1687 data: 0.0826 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:38 lr: 0.000000 grad: 0.1846 (0.2020) loss: 0.7857 (0.7844) time: 0.1663 data: 0.0889 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:22 lr: 0.000000 grad: 0.1807 (0.2017) loss: 0.7869 (0.7845) time: 0.1323 data: 0.0502 max mem: 9377 +Train: [98] [2600/6250] eta: 0:09:05 lr: 0.000000 grad: 0.2042 (0.2015) loss: 0.7893 (0.7844) time: 0.1425 data: 0.0595 max mem: 9377 +Train: [98] [2700/6250] eta: 0:08:49 lr: 0.000000 grad: 0.2045 (0.2014) loss: 0.7862 (0.7845) time: 0.1238 data: 0.0387 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:34 lr: 0.000000 grad: 0.1941 (0.2012) loss: 0.7819 (0.7845) time: 0.1533 data: 0.0669 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:18 lr: 0.000000 grad: 0.1971 (0.2012) loss: 0.7836 (0.7844) time: 0.1629 data: 0.0732 max mem: 9377 +Train: [98] [3000/6250] eta: 0:08:01 lr: 0.000000 grad: 0.1975 (0.2012) loss: 0.7799 (0.7843) time: 0.1502 data: 0.0666 max mem: 9377 +Train: [98] [3100/6250] eta: 0:07:46 lr: 0.000000 grad: 0.1848 (0.2010) loss: 0.7871 (0.7843) time: 0.1711 data: 0.0884 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:30 lr: 0.000000 grad: 0.1924 (0.2008) loss: 0.7810 (0.7842) time: 0.1276 data: 0.0417 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:15 lr: 0.000000 grad: 0.1963 (0.2006) loss: 0.7862 (0.7843) time: 0.1350 data: 0.0454 max mem: 9377 +Train: [98] [3400/6250] eta: 0:06:59 lr: 0.000000 grad: 0.1920 (0.2004) loss: 0.7790 (0.7844) time: 0.1273 data: 0.0403 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:44 lr: 0.000000 grad: 0.1925 (0.2003) loss: 0.7740 (0.7843) time: 0.1514 data: 0.0668 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:28 lr: 0.000000 grad: 0.1964 (0.2002) loss: 0.7684 (0.7842) time: 0.1282 data: 0.0492 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:12 lr: 0.000000 grad: 0.1936 (0.2003) loss: 0.7775 (0.7840) time: 0.1249 data: 0.0418 max mem: 9377 +Train: [98] [3800/6250] eta: 0:05:57 lr: 0.000000 grad: 0.2074 (0.2004) loss: 0.7820 (0.7839) time: 0.1500 data: 0.0681 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:42 lr: 0.000000 grad: 0.1921 (0.2003) loss: 0.7814 (0.7838) time: 0.1396 data: 0.0556 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:27 lr: 0.000000 grad: 0.1958 (0.2003) loss: 0.7832 (0.7837) time: 0.1302 data: 0.0453 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:13 lr: 0.000000 grad: 0.1970 (0.2003) loss: 0.7808 (0.7835) time: 0.1443 data: 0.0535 max mem: 9377 +Train: [98] [4200/6250] eta: 0:04:59 lr: 0.000000 grad: 0.1939 (0.2002) loss: 0.7868 (0.7834) time: 0.1588 data: 0.0772 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:45 lr: 0.000000 grad: 0.2017 (0.2001) loss: 0.7800 (0.7834) time: 0.1364 data: 0.0565 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:30 lr: 0.000000 grad: 0.1797 (0.2002) loss: 0.7760 (0.7833) time: 0.1390 data: 0.0540 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:16 lr: 0.000000 grad: 0.1964 (0.2002) loss: 0.7822 (0.7833) time: 0.1426 data: 0.0590 max mem: 9377 +Train: [98] [4600/6250] eta: 0:04:01 lr: 0.000000 grad: 0.1990 (0.2002) loss: 0.7807 (0.7833) time: 0.1599 data: 0.0727 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:47 lr: 0.000000 grad: 0.1899 (0.2004) loss: 0.7804 (0.7832) time: 0.1638 data: 0.0758 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:33 lr: 0.000000 grad: 0.1973 (0.2004) loss: 0.7914 (0.7832) time: 0.1465 data: 0.0493 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:18 lr: 0.000000 grad: 0.1934 (0.2005) loss: 0.7893 (0.7832) time: 0.1287 data: 0.0434 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:03 lr: 0.000000 grad: 0.1976 (0.2006) loss: 0.7866 (0.7832) time: 0.1354 data: 0.0490 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:48 lr: 0.000000 grad: 0.1892 (0.2005) loss: 0.7899 (0.7832) time: 0.1125 data: 0.0237 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:33 lr: 0.000000 grad: 0.1921 (0.2005) loss: 0.7866 (0.7832) time: 0.1508 data: 0.0706 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:19 lr: 0.000000 grad: 0.2017 (0.2006) loss: 0.7716 (0.7831) time: 0.1446 data: 0.0501 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:04 lr: 0.000000 grad: 0.1959 (0.2007) loss: 0.7803 (0.7830) time: 0.1592 data: 0.0788 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:49 lr: 0.000000 grad: 0.1916 (0.2007) loss: 0.7832 (0.7830) time: 0.1378 data: 0.0513 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:35 lr: 0.000000 grad: 0.1964 (0.2007) loss: 0.7871 (0.7830) time: 0.1376 data: 0.0466 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2051 (0.2007) loss: 0.7816 (0.7830) time: 0.1201 data: 0.0352 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:05 lr: 0.000000 grad: 0.2017 (0.2008) loss: 0.7710 (0.7829) time: 0.1812 data: 0.0973 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.1872 (0.2008) loss: 0.7859 (0.7829) time: 0.1579 data: 0.0739 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.1938 (0.2008) loss: 0.7895 (0.7830) time: 0.1431 data: 0.0537 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1826 (0.2007) loss: 0.7882 (0.7831) time: 0.1515 data: 0.0662 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1986 (0.2006) loss: 0.7777 (0.7831) time: 0.1525 data: 0.0637 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1991 (0.2006) loss: 0.7829 (0.7831) time: 0.1462 data: 0.0693 max mem: 9377 +Train: [98] Total time: 0:15:19 (0.1471 s / it) +Averaged stats: lr: 0.000000 grad: 0.1991 (0.2006) loss: 0.7829 (0.7831) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:04:51 loss: 0.8366 (0.8366) time: 4.6992 data: 4.5075 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8405 (0.8413) time: 0.1169 data: 0.0906 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:15 (0.2447 s / it) +Averaged stats (hcp-train-subset): loss: 0.8405 (0.8413) +Eval (hcp-val): [98] [ 0/62] eta: 0:05:36 loss: 0.8393 (0.8393) time: 5.4312 data: 5.4010 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8373 (0.8398) time: 0.1237 data: 0.0969 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8373 (0.8398) +Eval (nsd-val): [98] [ 0/62] eta: 0:05:22 loss: 0.8213 (0.8213) time: 5.1942 data: 5.1631 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8303 (0.8321) time: 0.1427 data: 0.1171 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (nsd-val): loss: 0.8303 (0.8321) +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 9:58:41 lr: 0.000000 grad: 0.1585 (0.1585) loss: 0.8193 (0.8193) time: 5.7475 data: 5.3868 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:22:53 lr: 0.000000 grad: 0.1931 (0.2221) loss: 0.7977 (0.8049) time: 0.1417 data: 0.0265 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:19:03 lr: 0.000000 grad: 0.1906 (0.2137) loss: 0.7974 (0.7997) time: 0.1537 data: 0.0602 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:17:37 lr: 0.000000 grad: 0.2002 (0.2110) loss: 0.7841 (0.7953) time: 0.1359 data: 0.0366 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:16:41 lr: 0.000000 grad: 0.1874 (0.2079) loss: 0.8005 (0.7933) time: 0.1361 data: 0.0309 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:15:52 lr: 0.000000 grad: 0.1948 (0.2048) loss: 0.7874 (0.7922) time: 0.1500 data: 0.0577 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:15:14 lr: 0.000000 grad: 0.1848 (0.2036) loss: 0.7852 (0.7908) time: 0.1237 data: 0.0265 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:14:44 lr: 0.000000 grad: 0.1941 (0.2032) loss: 0.7822 (0.7902) time: 0.1607 data: 0.0608 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:14:32 lr: 0.000000 grad: 0.1973 (0.2022) loss: 0.7779 (0.7900) time: 0.1748 data: 0.0785 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:14:09 lr: 0.000000 grad: 0.1833 (0.2012) loss: 0.7985 (0.7903) time: 0.1460 data: 0.0519 max mem: 9377 +Train: [99] [1000/6250] eta: 0:13:48 lr: 0.000000 grad: 0.1978 (0.2008) loss: 0.7888 (0.7900) time: 0.1312 data: 0.0313 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:33 lr: 0.000000 grad: 0.1860 (0.2003) loss: 0.7817 (0.7899) time: 0.1609 data: 0.0777 max mem: 9377 +Train: [99] [1200/6250] eta: 0:13:09 lr: 0.000000 grad: 0.1950 (0.1997) loss: 0.7979 (0.7900) time: 0.1159 data: 0.0359 max mem: 9377 +Train: [99] [1300/6250] eta: 0:12:54 lr: 0.000000 grad: 0.1940 (0.1997) loss: 0.7984 (0.7900) time: 0.1562 data: 0.0705 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:36 lr: 0.000000 grad: 0.2020 (0.1994) loss: 0.7876 (0.7898) time: 0.1637 data: 0.0758 max mem: 9377 +Train: [99] [1500/6250] eta: 0:12:22 lr: 0.000000 grad: 0.2007 (0.1991) loss: 0.7890 (0.7897) time: 0.1745 data: 0.0923 max mem: 9377 +Train: [99] [1600/6250] eta: 0:12:00 lr: 0.000000 grad: 0.1902 (0.1988) loss: 0.8014 (0.7896) time: 0.1449 data: 0.0546 max mem: 9377 +Train: [99] [1700/6250] eta: 0:11:43 lr: 0.000000 grad: 0.1916 (0.1983) loss: 0.7923 (0.7898) time: 0.1379 data: 0.0536 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:24 lr: 0.000000 grad: 0.1923 (0.1982) loss: 0.7955 (0.7899) time: 0.1388 data: 0.0486 max mem: 9377 +Train: [99] [1900/6250] eta: 0:11:06 lr: 0.000000 grad: 0.1893 (0.1979) loss: 0.7840 (0.7899) time: 0.1296 data: 0.0463 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:45 lr: 0.000000 grad: 0.1902 (0.1977) loss: 0.7935 (0.7900) time: 0.1164 data: 0.0316 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:26 lr: 0.000000 grad: 0.1962 (0.1976) loss: 0.7872 (0.7900) time: 0.1216 data: 0.0344 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:08 lr: 0.000000 grad: 0.1846 (0.1975) loss: 0.7915 (0.7900) time: 0.1121 data: 0.0185 max mem: 9377 +Train: [99] [2300/6250] eta: 0:09:50 lr: 0.000000 grad: 0.1820 (0.1974) loss: 0.7893 (0.7900) time: 0.1310 data: 0.0489 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:35 lr: 0.000000 grad: 0.1885 (0.1971) loss: 0.7922 (0.7901) time: 0.1394 data: 0.0615 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:20 lr: 0.000000 grad: 0.1892 (0.1972) loss: 0.7963 (0.7901) time: 0.1656 data: 0.0845 max mem: 9377 +Train: [99] [2600/6250] eta: 0:09:03 lr: 0.000000 grad: 0.1921 (0.1970) loss: 0.7965 (0.7901) time: 0.1506 data: 0.0658 max mem: 9377 +Train: [99] [2700/6250] eta: 0:08:47 lr: 0.000000 grad: 0.1906 (0.1969) loss: 0.7868 (0.7901) time: 0.1435 data: 0.0664 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:30 lr: 0.000000 grad: 0.1886 (0.1968) loss: 0.7848 (0.7901) time: 0.1337 data: 0.0499 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:14 lr: 0.000000 grad: 0.1828 (0.1966) loss: 0.7967 (0.7901) time: 0.1204 data: 0.0338 max mem: 9377 +Train: [99] [3000/6250] eta: 0:07:58 lr: 0.000000 grad: 0.1931 (0.1965) loss: 0.7867 (0.7900) time: 0.1560 data: 0.0753 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:43 lr: 0.000000 grad: 0.1884 (0.1964) loss: 0.7877 (0.7898) time: 0.1305 data: 0.0465 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:27 lr: 0.000000 grad: 0.1982 (0.1965) loss: 0.7827 (0.7897) time: 0.1395 data: 0.0597 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:12 lr: 0.000000 grad: 0.1939 (0.1966) loss: 0.7838 (0.7895) time: 0.1363 data: 0.0569 max mem: 9377 +Train: [99] [3400/6250] eta: 0:06:57 lr: 0.000000 grad: 0.1879 (0.1967) loss: 0.7905 (0.7894) time: 0.1554 data: 0.0756 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:41 lr: 0.000000 grad: 0.1860 (0.1967) loss: 0.7916 (0.7892) time: 0.1519 data: 0.0729 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:26 lr: 0.000000 grad: 0.1996 (0.1967) loss: 0.7815 (0.7891) time: 0.1235 data: 0.0399 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:11 lr: 0.000000 grad: 0.1941 (0.1967) loss: 0.7856 (0.7889) time: 0.1638 data: 0.0805 max mem: 9377 +Train: [99] [3800/6250] eta: 0:05:56 lr: 0.000000 grad: 0.2054 (0.1967) loss: 0.7923 (0.7888) time: 0.1381 data: 0.0551 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:41 lr: 0.000000 grad: 0.2033 (0.1967) loss: 0.7787 (0.7886) time: 0.1522 data: 0.0719 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:27 lr: 0.000000 grad: 0.1972 (0.1967) loss: 0.7811 (0.7885) time: 0.1529 data: 0.0692 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:13 lr: 0.000000 grad: 0.1922 (0.1968) loss: 0.7981 (0.7884) time: 0.1533 data: 0.0720 max mem: 9377 +Train: [99] [4200/6250] eta: 0:04:59 lr: 0.000000 grad: 0.1889 (0.1968) loss: 0.7870 (0.7883) time: 0.1465 data: 0.0674 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:44 lr: 0.000000 grad: 0.1978 (0.1969) loss: 0.7891 (0.7883) time: 0.1374 data: 0.0497 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:29 lr: 0.000000 grad: 0.2041 (0.1970) loss: 0.7848 (0.7882) time: 0.1311 data: 0.0514 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:14 lr: 0.000000 grad: 0.1879 (0.1970) loss: 0.7904 (0.7882) time: 0.1280 data: 0.0459 max mem: 9377 +Train: [99] [4600/6250] eta: 0:03:59 lr: 0.000000 grad: 0.1986 (0.1970) loss: 0.7814 (0.7882) time: 0.1334 data: 0.0537 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:44 lr: 0.000000 grad: 0.2005 (0.1972) loss: 0.7842 (0.7881) time: 0.1301 data: 0.0478 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:29 lr: 0.000000 grad: 0.1905 (0.1972) loss: 0.7885 (0.7880) time: 0.1286 data: 0.0468 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:14 lr: 0.000000 grad: 0.1933 (0.1973) loss: 0.7862 (0.7880) time: 0.1318 data: 0.0577 max mem: 9377 +Train: [99] [5000/6250] eta: 0:02:59 lr: 0.000000 grad: 0.2029 (0.1974) loss: 0.7798 (0.7880) time: 0.1261 data: 0.0387 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:45 lr: 0.000000 grad: 0.1972 (0.1974) loss: 0.7882 (0.7879) time: 0.1382 data: 0.0568 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:30 lr: 0.000000 grad: 0.1964 (0.1974) loss: 0.7873 (0.7879) time: 0.1328 data: 0.0555 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:16 lr: 0.000000 grad: 0.1939 (0.1973) loss: 0.7894 (0.7879) time: 0.1438 data: 0.0641 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:01 lr: 0.000000 grad: 0.1935 (0.1973) loss: 0.7763 (0.7879) time: 0.1379 data: 0.0507 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:47 lr: 0.000000 grad: 0.1921 (0.1972) loss: 0.7813 (0.7878) time: 0.1357 data: 0.0585 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:32 lr: 0.000000 grad: 0.1908 (0.1973) loss: 0.7909 (0.7878) time: 0.1588 data: 0.0780 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:18 lr: 0.000000 grad: 0.1944 (0.1972) loss: 0.7834 (0.7878) time: 0.1333 data: 0.0469 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.1811 (0.1972) loss: 0.7835 (0.7877) time: 0.1292 data: 0.0454 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:49 lr: 0.000000 grad: 0.1859 (0.1971) loss: 0.7884 (0.7877) time: 0.1329 data: 0.0425 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.1865 (0.1969) loss: 0.7865 (0.7877) time: 0.1647 data: 0.0855 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1993 (0.1969) loss: 0.7862 (0.7877) time: 0.1018 data: 0.0179 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1724 (0.1968) loss: 0.7961 (0.7878) time: 0.1337 data: 0.0531 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1940 (0.1968) loss: 0.7821 (0.7878) time: 0.1219 data: 0.0430 max mem: 9377 +Train: [99] Total time: 0:14:56 (0.1435 s / it) +Averaged stats: lr: 0.000000 grad: 0.1940 (0.1968) loss: 0.7821 (0.7878) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:04:47 loss: 0.8375 (0.8375) time: 4.6367 data: 4.6070 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8421 (0.8418) time: 0.0955 data: 0.0706 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:12 (0.2009 s / it) +Averaged stats (hcp-train-subset): loss: 0.8421 (0.8418) +Making plots (hcp-train-subset): example=4 +Eval (hcp-val): [99] [ 0/62] eta: 0:04:58 loss: 0.8377 (0.8377) time: 4.8162 data: 4.7867 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8367 (0.8395) time: 0.0998 data: 0.0753 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8395) +Making plots (hcp-val): example=41 +Eval (nsd-val): [99] [ 0/62] eta: 0:03:34 loss: 0.8191 (0.8191) time: 3.4615 data: 3.3918 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8301 (0.8319) time: 0.1147 data: 0.0884 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:12 (0.2044 s / it) +Averaged stats (nsd-val): loss: 0.8301 (0.8319) +Making plots (nsd-val): example=43 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n800_2/pretrain/checkpoint-00099.pth +done! training time: 1 day, 2:37:07 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d7037db4129b83925263eaffebfa5d5cd93cc2b --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..a8e676cfa2c32ab613ce6d65f50cae6ca3fe073d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,train,0.6673228346456693,0.021134508069384127,0.6642669753243395,0.021460938867522913,0.6680435972010252,0.021125527010100325 +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,test,0.36538461538461536,0.05797311149838978,0.34523809523809523,0.05799048194912561,0.3532509157509158,0.057100574010265324 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.8110236220472441,0.017480588066249882,0.8103281984828655,0.017757720765003798,0.8116142649729856,0.01751983954497983 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.5,0.06523326374040203,0.4921428571428571,0.06524914306344189,0.4965659340659341,0.06511778680900411 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5551181102362205,0.020437688174013112,0.5480473558083659,0.02102655769617425,0.5553181022231002,0.020482512270193404 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5576923076923077,0.0665988942993312,0.5390851681174261,0.06866072227511469,0.551510989010989,0.06605572206976242 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,train,0.8208661417322834,0.016690689849681316,0.8218492389400887,0.016645941038555743,0.8225799259286253,0.016648182341445106 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,test,0.5576923076923077,0.06415424967591227,0.5373690825303729,0.06840544574815215,0.5572344322344323,0.06427828573382566 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,train,0.6633858267716536,0.02115290215054518,0.6599989619975739,0.02169523857762392,0.66360625176212,0.021222555637406996 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,test,0.4807692307692308,0.06508874233458674,0.47488755622188905,0.06590505350666276,0.4803113553113553,0.06520370682985548 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,train,0.5511811023622047,0.02113338703899816,0.5402162479341778,0.021967466181459592,0.5490278132066462,0.02105067069020925 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,test,0.46153846153846156,0.06317790931054992,0.44909688013136284,0.0650700898904995,0.4608516483516484,0.06294913333244241 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,train,0.6437007874015748,0.02071153351239934,0.6404177990482424,0.021115422123643774,0.6438477317319697,0.02069746026721606 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,test,0.5192307692307693,0.06946851272594678,0.5110815047021944,0.07093176551027093,0.516025641025641,0.06940782139720432 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,train,0.812992125984252,0.016649803194989796,0.8133641279172763,0.016700264401470585,0.814097895237529,0.01655805223336249 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,test,0.4807692307692308,0.06199135032238691,0.46259258568256867,0.06270621787448583,0.4757326007326007,0.06178601222469888 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,train,0.6515748031496063,0.02121073944469961,0.6472330486909776,0.02170397723415902,0.6510419763362862,0.02118626505178072 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,test,0.4423076923076923,0.06068367882503206,0.43096891534391535,0.06162326543069519,0.4475732600732601,0.06146872051630546 +flat_mae,patch,logistic,aabc_age,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,9,2.782559402207126,test,0.4230769230769231,0.059346477889339166,0.4027777777777778,0.05541357871406193,0.4164377289377289,0.05849377911376269 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,train,0.5688976377952756,0.019809018635319576,0.5618756309732311,0.02028850935023967,0.5695809654595705,0.01967810052518912 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,test,0.40384615384615385,0.06097875272604113,0.38508202323991797,0.0585178009843021,0.39720695970695974,0.05999398811970512 +flat_mae,patch,logistic,aabc_age,11,9.999999999999999e-05,train,0.49015748031496065,0.02031598109809555,0.46043796526475084,0.020369619769169584,0.4875361774030522,0.020146137910816087 +flat_mae,patch,logistic,aabc_age,11,9.999999999999999e-05,test,0.5576923076923077,0.05679590784492307,0.5026090038993264,0.06182997260072238,0.5455586080586081,0.05632661372410228 +flat_mae,patch,logistic,aabc_age,12,0.000774263682681127,train,0.5452755905511811,0.02017224599143627,0.5324412462573656,0.020922942883293334,0.5442348467267242,0.020033528585091595 +flat_mae,patch,logistic,aabc_age,12,0.000774263682681127,test,0.46153846153846156,0.06477017859712106,0.4579124579124579,0.06492533095581389,0.45810439560439564,0.06464239660633196 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,train,0.8110236220472441,0.016881700165670532,0.8118032976031742,0.01692255927646618,0.811579022572398,0.01693557637739916 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,test,0.40384615384615385,0.056994484836474735,0.37643678160919536,0.04970195433046571,0.41025641025641024,0.05858420930982116 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,train,0.8228346456692913,0.015761018267212094,0.8231268731268732,0.015855145977849663,0.8247136495016197,0.015640549560550065 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,test,0.5961538461538461,0.062133707704724137,0.5839920948616601,0.06752389329606887,0.5977564102564104,0.06201538104620229 +flat_mae,patch,logistic,aabc_age,15,0.000774263682681127,train,0.5433070866141733,0.022315881306753653,0.5334090367531135,0.02314139925123424,0.542151109823951,0.022315743873536895 +flat_mae,patch,logistic,aabc_age,15,0.000774263682681127,test,0.5576923076923077,0.06393524830898337,0.5607990122585766,0.06478541429807994,0.5528846153846154,0.06394800974328127 +flat_mae,patch,logistic,aabc_age,16,0.3593813663804626,train,0.9783464566929134,0.006743820983555469,0.9786511003514489,0.006663731343379567,0.9790780012620137,0.006508682456166309 +flat_mae,patch,logistic,aabc_age,16,0.3593813663804626,test,0.40384615384615385,0.06596882668863041,0.40239007480386785,0.06729422460408556,0.4036172161172161,0.06601399957955083 +flat_mae,patch,logistic,aabc_age,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,17,2.782559402207126,test,0.40384615384615385,0.06647867478510257,0.4017857142857143,0.06316457428200375,0.4001831501831502,0.06589415747596433 +flat_mae,patch,logistic,aabc_age,18,9.999999999999999e-05,train,0.49015748031496065,0.02009255507449311,0.4674300891695351,0.020885129855637792,0.4875185562027584,0.020005785847594455 +flat_mae,patch,logistic,aabc_age,18,9.999999999999999e-05,test,0.4230769230769231,0.06048711291177954,0.37835896241068656,0.05225601484088372,0.41346153846153844,0.05900264146608903 +flat_mae,patch,logistic,aabc_age,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,19,2.782559402207126,test,0.4423076923076923,0.06871357022981567,0.4445634920634921,0.06816926526386007,0.44505494505494503,0.06905727909466619 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,train,0.812992125984252,0.017464138698838845,0.8132688148216418,0.017596504424504784,0.8144330576594443,0.01738733441503436 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,test,0.46153846153846156,0.060164665562508705,0.44503284072249594,0.05969845823574904,0.45650183150183155,0.05960724195578352 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,train,0.8169291338582677,0.017404914258969516,0.8172425130816148,0.017501788953360953,0.8176950175396872,0.017340926185044788 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,test,0.4230769230769231,0.061544170408273466,0.403126088470916,0.0629884090954219,0.41941391941391937,0.06123112273522697 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,train,0.9803149606299213,0.00597058115225634,0.9803679768495934,0.0059444006725758195,0.9803914399805136,0.005957658849012553 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,test,0.36538461538461536,0.06614126669306625,0.3853081700907788,0.064875095905477,0.36492673992673996,0.06618289537571495 +flat_mae,patch,logistic,aabc_age,23,0.3593813663804626,train,0.9744094488188977,0.006904905745587481,0.9745954140345225,0.0068509709453231215,0.974728201975876,0.006853627220135319 +flat_mae,patch,logistic,aabc_age,23,0.3593813663804626,test,0.4230769230769231,0.06389437474661096,0.425595238095238,0.06144923592315337,0.4223901098901099,0.06391794315633972 +flat_mae,patch,logistic,aabc_age,24,0.3593813663804626,train,0.9803149606299213,0.006094583561482899,0.980350674456571,0.006080361890165277,0.9804414266507349,0.006051914722281658 +flat_mae,patch,logistic,aabc_age,24,0.3593813663804626,test,0.34615384615384615,0.06651515821366848,0.34375,0.06309466720879657,0.33951465201465203,0.06564512326851708 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,train,0.9704724409448819,0.007551796876192616,0.970563693389447,0.0075034444502910256,0.9706459572411386,0.0075264784367110775 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,test,0.38461538461538464,0.062462426575724796,0.37837899232202077,0.06117045492462683,0.38095238095238093,0.06216459570449379 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,train,0.6673228346456693,0.02046369589115562,0.6635368450022587,0.020972884906776186,0.6673209686050148,0.020430441557039623 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,test,0.4807692307692308,0.06528385901982749,0.4740017326224223,0.06582907103345002,0.4816849816849817,0.06518294960502306 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,train,0.8208661417322834,0.017709491162993758,0.8211845005655524,0.01784787361259689,0.8224299659179614,0.01769325392156373 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,test,0.4423076923076923,0.06487597407233528,0.4321219715956558,0.06666505492193492,0.4375,0.06447195967818517 +flat_mae,patch,logistic,aabc_age,28,0.046415888336127774,train,0.8051181102362205,0.017050607494937867,0.8055342746275931,0.017168087311730817,0.8065185015410758,0.01696501840550821 +flat_mae,patch,logistic,aabc_age,28,0.046415888336127774,test,0.5,0.066058462093824,0.49351037254263064,0.06575910352082737,0.49381868131868134,0.06575731627523208 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,train,0.8149606299212598,0.016799003613698175,0.8143220977401497,0.01687911793445277,0.8162316188105235,0.016761869613495104 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,test,0.4423076923076923,0.06556666962474511,0.44044311177744466,0.06444134923966444,0.4445970695970696,0.06604366629043604 +flat_mae,patch,logistic,aabc_age,30,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,30,21.54434690031882,test,0.5384615384615384,0.06868722524052491,0.5346790890269151,0.06964858743284236,0.5382326007326007,0.06893831181869045 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,train,0.49015748031496065,0.020339324948883592,0.45692030451895005,0.020423254594143243,0.486565932537894,0.02014878713393619 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,test,0.5576923076923077,0.05423382424564667,0.4804232804232804,0.04603229011462956,0.5453296703296704,0.05278931044202807 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,train,0.6614173228346457,0.020720874516035315,0.658602726934749,0.02115087929720378,0.6622928130436201,0.020624077135396577 +flat_mae,patch,logistic,aabc_age,32,0.005994842503189409,test,0.4807692307692308,0.05913942757155436,0.4662309368191721,0.060525601473845676,0.47435897435897434,0.05869612858871228 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.8011811023622047,0.016533544605322664,0.801477631860539,0.01663086547326191,0.8023862701361172,0.0164374958850667 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.46153846153846156,0.06993381274948816,0.4661490683229813,0.07084614734702722,0.4626831501831502,0.07028114094233108 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.6594488188976378,0.01959581195030439,0.6562811423837825,0.019972515532907624,0.6603942785520983,0.019564105578370718 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.4423076923076923,0.06703318748837825,0.451731078904992,0.06690869964232153,0.4464285714285714,0.06741821564573157 +flat_mae,patch,logistic,aabc_age,35,0.000774263682681127,train,0.5531496062992126,0.020156500268375108,0.5404119400763696,0.021074863861146344,0.5521317816447988,0.020148075924458116 +flat_mae,patch,logistic,aabc_age,35,0.000774263682681127,test,0.46153846153846156,0.0661538014311119,0.46003016591251883,0.06605582597558224,0.4581043956043956,0.06577450612830424 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,train,0.8110236220472441,0.016720595732707698,0.8120414462311174,0.01661633877469413,0.8127668353187353,0.016687349110596945 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,test,0.46153846153846156,0.0632920697613418,0.44800793076655143,0.0644272367952559,0.46222527472527475,0.06360045514679785 +flat_mae,patch,logistic,aabc_age,37,9.999999999999999e-05,train,0.5137795275590551,0.01999286716380307,0.4812281901506497,0.020878267195261997,0.5102067506218966,0.019851644889114964 +flat_mae,patch,logistic,aabc_age,37,9.999999999999999e-05,test,0.34615384615384615,0.05864878918931559,0.3017276422764228,0.05244858546364706,0.33516483516483514,0.05709952895584895 +flat_mae,patch,logistic,aabc_age,38,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,38,166.81005372000556,test,0.4807692307692308,0.0654748133516389,0.49620471014492756,0.06410560790108188,0.4819139194139194,0.06568394678432912 +flat_mae,patch,logistic,aabc_age,39,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,39,166.81005372000556,test,0.5192307692307693,0.06672928395598221,0.5092857142857142,0.0689008134663885,0.5146520146520146,0.06704548657458165 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,train,0.4704724409448819,0.0197423649551402,0.430374021136295,0.0195808841056343,0.46533442400971253,0.019551482264696052 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,test,0.4807692307692308,0.056013576122750555,0.42601809954751135,0.04883074948274615,0.47115384615384615,0.05443008841455104 +flat_mae,patch,logistic,aabc_age,41,0.000774263682681127,train,0.5708661417322834,0.020661834425403845,0.5622276901103835,0.021133913054246826,0.5701417271940912,0.020644956877090576 +flat_mae,patch,logistic,aabc_age,41,0.000774263682681127,test,0.34615384615384615,0.0521989253783149,0.30761282290694053,0.049166906941617586,0.3463827838827839,0.05227116774755703 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,train,0.5433070866141733,0.020628061991711272,0.5303594829352898,0.0211857896045426,0.5422187176944662,0.020525091390594854 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,test,0.46153846153846156,0.05788935579873816,0.433324472798157,0.06575794023072831,0.4608516483516484,0.05816808670625872 +flat_mae,patch,logistic,aabc_age,43,0.005994842503189409,train,0.6476377952755905,0.019982244583134235,0.6435650668655637,0.020474908211561355,0.6484827067698015,0.020003954687539533 +flat_mae,patch,logistic,aabc_age,43,0.005994842503189409,test,0.4807692307692308,0.06253958509729667,0.4710303938939621,0.06373962374315405,0.4773351648351648,0.06224033424108078 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,train,0.6535433070866141,0.019421701583666278,0.6509159028856206,0.01953900401941722,0.6540283502337026,0.019410884816072494 +flat_mae,patch,logistic,aabc_age,44,0.005994842503189409,test,0.46153846153846156,0.0671194730956602,0.449859747545582,0.0698571636498157,0.4583333333333333,0.06699781751958288 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.6496062992125984,0.020217789333109873,0.6462120070823345,0.020618360806566326,0.6504812146017658,0.020187697878186556 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.46153846153846156,0.06234120656117396,0.4434841021047917,0.061873948497222823,0.4668040293040293,0.06314570976065748 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,train,0.5433070866141733,0.020084297517165336,0.5311996429786878,0.020958545245411537,0.5415660140509293,0.020086968135267946 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,test,0.5192307692307693,0.05925947106786056,0.4831372549019608,0.06547006008742531,0.5141941391941391,0.05864321613726716 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6653543307086615,0.020615223997689693,0.660433900716314,0.0211083692019143,0.665204866232314,0.0205647226923896 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.4423076923076923,0.06558110746857462,0.4381104902094407,0.06819552532668265,0.44184981684981683,0.06577556138648642 +flat_mae,patch,logistic,aabc_age,48,0.000774263682681127,train,0.5492125984251969,0.021917054802472486,0.537860012359409,0.023014995601643197,0.5472792387257882,0.02187821681468367 +flat_mae,patch,logistic,aabc_age,48,0.000774263682681127,test,0.4423076923076923,0.06531012725369238,0.4224910394265232,0.0668673430016994,0.43864468864468864,0.06490777068843605 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.8011811023622047,0.017800816153707988,0.8016174776273861,0.01788221619971921,0.8019511343737594,0.017832805797944006 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.4423076923076923,0.06704563269822521,0.4482594310785054,0.06636219167044904,0.44184981684981683,0.06722097053790903 +flat_mae,patch,logistic,aabc_age,50,0.046415888336127774,train,0.8228346456692913,0.01663972733433608,0.8235131143366438,0.016648607487918524,0.8237933913066827,0.0165463252107114 +flat_mae,patch,logistic,aabc_age,50,0.046415888336127774,test,0.40384615384615385,0.06419160482631288,0.3997294372294372,0.06557873782361305,0.40636446886446886,0.0645738754821113 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,train,0.8070866141732284,0.017305053060586738,0.8077573582868083,0.017398014313476467,0.8096724566494499,0.01721928596247858 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,test,0.38461538461538464,0.06513412425151531,0.3855555555555556,0.06361873191906178,0.3869047619047619,0.06516484114243888 +flat_mae,patch,logistic,aabc_age,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,52,166.81005372000556,test,0.34615384615384615,0.06534904009562895,0.35266661809438804,0.06642468294976489,0.3456959706959707,0.06537286382547726 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.4940944881889764,0.02150707045620484,0.47588045774177157,0.022012486273210535,0.49148320639675946,0.021388066970743816 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5,0.0567875726049887,0.45929035045414357,0.05560754467914116,0.49336080586080583,0.055882885423537386 +flat_mae,patch,logistic,aabc_age,54,0.005994842503189409,train,0.6633858267716536,0.02065668907152371,0.6621203468180381,0.02098972401807353,0.6641913475351418,0.02068590068115522 +flat_mae,patch,logistic,aabc_age,54,0.005994842503189409,test,0.40384615384615385,0.05926068175752676,0.3806896551724138,0.059235489976236094,0.4059065934065934,0.0596103935569322 +flat_mae,patch,logistic,aabc_age,55,9.999999999999999e-05,train,0.49803149606299213,0.020717753460513442,0.4694634478113827,0.020512246245636016,0.4945628407964111,0.020449467784889333 +flat_mae,patch,logistic,aabc_age,55,9.999999999999999e-05,test,0.36538461538461536,0.0574055823729188,0.322561669829222,0.04880286907847437,0.3585164835164835,0.056161427401074125 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,train,0.5551181102362205,0.02083183333099993,0.5495216698127926,0.02133682170345319,0.554982939801185,0.020877323148205024 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,test,0.36538461538461536,0.06262751489003676,0.35699300699300707,0.06043459718932002,0.3644688644688645,0.06262732484657567 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,train,0.5078740157480315,0.02084292423953467,0.48142161330553257,0.02109236489486797,0.5034380520110704,0.020670445920230082 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,test,0.4230769230769231,0.05807843859459205,0.37004453133485393,0.04867457200760687,0.4175824175824176,0.05682660870586967 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,train,0.8267716535433071,0.016031322029282927,0.8272982713643323,0.016060537209506395,0.8285107184846632,0.015971247392995613 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,test,0.4230769230769231,0.053035435290470695,0.38792167584737247,0.05670503933371054,0.4178113553113554,0.05195409806794536 +flat_mae,patch,logistic,aabc_age,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,59,166.81005372000556,test,0.46153846153846156,0.06481991439191469,0.4543936268074199,0.06534968945918793,0.46703296703296704,0.065263937523551 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6673228346456693,0.02012373990911769,0.6636148060092675,0.020654636829326247,0.6685087813513519,0.020053396324675624 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.46153846153846156,0.06503402614180531,0.4485057952799888,0.06844746919383875,0.45810439560439564,0.06508711007940957 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,train,0.984251968503937,0.005433440304682384,0.9844469355115096,0.005367480311438193,0.984256116834072,0.005443797117477237 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,test,0.38461538461538464,0.06591723113835067,0.3882481836005075,0.06318564565788565,0.38530219780219777,0.06592039137551461 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,train,0.7992125984251969,0.017928362260740564,0.79952898941409,0.01809502501425946,0.8004377489743741,0.017891053426093963 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,test,0.5,0.06665528872334812,0.5012464387464387,0.0672742848413357,0.49977106227106227,0.067074979170592 +flat_mae,patch,logistic,aabc_age,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,63,2.782559402207126,test,0.46153846153846156,0.06032917592223203,0.4568681318681319,0.06043772869714045,0.45947802197802196,0.06031858240525293 +flat_mae,patch,logistic,aabc_age,64,0.005994842503189409,train,0.6692913385826772,0.019305832173991153,0.6662536572607739,0.01982197063875225,0.6694370709777152,0.01927349787251739 +flat_mae,patch,logistic,aabc_age,64,0.005994842503189409,test,0.4423076923076923,0.06449231640854317,0.43994178235557546,0.06443437551345184,0.44047619047619047,0.06469636984511624 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,train,0.8011811023622047,0.016977102430745173,0.8026195467152988,0.016965105114575017,0.8021187155847169,0.016917069251565112 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,test,0.5192307692307693,0.06415139037911777,0.5203846153846153,0.06369206876913053,0.5190018315018315,0.06418940510171849 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6456692913385826,0.020746060657340845,0.6429613034267267,0.020961070475020573,0.6469840656400498,0.020687355515164284 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.4230769230769231,0.06106214702101924,0.41615675990675993,0.056657697276668154,0.4164377289377289,0.060290168897336856 +flat_mae,patch,logistic,aabc_age,67,0.3593813663804626,train,0.9803149606299213,0.006056202098276419,0.980370357700772,0.00602756677142978,0.9806090078616926,0.005998164052435533 +flat_mae,patch,logistic,aabc_age,67,0.3593813663804626,test,0.46153846153846156,0.06385239774843895,0.44735909252038286,0.06553276731997268,0.4608516483516483,0.06399754691771757 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.8070866141732284,0.016786512862236386,0.8076301000691245,0.01686362061666575,0.8072468444865543,0.016768518214798467 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.4230769230769231,0.061924880527962,0.41160372194854955,0.062433014485939346,0.4194139194139194,0.06175038268378974 +flat_mae,patch,logistic,aabc_age,69,0.005994842503189409,train,0.6614173228346457,0.019421487298565424,0.6572673701143198,0.019868781450278407,0.6621428530329563,0.019398979944477607 +flat_mae,patch,logistic,aabc_age,69,0.005994842503189409,test,0.36538461538461536,0.06514039230814604,0.38343253968253965,0.06303024527176505,0.36904761904761907,0.06582780523910735 +flat_mae,patch,logistic,aabc_age,70,9.999999999999999e-05,train,0.49803149606299213,0.020001579003326837,0.4740501815773408,0.020702980970065683,0.4950979498992115,0.019874904701942007 +flat_mae,patch,logistic,aabc_age,70,9.999999999999999e-05,test,0.38461538461538464,0.0615785806724476,0.3658655520724486,0.06394429548576383,0.38369963369963367,0.06151327251593887 +flat_mae,patch,logistic,aabc_age,71,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,71,2.782559402207126,test,0.5,0.0669227564094889,0.5045699067438199,0.06511638588798478,0.5057234432234432,0.06746858289811337 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,train,0.8031496062992126,0.018234797586364534,0.8035369357142843,0.018386958558190773,0.8058577664661126,0.01813598216260913 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,test,0.5961538461538461,0.06448580184081808,0.587548945307566,0.0670532455241151,0.594551282051282,0.06467477492935751 +flat_mae,patch,logistic,aabc_age,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,73,21.54434690031882,test,0.46153846153846156,0.06625523313037064,0.4606647378386509,0.06660111487063981,0.46108058608058605,0.06637806283512905 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6653543307086615,0.01963551407798702,0.663541725020846,0.01987061561393362,0.6659399220159996,0.01952924257300303 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.46153846153846156,0.06980153607099075,0.45845924908424907,0.0703541524740769,0.4583333333333333,0.06958060319472023 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.6417322834645669,0.020543460627838465,0.6397712630086939,0.02094243274649449,0.6424166984727334,0.02059288504966699 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.5576923076923077,0.06752273340607012,0.5577095067417648,0.06859100082964703,0.5547161172161172,0.06774634443428293 +flat_mae,patch,logistic,aabc_age,76,0.005994842503189409,train,0.6732283464566929,0.019975043329232407,0.6707123287132374,0.020457708179626794,0.6740044381450319,0.019951524090771333 +flat_mae,patch,logistic,aabc_age,76,0.005994842503189409,test,0.46153846153846156,0.06729493285653594,0.47349985721425003,0.06553896842453717,0.4626831501831502,0.06756771030669786 +flat_mae,patch,logistic,aabc_age,77,0.000774263682681127,train,0.5354330708661418,0.020308826822268543,0.5199942417399845,0.021327683767807956,0.5341718227657277,0.020248343431408442 +flat_mae,patch,logistic,aabc_age,77,0.000774263682681127,test,0.4423076923076923,0.06662288996625425,0.4318463783981026,0.06740551055035031,0.44024725274725274,0.06644484807946853 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.7913385826771654,0.017584429161172623,0.7903095387750537,0.017861227340668903,0.7926584085970361,0.017452724267204324 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5961538461538461,0.06778079880313106,0.6060782967032967,0.06636146091676794,0.5961538461538461,0.0678870802947577 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,train,0.8149606299212598,0.017819272980483202,0.8155225333152568,0.017890234845911714,0.8162139976102296,0.01776597603675087 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,test,0.4807692307692308,0.06722306768068806,0.4666738176964149,0.06947301083823462,0.4759615384615385,0.06690422484057741 +flat_mae,patch,logistic,aabc_age,80,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,80,1291.5496650148827,test,0.5769230769230769,0.06471334149962693,0.562824302134647,0.06782130333476351,0.5737179487179487,0.06461591370563609 +flat_mae,patch,logistic,aabc_age,81,0.3593813663804626,train,0.9803149606299213,0.005867408572186421,0.9802638007658137,0.005862931091920789,0.9803914399805136,0.005840281182268117 +flat_mae,patch,logistic,aabc_age,81,0.3593813663804626,test,0.4423076923076923,0.06326046233370854,0.43348338457034113,0.06370513535578642,0.43612637362637363,0.06298082901419623 +flat_mae,patch,logistic,aabc_age,82,0.3593813663804626,train,0.9822834645669292,0.005646064625568101,0.9823947819217674,0.0056011174512924184,0.9826751235641719,0.005522704715359446 +flat_mae,patch,logistic,aabc_age,82,0.3593813663804626,test,0.5,0.06713908552202455,0.47806144320076205,0.06928562270770813,0.49496336996337,0.06653687026043122 +flat_mae,patch,logistic,aabc_age,83,0.005994842503189409,train,0.6633858267716536,0.021212637517573837,0.6593718060149529,0.02177023372642861,0.6627536014376981,0.021254314751218195 +flat_mae,patch,logistic,aabc_age,83,0.005994842503189409,test,0.46153846153846156,0.059184097496284456,0.43625783033780224,0.06055702863583901,0.45787545787545786,0.05857016490994418 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,train,0.655511811023622,0.020258318076753136,0.6531136388236267,0.0207329339095953,0.6570647108013403,0.020206886890041294 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,test,0.5,0.06346050115702358,0.4857491970395196,0.06568205840911538,0.4951923076923077,0.0631110183296891 +flat_mae,patch,logistic,aabc_age,85,0.3593813663804626,train,0.9763779527559056,0.006647062724759407,0.9762348434722907,0.006679985403846224,0.9762092219053337,0.006691547026755965 +flat_mae,patch,logistic,aabc_age,85,0.3593813663804626,test,0.46153846153846156,0.06335655652288774,0.4435714285714286,0.06172453824170494,0.4578754578754579,0.06283177560555399 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,train,0.9763779527559056,0.006770009871932797,0.9765343040646481,0.006720036733135326,0.9768443043485766,0.006674953587151752 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,test,0.5384615384615384,0.060346276413073545,0.5081417624521073,0.06303745240652271,0.532051282051282,0.059941055053344725 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.6437007874015748,0.019466079879023712,0.6385918618305564,0.0200950057400134,0.6429950814075479,0.01947109880670619 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.5192307692307693,0.06785531073737906,0.5167032967032967,0.06768145131823404,0.5249542124542125,0.06837214406364608 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,train,0.484251968503937,0.017785801167488808,0.44705104884720603,0.017898924119562166,0.4815054115065719,0.017597186806974197 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,test,0.4807692307692308,0.06729586709308863,0.47176842765078064,0.06988857737709754,0.47596153846153844,0.06715240770694232 +flat_mae,patch,logistic,aabc_age,89,0.000774263682681127,train,0.5551181102362205,0.021107100772474786,0.5453875407000407,0.02168878158495461,0.5551681422124364,0.021047105327669353 +flat_mae,patch,logistic,aabc_age,89,0.000774263682681127,test,0.40384615384615385,0.06070925716334737,0.3891285403050109,0.06162161546304663,0.4059065934065934,0.061082897509892146 +flat_mae,patch,logistic,aabc_age,90,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,90,2.782559402207126,test,0.46153846153846156,0.06477822886519238,0.45366739707069537,0.06481881885465171,0.4624542124542124,0.06514934878710535 +flat_mae,patch,logistic,aabc_age,91,9.999999999999999e-05,train,0.4940944881889764,0.020681599634871737,0.45033422483168867,0.02030476293678915,0.48969555360776695,0.020431217695024536 +flat_mae,patch,logistic,aabc_age,91,9.999999999999999e-05,test,0.4423076923076923,0.0647490718674199,0.4281603136867156,0.0676044574879797,0.4416208791208791,0.06456022539776912 +flat_mae,patch,logistic,aabc_age,92,0.3593813663804626,train,0.9783464566929134,0.006518870722092213,0.9785769096009957,0.0064485184210078305,0.9785928788294345,0.006475718991745807 +flat_mae,patch,logistic,aabc_age,92,0.3593813663804626,test,0.4423076923076923,0.056248852716768036,0.4112554112554112,0.05656535320495424,0.44436813186813184,0.05696691141733874 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,train,0.8267716535433071,0.016669625618256592,0.8270625078225841,0.016749044153877256,0.8279256227116414,0.016610151296173683 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,test,0.4807692307692308,0.06399300766166409,0.4818314485434051,0.062086096370781156,0.4878663003663004,0.06493989525455693 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,train,0.8149606299212598,0.01616820328056257,0.8149248000637802,0.016358087103172644,0.8164991733619236,0.01607920668607022 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,test,0.4230769230769231,0.06826787647627457,0.42109302491611333,0.06876417167530233,0.4226190476190476,0.06832540818445779 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,train,0.9763779527559056,0.006818693055795237,0.9765495415557527,0.006755778447247993,0.9767943176783553,0.006680511021807584 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,test,0.5,0.06066050400023173,0.47691297208538586,0.05601299097208416,0.4906135531135531,0.05970087100652999 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,train,0.8110236220472441,0.01734739266760467,0.8120001301120557,0.017329775825447227,0.8117466037833556,0.017340329807401952 +flat_mae,patch,logistic,aabc_age,96,0.046415888336127774,test,0.4423076923076923,0.07014223714676245,0.4430113636363637,0.07056556755059885,0.4391025641025641,0.07009350237139708 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,train,0.6692913385826772,0.02159676737015801,0.6656415591811804,0.02219271981853881,0.6685844206532934,0.02164049464983858 +flat_mae,patch,logistic,aabc_age,97,0.005994842503189409,test,0.46153846153846156,0.06231452953939375,0.45179146537842185,0.062414285566127756,0.4608516483516484,0.062206297384865196 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,train,0.6535433070866141,0.020133029893132967,0.6504834017398144,0.020620853927054254,0.654781027217682,0.020208480326434266 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,test,0.4423076923076923,0.06104751277953957,0.41550925925925924,0.06395468254199337,0.440018315018315,0.06095778406121292 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,train,0.9803149606299213,0.005790045729461818,0.9804829709380649,0.0057560617732463695,0.9806589945319139,0.005735132416810192 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,test,0.4230769230769231,0.06846425016237202,0.43160287987874196,0.06716847406196273,0.42399267399267404,0.06864137169176612 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,train,0.8169291338582677,0.016991325063403805,0.8178598556304609,0.016930266544072134,0.8183300999829303,0.016931334883859218 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,test,0.46153846153846156,0.06492161864931695,0.45384615384615384,0.06599439012345748,0.4624542124542125,0.06518021361592749 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e28b7db7bbaf4973404fe1ca7de84f66181c2fdc --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:54:59 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:20:34 time: 5.4130 data: 4.4775 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:42 time: 0.2470 data: 0.0851 max mem: 3581 +extract (train) [ 40/228] eta: 0:01:05 time: 0.1991 data: 0.0627 max mem: 3581 +extract (train) [ 60/228] eta: 0:00:50 time: 0.1983 data: 0.0658 max mem: 3581 +extract (train) [ 80/228] eta: 0:00:40 time: 0.2055 data: 0.0684 max mem: 3581 +extract (train) [100/228] eta: 0:00:33 time: 0.2043 data: 0.0687 max mem: 3581 +extract (train) [120/228] eta: 0:00:27 time: 0.2310 data: 0.0821 max mem: 3581 +extract (train) [140/228] eta: 0:00:21 time: 0.2027 data: 0.0665 max mem: 3581 +extract (train) [160/228] eta: 0:00:16 time: 0.1951 data: 0.0619 max mem: 3581 +extract (train) [180/228] eta: 0:00:11 time: 0.2263 data: 0.0784 max mem: 3581 +extract (train) [200/228] eta: 0:00:06 time: 0.1796 data: 0.0529 max mem: 3581 +extract (train) [220/228] eta: 0:00:01 time: 0.1787 data: 0.0540 max mem: 3581 +extract (train) [227/228] eta: 0:00:00 time: 0.1743 data: 0.0535 max mem: 3581 +extract (train) Total time: 0:00:52 (0.2298 s / it) +extract (validation) [ 0/27] eta: 0:01:54 time: 4.2524 data: 4.1100 max mem: 3581 +extract (validation) [20/27] eta: 0:00:02 time: 0.1770 data: 0.0540 max mem: 3581 +extract (validation) [26/27] eta: 0:00:00 time: 0.1661 data: 0.0486 max mem: 3581 +extract (validation) Total time: 0:00:09 (0.3374 s / it) +extract (test) [ 0/26] eta: 0:01:51 time: 4.3020 data: 4.1636 max mem: 3581 +extract (test) [20/26] eta: 0:00:02 time: 0.1904 data: 0.0584 max mem: 3581 +extract (test) [25/26] eta: 0:00:00 time: 0.1771 data: 0.0522 max mem: 3581 +extract (test) Total time: 0:00:09 (0.3572 s / it) +feature extraction time: 0:01:10 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | train | 0.66732 | 0.021135 | 0.66427 | 0.021461 | 0.66804 | 0.021126 | +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | test | 0.36538 | 0.057973 | 0.34524 | 0.05799 | 0.35325 | 0.057101 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06523326374040203, "f1": 0.4921428571428571, "f1_std": 0.06524914306344189, "bacc": 0.4965659340659341, "bacc_std": 0.06511778680900411} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.0665988942993312, "f1": 0.5390851681174261, "f1_std": 0.06866072227511469, "bacc": 0.551510989010989, "bacc_std": 0.06605572206976242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06415424967591227, "f1": 0.5373690825303729, "f1_std": 0.06840544574815215, "bacc": 0.5572344322344323, "bacc_std": 0.06427828573382566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06508874233458674, "f1": 0.47488755622188905, "f1_std": 0.06590505350666276, "bacc": 0.4803113553113553, "bacc_std": 0.06520370682985548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06317790931054992, "f1": 0.44909688013136284, "f1_std": 0.0650700898904995, "bacc": 0.4608516483516484, "bacc_std": 0.06294913333244241} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06946851272594678, "f1": 0.5110815047021944, "f1_std": 0.07093176551027093, "bacc": 0.516025641025641, "bacc_std": 0.06940782139720432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06199135032238691, "f1": 0.46259258568256867, "f1_std": 0.06270621787448583, "bacc": 0.4757326007326007, "bacc_std": 0.06178601222469888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06068367882503206, "f1": 0.43096891534391535, "f1_std": 0.06162326543069519, "bacc": 0.4475732600732601, "bacc_std": 0.06146872051630546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.059346477889339166, "f1": 0.4027777777777778, "f1_std": 0.05541357871406193, "bacc": 0.4164377289377289, "bacc_std": 0.05849377911376269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06097875272604113, "f1": 0.38508202323991797, "f1_std": 0.0585178009843021, "bacc": 0.39720695970695974, "bacc_std": 0.05999398811970512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.05679590784492307, "f1": 0.5026090038993264, "f1_std": 0.06182997260072238, "bacc": 0.5455586080586081, "bacc_std": 0.05632661372410228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06477017859712106, "f1": 0.4579124579124579, "f1_std": 0.06492533095581389, "bacc": 0.45810439560439564, "bacc_std": 0.06464239660633196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.056994484836474735, "f1": 0.37643678160919536, "f1_std": 0.04970195433046571, "bacc": 0.41025641025641024, "bacc_std": 0.05858420930982116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.062133707704724137, "f1": 0.5839920948616601, "f1_std": 0.06752389329606887, "bacc": 0.5977564102564104, "bacc_std": 0.06201538104620229} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.000774263682681127, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06393524830898337, "f1": 0.5607990122585766, "f1_std": 0.06478541429807994, "bacc": 0.5528846153846154, "bacc_std": 0.06394800974328127} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06596882668863041, "f1": 0.40239007480386785, "f1_std": 0.06729422460408556, "bacc": 0.4036172161172161, "bacc_std": 0.06601399957955083} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06647867478510257, "f1": 0.4017857142857143, "f1_std": 0.06316457428200375, "bacc": 0.4001831501831502, "bacc_std": 0.06589415747596433} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06048711291177954, "f1": 0.37835896241068656, "f1_std": 0.05225601484088372, "bacc": 0.41346153846153844, "bacc_std": 0.05900264146608903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06871357022981567, "f1": 0.4445634920634921, "f1_std": 0.06816926526386007, "bacc": 0.44505494505494503, "bacc_std": 0.06905727909466619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.060164665562508705, "f1": 0.44503284072249594, "f1_std": 0.05969845823574904, "bacc": 0.45650183150183155, "bacc_std": 0.05960724195578352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061544170408273466, "f1": 0.403126088470916, "f1_std": 0.0629884090954219, "bacc": 0.41941391941391937, "bacc_std": 0.06123112273522697} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06614126669306625, "f1": 0.3853081700907788, "f1_std": 0.064875095905477, "bacc": 0.36492673992673996, "bacc_std": 0.06618289537571495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06389437474661096, "f1": 0.425595238095238, "f1_std": 0.06144923592315337, "bacc": 0.4223901098901099, "bacc_std": 0.06391794315633972} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06651515821366848, "f1": 0.34375, "f1_std": 0.06309466720879657, "bacc": 0.33951465201465203, "bacc_std": 0.06564512326851708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.062462426575724796, "f1": 0.37837899232202077, "f1_std": 0.06117045492462683, "bacc": 0.38095238095238093, "bacc_std": 0.06216459570449379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06528385901982749, "f1": 0.4740017326224223, "f1_std": 0.06582907103345002, "bacc": 0.4816849816849817, "bacc_std": 0.06518294960502306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06487597407233528, "f1": 0.4321219715956558, "f1_std": 0.06666505492193492, "bacc": 0.4375, "bacc_std": 0.06447195967818517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.066058462093824, "f1": 0.49351037254263064, "f1_std": 0.06575910352082737, "bacc": 0.49381868131868134, "bacc_std": 0.06575731627523208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06556666962474511, "f1": 0.44044311177744466, "f1_std": 0.06444134923966444, "bacc": 0.4445970695970696, "bacc_std": 0.06604366629043604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 21.54434690031882, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06868722524052491, "f1": 0.5346790890269151, "f1_std": 0.06964858743284236, "bacc": 0.5382326007326007, "bacc_std": 0.06893831181869045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.05423382424564667, "f1": 0.4804232804232804, "f1_std": 0.04603229011462956, "bacc": 0.5453296703296704, "bacc_std": 0.05278931044202807} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05913942757155436, "f1": 0.4662309368191721, "f1_std": 0.060525601473845676, "bacc": 0.47435897435897434, "bacc_std": 0.05869612858871228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06993381274948816, "f1": 0.4661490683229813, "f1_std": 0.07084614734702722, "bacc": 0.4626831501831502, "bacc_std": 0.07028114094233108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06703318748837825, "f1": 0.451731078904992, "f1_std": 0.06690869964232153, "bacc": 0.4464285714285714, "bacc_std": 0.06741821564573157} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0661538014311119, "f1": 0.46003016591251883, "f1_std": 0.06605582597558224, "bacc": 0.4581043956043956, "bacc_std": 0.06577450612830424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0632920697613418, "f1": 0.44800793076655143, "f1_std": 0.0644272367952559, "bacc": 0.46222527472527475, "bacc_std": 0.06360045514679785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 9.999999999999999e-05, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.05864878918931559, "f1": 0.3017276422764228, "f1_std": 0.05244858546364706, "bacc": 0.33516483516483514, "bacc_std": 0.05709952895584895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0654748133516389, "f1": 0.49620471014492756, "f1_std": 0.06410560790108188, "bacc": 0.4819139194139194, "bacc_std": 0.06568394678432912} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 166.81005372000556, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06672928395598221, "f1": 0.5092857142857142, "f1_std": 0.0689008134663885, "bacc": 0.5146520146520146, "bacc_std": 0.06704548657458165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.056013576122750555, "f1": 0.42601809954751135, "f1_std": 0.04883074948274615, "bacc": 0.47115384615384615, "bacc_std": 0.05443008841455104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.0521989253783149, "f1": 0.30761282290694053, "f1_std": 0.049166906941617586, "bacc": 0.3463827838827839, "bacc_std": 0.05227116774755703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05788935579873816, "f1": 0.433324472798157, "f1_std": 0.06575794023072831, "bacc": 0.4608516483516484, "bacc_std": 0.05816808670625872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06253958509729667, "f1": 0.4710303938939621, "f1_std": 0.06373962374315405, "bacc": 0.4773351648351648, "bacc_std": 0.06224033424108078} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0671194730956602, "f1": 0.449859747545582, "f1_std": 0.0698571636498157, "bacc": 0.4583333333333333, "bacc_std": 0.06699781751958288} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06234120656117396, "f1": 0.4434841021047917, "f1_std": 0.061873948497222823, "bacc": 0.4668040293040293, "bacc_std": 0.06314570976065748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.05925947106786056, "f1": 0.4831372549019608, "f1_std": 0.06547006008742531, "bacc": 0.5141941391941391, "bacc_std": 0.05864321613726716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06558110746857462, "f1": 0.4381104902094407, "f1_std": 0.06819552532668265, "bacc": 0.44184981684981683, "bacc_std": 0.06577556138648642} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06531012725369238, "f1": 0.4224910394265232, "f1_std": 0.0668673430016994, "bacc": 0.43864468864468864, "bacc_std": 0.06490777068843605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06704563269822521, "f1": 0.4482594310785054, "f1_std": 0.06636219167044904, "bacc": 0.44184981684981683, "bacc_std": 0.06722097053790903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06419160482631288, "f1": 0.3997294372294372, "f1_std": 0.06557873782361305, "bacc": 0.40636446886446886, "bacc_std": 0.0645738754821113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06513412425151531, "f1": 0.3855555555555556, "f1_std": 0.06361873191906178, "bacc": 0.3869047619047619, "bacc_std": 0.06516484114243888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06534904009562895, "f1": 0.35266661809438804, "f1_std": 0.06642468294976489, "bacc": 0.3456959706959707, "bacc_std": 0.06537286382547726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.0567875726049887, "f1": 0.45929035045414357, "f1_std": 0.05560754467914116, "bacc": 0.49336080586080583, "bacc_std": 0.055882885423537386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05926068175752676, "f1": 0.3806896551724138, "f1_std": 0.059235489976236094, "bacc": 0.4059065934065934, "bacc_std": 0.0596103935569322} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0574055823729188, "f1": 0.322561669829222, "f1_std": 0.04880286907847437, "bacc": 0.3585164835164835, "bacc_std": 0.056161427401074125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06262751489003676, "f1": 0.35699300699300707, "f1_std": 0.06043459718932002, "bacc": 0.3644688644688645, "bacc_std": 0.06262732484657567} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05807843859459205, "f1": 0.37004453133485393, "f1_std": 0.04867457200760687, "bacc": 0.4175824175824176, "bacc_std": 0.05682660870586967} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.053035435290470695, "f1": 0.38792167584737247, "f1_std": 0.05670503933371054, "bacc": 0.4178113553113554, "bacc_std": 0.05195409806794536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06481991439191469, "f1": 0.4543936268074199, "f1_std": 0.06534968945918793, "bacc": 0.46703296703296704, "bacc_std": 0.065263937523551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06503402614180531, "f1": 0.4485057952799888, "f1_std": 0.06844746919383875, "bacc": 0.45810439560439564, "bacc_std": 0.06508711007940957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06591723113835067, "f1": 0.3882481836005075, "f1_std": 0.06318564565788565, "bacc": 0.38530219780219777, "bacc_std": 0.06592039137551461} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06665528872334812, "f1": 0.5012464387464387, "f1_std": 0.0672742848413357, "bacc": 0.49977106227106227, "bacc_std": 0.067074979170592} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06032917592223203, "f1": 0.4568681318681319, "f1_std": 0.06043772869714045, "bacc": 0.45947802197802196, "bacc_std": 0.06031858240525293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06449231640854317, "f1": 0.43994178235557546, "f1_std": 0.06443437551345184, "bacc": 0.44047619047619047, "bacc_std": 0.06469636984511624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06415139037911777, "f1": 0.5203846153846153, "f1_std": 0.06369206876913053, "bacc": 0.5190018315018315, "bacc_std": 0.06418940510171849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06106214702101924, "f1": 0.41615675990675993, "f1_std": 0.056657697276668154, "bacc": 0.4164377289377289, "bacc_std": 0.060290168897336856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06385239774843895, "f1": 0.44735909252038286, "f1_std": 0.06553276731997268, "bacc": 0.4608516483516483, "bacc_std": 0.06399754691771757} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061924880527962, "f1": 0.41160372194854955, "f1_std": 0.062433014485939346, "bacc": 0.4194139194139194, "bacc_std": 0.06175038268378974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06514039230814604, "f1": 0.38343253968253965, "f1_std": 0.06303024527176505, "bacc": 0.36904761904761907, "bacc_std": 0.06582780523910735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 9.999999999999999e-05, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0615785806724476, "f1": 0.3658655520724486, "f1_std": 0.06394429548576383, "bacc": 0.38369963369963367, "bacc_std": 0.06151327251593887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.0669227564094889, "f1": 0.5045699067438199, "f1_std": 0.06511638588798478, "bacc": 0.5057234432234432, "bacc_std": 0.06746858289811337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06448580184081808, "f1": 0.587548945307566, "f1_std": 0.0670532455241151, "bacc": 0.594551282051282, "bacc_std": 0.06467477492935751} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06625523313037064, "f1": 0.4606647378386509, "f1_std": 0.06660111487063981, "bacc": 0.46108058608058605, "bacc_std": 0.06637806283512905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06980153607099075, "f1": 0.45845924908424907, "f1_std": 0.0703541524740769, "bacc": 0.4583333333333333, "bacc_std": 0.06958060319472023} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06752273340607012, "f1": 0.5577095067417648, "f1_std": 0.06859100082964703, "bacc": 0.5547161172161172, "bacc_std": 0.06774634443428293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06729493285653594, "f1": 0.47349985721425003, "f1_std": 0.06553896842453717, "bacc": 0.4626831501831502, "bacc_std": 0.06756771030669786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06662288996625425, "f1": 0.4318463783981026, "f1_std": 0.06740551055035031, "bacc": 0.44024725274725274, "bacc_std": 0.06644484807946853} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06778079880313106, "f1": 0.6060782967032967, "f1_std": 0.06636146091676794, "bacc": 0.5961538461538461, "bacc_std": 0.0678870802947577} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06722306768068806, "f1": 0.4666738176964149, "f1_std": 0.06947301083823462, "bacc": 0.4759615384615385, "bacc_std": 0.06690422484057741} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 1291.5496650148827, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06471334149962693, "f1": 0.562824302134647, "f1_std": 0.06782130333476351, "bacc": 0.5737179487179487, "bacc_std": 0.06461591370563609} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06326046233370854, "f1": 0.43348338457034113, "f1_std": 0.06370513535578642, "bacc": 0.43612637362637363, "bacc_std": 0.06298082901419623} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06713908552202455, "f1": 0.47806144320076205, "f1_std": 0.06928562270770813, "bacc": 0.49496336996337, "bacc_std": 0.06653687026043122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.059184097496284456, "f1": 0.43625783033780224, "f1_std": 0.06055702863583901, "bacc": 0.45787545787545786, "bacc_std": 0.05857016490994418} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06346050115702358, "f1": 0.4857491970395196, "f1_std": 0.06568205840911538, "bacc": 0.4951923076923077, "bacc_std": 0.0631110183296891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06335655652288774, "f1": 0.4435714285714286, "f1_std": 0.06172453824170494, "bacc": 0.4578754578754579, "bacc_std": 0.06283177560555399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.060346276413073545, "f1": 0.5081417624521073, "f1_std": 0.06303745240652271, "bacc": 0.532051282051282, "bacc_std": 0.059941055053344725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06785531073737906, "f1": 0.5167032967032967, "f1_std": 0.06768145131823404, "bacc": 0.5249542124542125, "bacc_std": 0.06837214406364608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06729586709308863, "f1": 0.47176842765078064, "f1_std": 0.06988857737709754, "bacc": 0.47596153846153844, "bacc_std": 0.06715240770694232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06070925716334737, "f1": 0.3891285403050109, "f1_std": 0.06162161546304663, "bacc": 0.4059065934065934, "bacc_std": 0.061082897509892146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06477822886519238, "f1": 0.45366739707069537, "f1_std": 0.06481881885465171, "bacc": 0.4624542124542124, "bacc_std": 0.06514934878710535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0647490718674199, "f1": 0.4281603136867156, "f1_std": 0.0676044574879797, "bacc": 0.4416208791208791, "bacc_std": 0.06456022539776912} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.056248852716768036, "f1": 0.4112554112554112, "f1_std": 0.05656535320495424, "bacc": 0.44436813186813184, "bacc_std": 0.05696691141733874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06399300766166409, "f1": 0.4818314485434051, "f1_std": 0.062086096370781156, "bacc": 0.4878663003663004, "bacc_std": 0.06493989525455693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06826787647627457, "f1": 0.42109302491611333, "f1_std": 0.06876417167530233, "bacc": 0.4226190476190476, "bacc_std": 0.06832540818445779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06066050400023173, "f1": 0.47691297208538586, "f1_std": 0.05601299097208416, "bacc": 0.4906135531135531, "bacc_std": 0.05970087100652999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.07014223714676245, "f1": 0.4430113636363637, "f1_std": 0.07056556755059885, "bacc": 0.4391025641025641, "bacc_std": 0.07009350237139708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06231452953939375, "f1": 0.45179146537842185, "f1_std": 0.062414285566127756, "bacc": 0.4608516483516484, "bacc_std": 0.062206297384865196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06104751277953957, "f1": 0.41550925925925924, "f1_std": 0.06395468254199337, "bacc": 0.440018315018315, "bacc_std": 0.06095778406121292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06846425016237202, "f1": 0.43160287987874196, "f1_std": 0.06716847406196273, "bacc": 0.42399267399267404, "bacc_std": 0.06864137169176612} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06492161864931695, "f1": 0.45384615384615384, "f1_std": 0.06599439012345748, "bacc": 0.4624542124542125, "bacc_std": 0.06518021361592749} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 20.25 | 132.55 | 0.75541 | 0.17625 | 0.75005 | 0.18353 | 0.7554 | 0.17706 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 20.25 | 132.55 | 0.45962 | 0.057237 | 0.44744 | 0.058349 | 0.45759 | 0.056877 | + + +done! total time: 0:05:40 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38316a49ca786b0b04bd75136e07950c23e86db9 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_age reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f9c94dfc4f39f681b00a974854bd1fc153e3038d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,aabc_age,,0.005994842503189409,train,0.6279527559055118,0.02142932553609622,0.62565536938739,0.021693749259380012,0.6291020033062805,0.021404964457797254 +flat_mae,reg,logistic,aabc_age,,0.005994842503189409,test,0.36538461538461536,0.0636838878678469,0.3611506746626687,0.0633396060697681,0.35622710622710624,0.06301874249859056 +flat_mae,reg,logistic,aabc_age,1,0.046415888336127774,train,0.7913385826771654,0.017470719860729986,0.7907790989944486,0.017850206706045025,0.7914206091804775,0.017511690858046543 +flat_mae,reg,logistic,aabc_age,1,0.046415888336127774,test,0.46153846153846156,0.0635215450567013,0.46233105669387525,0.06145093577090532,0.4608516483516483,0.06363918506654685 +flat_mae,reg,logistic,aabc_age,2,9.999999999999999e-05,train,0.4822834645669291,0.020233516088172775,0.45990668262774304,0.020692039461757093,0.4809770152419786,0.020142579536389864 +flat_mae,reg,logistic,aabc_age,2,9.999999999999999e-05,test,0.5384615384615384,0.06397558705979156,0.5167457769091163,0.06655923191241152,0.5350274725274725,0.0634937022047951 +flat_mae,reg,logistic,aabc_age,3,0.046415888336127774,train,0.7716535433070866,0.018019338816758594,0.772414231475761,0.01807857622248129,0.7724471316042341,0.01803122694827653 +flat_mae,reg,logistic,aabc_age,3,0.046415888336127774,test,0.5576923076923077,0.06757240254570512,0.5521371986889227,0.06949239911115596,0.55746336996337,0.06775775628642457 +flat_mae,reg,logistic,aabc_age,4,0.046415888336127774,train,0.7854330708661418,0.019053528192276452,0.7846006406260748,0.019442189576533077,0.7863924536190828,0.01909766883110144 +flat_mae,reg,logistic,aabc_age,4,0.046415888336127774,test,0.5576923076923077,0.06369867117969708,0.5476679929266136,0.06549382903532174,0.5588369963369964,0.06410354668758966 +flat_mae,reg,logistic,aabc_age,5,0.046415888336127774,train,0.7893700787401575,0.016946394789470503,0.7899279037263678,0.016992641520294206,0.7907422529052205,0.016834599985279423 +flat_mae,reg,logistic,aabc_age,5,0.046415888336127774,test,0.4423076923076923,0.06153378588006534,0.4318368700265252,0.06121395568289032,0.4416208791208791,0.06138814898142881 +flat_mae,reg,logistic,aabc_age,6,0.046415888336127774,train,0.7696850393700787,0.01813974658314753,0.7695307311885792,0.018238546030916698,0.7703810159017547,0.018121438375953466 +flat_mae,reg,logistic,aabc_age,6,0.046415888336127774,test,0.5,0.07041099757766293,0.5109740802675585,0.06997599806402775,0.5029761904761905,0.07043558162042017 +flat_mae,reg,logistic,aabc_age,7,0.005994842503189409,train,0.6358267716535433,0.02058100040462453,0.6331066553573081,0.021095618801794044,0.637021015019496,0.020611442993809525 +flat_mae,reg,logistic,aabc_age,7,0.005994842503189409,test,0.4807692307692308,0.06388797286597123,0.4579485978572683,0.064824778536135,0.47435897435897434,0.06331108567415208 +flat_mae,reg,logistic,aabc_age,8,9.999999999999999e-05,train,0.5019685039370079,0.01978939700295255,0.47046355207535523,0.02018694469268829,0.4999005061480008,0.019604951309276576 +flat_mae,reg,logistic,aabc_age,8,9.999999999999999e-05,test,0.38461538461538464,0.06555051351808928,0.3819535576341384,0.06573420966782607,0.38530219780219777,0.06579533252374668 +flat_mae,reg,logistic,aabc_age,9,0.005994842503189409,train,0.6358267716535433,0.02083291069486777,0.6334631924622511,0.02126152009519476,0.6373385562411175,0.020824382894799192 +flat_mae,reg,logistic,aabc_age,9,0.005994842503189409,test,0.4807692307692308,0.06449772942198599,0.45986928104575164,0.06516923458597285,0.47298534798534797,0.06400114795904321 +flat_mae,reg,logistic,aabc_age,10,0.000774263682681127,train,0.5492125984251969,0.01978019951902877,0.5400102493679684,0.020322769154234817,0.5493873096670625,0.019718988649446465 +flat_mae,reg,logistic,aabc_age,10,0.000774263682681127,test,0.4230769230769231,0.06550491196180834,0.4217707024158637,0.0658019159541703,0.4210164835164836,0.0652426249885098 +flat_mae,reg,logistic,aabc_age,11,9.999999999999999e-05,train,0.4763779527559055,0.019378278704141168,0.4492761059914814,0.0197667671711222,0.4754313717780774,0.019206753615522803 +flat_mae,reg,logistic,aabc_age,11,9.999999999999999e-05,test,0.5192307692307693,0.06230212463253408,0.489169110459433,0.06788477736496269,0.5114468864468865,0.061990161827376995 +flat_mae,reg,logistic,aabc_age,12,0.005994842503189409,train,0.6318897637795275,0.020496964037414644,0.6277959036045436,0.021059670841020596,0.6322360799710005,0.020503326112156973 +flat_mae,reg,logistic,aabc_age,12,0.005994842503189409,test,0.4423076923076923,0.06546167404849015,0.4436363636363636,0.06533979841486413,0.44184981684981683,0.06561485343501645 +flat_mae,reg,logistic,aabc_age,13,0.046415888336127774,train,0.7874015748031497,0.017643715172870212,0.7875504032258065,0.01770318702132295,0.7879558256886893,0.017662262905104937 +flat_mae,reg,logistic,aabc_age,13,0.046415888336127774,test,0.40384615384615385,0.05943192615214357,0.3929096989966555,0.059435883721023215,0.40453296703296704,0.05977774563040662 +flat_mae,reg,logistic,aabc_age,14,0.046415888336127774,train,0.7854330708661418,0.017131719557348307,0.7850914881381652,0.017373170999393308,0.7861248990676827,0.01710227768615963 +flat_mae,reg,logistic,aabc_age,14,0.046415888336127774,test,0.5576923076923077,0.06651315659326355,0.5527777777777777,0.07005273622240277,0.5590659340659341,0.0664281053079526 +flat_mae,reg,logistic,aabc_age,15,0.000774263682681127,train,0.5216535433070866,0.022052520108307043,0.514874688427676,0.022674526983561052,0.521764220188765,0.0220534737111517 +flat_mae,reg,logistic,aabc_age,15,0.000774263682681127,test,0.5384615384615384,0.06507868473702116,0.5476651186790505,0.0644691991685846,0.5366300366300366,0.0649956189605494 +flat_mae,reg,logistic,aabc_age,16,0.000774263682681127,train,0.5374015748031497,0.021695244026450292,0.527622146166578,0.022065827078963283,0.5362703039381345,0.021566711605416598 +flat_mae,reg,logistic,aabc_age,16,0.000774263682681127,test,0.4423076923076923,0.06276983172789241,0.4405230925261351,0.06357106649169934,0.4416208791208791,0.06274541516930576 +flat_mae,reg,logistic,aabc_age,17,0.000774263682681127,train,0.5610236220472441,0.021069208001798533,0.5560834937717745,0.021754314210652305,0.5620015717631173,0.021073827226898337 +flat_mae,reg,logistic,aabc_age,17,0.000774263682681127,test,0.4230769230769231,0.05510714415824963,0.3891826923076923,0.05383832335941564,0.4191849816849817,0.05421053005724257 +flat_mae,reg,logistic,aabc_age,18,9.999999999999999e-05,train,0.4921259842519685,0.020156711353443175,0.46554164551403243,0.02090125963949339,0.49030498341928963,0.020032355950983155 +flat_mae,reg,logistic,aabc_age,18,9.999999999999999e-05,test,0.5,0.056525050738117394,0.43675595238095233,0.04807419339796134,0.49038461538461536,0.05487344921996411 +flat_mae,reg,logistic,aabc_age,19,0.005994842503189409,train,0.6437007874015748,0.020993341311967444,0.6404277062549323,0.021478205406693367,0.6453530856999283,0.020881387512864672 +flat_mae,reg,logistic,aabc_age,19,0.005994842503189409,test,0.4230769230769231,0.06573097177805584,0.42341856367101804,0.06616177145745306,0.4228479853479854,0.06583392617397316 +flat_mae,reg,logistic,aabc_age,20,0.005994842503189409,train,0.6161417322834646,0.022414971846110483,0.6129774906962036,0.022937195660247055,0.6169773192376518,0.022396772493191042 +flat_mae,reg,logistic,aabc_age,20,0.005994842503189409,test,0.38461538461538464,0.054562542150427336,0.36404761904761906,0.05197598100507604,0.3820970695970696,0.05412459006261871 +flat_mae,reg,logistic,aabc_age,21,0.046415888336127774,train,0.7775590551181102,0.017575580567994475,0.7777791199336405,0.01765892412800811,0.7792981823552089,0.017434286426165773 +flat_mae,reg,logistic,aabc_age,21,0.046415888336127774,test,0.38461538461538464,0.06716810801063114,0.3837888198757764,0.06579405413982745,0.3839285714285714,0.06724200237390185 +flat_mae,reg,logistic,aabc_age,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,22,2.782559402207126,test,0.3076923076923077,0.06197789021691239,0.31612554112554114,0.06052295842956278,0.30540293040293043,0.06172357520674181 +flat_mae,reg,logistic,aabc_age,23,0.046415888336127774,train,0.7992125984251969,0.01725758065744385,0.798839549088453,0.017464206141375626,0.8004377489743741,0.017211908376675272 +flat_mae,reg,logistic,aabc_age,23,0.046415888336127774,test,0.4230769230769231,0.056996262723945694,0.4221295945433876,0.055514211588921364,0.4251373626373627,0.057712208476765364 +flat_mae,reg,logistic,aabc_age,24,0.000774263682681127,train,0.5531496062992126,0.020841689728811848,0.544113778022328,0.021400770193390376,0.5529844319692208,0.020743766065440254 +flat_mae,reg,logistic,aabc_age,24,0.000774263682681127,test,0.38461538461538464,0.06006057100418804,0.36163720538720534,0.05721191740047356,0.37934981684981683,0.05918138376944187 +flat_mae,reg,logistic,aabc_age,25,9.999999999999999e-05,train,0.5059055118110236,0.01987010448241539,0.48319268980818114,0.02030099420931796,0.5046178333259812,0.019768862067774416 +flat_mae,reg,logistic,aabc_age,25,9.999999999999999e-05,test,0.36538461538461536,0.057689371720165715,0.32922367693158766,0.05691724112402267,0.3628663003663004,0.05718031970159127 +flat_mae,reg,logistic,aabc_age,26,0.000774263682681127,train,0.5393700787401575,0.021541462254665824,0.5321227208391641,0.021857807128146663,0.5393566511759934,0.02149352249007672 +flat_mae,reg,logistic,aabc_age,26,0.000774263682681127,test,0.40384615384615385,0.06308864806597855,0.3838338122605364,0.05960954233774736,0.3985805860805861,0.062302552443679736 +flat_mae,reg,logistic,aabc_age,27,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,27,166.81005372000556,test,0.38461538461538464,0.0667483461666005,0.3795713963455899,0.06659355276586615,0.38278388278388276,0.06669498697191201 +flat_mae,reg,logistic,aabc_age,28,9.999999999999999e-05,train,0.49606299212598426,0.01904669723477934,0.4730803369287677,0.019785518461366954,0.49535747301918526,0.018939434464768506 +flat_mae,reg,logistic,aabc_age,28,9.999999999999999e-05,test,0.46153846153846156,0.05976848826387624,0.3938521156263092,0.05064040314561026,0.4532967032967033,0.05817973702420191 +flat_mae,reg,logistic,aabc_age,29,0.046415888336127774,train,0.7696850393700787,0.018648393658038546,0.7703440633269257,0.01865763583658656,0.7719363565399346,0.01862602588888433 +flat_mae,reg,logistic,aabc_age,29,0.046415888336127774,test,0.4423076923076923,0.06445698304079935,0.43796296296296294,0.06392659279558822,0.4416208791208791,0.06438226572942052 +flat_mae,reg,logistic,aabc_age,30,0.046415888336127774,train,0.7952755905511811,0.017798750183398167,0.7955911991136699,0.017938509401295495,0.7961879230286791,0.017709803589511162 +flat_mae,reg,logistic,aabc_age,30,0.046415888336127774,test,0.5,0.06562601365799275,0.5014737075906491,0.063764951463825,0.5070970695970696,0.0660563858247356 +flat_mae,reg,logistic,aabc_age,31,9.999999999999999e-05,train,0.47834645669291337,0.020077061455222808,0.4497310322827715,0.02053447761624194,0.4770123650479776,0.01990458636488613 +flat_mae,reg,logistic,aabc_age,31,9.999999999999999e-05,test,0.6153846153846154,0.05598266849300445,0.5763107263107263,0.06077096700547292,0.6062271062271063,0.05538900933653573 +flat_mae,reg,logistic,aabc_age,32,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,32,166.81005372000556,test,0.4230769230769231,0.06225395355901351,0.40538031637775884,0.06275107671994935,0.4194139194139195,0.06198457953341503 +flat_mae,reg,logistic,aabc_age,33,0.046415888336127774,train,0.7677165354330708,0.018081411501176665,0.7680719992056337,0.018212591497833226,0.7685824547506755,0.01804566382680796 +flat_mae,reg,logistic,aabc_age,33,0.046415888336127774,test,0.46153846153846156,0.06800708064423497,0.47451923076923075,0.06698869887278167,0.46130952380952384,0.06816881013542106 +flat_mae,reg,logistic,aabc_age,34,0.000774263682681127,train,0.5531496062992126,0.020209104354616942,0.5411924472707241,0.021082381224040853,0.5521317816447988,0.020150514959387283 +flat_mae,reg,logistic,aabc_age,34,0.000774263682681127,test,0.40384615384615385,0.06643094915712977,0.4111528822055138,0.07023446646418564,0.4006410256410256,0.06663515495214557 +flat_mae,reg,logistic,aabc_age,35,9.999999999999999e-05,train,0.49803149606299213,0.019807062355115614,0.4756023632380889,0.020306535898765143,0.4969884529593068,0.019716606008738324 +flat_mae,reg,logistic,aabc_age,35,9.999999999999999e-05,test,0.4230769230769231,0.06145359532767977,0.38824884792626724,0.060636641281595675,0.4191849816849817,0.06075702966635236 +flat_mae,reg,logistic,aabc_age,36,0.005994842503189409,train,0.6122047244094488,0.0208852178438399,0.6102670337934166,0.02088234838783817,0.6124099520703352,0.020859881131186305 +flat_mae,reg,logistic,aabc_age,36,0.005994842503189409,test,0.5,0.05797642859118996,0.4911512497719394,0.05779249289977651,0.5022893772893773,0.05821223089978041 +flat_mae,reg,logistic,aabc_age,37,0.046415888336127774,train,0.7795275590551181,0.01842817922898461,0.7797709797709798,0.018542563476607283,0.7817494471498247,0.01842960450459192 +flat_mae,reg,logistic,aabc_age,37,0.046415888336127774,test,0.46153846153846156,0.06426779218571334,0.4595289855072464,0.06424631314360292,0.4569597069597069,0.06418496042764131 +flat_mae,reg,logistic,aabc_age,38,9.999999999999999e-05,train,0.4862204724409449,0.019235388493616157,0.4553573862784389,0.01967255962201083,0.4850268945067886,0.01913457478714803 +flat_mae,reg,logistic,aabc_age,38,9.999999999999999e-05,test,0.46153846153846156,0.06025403568698156,0.4119022869022869,0.0501325708860847,0.45192307692307687,0.05865072378903898 +flat_mae,reg,logistic,aabc_age,39,0.046415888336127774,train,0.7795275590551181,0.017508441495205897,0.778528565553328,0.01764911964691925,0.7793738216571504,0.017449961367610584 +flat_mae,reg,logistic,aabc_age,39,0.046415888336127774,test,0.4807692307692308,0.06769732280373353,0.47308941058941056,0.06848737457399039,0.47458791208791207,0.06761032584988574 +flat_mae,reg,logistic,aabc_age,40,0.000774263682681127,train,0.5295275590551181,0.021132872525915704,0.5184132721563518,0.021614089405482408,0.5281058144686597,0.02106530596902898 +flat_mae,reg,logistic,aabc_age,40,0.000774263682681127,test,0.5384615384615384,0.06347319240314099,0.5337403967434926,0.06475136159150187,0.5336538461538461,0.06321120714157227 +flat_mae,reg,logistic,aabc_age,41,0.046415888336127774,train,0.7795275590551181,0.01844581926878005,0.7799447172862786,0.018549574361311133,0.7820669883714463,0.018273922104190717 +flat_mae,reg,logistic,aabc_age,41,0.046415888336127774,test,0.4230769230769231,0.06658707650435218,0.4142613636363637,0.06802799901370726,0.423992673992674,0.0668351081637679 +flat_mae,reg,logistic,aabc_age,42,0.000774263682681127,train,0.547244094488189,0.019207030947548234,0.5361517994328244,0.019700888691147642,0.5467360981915614,0.01911056276009153 +flat_mae,reg,logistic,aabc_age,42,0.000774263682681127,test,0.5192307692307693,0.06269607114706716,0.503393665158371,0.06923785951855704,0.5233516483516484,0.0629442886621374 +flat_mae,reg,logistic,aabc_age,43,0.000774263682681127,train,0.5492125984251969,0.020723960696862307,0.5405155761087964,0.0211802462927748,0.5493873096670625,0.020704210437818234 +flat_mae,reg,logistic,aabc_age,43,0.000774263682681127,test,0.4807692307692308,0.06542282820013318,0.46949404761904767,0.06392540367338338,0.47435897435897434,0.06505806858132487 +flat_mae,reg,logistic,aabc_age,44,0.005994842503189409,train,0.6279527559055118,0.020099292785148947,0.628215060095476,0.01990369052141201,0.629926743755622,0.020107986561396292 +flat_mae,reg,logistic,aabc_age,44,0.005994842503189409,test,0.4807692307692308,0.0693893909323644,0.47442982456140353,0.07265749914384596,0.4764194139194139,0.0695203655103376 +flat_mae,reg,logistic,aabc_age,45,0.005994842503189409,train,0.6161417322834646,0.020577853991515963,0.6121355018127064,0.021084732318157853,0.6167773725567667,0.020604204391379114 +flat_mae,reg,logistic,aabc_age,45,0.005994842503189409,test,0.4230769230769231,0.05677366044839861,0.39141414141414144,0.05788268287355007,0.42811355311355315,0.05782617095864232 +flat_mae,reg,logistic,aabc_age,46,0.005994842503189409,train,0.610236220472441,0.020651012164748895,0.605905948324132,0.02119718077276201,0.6101938763571919,0.020699608256220994 +flat_mae,reg,logistic,aabc_age,46,0.005994842503189409,test,0.5384615384615384,0.065240519953377,0.5284178187403994,0.0685708198487278,0.5352564102564102,0.06518834895771239 +flat_mae,reg,logistic,aabc_age,47,0.005994842503189409,train,0.6279527559055118,0.020616213814775592,0.623275745900958,0.021094462798746102,0.6282714297769996,0.020552697111484795 +flat_mae,reg,logistic,aabc_age,47,0.005994842503189409,test,0.5,0.06806300379212704,0.5015360983102919,0.07025446523612558,0.5013736263736264,0.0681226242916817 +flat_mae,reg,logistic,aabc_age,48,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,48,2.782559402207126,test,0.3076923076923077,0.06295573489534545,0.3146488294314381,0.06174708054934639,0.3083791208791209,0.06330854703986699 +flat_mae,reg,logistic,aabc_age,49,0.046415888336127774,train,0.7716535433070866,0.01866923322154765,0.7721670722647076,0.01869485610716379,0.7722795503932764,0.018588586829133117 +flat_mae,reg,logistic,aabc_age,49,0.046415888336127774,test,0.46153846153846156,0.06160496406648685,0.44890109890109886,0.06052242105885804,0.45650183150183155,0.06126472691445141 +flat_mae,reg,logistic,aabc_age,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,50,166.81005372000556,test,0.3269230769230769,0.06550328597385616,0.3328544061302682,0.06431045315353695,0.32623626373626374,0.06527709759401436 +flat_mae,reg,logistic,aabc_age,51,0.046415888336127774,train,0.7716535433070866,0.017777802637011515,0.7721917502787068,0.017821050509876264,0.7739024989019714,0.017580457484295108 +flat_mae,reg,logistic,aabc_age,51,0.046415888336127774,test,0.5384615384615384,0.07021786679021264,0.5426010770838356,0.06957130648355016,0.5368589743589743,0.07031745680660469 +flat_mae,reg,logistic,aabc_age,52,0.046415888336127774,train,0.7854330708661418,0.018306619247272744,0.7847794566544567,0.018594814960319336,0.7859573178567251,0.018223050273657258 +flat_mae,reg,logistic,aabc_age,52,0.046415888336127774,test,0.46153846153846156,0.07130105354541677,0.4699079031912615,0.07089907677300118,0.4642857142857143,0.0714365514937434 +flat_mae,reg,logistic,aabc_age,53,9.999999999999999e-05,train,0.48031496062992124,0.02144731392213367,0.46307085601634224,0.022020216438652318,0.4795959686529635,0.021339735147270826 +flat_mae,reg,logistic,aabc_age,53,9.999999999999999e-05,test,0.5384615384615384,0.05324737124383416,0.4894601806239737,0.05385427649113559,0.5318223443223443,0.052299533512303244 +flat_mae,reg,logistic,aabc_age,54,0.046415888336127774,train,0.7952755905511811,0.017530121637065993,0.795354204900351,0.017731049726312988,0.7973757357750164,0.017487381184186333 +flat_mae,reg,logistic,aabc_age,54,0.046415888336127774,test,0.46153846153846156,0.05743907242827062,0.43558499342704016,0.0595672412132623,0.4652014652014652,0.05801901363625614 +flat_mae,reg,logistic,aabc_age,55,0.000774263682681127,train,0.5492125984251969,0.021243525380095014,0.5354889642428613,0.021924849169174938,0.548217118121019,0.02111743210472177 +flat_mae,reg,logistic,aabc_age,55,0.000774263682681127,test,0.4807692307692308,0.06952029204424533,0.4948282967032967,0.06874261193053124,0.4835164835164835,0.06986617324323995 +flat_mae,reg,logistic,aabc_age,56,0.000774263682681127,train,0.5433070866141733,0.020411582392489604,0.5334766883875766,0.021006993988487058,0.5425862455863089,0.020401685800693365 +flat_mae,reg,logistic,aabc_age,56,0.000774263682681127,test,0.3269230769230769,0.05990647296376728,0.3137423866456125,0.05799261461623615,0.3273809523809524,0.06018979545320508 +flat_mae,reg,logistic,aabc_age,57,0.3593813663804626,train,0.9645669291338582,0.00812826812042305,0.9649443371026772,0.008056704761225506,0.9650826925769435,0.008038076031832113 +flat_mae,reg,logistic,aabc_age,57,0.3593813663804626,test,0.46153846153846156,0.06366359441971424,0.4512744598951496,0.06276025701256596,0.4608516483516484,0.06372183734746409 +flat_mae,reg,logistic,aabc_age,58,0.046415888336127774,train,0.7933070866141733,0.017641596099894873,0.7942170923285161,0.017704991030733582,0.794171793996421,0.017654053981941854 +flat_mae,reg,logistic,aabc_age,58,0.046415888336127774,test,0.40384615384615385,0.052510015950749146,0.3610294117647059,0.05157015865388373,0.396978021978022,0.05075616358166564 +flat_mae,reg,logistic,aabc_age,59,0.046415888336127774,train,0.7854330708661418,0.01734335041402346,0.7840606626981157,0.017690174611556128,0.7862924802786404,0.01730662418086337 +flat_mae,reg,logistic,aabc_age,59,0.046415888336127774,test,0.5384615384615384,0.0669194848726535,0.541971706454465,0.06699214220343688,0.5400641025641026,0.06724980671651086 +flat_mae,reg,logistic,aabc_age,60,0.000774263682681127,train,0.5433070866141733,0.02181634006668827,0.5350153359226923,0.022236798099004272,0.5433389225702883,0.0217055909083407 +flat_mae,reg,logistic,aabc_age,60,0.000774263682681127,test,0.38461538461538464,0.057020446867301434,0.3717135129914035,0.05804129142893567,0.3850732600732601,0.057236141401219874 +flat_mae,reg,logistic,aabc_age,61,0.3593813663804626,train,0.9724409448818898,0.007376944275146323,0.9729922316787571,0.007235209814306144,0.9728120462840605,0.007296459353607217 +flat_mae,reg,logistic,aabc_age,61,0.3593813663804626,test,0.4230769230769231,0.06664735420468848,0.42827693296209035,0.0656690699202695,0.4269688644688645,0.06708553862324144 +flat_mae,reg,logistic,aabc_age,62,0.046415888336127774,train,0.7716535433070866,0.01836512854721941,0.7716172609817387,0.018595293288694605,0.7731498219179922,0.018301995381751437 +flat_mae,reg,logistic,aabc_age,62,0.046415888336127774,test,0.4230769230769231,0.06811896789428219,0.4261907507784569,0.06809110786307863,0.4212454212454212,0.06840003427291298 +flat_mae,reg,logistic,aabc_age,63,0.046415888336127774,train,0.7933070866141733,0.01792064186347157,0.7936181899178892,0.018171180520451857,0.7938042661045784,0.017923773085410388 +flat_mae,reg,logistic,aabc_age,63,0.046415888336127774,test,0.46153846153846156,0.06030146160906341,0.4409523809523809,0.06255875771384951,0.46222527472527475,0.06063968529122935 +flat_mae,reg,logistic,aabc_age,64,0.000774263682681127,train,0.5354330708661418,0.02080378850342486,0.5258565218030413,0.021271976065649904,0.5342041882356552,0.02091083005794564 +flat_mae,reg,logistic,aabc_age,64,0.000774263682681127,test,0.46153846153846156,0.06347347207144677,0.44502801120448177,0.06550486274729536,0.45535714285714285,0.06342627737412522 +flat_mae,reg,logistic,aabc_age,65,0.046415888336127774,train,0.7775590551181102,0.01820979949624915,0.7772228912037797,0.018360214177608337,0.7794157768959452,0.01812466735356153 +flat_mae,reg,logistic,aabc_age,65,0.046415888336127774,test,0.4807692307692308,0.0646783118492485,0.48205387205387207,0.06270447158169154,0.47596153846153844,0.06425268359364424 +flat_mae,reg,logistic,aabc_age,66,0.000774263682681127,train,0.5393700787401575,0.020278400156990346,0.5276167809765502,0.02093028573061545,0.5388891499437081,0.020214465440766813 +flat_mae,reg,logistic,aabc_age,66,0.000774263682681127,test,0.5192307692307693,0.06072544673904675,0.48778225806451614,0.06511409985678748,0.5114468864468865,0.06040859216572663 +flat_mae,reg,logistic,aabc_age,67,0.000774263682681127,train,0.5354330708661418,0.02013685083337321,0.5288994891271843,0.020372364193360654,0.5348892573491194,0.020078254340097162 +flat_mae,reg,logistic,aabc_age,67,0.000774263682681127,test,0.5,0.06436006102872574,0.47044864664441977,0.06911468811774037,0.49496336996337,0.06405733044786148 +flat_mae,reg,logistic,aabc_age,68,0.046415888336127774,train,0.7637795275590551,0.0184139022519289,0.7624798861067712,0.018626993164979105,0.7636975463617375,0.01835746322814422 +flat_mae,reg,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.06089932520685995,0.4559178743961353,0.06115479118899193,0.46703296703296704,0.06186205461485543 +flat_mae,reg,logistic,aabc_age,69,0.046415888336127774,train,0.7913385826771654,0.01713083061123176,0.7919270346327967,0.01715131489850835,0.7923732328453419,0.017176106945524718 +flat_mae,reg,logistic,aabc_age,69,0.046415888336127774,test,0.4423076923076923,0.06828395290880626,0.46580663375894793,0.06431988819204823,0.4478021978021978,0.06906040759402957 +flat_mae,reg,logistic,aabc_age,70,0.000774263682681127,train,0.5334645669291339,0.020098629951065126,0.526741102799273,0.020454540507391632,0.5334258586199556,0.020048664055148324 +flat_mae,reg,logistic,aabc_age,70,0.000774263682681127,test,0.36538461538461536,0.06239607928435224,0.3557142857142857,0.06170389547721175,0.3630952380952381,0.06203516420650396 +flat_mae,reg,logistic,aabc_age,71,0.005994842503189409,train,0.6259842519685039,0.021409237053451064,0.6221499971990122,0.021869710578095836,0.625652583771426,0.021365582241875396 +flat_mae,reg,logistic,aabc_age,71,0.005994842503189409,test,0.5384615384615384,0.06582452691205054,0.53574082921909,0.06721322609986392,0.5382326007326008,0.06610979130684805 +flat_mae,reg,logistic,aabc_age,72,0.3593813663804626,train,0.9586614173228346,0.008994907660266876,0.9592043582593005,0.008880545886790455,0.9589343321397268,0.008932832307509514 +flat_mae,reg,logistic,aabc_age,72,0.3593813663804626,test,0.5384615384615384,0.06654014004107889,0.5201728967449049,0.07048678450212359,0.5336538461538461,0.06622734503322887 +flat_mae,reg,logistic,aabc_age,73,0.005994842503189409,train,0.6181102362204725,0.021306354414152524,0.6164783676225111,0.021696491614815874,0.6190934216103523,0.021366124191680998 +flat_mae,reg,logistic,aabc_age,73,0.005994842503189409,test,0.6153846153846154,0.0631375295140386,0.6096551724137931,0.0640725411745994,0.6137820512820513,0.06318277355265306 +flat_mae,reg,logistic,aabc_age,74,0.000774263682681127,train,0.531496062992126,0.021033158281330337,0.522289358323841,0.02149619128421969,0.5315273241284338,0.02102383111550611 +flat_mae,reg,logistic,aabc_age,74,0.000774263682681127,test,0.5384615384615384,0.06725025021400406,0.5204112554112554,0.06941327471270263,0.5309065934065934,0.06695426351851014 +flat_mae,reg,logistic,aabc_age,75,0.005994842503189409,train,0.6358267716535433,0.02081882020784438,0.6335227313860143,0.02122820589386739,0.6369710283492747,0.02077975178221324 +flat_mae,reg,logistic,aabc_age,75,0.005994842503189409,test,0.4230769230769231,0.0629654267865644,0.41176840985839286,0.06252813808657212,0.41941391941391937,0.06260032820751933 +flat_mae,reg,logistic,aabc_age,76,0.005994842503189409,train,0.639763779527559,0.02010161542388783,0.6376969630130879,0.020525669192746406,0.6413208276354123,0.020049406893353872 +flat_mae,reg,logistic,aabc_age,76,0.005994842503189409,test,0.4807692307692308,0.06589057631662723,0.4801343986126595,0.06645298923045889,0.4819139194139194,0.06614408852273984 +flat_mae,reg,logistic,aabc_age,77,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,77,1291.5496650148827,test,0.4230769230769231,0.0656736147066617,0.4252338116655958,0.0642087345970438,0.4210164835164835,0.06560799486917428 +flat_mae,reg,logistic,aabc_age,78,0.046415888336127774,train,0.7874015748031497,0.018017174021946945,0.7862509991619432,0.01825475500162807,0.7886261505325198,0.017944737526447076 +flat_mae,reg,logistic,aabc_age,78,0.046415888336127774,test,0.5192307692307693,0.06626667480226477,0.5258018784334574,0.06563724008770741,0.5173992673992673,0.06629406748786343 +flat_mae,reg,logistic,aabc_age,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,79,2.782559402207126,test,0.34615384615384615,0.06020060154213996,0.3493447293447294,0.057134039237096854,0.34523809523809523,0.060247842616569194 +flat_mae,reg,logistic,aabc_age,80,0.000774263682681127,train,0.5275590551181102,0.02114333598515886,0.5179364804323369,0.021832344082485192,0.5270099436313387,0.020982871472492344 +flat_mae,reg,logistic,aabc_age,80,0.000774263682681127,test,0.46153846153846156,0.058969190130231434,0.43798850574712644,0.06121039088968693,0.4635989010989011,0.05951031843977462 +flat_mae,reg,logistic,aabc_age,81,0.046415888336127774,train,0.7775590551181102,0.019192713909724567,0.7784251949162213,0.01921874538328659,0.7793981556956514,0.019016409415318923 +flat_mae,reg,logistic,aabc_age,81,0.046415888336127774,test,0.34615384615384615,0.06265284269867451,0.35285884963304315,0.06110407618452753,0.3411172161172161,0.06232903520452375 +flat_mae,reg,logistic,aabc_age,82,0.005994842503189409,train,0.6200787401574803,0.02065255227703905,0.6170329366025042,0.02100924988408755,0.6204744681993675,0.020699334937650637 +flat_mae,reg,logistic,aabc_age,82,0.005994842503189409,test,0.5192307692307693,0.06151268690999909,0.4924796005358665,0.0705099339519262,0.5157967032967032,0.06138539338783504 +flat_mae,reg,logistic,aabc_age,83,0.046415888336127774,train,0.7933070866141733,0.017588874700702903,0.7939726242811542,0.017682814601093076,0.7937366582340633,0.017600520245747216 +flat_mae,reg,logistic,aabc_age,83,0.046415888336127774,test,0.36538461538461536,0.061116957911685185,0.3607150913602527,0.059919678655952824,0.3617216117216117,0.060650419953187415 +flat_mae,reg,logistic,aabc_age,84,0.046415888336127774,train,0.7696850393700787,0.018952348624459107,0.7697247781458308,0.019057787939146604,0.7712336662261766,0.018926822127872948 +flat_mae,reg,logistic,aabc_age,84,0.046415888336127774,test,0.5192307692307693,0.06485559745087588,0.5166666666666666,0.06645026186741908,0.5160256410256411,0.06468347909757757 +flat_mae,reg,logistic,aabc_age,85,9.999999999999999e-05,train,0.4763779527559055,0.01947355053497797,0.43128701575456285,0.018971403388227837,0.4730233808154756,0.01916915664233121 +flat_mae,reg,logistic,aabc_age,85,9.999999999999999e-05,test,0.46153846153846156,0.04807432300661528,0.3784722222222222,0.04680206275885799,0.4532967032967033,0.046241884489209475 +flat_mae,reg,logistic,aabc_age,86,0.3593813663804626,train,0.9665354330708661,0.008026318386631202,0.9669258283462469,0.00794968883140255,0.9673663761606017,0.007851787495116998 +flat_mae,reg,logistic,aabc_age,86,0.3593813663804626,test,0.46153846153846156,0.06753367557950705,0.4431481481481481,0.06779674940327746,0.45375457875457875,0.06718382771857714 +flat_mae,reg,logistic,aabc_age,87,0.005994842503189409,train,0.6181102362204725,0.019856345561278837,0.6157515513794902,0.020478171660288553,0.6179732167345302,0.019876185468774156 +flat_mae,reg,logistic,aabc_age,87,0.005994842503189409,test,0.5769230769230769,0.06449879591555344,0.574404761904762,0.0651860042889364,0.5812728937728938,0.06478056722481677 +flat_mae,reg,logistic,aabc_age,88,0.000774263682681127,train,0.5295275590551181,0.020174575981344256,0.5164298228810253,0.021064156001901153,0.5304490744914065,0.02010372993447911 +flat_mae,reg,logistic,aabc_age,88,0.000774263682681127,test,0.5,0.06504949185452984,0.5006475006475006,0.06758364192675416,0.49679487179487175,0.06501613468684679 +flat_mae,reg,logistic,aabc_age,89,0.000774263682681127,train,0.5452755905511811,0.021563872669379428,0.5361191320431894,0.02234673097480212,0.5455226328135039,0.021553974883828615 +flat_mae,reg,logistic,aabc_age,89,0.000774263682681127,test,0.38461538461538464,0.061562146884218426,0.37788515406162465,0.06170259954974571,0.38827838827838823,0.0621893296708724 +flat_mae,reg,logistic,aabc_age,90,0.046415888336127774,train,0.765748031496063,0.017810705107952154,0.7654248176276613,0.018005091394724095,0.766466352377975,0.01777723802617592 +flat_mae,reg,logistic,aabc_age,90,0.046415888336127774,test,0.5,0.05505633209851535,0.4760660784854333,0.054637068609627915,0.5020604395604396,0.055479336190935334 +flat_mae,reg,logistic,aabc_age,91,9.999999999999999e-05,train,0.4940944881889764,0.021184738659853795,0.46345575950083456,0.021494305708431468,0.4920035712299262,0.020996195562213004 +flat_mae,reg,logistic,aabc_age,91,9.999999999999999e-05,test,0.4807692307692308,0.06068645774673418,0.44500103391232426,0.06510692668376102,0.47847985347985345,0.060357772561131735 +flat_mae,reg,logistic,aabc_age,92,0.046415888336127774,train,0.7637795275590551,0.019249568711838482,0.7633823426599647,0.019417915671226253,0.7652029003296963,0.019321630655153108 +flat_mae,reg,logistic,aabc_age,92,0.046415888336127774,test,0.5384615384615384,0.05687652722443966,0.513186667398624,0.059596712721945805,0.5407509157509157,0.057354357691175174 +flat_mae,reg,logistic,aabc_age,93,0.005994842503189409,train,0.6259842519685039,0.02170979950692288,0.6238738456496928,0.022045036775918535,0.6265052340958478,0.02175538620691589 +flat_mae,reg,logistic,aabc_age,93,0.005994842503189409,test,0.36538461538461536,0.06123724356957946,0.35315176513125923,0.06137616920384976,0.36172161172161177,0.06088763257145617 +flat_mae,reg,logistic,aabc_age,94,0.000774263682681127,train,0.5393700787401575,0.021028063982662548,0.5270028790874576,0.021802803544021122,0.5377013371973709,0.020929839473461116 +flat_mae,reg,logistic,aabc_age,94,0.000774263682681127,test,0.4423076923076923,0.06352677299827188,0.43104395604395607,0.06412780498206078,0.4416208791208791,0.06354292988391468 +flat_mae,reg,logistic,aabc_age,95,0.3593813663804626,train,0.9645669291338582,0.008498616737621085,0.9649855072463768,0.008403824684093909,0.9652002871176799,0.00833867840042559 +flat_mae,reg,logistic,aabc_age,95,0.3593813663804626,test,0.4423076923076923,0.06465176402632782,0.4289983579638752,0.06310108540298548,0.43727106227106227,0.0639757256305047 +flat_mae,reg,logistic,aabc_age,96,0.046415888336127774,train,0.7677165354330708,0.018826103124234654,0.7676332198701986,0.019050594304561855,0.7681473189883178,0.018844865847214332 +flat_mae,reg,logistic,aabc_age,96,0.046415888336127774,test,0.4423076923076923,0.06845613634693604,0.45566454006351703,0.06654604472412512,0.4375,0.06821475006689492 +flat_mae,reg,logistic,aabc_age,97,0.000774263682681127,train,0.5413385826771654,0.021505683794961535,0.5343953363942912,0.022127392628398194,0.5407200765647147,0.021485142996979064 +flat_mae,reg,logistic,aabc_age,97,0.000774263682681127,test,0.40384615384615385,0.06492685916799086,0.3869485294117647,0.06728092376615133,0.4001831501831502,0.06452767550372585 +flat_mae,reg,logistic,aabc_age,98,0.046415888336127774,train,0.7696850393700787,0.01842284585243276,0.7698234922069913,0.018519995297964057,0.7705985837829337,0.0183187872909922 +flat_mae,reg,logistic,aabc_age,98,0.046415888336127774,test,0.46153846153846156,0.06552130508558526,0.44963121118012417,0.0671746965720416,0.4581043956043956,0.0650731422150053 +flat_mae,reg,logistic,aabc_age,99,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,99,1291.5496650148827,test,0.5384615384615384,0.06263752915921571,0.5242512672235311,0.06511641113083007,0.538003663003663,0.06257881278303613 +flat_mae,reg,logistic,aabc_age,100,0.046415888336127774,train,0.8031496062992126,0.017592110759221635,0.8036025336471383,0.017773091849778783,0.8033321809627745,0.017617847419188985 +flat_mae,reg,logistic,aabc_age,100,0.046415888336127774,test,0.40384615384615385,0.06878036177340502,0.40091580832960144,0.06899718462027388,0.4017857142857143,0.0685889800200924 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9b9ab9f5e1389c6c6e897a45f25507c627b98457 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:26:26 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_age reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_age__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:23:19 time: 6.1400 data: 5.4549 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:56 time: 0.2813 data: 0.1049 max mem: 3581 +extract (train) [ 40/228] eta: 0:01:15 time: 0.2350 data: 0.0808 max mem: 3581 +extract (train) [ 60/228] eta: 0:00:57 time: 0.2173 data: 0.0713 max mem: 3581 +extract (train) [ 80/228] eta: 0:00:46 time: 0.2321 data: 0.0788 max mem: 3581 +extract (train) [100/228] eta: 0:00:38 time: 0.2265 data: 0.0768 max mem: 3581 +extract (train) [120/228] eta: 0:00:30 time: 0.2358 data: 0.0811 max mem: 3581 +extract (train) [140/228] eta: 0:00:24 time: 0.2235 data: 0.0764 max mem: 3581 +extract (train) [160/228] eta: 0:00:18 time: 0.2339 data: 0.0770 max mem: 3581 +extract (train) [180/228] eta: 0:00:12 time: 0.2283 data: 0.0788 max mem: 3581 +extract (train) [200/228] eta: 0:00:07 time: 0.2190 data: 0.0739 max mem: 3581 +extract (train) [220/228] eta: 0:00:02 time: 0.1813 data: 0.0537 max mem: 3581 +extract (train) [227/228] eta: 0:00:00 time: 0.1837 data: 0.0590 max mem: 3581 +extract (train) Total time: 0:00:58 (0.2552 s / it) +extract (validation) [ 0/27] eta: 0:02:14 time: 4.9803 data: 4.8052 max mem: 3581 +extract (validation) [20/27] eta: 0:00:03 time: 0.2060 data: 0.0614 max mem: 3581 +extract (validation) [26/27] eta: 0:00:00 time: 0.1755 data: 0.0496 max mem: 3581 +extract (validation) Total time: 0:00:10 (0.3978 s / it) +extract (test) [ 0/26] eta: 0:01:59 time: 4.5897 data: 4.4596 max mem: 3581 +extract (test) [20/26] eta: 0:00:02 time: 0.1769 data: 0.0510 max mem: 3581 +extract (test) [25/26] eta: 0:00:00 time: 0.1761 data: 0.0509 max mem: 3581 +extract (test) Total time: 0:00:09 (0.3621 s / it) +feature extraction time: 0:01:18 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_age | | 0.0059948 | train | 0.62795 | 0.021429 | 0.62566 | 0.021694 | 0.6291 | 0.021405 | +| flat_mae | reg | logistic | aabc_age | | 0.0059948 | test | 0.36538 | 0.063684 | 0.36115 | 0.06334 | 0.35623 | 0.063019 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0635215450567013, "f1": 0.46233105669387525, "f1_std": 0.06145093577090532, "bacc": 0.4608516483516483, "bacc_std": 0.06363918506654685} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06397558705979156, "f1": 0.5167457769091163, "f1_std": 0.06655923191241152, "bacc": 0.5350274725274725, "bacc_std": 0.0634937022047951} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06757240254570512, "f1": 0.5521371986889227, "f1_std": 0.06949239911115596, "bacc": 0.55746336996337, "bacc_std": 0.06775775628642457} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06369867117969708, "f1": 0.5476679929266136, "f1_std": 0.06549382903532174, "bacc": 0.5588369963369964, "bacc_std": 0.06410354668758966} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06153378588006534, "f1": 0.4318368700265252, "f1_std": 0.06121395568289032, "bacc": 0.4416208791208791, "bacc_std": 0.06138814898142881} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.07041099757766293, "f1": 0.5109740802675585, "f1_std": 0.06997599806402775, "bacc": 0.5029761904761905, "bacc_std": 0.07043558162042017} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06388797286597123, "f1": 0.4579485978572683, "f1_std": 0.064824778536135, "bacc": 0.47435897435897434, "bacc_std": 0.06331108567415208} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 9.999999999999999e-05, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06555051351808928, "f1": 0.3819535576341384, "f1_std": 0.06573420966782607, "bacc": 0.38530219780219777, "bacc_std": 0.06579533252374668} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06449772942198599, "f1": 0.45986928104575164, "f1_std": 0.06516923458597285, "bacc": 0.47298534798534797, "bacc_std": 0.06400114795904321} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06550491196180834, "f1": 0.4217707024158637, "f1_std": 0.0658019159541703, "bacc": 0.4210164835164836, "bacc_std": 0.0652426249885098} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06230212463253408, "f1": 0.489169110459433, "f1_std": 0.06788477736496269, "bacc": 0.5114468864468865, "bacc_std": 0.061990161827376995} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06546167404849015, "f1": 0.4436363636363636, "f1_std": 0.06533979841486413, "bacc": 0.44184981684981683, "bacc_std": 0.06561485343501645} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05943192615214357, "f1": 0.3929096989966555, "f1_std": 0.059435883721023215, "bacc": 0.40453296703296704, "bacc_std": 0.05977774563040662} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06651315659326355, "f1": 0.5527777777777777, "f1_std": 0.07005273622240277, "bacc": 0.5590659340659341, "bacc_std": 0.0664281053079526} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06507868473702116, "f1": 0.5476651186790505, "f1_std": 0.0644691991685846, "bacc": 0.5366300366300366, "bacc_std": 0.0649956189605494} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06276983172789241, "f1": 0.4405230925261351, "f1_std": 0.06357106649169934, "bacc": 0.4416208791208791, "bacc_std": 0.06274541516930576} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05510714415824963, "f1": 0.3891826923076923, "f1_std": 0.05383832335941564, "bacc": 0.4191849816849817, "bacc_std": 0.05421053005724257} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.056525050738117394, "f1": 0.43675595238095233, "f1_std": 0.04807419339796134, "bacc": 0.49038461538461536, "bacc_std": 0.05487344921996411} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06573097177805584, "f1": 0.42341856367101804, "f1_std": 0.06616177145745306, "bacc": 0.4228479853479854, "bacc_std": 0.06583392617397316} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.054562542150427336, "f1": 0.36404761904761906, "f1_std": 0.05197598100507604, "bacc": 0.3820970695970696, "bacc_std": 0.05412459006261871} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06716810801063114, "f1": 0.3837888198757764, "f1_std": 0.06579405413982745, "bacc": 0.3839285714285714, "bacc_std": 0.06724200237390185} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06197789021691239, "f1": 0.31612554112554114, "f1_std": 0.06052295842956278, "bacc": 0.30540293040293043, "bacc_std": 0.06172357520674181} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.056996262723945694, "f1": 0.4221295945433876, "f1_std": 0.055514211588921364, "bacc": 0.4251373626373627, "bacc_std": 0.057712208476765364} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06006057100418804, "f1": 0.36163720538720534, "f1_std": 0.05721191740047356, "bacc": 0.37934981684981683, "bacc_std": 0.05918138376944187} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.057689371720165715, "f1": 0.32922367693158766, "f1_std": 0.05691724112402267, "bacc": 0.3628663003663004, "bacc_std": 0.05718031970159127} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06308864806597855, "f1": 0.3838338122605364, "f1_std": 0.05960954233774736, "bacc": 0.3985805860805861, "bacc_std": 0.062302552443679736} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0667483461666005, "f1": 0.3795713963455899, "f1_std": 0.06659355276586615, "bacc": 0.38278388278388276, "bacc_std": 0.06669498697191201} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05976848826387624, "f1": 0.3938521156263092, "f1_std": 0.05064040314561026, "bacc": 0.4532967032967033, "bacc_std": 0.05817973702420191} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06445698304079935, "f1": 0.43796296296296294, "f1_std": 0.06392659279558822, "bacc": 0.4416208791208791, "bacc_std": 0.06438226572942052} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06562601365799275, "f1": 0.5014737075906491, "f1_std": 0.063764951463825, "bacc": 0.5070970695970696, "bacc_std": 0.0660563858247356} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05598266849300445, "f1": 0.5763107263107263, "f1_std": 0.06077096700547292, "bacc": 0.6062271062271063, "bacc_std": 0.05538900933653573} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 166.81005372000556, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06225395355901351, "f1": 0.40538031637775884, "f1_std": 0.06275107671994935, "bacc": 0.4194139194139195, "bacc_std": 0.06198457953341503} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06800708064423497, "f1": 0.47451923076923075, "f1_std": 0.06698869887278167, "bacc": 0.46130952380952384, "bacc_std": 0.06816881013542106} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06643094915712977, "f1": 0.4111528822055138, "f1_std": 0.07023446646418564, "bacc": 0.4006410256410256, "bacc_std": 0.06663515495214557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06145359532767977, "f1": 0.38824884792626724, "f1_std": 0.060636641281595675, "bacc": 0.4191849816849817, "bacc_std": 0.06075702966635236} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.05797642859118996, "f1": 0.4911512497719394, "f1_std": 0.05779249289977651, "bacc": 0.5022893772893773, "bacc_std": 0.05821223089978041} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06426779218571334, "f1": 0.4595289855072464, "f1_std": 0.06424631314360292, "bacc": 0.4569597069597069, "bacc_std": 0.06418496042764131} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06025403568698156, "f1": 0.4119022869022869, "f1_std": 0.0501325708860847, "bacc": 0.45192307692307687, "bacc_std": 0.05865072378903898} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06769732280373353, "f1": 0.47308941058941056, "f1_std": 0.06848737457399039, "bacc": 0.47458791208791207, "bacc_std": 0.06761032584988574} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06347319240314099, "f1": 0.5337403967434926, "f1_std": 0.06475136159150187, "bacc": 0.5336538461538461, "bacc_std": 0.06321120714157227} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06658707650435218, "f1": 0.4142613636363637, "f1_std": 0.06802799901370726, "bacc": 0.423992673992674, "bacc_std": 0.0668351081637679} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06269607114706716, "f1": 0.503393665158371, "f1_std": 0.06923785951855704, "bacc": 0.5233516483516484, "bacc_std": 0.0629442886621374} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06542282820013318, "f1": 0.46949404761904767, "f1_std": 0.06392540367338338, "bacc": 0.47435897435897434, "bacc_std": 0.06505806858132487} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0693893909323644, "f1": 0.47442982456140353, "f1_std": 0.07265749914384596, "bacc": 0.4764194139194139, "bacc_std": 0.0695203655103376} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05677366044839861, "f1": 0.39141414141414144, "f1_std": 0.05788268287355007, "bacc": 0.42811355311355315, "bacc_std": 0.05782617095864232} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.065240519953377, "f1": 0.5284178187403994, "f1_std": 0.0685708198487278, "bacc": 0.5352564102564102, "bacc_std": 0.06518834895771239} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06806300379212704, "f1": 0.5015360983102919, "f1_std": 0.07025446523612558, "bacc": 0.5013736263736264, "bacc_std": 0.0681226242916817} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 2.782559402207126, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06295573489534545, "f1": 0.3146488294314381, "f1_std": 0.06174708054934639, "bacc": 0.3083791208791209, "bacc_std": 0.06330854703986699} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06160496406648685, "f1": 0.44890109890109886, "f1_std": 0.06052242105885804, "bacc": 0.45650183150183155, "bacc_std": 0.06126472691445141} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.06550328597385616, "f1": 0.3328544061302682, "f1_std": 0.06431045315353695, "bacc": 0.32623626373626374, "bacc_std": 0.06527709759401436} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07021786679021264, "f1": 0.5426010770838356, "f1_std": 0.06957130648355016, "bacc": 0.5368589743589743, "bacc_std": 0.07031745680660469} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.07130105354541677, "f1": 0.4699079031912615, "f1_std": 0.07089907677300118, "bacc": 0.4642857142857143, "bacc_std": 0.0714365514937434} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05324737124383416, "f1": 0.4894601806239737, "f1_std": 0.05385427649113559, "bacc": 0.5318223443223443, "bacc_std": 0.052299533512303244} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05743907242827062, "f1": 0.43558499342704016, "f1_std": 0.0595672412132623, "bacc": 0.4652014652014652, "bacc_std": 0.05801901363625614} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06952029204424533, "f1": 0.4948282967032967, "f1_std": 0.06874261193053124, "bacc": 0.4835164835164835, "bacc_std": 0.06986617324323995} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.05990647296376728, "f1": 0.3137423866456125, "f1_std": 0.05799261461623615, "bacc": 0.3273809523809524, "bacc_std": 0.06018979545320508} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06366359441971424, "f1": 0.4512744598951496, "f1_std": 0.06276025701256596, "bacc": 0.4608516483516484, "bacc_std": 0.06372183734746409} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.052510015950749146, "f1": 0.3610294117647059, "f1_std": 0.05157015865388373, "bacc": 0.396978021978022, "bacc_std": 0.05075616358166564} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0669194848726535, "f1": 0.541971706454465, "f1_std": 0.06699214220343688, "bacc": 0.5400641025641026, "bacc_std": 0.06724980671651086} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.057020446867301434, "f1": 0.3717135129914035, "f1_std": 0.05804129142893567, "bacc": 0.3850732600732601, "bacc_std": 0.057236141401219874} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06664735420468848, "f1": 0.42827693296209035, "f1_std": 0.0656690699202695, "bacc": 0.4269688644688645, "bacc_std": 0.06708553862324144} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06811896789428219, "f1": 0.4261907507784569, "f1_std": 0.06809110786307863, "bacc": 0.4212454212454212, "bacc_std": 0.06840003427291298} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06030146160906341, "f1": 0.4409523809523809, "f1_std": 0.06255875771384951, "bacc": 0.46222527472527475, "bacc_std": 0.06063968529122935} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06347347207144677, "f1": 0.44502801120448177, "f1_std": 0.06550486274729536, "bacc": 0.45535714285714285, "bacc_std": 0.06342627737412522} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0646783118492485, "f1": 0.48205387205387207, "f1_std": 0.06270447158169154, "bacc": 0.47596153846153844, "bacc_std": 0.06425268359364424} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06072544673904675, "f1": 0.48778225806451614, "f1_std": 0.06511409985678748, "bacc": 0.5114468864468865, "bacc_std": 0.06040859216572663} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06436006102872574, "f1": 0.47044864664441977, "f1_std": 0.06911468811774037, "bacc": 0.49496336996337, "bacc_std": 0.06405733044786148} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06089932520685995, "f1": 0.4559178743961353, "f1_std": 0.06115479118899193, "bacc": 0.46703296703296704, "bacc_std": 0.06186205461485543} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06828395290880626, "f1": 0.46580663375894793, "f1_std": 0.06431988819204823, "bacc": 0.4478021978021978, "bacc_std": 0.06906040759402957} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06239607928435224, "f1": 0.3557142857142857, "f1_std": 0.06170389547721175, "bacc": 0.3630952380952381, "bacc_std": 0.06203516420650396} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06582452691205054, "f1": 0.53574082921909, "f1_std": 0.06721322609986392, "bacc": 0.5382326007326008, "bacc_std": 0.06610979130684805} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06654014004107889, "f1": 0.5201728967449049, "f1_std": 0.07048678450212359, "bacc": 0.5336538461538461, "bacc_std": 0.06622734503322887} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0631375295140386, "f1": 0.6096551724137931, "f1_std": 0.0640725411745994, "bacc": 0.6137820512820513, "bacc_std": 0.06318277355265306} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06725025021400406, "f1": 0.5204112554112554, "f1_std": 0.06941327471270263, "bacc": 0.5309065934065934, "bacc_std": 0.06695426351851014} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0629654267865644, "f1": 0.41176840985839286, "f1_std": 0.06252813808657212, "bacc": 0.41941391941391937, "bacc_std": 0.06260032820751933} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06589057631662723, "f1": 0.4801343986126595, "f1_std": 0.06645298923045889, "bacc": 0.4819139194139194, "bacc_std": 0.06614408852273984} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 1291.5496650148827, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0656736147066617, "f1": 0.4252338116655958, "f1_std": 0.0642087345970438, "bacc": 0.4210164835164835, "bacc_std": 0.06560799486917428} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06626667480226477, "f1": 0.5258018784334574, "f1_std": 0.06563724008770741, "bacc": 0.5173992673992673, "bacc_std": 0.06629406748786343} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06020060154213996, "f1": 0.3493447293447294, "f1_std": 0.057134039237096854, "bacc": 0.34523809523809523, "bacc_std": 0.060247842616569194} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.058969190130231434, "f1": 0.43798850574712644, "f1_std": 0.06121039088968693, "bacc": 0.4635989010989011, "bacc_std": 0.05951031843977462} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06265284269867451, "f1": 0.35285884963304315, "f1_std": 0.06110407618452753, "bacc": 0.3411172161172161, "bacc_std": 0.06232903520452375} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06151268690999909, "f1": 0.4924796005358665, "f1_std": 0.0705099339519262, "bacc": 0.5157967032967032, "bacc_std": 0.06138539338783504} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.061116957911685185, "f1": 0.3607150913602527, "f1_std": 0.059919678655952824, "bacc": 0.3617216117216117, "bacc_std": 0.060650419953187415} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06485559745087588, "f1": 0.5166666666666666, "f1_std": 0.06645026186741908, "bacc": 0.5160256410256411, "bacc_std": 0.06468347909757757} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.04807432300661528, "f1": 0.3784722222222222, "f1_std": 0.04680206275885799, "bacc": 0.4532967032967033, "bacc_std": 0.046241884489209475} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06753367557950705, "f1": 0.4431481481481481, "f1_std": 0.06779674940327746, "bacc": 0.45375457875457875, "bacc_std": 0.06718382771857714} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06449879591555344, "f1": 0.574404761904762, "f1_std": 0.0651860042889364, "bacc": 0.5812728937728938, "bacc_std": 0.06478056722481677} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06504949185452984, "f1": 0.5006475006475006, "f1_std": 0.06758364192675416, "bacc": 0.49679487179487175, "bacc_std": 0.06501613468684679} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.061562146884218426, "f1": 0.37788515406162465, "f1_std": 0.06170259954974571, "bacc": 0.38827838827838823, "bacc_std": 0.0621893296708724} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.05505633209851535, "f1": 0.4760660784854333, "f1_std": 0.054637068609627915, "bacc": 0.5020604395604396, "bacc_std": 0.055479336190935334} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06068645774673418, "f1": 0.44500103391232426, "f1_std": 0.06510692668376102, "bacc": 0.47847985347985345, "bacc_std": 0.060357772561131735} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05687652722443966, "f1": 0.513186667398624, "f1_std": 0.059596712721945805, "bacc": 0.5407509157509157, "bacc_std": 0.057354357691175174} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06123724356957946, "f1": 0.35315176513125923, "f1_std": 0.06137616920384976, "bacc": 0.36172161172161177, "bacc_std": 0.06088763257145617} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06352677299827188, "f1": 0.43104395604395607, "f1_std": 0.06412780498206078, "bacc": 0.4416208791208791, "bacc_std": 0.06354292988391468} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06465176402632782, "f1": 0.4289983579638752, "f1_std": 0.06310108540298548, "bacc": 0.43727106227106227, "bacc_std": 0.0639757256305047} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06845613634693604, "f1": 0.45566454006351703, "f1_std": 0.06654604472412512, "bacc": 0.4375, "bacc_std": 0.06821475006689492} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06492685916799086, "f1": 0.3869485294117647, "f1_std": 0.06728092376615133, "bacc": 0.4001831501831502, "bacc_std": 0.06452767550372585} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06552130508558526, "f1": 0.44963121118012417, "f1_std": 0.0671746965720416, "bacc": 0.4581043956043956, "bacc_std": 0.0650731422150053} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 1291.5496650148827, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06263752915921571, "f1": 0.5242512672235311, "f1_std": 0.06511641113083007, "bacc": 0.538003663003663, "bacc_std": 0.06257881278303613} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06878036177340502, "f1": 0.40091580832960144, "f1_std": 0.06899718462027388, "bacc": 0.4017857142857143, "bacc_std": 0.0685889800200924} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_age | train | 100 | 30.954 | 183.23 | 0.69098 | 0.15931 | 0.68508 | 0.16596 | 0.69125 | 0.15984 | +| flat_mae | reg | logistic | aabc_age | test | 100 | 30.954 | 183.23 | 0.45923 | 0.065388 | 0.44746 | 0.065364 | 0.45729 | 0.065279 | + + +done! total time: 0:06:08 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdb05e11e931089e3c0fb268af5784f51298d2f9 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..0028dc359ae3fb012145e0257fd351507f4c03d1 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,train,0.8979206049149339,0.013218213174086052,0.8948077772867875,0.013677766884316846,0.8925058548009368,0.013876741772887185 +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,test,0.8909090909090909,0.04445443762768108,0.8891129032258065,0.04453656303956587,0.9015151515151516,0.04107849045825253 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,train,0.9886578449905482,0.004648990860332371,0.9883855386416862,0.004754556497440742,0.9889797473548463,0.004575950341334994 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,test,0.8363636363636363,0.049979830642714045,0.8307692307692308,0.052114221274963034,0.8288043478260869,0.05229137973860432 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,2,2.782559402207126,test,0.8545454545454545,0.04751546072874029,0.8505434782608696,0.04905752636385842,0.8505434782608696,0.04921354934341602 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9376181474480151,0.010243775924971459,0.9359246820459175,0.010546159487965903,0.9351314516838125,0.010808680717050035 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7636363636363637,0.058653497144439105,0.7555555555555555,0.06099713134051328,0.7540760869565217,0.06082206081333329 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,train,0.9281663516068053,0.01133278503953773,0.9260738452486026,0.011721047871843242,0.9245288548902371,0.012066986081897166 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,test,0.8363636363636363,0.050399842581414896,0.8354935194416749,0.05024155884788907,0.8471467391304348,0.04733826769948187 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,train,0.9319470699432892,0.010993374600726698,0.929871851524525,0.011398455044034046,0.9277968287464463,0.011795414190170108 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,test,0.8727272727272727,0.04264349232903572,0.8683760683760684,0.04469182604063988,0.8661684782608696,0.0452071274759942 +flat_mae,patch,logistic,aabc_sex,6,0.3593813663804626,train,0.9735349716446124,0.007000407107102199,0.9728995901639343,0.007161938598204154,0.9734751897769571,0.007129824343356391 +flat_mae,patch,logistic,aabc_sex,6,0.3593813663804626,test,0.9272727272727272,0.03463091820711327,0.9242424242424243,0.036827578434685766,0.9191576086956521,0.03874944191602967 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,train,0.9300567107750473,0.010910827574213073,0.9278752436647173,0.011342040972428946,0.9255546762800786,0.011791435347801488 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,test,0.9090909090909091,0.038021759755119615,0.9071259709557582,0.03879016877348678,0.9096467391304348,0.03845550356797873 +flat_mae,patch,logistic,aabc_sex,8,0.3593813663804626,train,0.9867674858223062,0.004927968755442423,0.9864417081324122,0.005046935514531682,0.9867375948884786,0.005036694774717146 +flat_mae,patch,logistic,aabc_sex,8,0.3593813663804626,test,0.8545454545454545,0.046151973176660296,0.8533333333333333,0.04622169295632456,0.8627717391304348,0.044569939768353196 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9376181474480151,0.010244117086623532,0.9358427325549344,0.01056656276340107,0.9345232861455495,0.01082163763996326 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.8727272727272727,0.04494545454545453,0.8699763593380614,0.04574360610828019,0.8722826086956521,0.04555173783021404 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,train,0.9319470699432892,0.01050100626301955,0.9299646954986761,0.010868970912534755,0.9284049942847094,0.01124403358957719 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,test,0.8545454545454545,0.04794557520583291,0.8521505376344086,0.048450693707014734,0.8566576086956521,0.047631521598830176 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.9300567107750473,0.011116738588790233,0.9278752436647173,0.011540275340520482,0.9255546762800786,0.01193862293432863 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.8909090909090909,0.044070627012662934,0.8863636363636364,0.04685771412343013,0.8817934782608696,0.04767547188969283 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,train,0.9848771266540642,0.00516565538481019,0.98453216374269,0.005269283777795128,0.9857117734986371,0.004921212855775628 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,test,0.8181818181818182,0.050836974895531725,0.8074229691876751,0.05588488886592202,0.8009510869565217,0.05518700919418474 +flat_mae,patch,logistic,aabc_sex,13,0.046415888336127774,train,0.9395085066162571,0.01051990974493902,0.9377463959988231,0.010852738369197633,0.936157273073654,0.011099597657114959 +flat_mae,patch,logistic,aabc_sex,13,0.046415888336127774,test,0.8727272727272727,0.04311339651057065,0.8683760683760684,0.04480368712293577,0.8661684782608696,0.0450411926785977 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,train,0.9792060491493384,0.0056805101138609464,0.9786941127795048,0.005817724163119834,0.978985316099534,0.005857143613623006 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,test,0.8727272727272727,0.04407956742599465,0.8699763593380614,0.045021616492151174,0.8722826086956521,0.044782663830795266 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,train,0.9300567107750473,0.010805631651132103,0.9276748267898383,0.011295620547121394,0.9243383452035523,0.011842230841745453 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,test,0.8363636363636363,0.050871817523868906,0.8343927735028438,0.05121270628930134,0.8410326086956521,0.050420758055814356 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,train,0.9754253308128544,0.006771201100757306,0.9749074124751427,0.006891354925108573,0.976933673319851,0.0065119006760939995 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,test,0.8545454545454545,0.04727735641670402,0.84593837535014,0.05208167818486709,0.8383152173913043,0.052534778877567474 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.9357277882797732,0.010820869634963566,0.9339410589410589,0.011157546025710114,0.9328892992174448,0.011401745894231468 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8363636363636363,0.048739143737158555,0.8250265111346766,0.05413837271580632,0.8165760869565217,0.05353673322984692 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,train,0.9262759924385633,0.01111291655323672,0.9239766081871346,0.011543745792404147,0.9216785368856062,0.011971554852737912 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,test,0.8545454545454545,0.045277195836424276,0.8521505376344086,0.045796562109121204,0.8566576086956521,0.04523908071194618 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,train,0.9338374291115312,0.0103188340384066,0.9317738791423003,0.01073542351478101,0.9294308156745509,0.011250730597096198 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,test,0.8363636363636363,0.0497027993624384,0.8343927735028438,0.04998745062171858,0.8410326086956521,0.04911638928765456 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9357277882797732,0.010994112452137576,0.9338555457487496,0.011364176694626148,0.9322811336791816,0.011716851852229605 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8363636363636363,0.04937458776896643,0.8328267477203647,0.05060901146070245,0.8349184782608696,0.050724127407500694 +flat_mae,patch,logistic,aabc_sex,21,0.005994842503189409,train,0.9073724007561437,0.012034612028582673,0.9044834307992202,0.012453971794233335,0.9022978399132449,0.012649592930421397 +flat_mae,patch,logistic,aabc_sex,21,0.005994842503189409,test,0.8363636363636363,0.0503660780504281,0.8307692307692308,0.0527326193321356,0.8288043478260869,0.05309148879119155 +flat_mae,patch,logistic,aabc_sex,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,2.782559402207126,test,0.8545454545454545,0.049345481342526365,0.8521505376344086,0.05000058308014053,0.8566576086956521,0.04970105902488103 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,train,0.9810964083175804,0.005943273592190965,0.9806872280148369,0.0060517405531202275,0.982443799642428,0.0056181069221184566 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,test,0.9272727272727272,0.03472944305450397,0.9252717391304348,0.0357934873984947,0.9252717391304348,0.03612846645457738 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,train,0.9338374291115312,0.010747668595769783,0.9319544133158395,0.0110934864789005,0.9306471467510771,0.011382908089981323 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,test,0.8727272727272727,0.04509530770334651,0.8683760683760684,0.04717039073314957,0.8661684782608696,0.04775954069043428 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,train,0.9281663516068053,0.011422127637516807,0.9260738452486026,0.011806695017309332,0.9245288548902371,0.012110220420481719 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,test,0.9454545454545454,0.029984017781378967,0.9435897435897436,0.031396936022588515,0.9408967391304348,0.03269664780532673 +flat_mae,patch,logistic,aabc_sex,26,0.3593813663804626,train,0.9829867674858223,0.0056529148346969775,0.9825885657235016,0.005775106173105369,0.9834696210322693,0.005558780445311248 +flat_mae,patch,logistic,aabc_sex,26,0.3593813663804626,test,0.8727272727272727,0.042690893603837966,0.8663658451926415,0.04624391738603346,0.8600543478260869,0.04708772248723998 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,train,0.9262759924385633,0.011134232190011919,0.9240784423403167,0.011535785346910504,0.9222867024238695,0.01192562479187047 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,test,0.9090909090909091,0.03829204867509099,0.9071259709557582,0.03896359943367959,0.9096467391304348,0.03825473718765804 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,train,0.9319470699432892,0.010669830196625764,0.9299646954986761,0.011009152405757717,0.9284049942847094,0.01126164100823256 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,test,0.8545454545454545,0.04587660056432735,0.8484848484848485,0.048863494027942114,0.8444293478260869,0.04945172073791577 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,train,0.9357277882797732,0.010874413318417648,0.9336779846013157,0.011307335695636434,0.9310648026026555,0.011809707783237252 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,test,0.8909090909090909,0.042077865640494455,0.8879076086956521,0.043337367579471014,0.8879076086956521,0.04337175888926743 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,train,0.9281663516068053,0.01092835257377837,0.9260738452486026,0.011282154324437021,0.9245288548902371,0.011501715888301752 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,test,0.8,0.05581147724021946,0.795677136102668,0.05695834966003606,0.7975543478260869,0.056545937349887575 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.9262759924385633,0.011463045098904846,0.9240784423403167,0.011857698693027038,0.9222867024238695,0.012142395640393313 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.8545454545454545,0.043898892096187636,0.8484848484848485,0.04627099309748399,0.8444293478260869,0.04628414436242084 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.9357277882797732,0.010571431358449334,0.9339410589410589,0.010892772743432665,0.9328892992174448,0.011119521391041406 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8909090909090909,0.041034216723958065,0.884453781512605,0.04532303427299883,0.8756793478260869,0.046552994820911804 +flat_mae,patch,logistic,aabc_sex,33,0.005994842503189409,train,0.8922495274102079,0.01274919888091919,0.8890377234204629,0.013171684739842636,0.8874014478736187,0.013352985178546278 +flat_mae,patch,logistic,aabc_sex,33,0.005994842503189409,test,0.8363636363636363,0.04740295349448173,0.8307692307692308,0.04986026174395696,0.8288043478260869,0.0504033309300538 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.9810964083175804,0.006021795431482549,0.9806872280148369,0.0061293975576675284,0.982443799642428,0.005628093557108213 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.8727272727272727,0.044550070261791704,0.8699763593380614,0.04554749075445059,0.8722826086956521,0.04561983866742563 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,train,0.8563327032136105,0.015179419331618054,0.8511183528366169,0.01590774820384353,0.8478413787039479,0.016158545357895102 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,test,0.8545454545454545,0.045903239951190014,0.84593837535014,0.050674101218175414,0.8383152173913043,0.05100884089064916 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,train,0.9754253308128544,0.006415423398655344,0.9748501504895023,0.006552318436978207,0.9757173422433248,0.006377082546798903 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,test,0.8909090909090909,0.04209286859762422,0.8879076086956521,0.04342780126651795,0.8879076086956521,0.04369233634063438 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9319470699432892,0.010687936939667776,0.9296791917759659,0.011131946761470453,0.92658049766992,0.011555926754883794 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7636363636363637,0.05800773958050485,0.7555555555555555,0.06085463918872742,0.7540760869565217,0.06048988552548357 +flat_mae,patch,logistic,aabc_sex,38,0.3593813663804626,train,0.9848771266540642,0.00518634636466258,0.98453216374269,0.0052908800093363195,0.9857117734986371,0.004928227275398655 +flat_mae,patch,logistic,aabc_sex,38,0.3593813663804626,test,0.8363636363636363,0.045840268846657976,0.8281846581048247,0.04940966888572304,0.8226902173913043,0.04946488244174629 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,train,0.9376181474480151,0.010042091795041577,0.9359246820459175,0.01034425487732331,0.9351314516838125,0.010568993221720847 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,test,0.8545454545454545,0.04541694444780719,0.8521505376344086,0.0459635531780556,0.8566576086956521,0.04530318525519917 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,train,0.9281663516068053,0.011184885714545312,0.9259758432758874,0.011600805441715392,0.923920689351974,0.011997337299164709 +flat_mae,patch,logistic,aabc_sex,40,0.046415888336127774,test,0.8545454545454545,0.04456045758478347,0.8484848484848485,0.047178845737100586,0.8444293478260869,0.04765095776574107 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,train,0.8998109640831758,0.01351273161987521,0.8965443442002915,0.014037311200813444,0.8939373955860371,0.014289941979750854 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,test,0.8545454545454545,0.0470935204611153,0.84593837535014,0.05193953739922566,0.8383152173913043,0.0520893976935748 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,test,0.8,0.05570257498091206,0.795677136102668,0.056865767510617914,0.7975543478260869,0.056673705795758346 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9319470699432892,0.011118281434027174,0.929871851524525,0.011509301707121474,0.9277968287464463,0.011876501180490478 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.9454545454545454,0.030422675318809808,0.9442755825734549,0.030963520803673268,0.9470108695652174,0.029922312813296416 +flat_mae,patch,logistic,aabc_sex,44,0.3593813663804626,train,0.9867674858223062,0.004999299195815722,0.9864417081324122,0.005118360383354906,0.9867375948884786,0.00506329000905632 +flat_mae,patch,logistic,aabc_sex,44,0.3593813663804626,test,0.8727272727272727,0.043681476680152154,0.8699763593380614,0.04444520985547415,0.8722826086956521,0.04403583786912668 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.9319470699432892,0.011088873950456695,0.9299646954986761,0.011458315951879546,0.9284049942847094,0.011710786224914409 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9636363636363636,0.02440745042735182,0.9630376344086022,0.024547986237309635,0.96875,0.02097515271100547 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9376181474480151,0.010472587062299603,0.9358427325549344,0.010799891782819437,0.9345232861455495,0.011040459668946807 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8909090909090909,0.043179104220934385,0.8879076086956521,0.04458276579251271,0.8879076086956521,0.044890038242751895 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,train,0.9395085066162571,0.010273326255047666,0.9378268790033496,0.010608703157331378,0.9367654386119171,0.010935388751594497 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,test,0.8545454545454545,0.04490848729811501,0.84593837535014,0.04951131239259725,0.8383152173913043,0.049727125102110616 +flat_mae,patch,logistic,aabc_sex,48,0.046415888336127774,train,0.9319470699432892,0.011181134913437014,0.9299646954986761,0.011556189704823278,0.9284049942847094,0.011867292209784303 +flat_mae,patch,logistic,aabc_sex,48,0.046415888336127774,test,0.8727272727272727,0.04535609702333393,0.8663658451926415,0.04915934470972352,0.8600543478260869,0.04995786238362686 +flat_mae,patch,logistic,aabc_sex,49,0.046415888336127774,train,0.9357277882797732,0.010577319548132035,0.9339410589410589,0.010907977209098056,0.9328892992174448,0.011183383838066862 +flat_mae,patch,logistic,aabc_sex,49,0.046415888336127774,test,0.8545454545454545,0.04433554997668953,0.8521505376344086,0.045117155133186496,0.8566576086956521,0.044879046840705324 +flat_mae,patch,logistic,aabc_sex,50,0.005994842503189409,train,0.9054820415879017,0.012396942564514896,0.9025997937840624,0.012830278186389007,0.9006638529851403,0.013089022482628027 +flat_mae,patch,logistic,aabc_sex,50,0.005994842503189409,test,0.8545454545454545,0.049170561642423845,0.8505434782608696,0.05052656102177713,0.8505434782608696,0.050601486752735944 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,train,0.9792060491493384,0.005793041371583213,0.9787193581065019,0.005917807987822388,0.9795934816377971,0.005741662091572571 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,test,0.8363636363636363,0.04642575517750519,0.8250265111346766,0.0531094353077188,0.8165760869565217,0.052464281633686057 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,train,0.9848771266540642,0.005115835817901397,0.98453216374269,0.005219558003090379,0.9857117734986371,0.004896023375318947 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,test,0.8181818181818182,0.05174803545598341,0.8074229691876751,0.056967451961668754,0.8009510869565217,0.05618861325597044 +flat_mae,patch,logistic,aabc_sex,53,0.3593813663804626,train,0.9792060491493384,0.0064108264155694214,0.97876781055589,0.006518354166277934,0.9808098127143234,0.0059530031315663074 +flat_mae,patch,logistic,aabc_sex,53,0.3593813663804626,test,0.9090909090909091,0.0404536261386862,0.905982905982906,0.0421921794315327,0.9035326086956521,0.043062189797527926 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,train,0.9357277882797732,0.010398332958288748,0.9338555457487496,0.01075741975807743,0.9322811336791816,0.01112268147671014 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,test,0.8363636363636363,0.05106017839943194,0.8328267477203647,0.05191196689923785,0.8349184782608696,0.0514518592989505 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9829867674858223,0.005830799294952884,0.9825466942830434,0.0059883236816095295,0.9822532899557432,0.006182129314619867 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8545454545454545,0.04759247110400049,0.8521505376344086,0.04818108309951583,0.8566576086956521,0.04777181092948555 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,train,0.9924385633270322,0.0037961575117313163,0.9922477212110554,0.0038931862550643506,0.9922477212110554,0.003978039301285019 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,test,0.8181818181818182,0.05066664346816923,0.8106060606060606,0.05380030293011867,0.8070652173913043,0.05384603855245998 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,train,0.9376181474480151,0.01016816377178648,0.935672514619883,0.010561760230408763,0.9333069550690232,0.011020136621883229 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,test,0.9090909090909091,0.03870628539301621,0.9071259709557582,0.039498151445812965,0.9096467391304348,0.03893262376560892 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,train,0.9810964083175804,0.006239976829290193,0.9806193030276386,0.006395027285868735,0.9806193030276386,0.00641770026280588 +flat_mae,patch,logistic,aabc_sex,58,0.3593813663804626,test,0.7818181818181819,0.05533100994159625,0.7758152173913043,0.05691511442965067,0.7758152173913043,0.05673492951506501 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.9357277882797732,0.01116604998371754,0.9339410589410589,0.011520727816787604,0.9328892992174448,0.011837055473481472 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.9090909090909091,0.038010629222420114,0.9071259709557582,0.038727810170713056,0.9096467391304348,0.038259390483507995 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,train,0.8960302457466919,0.012892471268514215,0.8930712209248908,0.013284600396886106,0.8918857528063542,0.013400747421294813 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,test,0.8727272727272727,0.043333173977244586,0.8683760683760684,0.04541251572451246,0.8661684782608696,0.046005090412563565 +flat_mae,patch,logistic,aabc_sex,61,0.3593813663804626,train,0.9829867674858223,0.005825215927744091,0.9825885657235016,0.005951787816111025,0.9834696210322693,0.005744176719745834 +flat_mae,patch,logistic,aabc_sex,61,0.3593813663804626,test,0.8181818181818182,0.0505493785265604,0.8151881720430108,0.051070417559471344,0.8192934782608696,0.05107765964999542 +flat_mae,patch,logistic,aabc_sex,62,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,62,2.782559402207126,test,0.8363636363636363,0.04799750682230299,0.8307692307692308,0.05035684378154921,0.8288043478260869,0.050608965052411 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,train,0.9810964083175804,0.005926854528672268,0.9806425644028103,0.006060930771843901,0.9812274685659017,0.005924750345618621 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,test,0.9454545454545454,0.0297743358344089,0.9442755825734549,0.0302752512366801,0.9470108695652174,0.0292322281469951 +flat_mae,patch,logistic,aabc_sex,64,0.046415888336127774,train,0.943289224952741,0.010551016286054137,0.9416372462488967,0.010891603033154016,0.9400334124681262,0.011173991843354209 +flat_mae,patch,logistic,aabc_sex,64,0.046415888336127774,test,0.8181818181818182,0.050398950533478676,0.8131793478260869,0.05199510973078927,0.8131793478260869,0.052004874685033384 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.9319470699432892,0.011288226550786578,0.929871851524525,0.011696681591629962,0.9277968287464463,0.012026834582726754 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.8545454545454545,0.045699977394789766,0.84593837535014,0.050077777004766864,0.8383152173913043,0.050142510827363915 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.9376181474480151,0.01033920779219041,0.9359246820459175,0.010627057026302013,0.9351314516838125,0.010774165162727426 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8363636363636363,0.049626119489006866,0.8307692307692308,0.05166277653444132,0.8288043478260869,0.05179761397315772 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,train,0.9829867674858223,0.005569626910524766,0.9826282086366374,0.00565870950868827,0.9846859521087956,0.0049895553813963265 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,test,0.9272727272727272,0.03460051115086058,0.9260752688172043,0.03485992561664718,0.9313858695652174,0.033038890640624034 +flat_mae,patch,logistic,aabc_sex,68,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,68,21.54434690031882,test,0.9636363636363636,0.025008983509897952,0.9626358695652174,0.025799424698907846,0.9626358695652174,0.026276009757663994 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,train,0.9300567107750473,0.011222292934399783,0.9279718555536338,0.011600756432576204,0.9261628418183416,0.011884202225170275 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,test,0.9818181818181818,0.018108360700867642,0.9814251941911516,0.01833971583558119,0.984375,0.015561872477308121 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,train,0.9281663516068053,0.011500070187157749,0.9259758432758874,0.011930537290257255,0.923920689351974,0.01231765954622534 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,test,0.9272727272727272,0.035197652062940246,0.9260752688172043,0.0354783106666295,0.9313858695652174,0.03379827749390227 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9262759924385633,0.010739170198469006,0.9241777748376498,0.011063672761666303,0.9228948679621325,0.01124505355379193 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.04429017763241351,0.84593837535014,0.04870379047304653,0.8383152173913043,0.04842836700820749 +flat_mae,patch,logistic,aabc_sex,72,0.000774263682681127,train,0.8544423440453687,0.014863034281674079,0.8490481431657901,0.01562573486246472,0.8455992262375802,0.015885583894741904 +flat_mae,patch,logistic,aabc_sex,72,0.000774263682681127,test,0.8727272727272727,0.04356143567062441,0.8683760683760684,0.04544217893707619,0.8661684782608696,0.046019548666929755 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,train,0.9792060491493384,0.00621546414975839,0.9787439225298349,0.00633445607978982,0.9802016471760602,0.005966092646443197 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,test,0.9454545454545454,0.029471510265939416,0.9427282193682749,0.032086562972910126,0.9347826086956521,0.035237675317971034 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,train,0.9319470699432892,0.010123077912148499,0.9299646954986761,0.010472873043551427,0.9284049942847094,0.010850686957843704 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,test,0.8727272727272727,0.045244080020036916,0.8699763593380614,0.046040820311280845,0.8722826086956521,0.04551814846604915 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.9338374291115312,0.010632960970085231,0.9319544133158395,0.01095857180494309,0.9306471467510771,0.011160416116705534 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.8181818181818182,0.047291241129867836,0.8106060606060606,0.05019755372035459,0.8070652173913043,0.05031367037995762 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.9300567107750473,0.010669602453804893,0.9279718555536338,0.011076100815184235,0.9261628418183416,0.011557817547254236 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8363636363636363,0.05168724765595409,0.8328267477203647,0.052813285798358485,0.8349184782608696,0.0525906663173658 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,train,0.9829867674858223,0.0056040959308798636,0.9825466942830434,0.005754813692431461,0.9822532899557432,0.00592904077934055 +flat_mae,patch,logistic,aabc_sex,77,0.3593813663804626,test,0.8727272727272727,0.04444182379785215,0.8663658451926415,0.04801571621523334,0.8600543478260869,0.04893578132431206 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,train,0.9300567107750473,0.011153885462760391,0.9279718555536338,0.011558647315209223,0.9261628418183416,0.011931564011352066 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,test,0.9636363636363636,0.022626569622736944,0.9626358695652174,0.02328238852191854,0.9626358695652174,0.02353507900730232 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,train,0.9281663516068053,0.011427201627349563,0.9259758432758874,0.011838092209721311,0.923920689351974,0.012136084094635435 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,test,0.8363636363636363,0.05089967445381287,0.8281846581048247,0.05501911297299524,0.8226902173913043,0.05474395708541755 +flat_mae,patch,logistic,aabc_sex,80,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,80,21.54434690031882,test,0.7818181818181819,0.05600305776775802,0.7758152173913043,0.05770384746706663,0.7758152173913043,0.05774908644384475 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,train,0.9829867674858223,0.00568604326493316,0.9825679104559584,0.005823252306278268,0.9828614554940063,0.005803375095519455 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,test,0.9090909090909091,0.03744661875521403,0.905982905982906,0.03915079352289381,0.9035326086956521,0.040112961692828436 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.9848771266540642,0.0054545218913304985,0.98453216374269,0.005563874885447317,0.9857117734986371,0.005124840673334588 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.8545454545454545,0.044860761628446474,0.84593837535014,0.04896845045168756,0.8383152173913043,0.049148468965363463 +flat_mae,patch,logistic,aabc_sex,83,0.3593813663804626,train,0.9829867674858223,0.005503109247640711,0.9826086638880467,0.005610269562035886,0.9840777865705326,0.005242876034858651 +flat_mae,patch,logistic,aabc_sex,83,0.3593813663804626,test,0.9454545454545454,0.030428694591543935,0.9427282193682749,0.0331390139149995,0.9347826086956521,0.036382134837715574 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,train,0.9357277882797732,0.01108298992566103,0.9336779846013157,0.011492529775151791,0.9310648026026555,0.011795195118706813 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,test,0.8363636363636363,0.05174813766761189,0.8307692307692308,0.05388013222823208,0.8288043478260869,0.054075529410496946 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,train,0.9357277882797732,0.01037400728199137,0.9338555457487496,0.010728374736941469,0.9322811336791816,0.011077026487297659 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,test,0.8363636363636363,0.051029714241186845,0.8307692307692308,0.05344954745764673,0.8288043478260869,0.053642512293125756 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,train,0.9300567107750473,0.011734932538918212,0.9280660940767447,0.0120988688835664,0.9267710073566048,0.012378880106912842 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,test,0.9454545454545454,0.03304124144707092,0.9442755825734549,0.03363891669055451,0.9470108695652174,0.03272191539729666 +flat_mae,patch,logistic,aabc_sex,87,0.3593813663804626,train,0.9773156899810964,0.006588324509841976,0.9768246736178042,0.006704098555077754,0.9785676602479556,0.0062346503455578154 +flat_mae,patch,logistic,aabc_sex,87,0.3593813663804626,test,0.8545454545454545,0.049241197560478574,0.8505434782608696,0.050762629942202424,0.8505434782608696,0.050792067137122004 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,test,0.8545454545454545,0.048698756920683636,0.8484848484848485,0.05137980915228604,0.8444293478260869,0.051907286625031336 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,train,0.9848771266540642,0.005365509362660935,0.98453216374269,0.005475044183953486,0.9857117734986371,0.005152457590222476 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,test,0.8727272727272727,0.04187491803371772,0.8663658451926415,0.04552464343656962,0.8600543478260869,0.046404944114193 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,train,0.9319470699432892,0.011695914169198494,0.929871851524525,0.012093463933832321,0.9277968287464463,0.012390280852118947 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,test,0.8727272727272727,0.04571502095040389,0.8683760683760684,0.04773029633539852,0.8661684782608696,0.04827224613372834 +flat_mae,patch,logistic,aabc_sex,91,0.3593813663804626,train,0.9829867674858223,0.005287022002596578,0.9825885657235016,0.005403170889907181,0.9834696210322693,0.005262723139549306 +flat_mae,patch,logistic,aabc_sex,91,0.3593813663804626,test,0.8727272727272727,0.04497786416074921,0.8699763593380614,0.045904148996592106,0.8722826086956521,0.045638833191635475 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,train,0.9281663516068053,0.01107548901962392,0.9259758432758874,0.011483802322414327,0.923920689351974,0.011872506660112199 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,test,0.9272727272727272,0.0330188826517532,0.9229691876750701,0.03663016955256208,0.9130434782608696,0.039479098822748394 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.9848771266540642,0.005548944473236707,0.98453216374269,0.005661208432503415,0.9857117734986371,0.0053108614160721854 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8727272727272727,0.046397078847917064,0.8683760683760684,0.04854149951860425,0.8661684782608696,0.04927073116982192 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,train,0.8922495274102079,0.013346894796237328,0.888736370177672,0.01391192592323747,0.8861851167970926,0.014278968336350601 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,test,0.9272727272727272,0.034127054777237896,0.9252717391304348,0.0351497144418128,0.9252717391304348,0.03563294494450665 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.9300567107750473,0.010943650865682828,0.9279718555536338,0.011315622962721844,0.9261628418183416,0.011606365986843338 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.9090909090909091,0.038783126602827026,0.9071259709557582,0.039487743078217626,0.9096467391304348,0.038921795264619835 +flat_mae,patch,logistic,aabc_sex,96,0.3593813663804626,train,0.9848771266540642,0.005566382049169837,0.9845140515222482,0.005693058027021562,0.9851036079603739,0.005573818794514943 +flat_mae,patch,logistic,aabc_sex,96,0.3593813663804626,test,0.9272727272727272,0.03512426111690131,0.9242424242424243,0.037252457068302526,0.9191576086956521,0.038899226615328436 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.9792060491493384,0.006079790072657901,0.9787193581065019,0.006211908344738967,0.9795934816377971,0.006077427083892838 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.8545454545454545,0.04773562178056192,0.8505434782608696,0.049417529319914485,0.8505434782608696,0.049680436491159684 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,train,0.9300567107750473,0.010904128494503113,0.9279718555536338,0.011272961569206683,0.9261628418183416,0.011515460641472044 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,test,0.7818181818181819,0.05256037535873615,0.7642857142857142,0.06022188314997999,0.7574728260869565,0.05743761466011973 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.8979206049149339,0.014018705275900939,0.8949470432480142,0.014456480286415347,0.8935197397344588,0.014605392430675677 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8727272727272727,0.04514218492169083,0.8663658451926415,0.04923348248390488,0.8600543478260869,0.05014650794401546 +flat_mae,patch,logistic,aabc_sex,100,0.3593813663804626,train,0.9810964083175804,0.005809157076376469,0.9806425644028103,0.00594288078128264,0.9812274685659017,0.005876610131245764 +flat_mae,patch,logistic,aabc_sex,100,0.3593813663804626,test,0.9090909090909091,0.03731502699542668,0.9071259709557582,0.038094706082983175,0.9096467391304348,0.03774852985231016 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f590fb258aba5384726491407c7f3e887fa08ab0 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:55:13 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:20:31 time: 5.2178 data: 4.5316 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:45 time: 0.2538 data: 0.0906 max mem: 3581 +extract (train) [ 40/236] eta: 0:01:08 time: 0.2047 data: 0.0674 max mem: 3581 +extract (train) [ 60/236] eta: 0:00:53 time: 0.2075 data: 0.0705 max mem: 3581 +extract (train) [ 80/236] eta: 0:00:43 time: 0.2000 data: 0.0660 max mem: 3581 +extract (train) [100/236] eta: 0:00:35 time: 0.2089 data: 0.0701 max mem: 3581 +extract (train) [120/236] eta: 0:00:29 time: 0.2058 data: 0.0693 max mem: 3581 +extract (train) [140/236] eta: 0:00:23 time: 0.2139 data: 0.0742 max mem: 3581 +extract (train) [160/236] eta: 0:00:18 time: 0.2056 data: 0.0686 max mem: 3581 +extract (train) [180/236] eta: 0:00:13 time: 0.2105 data: 0.0728 max mem: 3581 +extract (train) [200/236] eta: 0:00:08 time: 0.2203 data: 0.0772 max mem: 3581 +extract (train) [220/236] eta: 0:00:03 time: 0.1969 data: 0.0645 max mem: 3581 +extract (train) [235/236] eta: 0:00:00 time: 0.1857 data: 0.0595 max mem: 3581 +extract (train) Total time: 0:00:55 (0.2334 s / it) +extract (validation) [ 0/29] eta: 0:02:27 time: 5.0732 data: 4.9099 max mem: 3581 +extract (validation) [20/29] eta: 0:00:03 time: 0.1940 data: 0.0609 max mem: 3581 +extract (validation) [28/29] eta: 0:00:00 time: 0.1869 data: 0.0558 max mem: 3581 +extract (validation) Total time: 0:00:10 (0.3704 s / it) +extract (test) [ 0/28] eta: 0:02:22 time: 5.0936 data: 4.9220 max mem: 3581 +extract (test) [20/28] eta: 0:00:03 time: 0.2119 data: 0.0702 max mem: 3581 +extract (test) [27/28] eta: 0:00:00 time: 0.1842 data: 0.0578 max mem: 3581 +extract (test) Total time: 0:00:10 (0.3898 s / it) +feature extraction time: 0:01:16 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | train | 0.89792 | 0.013218 | 0.89481 | 0.013678 | 0.89251 | 0.013877 | +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | test | 0.89091 | 0.044454 | 0.88911 | 0.044537 | 0.90152 | 0.041078 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049979830642714045, "f1": 0.8307692307692308, "f1_std": 0.052114221274963034, "bacc": 0.8288043478260869, "bacc_std": 0.05229137973860432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04751546072874029, "f1": 0.8505434782608696, "f1_std": 0.04905752636385842, "bacc": 0.8505434782608696, "bacc_std": 0.04921354934341602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.058653497144439105, "f1": 0.7555555555555555, "f1_std": 0.06099713134051328, "bacc": 0.7540760869565217, "bacc_std": 0.06082206081333329} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050399842581414896, "f1": 0.8354935194416749, "f1_std": 0.05024155884788907, "bacc": 0.8471467391304348, "bacc_std": 0.04733826769948187} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04264349232903572, "f1": 0.8683760683760684, "f1_std": 0.04469182604063988, "bacc": 0.8661684782608696, "bacc_std": 0.0452071274759942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03463091820711327, "f1": 0.9242424242424243, "f1_std": 0.036827578434685766, "bacc": 0.9191576086956521, "bacc_std": 0.03874944191602967} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038021759755119615, "f1": 0.9071259709557582, "f1_std": 0.03879016877348678, "bacc": 0.9096467391304348, "bacc_std": 0.03845550356797873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046151973176660296, "f1": 0.8533333333333333, "f1_std": 0.04622169295632456, "bacc": 0.8627717391304348, "bacc_std": 0.044569939768353196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04494545454545453, "f1": 0.8699763593380614, "f1_std": 0.04574360610828019, "bacc": 0.8722826086956521, "bacc_std": 0.04555173783021404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04794557520583291, "f1": 0.8521505376344086, "f1_std": 0.048450693707014734, "bacc": 0.8566576086956521, "bacc_std": 0.047631521598830176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.044070627012662934, "f1": 0.8863636363636364, "f1_std": 0.04685771412343013, "bacc": 0.8817934782608696, "bacc_std": 0.04767547188969283} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050836974895531725, "f1": 0.8074229691876751, "f1_std": 0.05588488886592202, "bacc": 0.8009510869565217, "bacc_std": 0.05518700919418474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04311339651057065, "f1": 0.8683760683760684, "f1_std": 0.04480368712293577, "bacc": 0.8661684782608696, "bacc_std": 0.0450411926785977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04407956742599465, "f1": 0.8699763593380614, "f1_std": 0.045021616492151174, "bacc": 0.8722826086956521, "bacc_std": 0.044782663830795266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050871817523868906, "f1": 0.8343927735028438, "f1_std": 0.05121270628930134, "bacc": 0.8410326086956521, "bacc_std": 0.050420758055814356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04727735641670402, "f1": 0.84593837535014, "f1_std": 0.05208167818486709, "bacc": 0.8383152173913043, "bacc_std": 0.052534778877567474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048739143737158555, "f1": 0.8250265111346766, "f1_std": 0.05413837271580632, "bacc": 0.8165760869565217, "bacc_std": 0.05353673322984692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045277195836424276, "f1": 0.8521505376344086, "f1_std": 0.045796562109121204, "bacc": 0.8566576086956521, "bacc_std": 0.04523908071194618} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0497027993624384, "f1": 0.8343927735028438, "f1_std": 0.04998745062171858, "bacc": 0.8410326086956521, "bacc_std": 0.04911638928765456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04937458776896643, "f1": 0.8328267477203647, "f1_std": 0.05060901146070245, "bacc": 0.8349184782608696, "bacc_std": 0.050724127407500694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0503660780504281, "f1": 0.8307692307692308, "f1_std": 0.0527326193321356, "bacc": 0.8288043478260869, "bacc_std": 0.05309148879119155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049345481342526365, "f1": 0.8521505376344086, "f1_std": 0.05000058308014053, "bacc": 0.8566576086956521, "bacc_std": 0.04970105902488103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03472944305450397, "f1": 0.9252717391304348, "f1_std": 0.0357934873984947, "bacc": 0.9252717391304348, "bacc_std": 0.03612846645457738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04509530770334651, "f1": 0.8683760683760684, "f1_std": 0.04717039073314957, "bacc": 0.8661684782608696, "bacc_std": 0.04775954069043428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029984017781378967, "f1": 0.9435897435897436, "f1_std": 0.031396936022588515, "bacc": 0.9408967391304348, "bacc_std": 0.03269664780532673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042690893603837966, "f1": 0.8663658451926415, "f1_std": 0.04624391738603346, "bacc": 0.8600543478260869, "bacc_std": 0.04708772248723998} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03829204867509099, "f1": 0.9071259709557582, "f1_std": 0.03896359943367959, "bacc": 0.9096467391304348, "bacc_std": 0.03825473718765804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04587660056432735, "f1": 0.8484848484848485, "f1_std": 0.048863494027942114, "bacc": 0.8444293478260869, "bacc_std": 0.04945172073791577} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042077865640494455, "f1": 0.8879076086956521, "f1_std": 0.043337367579471014, "bacc": 0.8879076086956521, "bacc_std": 0.04337175888926743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05581147724021946, "f1": 0.795677136102668, "f1_std": 0.05695834966003606, "bacc": 0.7975543478260869, "bacc_std": 0.056545937349887575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.043898892096187636, "f1": 0.8484848484848485, "f1_std": 0.04627099309748399, "bacc": 0.8444293478260869, "bacc_std": 0.04628414436242084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041034216723958065, "f1": 0.884453781512605, "f1_std": 0.04532303427299883, "bacc": 0.8756793478260869, "bacc_std": 0.046552994820911804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04740295349448173, "f1": 0.8307692307692308, "f1_std": 0.04986026174395696, "bacc": 0.8288043478260869, "bacc_std": 0.0504033309300538} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044550070261791704, "f1": 0.8699763593380614, "f1_std": 0.04554749075445059, "bacc": 0.8722826086956521, "bacc_std": 0.04561983866742563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045903239951190014, "f1": 0.84593837535014, "f1_std": 0.050674101218175414, "bacc": 0.8383152173913043, "bacc_std": 0.05100884089064916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04209286859762422, "f1": 0.8879076086956521, "f1_std": 0.04342780126651795, "bacc": 0.8879076086956521, "bacc_std": 0.04369233634063438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05800773958050485, "f1": 0.7555555555555555, "f1_std": 0.06085463918872742, "bacc": 0.7540760869565217, "bacc_std": 0.06048988552548357} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.045840268846657976, "f1": 0.8281846581048247, "f1_std": 0.04940966888572304, "bacc": 0.8226902173913043, "bacc_std": 0.04946488244174629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04541694444780719, "f1": 0.8521505376344086, "f1_std": 0.0459635531780556, "bacc": 0.8566576086956521, "bacc_std": 0.04530318525519917} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04456045758478347, "f1": 0.8484848484848485, "f1_std": 0.047178845737100586, "bacc": 0.8444293478260869, "bacc_std": 0.04765095776574107} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0470935204611153, "f1": 0.84593837535014, "f1_std": 0.05193953739922566, "bacc": 0.8383152173913043, "bacc_std": 0.0520893976935748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 166.81005372000556, "split": "test", "acc": 0.8, "acc_std": 0.05570257498091206, "f1": 0.795677136102668, "f1_std": 0.056865767510617914, "bacc": 0.7975543478260869, "bacc_std": 0.056673705795758346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030422675318809808, "f1": 0.9442755825734549, "f1_std": 0.030963520803673268, "bacc": 0.9470108695652174, "bacc_std": 0.029922312813296416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043681476680152154, "f1": 0.8699763593380614, "f1_std": 0.04444520985547415, "bacc": 0.8722826086956521, "bacc_std": 0.04403583786912668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.02440745042735182, "f1": 0.9630376344086022, "f1_std": 0.024547986237309635, "bacc": 0.96875, "bacc_std": 0.02097515271100547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043179104220934385, "f1": 0.8879076086956521, "f1_std": 0.04458276579251271, "bacc": 0.8879076086956521, "bacc_std": 0.044890038242751895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04490848729811501, "f1": 0.84593837535014, "f1_std": 0.04951131239259725, "bacc": 0.8383152173913043, "bacc_std": 0.049727125102110616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04535609702333393, "f1": 0.8663658451926415, "f1_std": 0.04915934470972352, "bacc": 0.8600543478260869, "bacc_std": 0.04995786238362686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04433554997668953, "f1": 0.8521505376344086, "f1_std": 0.045117155133186496, "bacc": 0.8566576086956521, "bacc_std": 0.044879046840705324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049170561642423845, "f1": 0.8505434782608696, "f1_std": 0.05052656102177713, "bacc": 0.8505434782608696, "bacc_std": 0.050601486752735944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04642575517750519, "f1": 0.8250265111346766, "f1_std": 0.0531094353077188, "bacc": 0.8165760869565217, "bacc_std": 0.052464281633686057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05174803545598341, "f1": 0.8074229691876751, "f1_std": 0.056967451961668754, "bacc": 0.8009510869565217, "bacc_std": 0.05618861325597044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.0404536261386862, "f1": 0.905982905982906, "f1_std": 0.0421921794315327, "bacc": 0.9035326086956521, "bacc_std": 0.043062189797527926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05106017839943194, "f1": 0.8328267477203647, "f1_std": 0.05191196689923785, "bacc": 0.8349184782608696, "bacc_std": 0.0514518592989505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04759247110400049, "f1": 0.8521505376344086, "f1_std": 0.04818108309951583, "bacc": 0.8566576086956521, "bacc_std": 0.04777181092948555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05066664346816923, "f1": 0.8106060606060606, "f1_std": 0.05380030293011867, "bacc": 0.8070652173913043, "bacc_std": 0.05384603855245998} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03870628539301621, "f1": 0.9071259709557582, "f1_std": 0.039498151445812965, "bacc": 0.9096467391304348, "bacc_std": 0.03893262376560892} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05533100994159625, "f1": 0.7758152173913043, "f1_std": 0.05691511442965067, "bacc": 0.7758152173913043, "bacc_std": 0.05673492951506501} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038010629222420114, "f1": 0.9071259709557582, "f1_std": 0.038727810170713056, "bacc": 0.9096467391304348, "bacc_std": 0.038259390483507995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043333173977244586, "f1": 0.8683760683760684, "f1_std": 0.04541251572451246, "bacc": 0.8661684782608696, "bacc_std": 0.046005090412563565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0505493785265604, "f1": 0.8151881720430108, "f1_std": 0.051070417559471344, "bacc": 0.8192934782608696, "bacc_std": 0.05107765964999542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04799750682230299, "f1": 0.8307692307692308, "f1_std": 0.05035684378154921, "bacc": 0.8288043478260869, "bacc_std": 0.050608965052411} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.0297743358344089, "f1": 0.9442755825734549, "f1_std": 0.0302752512366801, "bacc": 0.9470108695652174, "bacc_std": 0.0292322281469951} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050398950533478676, "f1": 0.8131793478260869, "f1_std": 0.05199510973078927, "bacc": 0.8131793478260869, "bacc_std": 0.052004874685033384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045699977394789766, "f1": 0.84593837535014, "f1_std": 0.050077777004766864, "bacc": 0.8383152173913043, "bacc_std": 0.050142510827363915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049626119489006866, "f1": 0.8307692307692308, "f1_std": 0.05166277653444132, "bacc": 0.8288043478260869, "bacc_std": 0.05179761397315772} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03460051115086058, "f1": 0.9260752688172043, "f1_std": 0.03485992561664718, "bacc": 0.9313858695652174, "bacc_std": 0.033038890640624034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.025008983509897952, "f1": 0.9626358695652174, "f1_std": 0.025799424698907846, "bacc": 0.9626358695652174, "bacc_std": 0.026276009757663994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.018108360700867642, "f1": 0.9814251941911516, "f1_std": 0.01833971583558119, "bacc": 0.984375, "bacc_std": 0.015561872477308121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035197652062940246, "f1": 0.9260752688172043, "f1_std": 0.0354783106666295, "bacc": 0.9313858695652174, "bacc_std": 0.03379827749390227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04429017763241351, "f1": 0.84593837535014, "f1_std": 0.04870379047304653, "bacc": 0.8383152173913043, "bacc_std": 0.04842836700820749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.000774263682681127, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04356143567062441, "f1": 0.8683760683760684, "f1_std": 0.04544217893707619, "bacc": 0.8661684782608696, "bacc_std": 0.046019548666929755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029471510265939416, "f1": 0.9427282193682749, "f1_std": 0.032086562972910126, "bacc": 0.9347826086956521, "bacc_std": 0.035237675317971034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045244080020036916, "f1": 0.8699763593380614, "f1_std": 0.046040820311280845, "bacc": 0.8722826086956521, "bacc_std": 0.04551814846604915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.047291241129867836, "f1": 0.8106060606060606, "f1_std": 0.05019755372035459, "bacc": 0.8070652173913043, "bacc_std": 0.05031367037995762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05168724765595409, "f1": 0.8328267477203647, "f1_std": 0.052813285798358485, "bacc": 0.8349184782608696, "bacc_std": 0.0525906663173658} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04444182379785215, "f1": 0.8663658451926415, "f1_std": 0.04801571621523334, "bacc": 0.8600543478260869, "bacc_std": 0.04893578132431206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.022626569622736944, "f1": 0.9626358695652174, "f1_std": 0.02328238852191854, "bacc": 0.9626358695652174, "bacc_std": 0.02353507900730232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05089967445381287, "f1": 0.8281846581048247, "f1_std": 0.05501911297299524, "bacc": 0.8226902173913043, "bacc_std": 0.05474395708541755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 21.54434690031882, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05600305776775802, "f1": 0.7758152173913043, "f1_std": 0.05770384746706663, "bacc": 0.7758152173913043, "bacc_std": 0.05774908644384475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03744661875521403, "f1": 0.905982905982906, "f1_std": 0.03915079352289381, "bacc": 0.9035326086956521, "bacc_std": 0.040112961692828436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044860761628446474, "f1": 0.84593837535014, "f1_std": 0.04896845045168756, "bacc": 0.8383152173913043, "bacc_std": 0.049148468965363463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030428694591543935, "f1": 0.9427282193682749, "f1_std": 0.0331390139149995, "bacc": 0.9347826086956521, "bacc_std": 0.036382134837715574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05174813766761189, "f1": 0.8307692307692308, "f1_std": 0.05388013222823208, "bacc": 0.8288043478260869, "bacc_std": 0.054075529410496946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.051029714241186845, "f1": 0.8307692307692308, "f1_std": 0.05344954745764673, "bacc": 0.8288043478260869, "bacc_std": 0.053642512293125756} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03304124144707092, "f1": 0.9442755825734549, "f1_std": 0.03363891669055451, "bacc": 0.9470108695652174, "bacc_std": 0.03272191539729666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049241197560478574, "f1": 0.8505434782608696, "f1_std": 0.050762629942202424, "bacc": 0.8505434782608696, "bacc_std": 0.050792067137122004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048698756920683636, "f1": 0.8484848484848485, "f1_std": 0.05137980915228604, "bacc": 0.8444293478260869, "bacc_std": 0.051907286625031336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04187491803371772, "f1": 0.8663658451926415, "f1_std": 0.04552464343656962, "bacc": 0.8600543478260869, "bacc_std": 0.046404944114193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04571502095040389, "f1": 0.8683760683760684, "f1_std": 0.04773029633539852, "bacc": 0.8661684782608696, "bacc_std": 0.04827224613372834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04497786416074921, "f1": 0.8699763593380614, "f1_std": 0.045904148996592106, "bacc": 0.8722826086956521, "bacc_std": 0.045638833191635475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.0330188826517532, "f1": 0.9229691876750701, "f1_std": 0.03663016955256208, "bacc": 0.9130434782608696, "bacc_std": 0.039479098822748394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.046397078847917064, "f1": 0.8683760683760684, "f1_std": 0.04854149951860425, "bacc": 0.8661684782608696, "bacc_std": 0.04927073116982192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034127054777237896, "f1": 0.9252717391304348, "f1_std": 0.0351497144418128, "bacc": 0.9252717391304348, "bacc_std": 0.03563294494450665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038783126602827026, "f1": 0.9071259709557582, "f1_std": 0.039487743078217626, "bacc": 0.9096467391304348, "bacc_std": 0.038921795264619835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03512426111690131, "f1": 0.9242424242424243, "f1_std": 0.037252457068302526, "bacc": 0.9191576086956521, "bacc_std": 0.038899226615328436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04773562178056192, "f1": 0.8505434782608696, "f1_std": 0.049417529319914485, "bacc": 0.8505434782608696, "bacc_std": 0.049680436491159684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05256037535873615, "f1": 0.7642857142857142, "f1_std": 0.06022188314997999, "bacc": 0.7574728260869565, "bacc_std": 0.05743761466011973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04514218492169083, "f1": 0.8663658451926415, "f1_std": 0.04923348248390488, "bacc": 0.8600543478260869, "bacc_std": 0.05014650794401546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03731502699542668, "f1": 0.9071259709557582, "f1_std": 0.038094706082983175, "bacc": 0.9096467391304348, "bacc_std": 0.03774852985231016} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 2.353 | 16.888 | 0.94981 | 0.032591 | 0.94833 | 0.033666 | 0.94755 | 0.034864 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 2.353 | 16.888 | 0.86873 | 0.045689 | 0.86417 | 0.047559 | 0.86304 | 0.048347 | + + +done! total time: 0:05:25 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74b1b40a11db91f3ceb3b01941d4d1eb4e6e1185 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_sex reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..927edbb26573fc21951b43074b62fbc5c4f99f68 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,aabc_sex,,0.005994842503189409,train,0.8922495274102079,0.013542913047647142,0.8888888888888888,0.014035287693448723,0.8864022248243559,0.01424188877254582 +flat_mae,reg,logistic,aabc_sex,,0.005994842503189409,test,0.9090909090909091,0.038778642839788345,0.9071259709557582,0.03919541334605234,0.9166666666666667,0.03679440247184172 +flat_mae,reg,logistic,aabc_sex,1,0.3593813663804626,train,0.9829867674858223,0.005885910655095329,0.9825466942830434,0.006046624643719531,0.9822532899557432,0.006270235128129421 +flat_mae,reg,logistic,aabc_sex,1,0.3593813663804626,test,0.8181818181818182,0.05148344835522822,0.8131793478260869,0.05300365663120187,0.8131793478260869,0.052888620726691385 +flat_mae,reg,logistic,aabc_sex,2,0.3593813663804626,train,0.9754253308128544,0.006594486461382126,0.9747896695199516,0.006767978961714793,0.9745010111667985,0.006931344530034972 +flat_mae,reg,logistic,aabc_sex,2,0.3593813663804626,test,0.8545454545454545,0.04827765425977575,0.8505434782608696,0.049764394368594665,0.8505434782608696,0.04999948302840019 +flat_mae,reg,logistic,aabc_sex,3,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,3,2.782559402207126,test,0.7818181818181819,0.057200577931542596,0.7727272727272727,0.060807554236573795,0.7697010869565217,0.06040350207723156 +flat_mae,reg,logistic,aabc_sex,4,0.3593813663804626,train,0.9773156899810964,0.006656431096507843,0.9767710772833724,0.006806190898318853,0.9773513291714294,0.006706182667725138 +flat_mae,reg,logistic,aabc_sex,4,0.3593813663804626,test,0.8545454545454545,0.04679437700203158,0.8521505376344086,0.04734437631820999,0.8566576086956521,0.046600335502049686 +flat_mae,reg,logistic,aabc_sex,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,5,2.782559402207126,test,0.8545454545454545,0.04354431199867573,0.8484848484848485,0.0464208692011045,0.8444293478260869,0.0469015114655155 +flat_mae,reg,logistic,aabc_sex,6,0.3593813663804626,train,0.9773156899810964,0.006628275338976925,0.9767710772833724,0.006780287041530973,0.9773513291714294,0.006743008988081173 +flat_mae,reg,logistic,aabc_sex,6,0.3593813663804626,test,0.9272727272727272,0.03665158718221091,0.9252717391304348,0.037676477828171816,0.9252717391304348,0.03793582409885245 +flat_mae,reg,logistic,aabc_sex,7,0.046415888336127774,train,0.9262759924385633,0.010870535010571203,0.9241777748376498,0.011216703793135803,0.9228948679621325,0.011463125181304325 +flat_mae,reg,logistic,aabc_sex,7,0.046415888336127774,test,0.8727272727272727,0.044359165199427286,0.8683760683760684,0.0463579447980355,0.8661684782608696,0.047033672274319656 +flat_mae,reg,logistic,aabc_sex,8,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,8,166.81005372000556,test,0.8727272727272727,0.04384005489530254,0.8699763593380614,0.0448195005823914,0.8722826086956521,0.04484391415055287 +flat_mae,reg,logistic,aabc_sex,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,9,166.81005372000556,test,0.8545454545454545,0.04670801159445172,0.8505434782608696,0.04810536351814372,0.8505434782608696,0.04829859142812354 +flat_mae,reg,logistic,aabc_sex,10,0.046415888336127774,train,0.9300567107750473,0.011003136886230058,0.9278752436647173,0.011439186291325973,0.9255546762800786,0.011902267067368105 +flat_mae,reg,logistic,aabc_sex,10,0.046415888336127774,test,0.8909090909090909,0.042331333763322626,0.8891129032258065,0.0426952658338831,0.8940217391304348,0.04145416230090705 +flat_mae,reg,logistic,aabc_sex,11,0.046415888336127774,train,0.9262759924385633,0.011210377516531929,0.9242746242360844,0.011534249393287262,0.9235030335003956,0.011696586171155845 +flat_mae,reg,logistic,aabc_sex,11,0.046415888336127774,test,0.9090909090909091,0.03996644873882828,0.9045470322804582,0.043407626401297715,0.8974184782608696,0.04499273039106961 +flat_mae,reg,logistic,aabc_sex,12,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,12,2.782559402207126,test,0.7636363636363637,0.05503992840664188,0.7472605160834218,0.061627586322157445,0.7418478260869565,0.05935685305898193 +flat_mae,reg,logistic,aabc_sex,13,0.046415888336127774,train,0.9243856332703214,0.011333629435986798,0.922283598754187,0.0116823155859451,0.9212608810340279,0.011927844701445336 +flat_mae,reg,logistic,aabc_sex,13,0.046415888336127774,test,0.9090909090909091,0.03773739623212186,0.905982905982906,0.039270895393158665,0.9035326086956521,0.039873222077380616 +flat_mae,reg,logistic,aabc_sex,14,0.3593813663804626,train,0.9848771266540642,0.004856731141321612,0.98453216374269,0.0049549837164448995,0.9857117734986371,0.00461570291109233 +flat_mae,reg,logistic,aabc_sex,14,0.3593813663804626,test,0.9454545454545454,0.028640236390362915,0.9447975911676145,0.028654830127008868,0.953125,0.024612703147968127 +flat_mae,reg,logistic,aabc_sex,15,0.3593813663804626,train,0.9810964083175804,0.006243585907620011,0.9806425644028103,0.006384724916021287,0.9812274685659017,0.006289219332181971 +flat_mae,reg,logistic,aabc_sex,15,0.3593813663804626,test,0.8363636363636363,0.052215383566452264,0.8328267477203647,0.05328373221171638,0.8349184782608696,0.05299092067767351 +flat_mae,reg,logistic,aabc_sex,16,0.3593813663804626,train,0.9792060491493384,0.005993242359310791,0.9786941127795048,0.0061374159516106265,0.978985316099534,0.006137718653260804 +flat_mae,reg,logistic,aabc_sex,16,0.3593813663804626,test,0.8545454545454545,0.049311343472281506,0.8484848484848485,0.0525919754819421,0.8444293478260869,0.05282677741262409 +flat_mae,reg,logistic,aabc_sex,17,0.046415888336127774,train,0.9243856332703214,0.011557672032257877,0.9223816650526748,0.011887725715585596,0.9218690465722911,0.01206270803502741 +flat_mae,reg,logistic,aabc_sex,17,0.046415888336127774,test,0.8545454545454545,0.043503965811432774,0.8428571428571429,0.05015809623994732,0.8322010869565217,0.049696903182686485 +flat_mae,reg,logistic,aabc_sex,18,0.046415888336127774,train,0.9206049149338374,0.011248238137423094,0.9181838267786124,0.01165798008850424,0.9161684105630294,0.011984134234395717 +flat_mae,reg,logistic,aabc_sex,18,0.046415888336127774,test,0.8545454545454545,0.047726202585405206,0.8533333333333333,0.04776293280424774,0.8627717391304348,0.045588157330957266 +flat_mae,reg,logistic,aabc_sex,19,0.046415888336127774,train,0.9338374291115312,0.01016627990084949,0.9320413294426397,0.010477181816484866,0.9312553122893402,0.010772897430313294 +flat_mae,reg,logistic,aabc_sex,19,0.046415888336127774,test,0.8545454545454545,0.04524168340541022,0.8505434782608696,0.046632931350771664,0.8505434782608696,0.04664664959146638 +flat_mae,reg,logistic,aabc_sex,20,0.3593813663804626,train,0.9886578449905482,0.004735082190994879,0.9883855386416862,0.004842489306382136,0.9889797473548463,0.004653509367494603 +flat_mae,reg,logistic,aabc_sex,20,0.3593813663804626,test,0.8727272727272727,0.042667098827012466,0.8663658451926415,0.046234106240726894,0.8600543478260869,0.04695916316025159 +flat_mae,reg,logistic,aabc_sex,21,0.3593813663804626,train,0.9905482041587902,0.004171273582580199,0.9903037190461352,0.004284693504055659,0.9900055687446877,0.004511592810628165 +flat_mae,reg,logistic,aabc_sex,21,0.3593813663804626,test,0.8,0.055135763318323726,0.7931623931623932,0.057742187433411724,0.7914402173913043,0.05785653466547375 +flat_mae,reg,logistic,aabc_sex,22,0.3593813663804626,train,0.9905482041587902,0.004330325591287449,0.9903155058088658,0.004433440554430492,0.9906137342829509,0.004348250206294544 +flat_mae,reg,logistic,aabc_sex,22,0.3593813663804626,test,0.9090909090909091,0.03860208532884639,0.9086075108009306,0.038427405743493015,0.921875,0.03317366707947736 +flat_mae,reg,logistic,aabc_sex,23,0.3593813663804626,train,0.9829867674858223,0.005306031770451264,0.9825885657235016,0.005418270539712667,0.9834696210322693,0.005192696135540976 +flat_mae,reg,logistic,aabc_sex,23,0.3593813663804626,test,0.9090909090909091,0.03847203986325225,0.9071259709557582,0.03923633973842022,0.9096467391304348,0.03890995317687199 +flat_mae,reg,logistic,aabc_sex,24,0.046415888336127774,train,0.9300567107750473,0.010864005153103842,0.9282475209414007,0.011147681456323421,0.9279873384331311,0.011262182045014937 +flat_mae,reg,logistic,aabc_sex,24,0.046415888336127774,test,0.8545454545454545,0.047619705623159425,0.84593837535014,0.05244983553981353,0.8383152173913043,0.052615318087127605 +flat_mae,reg,logistic,aabc_sex,25,0.005994842503189409,train,0.888468809073724,0.013560586590752226,0.8851443102071458,0.014045088551724956,0.8835253084791466,0.014283924692913211 +flat_mae,reg,logistic,aabc_sex,25,0.005994842503189409,test,0.9272727272727272,0.035207418451934064,0.9252717391304348,0.0362539448535669,0.9252717391304348,0.03647459046576888 +flat_mae,reg,logistic,aabc_sex,26,0.046415888336127774,train,0.9338374291115312,0.010160629709285135,0.9320413294426397,0.010444709673395131,0.9312553122893402,0.01059075608456845 +flat_mae,reg,logistic,aabc_sex,26,0.046415888336127774,test,0.8545454545454545,0.04483039110851195,0.8484848484848485,0.04782164287359455,0.8444293478260869,0.04841654876479196 +flat_mae,reg,logistic,aabc_sex,27,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,27,166.81005372000556,test,0.8181818181818182,0.05215964051998011,0.8131793478260869,0.053984763917201466,0.8131793478260869,0.05419020245328094 +flat_mae,reg,logistic,aabc_sex,28,0.046415888336127774,train,0.9338374291115312,0.010837759993524407,0.9319544133158395,0.011174990937666917,0.9306471467510771,0.011438407999341578 +flat_mae,reg,logistic,aabc_sex,28,0.046415888336127774,test,0.8181818181818182,0.051109359334374806,0.8074229691876751,0.05629244782322332,0.8009510869565217,0.055652170595915325 +flat_mae,reg,logistic,aabc_sex,29,0.046415888336127774,train,0.9243856332703214,0.011992373604402891,0.922182994998529,0.012396450419947252,0.9206527154957649,0.012693322299402121 +flat_mae,reg,logistic,aabc_sex,29,0.046415888336127774,test,0.8363636363636363,0.0514369516981712,0.8307692307692308,0.053825278490135756,0.8288043478260869,0.053944144032825214 +flat_mae,reg,logistic,aabc_sex,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,30,2.782559402207126,test,0.8181818181818182,0.05450028432710727,0.8151881720430108,0.055179253649181255,0.8192934782608696,0.05474897016199512 +flat_mae,reg,logistic,aabc_sex,31,0.046415888336127774,train,0.9206049149338374,0.01183689477565664,0.9182921447484554,0.012229109928400315,0.9167765761012925,0.012451417269201945 +flat_mae,reg,logistic,aabc_sex,31,0.046415888336127774,test,0.8363636363636363,0.04839200808275049,0.8328267477203647,0.04904916567875039,0.8349184782608696,0.04829106116781827 +flat_mae,reg,logistic,aabc_sex,32,0.046415888336127774,train,0.9300567107750473,0.01107490759699389,0.9282475209414007,0.011365585789901525,0.9279873384331311,0.011486977087649387 +flat_mae,reg,logistic,aabc_sex,32,0.046415888336127774,test,0.8909090909090909,0.039736320999956325,0.884453781512605,0.04403138735844937,0.8756793478260869,0.045401153721031505 +flat_mae,reg,logistic,aabc_sex,33,0.046415888336127774,train,0.9243856332703214,0.011279050032530633,0.922283598754187,0.011592808496400034,0.9212608810340279,0.011680441697039069 +flat_mae,reg,logistic,aabc_sex,33,0.046415888336127774,test,0.9090909090909091,0.03686713244267114,0.9071259709557582,0.037501458436335657,0.9096467391304348,0.03686944111904073 +flat_mae,reg,logistic,aabc_sex,34,0.3593813663804626,train,0.9810964083175804,0.006029954185815593,0.9806652046783626,0.006153557800759823,0.9818356341041647,0.00587282690464842 +flat_mae,reg,logistic,aabc_sex,34,0.3593813663804626,test,0.8727272727272727,0.0456399853687839,0.8699763593380614,0.04655171928823117,0.8722826086956521,0.04636882654416331 +flat_mae,reg,logistic,aabc_sex,35,0.3593813663804626,train,0.9829867674858223,0.00566540358884333,0.9825679104559584,0.005803015544065226,0.9828614554940063,0.00579811799482982 +flat_mae,reg,logistic,aabc_sex,35,0.3593813663804626,test,0.8545454545454545,0.04606720207954912,0.8484848484848485,0.04890383864811312,0.8444293478260869,0.049585007714192615 +flat_mae,reg,logistic,aabc_sex,36,0.3593813663804626,train,0.9773156899810964,0.006394852329266147,0.9768246736178042,0.006510769659789602,0.9785676602479556,0.006104696775604918 +flat_mae,reg,logistic,aabc_sex,36,0.3593813663804626,test,0.8727272727272727,0.04293718648760155,0.8683760683760684,0.045008431821918216,0.8661684782608696,0.04530649852701437 +flat_mae,reg,logistic,aabc_sex,37,0.046415888336127774,train,0.9300567107750473,0.010682276317966068,0.9280660940767447,0.011033732271086073,0.9267710073566048,0.011339268352822842 +flat_mae,reg,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.05588493491463707,0.7727272727272727,0.059475640628794645,0.7697010869565217,0.05901106269226388 +flat_mae,reg,logistic,aabc_sex,38,0.046415888336127774,train,0.9319470699432892,0.011261289549052372,0.9300552388787683,0.011604224653718399,0.9290131598229725,0.011805132641700385 +flat_mae,reg,logistic,aabc_sex,38,0.046415888336127774,test,0.8545454545454545,0.04400625049518109,0.8428571428571429,0.05076742089045086,0.8322010869565217,0.05035063029861087 +flat_mae,reg,logistic,aabc_sex,39,0.046415888336127774,train,0.9300567107750473,0.01065949113222586,0.9281579768393621,0.010968779257313101,0.9273791728948679,0.011164834915609297 +flat_mae,reg,logistic,aabc_sex,39,0.046415888336127774,test,0.9090909090909091,0.0376825540009307,0.905982905982906,0.039386072830629254,0.9035326086956521,0.04011051500656701 +flat_mae,reg,logistic,aabc_sex,40,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,40,21.54434690031882,test,0.8545454545454545,0.04384403612958357,0.8484848484848485,0.04614613234513338,0.8444293478260869,0.046330867070314176 +flat_mae,reg,logistic,aabc_sex,41,0.005994842503189409,train,0.8922495274102079,0.013564585149424145,0.8888888888888888,0.01405567070969672,0.8867932823353557,0.014264948120963298 +flat_mae,reg,logistic,aabc_sex,41,0.005994842503189409,test,0.9090909090909091,0.038480287966483194,0.905982905982906,0.040362495966507414,0.9035326086956521,0.04107032746648314 +flat_mae,reg,logistic,aabc_sex,42,0.046415888336127774,train,0.9224952741020794,0.011908181905523575,0.920289455598555,0.012322498514689757,0.9190187285676603,0.012735297273566431 +flat_mae,reg,logistic,aabc_sex,42,0.046415888336127774,test,0.8181818181818182,0.05163814337521893,0.8131793478260869,0.053294451521401186,0.8131793478260869,0.0530794048993146 +flat_mae,reg,logistic,aabc_sex,43,0.3593813663804626,train,0.9773156899810964,0.006670904750765714,0.9767710772833724,0.0068210113475396465,0.9773513291714294,0.006737521531675519 +flat_mae,reg,logistic,aabc_sex,43,0.3593813663804626,test,0.8363636363636363,0.048167462698484695,0.8307692307692308,0.050372141894493475,0.8288043478260869,0.050755327177261556 +flat_mae,reg,logistic,aabc_sex,44,0.046415888336127774,train,0.9300567107750473,0.010942861281589587,0.9279718555536338,0.01133077049812832,0.9261628418183416,0.01171477894163344 +flat_mae,reg,logistic,aabc_sex,44,0.046415888336127774,test,0.8545454545454545,0.04563558130441349,0.8505434782608696,0.0469383150456708,0.8505434782608696,0.04687424865497405 +flat_mae,reg,logistic,aabc_sex,45,0.046415888336127774,train,0.9262759924385633,0.011933237787207403,0.9241777748376498,0.012301839110627023,0.9228948679621325,0.012485800416768422 +flat_mae,reg,logistic,aabc_sex,45,0.046415888336127774,test,0.9454545454545454,0.029323501983819084,0.9447975911676145,0.029326318341046025,0.953125,0.02519988451734452 +flat_mae,reg,logistic,aabc_sex,46,0.3593813663804626,train,0.9829867674858223,0.006094095779210992,0.9825885657235016,0.00622452044008247,0.9834696210322693,0.0059766651395820256 +flat_mae,reg,logistic,aabc_sex,46,0.3593813663804626,test,0.9090909090909091,0.038148140992150914,0.9071259709557582,0.03887759388432839,0.9096467391304348,0.038341376939406355 +flat_mae,reg,logistic,aabc_sex,47,0.046415888336127774,train,0.9319470699432892,0.011228545058344482,0.9301434985474073,0.011549235706635484,0.9296213253612357,0.011767947262005743 +flat_mae,reg,logistic,aabc_sex,47,0.046415888336127774,test,0.8727272727272727,0.04224776975873136,0.8663658451926415,0.045638659989561486,0.8600543478260869,0.046469068011797295 +flat_mae,reg,logistic,aabc_sex,48,0.046415888336127774,train,0.9243856332703214,0.011267553017785563,0.922283598754187,0.011611588727307222,0.9212608810340279,0.011843438539400229 +flat_mae,reg,logistic,aabc_sex,48,0.046415888336127774,test,0.8909090909090909,0.04058312151703114,0.8879076086956521,0.041885923840686734,0.8879076086956521,0.04216694092273785 +flat_mae,reg,logistic,aabc_sex,49,0.3593813663804626,train,0.9848771266540642,0.005368220651296187,0.9844954424221108,0.005502045082135561,0.9844954424221108,0.0055114654748255115 +flat_mae,reg,logistic,aabc_sex,49,0.3593813663804626,test,0.8727272727272727,0.045169594102725315,0.8683760683760684,0.04725954817855603,0.8661684782608696,0.0475266087381254 +flat_mae,reg,logistic,aabc_sex,50,0.046415888336127774,train,0.9319470699432892,0.010310951909941782,0.9301434985474073,0.010594244240813884,0.9296213253612357,0.010780330390326397 +flat_mae,reg,logistic,aabc_sex,50,0.046415888336127774,test,0.8727272727272727,0.04601589376877126,0.8683760683760684,0.04786679180359651,0.8661684782608696,0.04820649437756565 +flat_mae,reg,logistic,aabc_sex,51,0.3593813663804626,train,0.9810964083175804,0.005795366442648235,0.9806193030276386,0.005943183104191459,0.9806193030276386,0.006036515047368593 +flat_mae,reg,logistic,aabc_sex,51,0.3593813663804626,test,0.8727272727272727,0.04113310785709572,0.8683760683760684,0.043061667916583925,0.8661684782608696,0.04377531275140523 +flat_mae,reg,logistic,aabc_sex,52,0.046415888336127774,train,0.9300567107750473,0.010722042547485669,0.9281579768393621,0.01105212956671905,0.9273791728948679,0.01134759534458528 +flat_mae,reg,logistic,aabc_sex,52,0.046415888336127774,test,0.9090909090909091,0.03815048064643117,0.9071259709557582,0.03890875186431489,0.9096467391304348,0.03856616421592726 +flat_mae,reg,logistic,aabc_sex,53,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,53,21.54434690031882,test,0.9454545454545454,0.030369602026907232,0.9442755825734549,0.03091173988423925,0.9470108695652174,0.029915117583385237 +flat_mae,reg,logistic,aabc_sex,54,0.046415888336127774,train,0.9319470699432892,0.01100495023676431,0.9299646954986761,0.011389170496024942,0.9284049942847094,0.011770846478103652 +flat_mae,reg,logistic,aabc_sex,54,0.046415888336127774,test,0.8181818181818182,0.052438078398390035,0.8151881720430108,0.052933455074335736,0.8192934782608696,0.05218737193013163 +flat_mae,reg,logistic,aabc_sex,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,55,2.782559402207126,test,0.8,0.05157962311476456,0.795677136102668,0.052825845330730606,0.7975543478260869,0.05298057518729177 +flat_mae,reg,logistic,aabc_sex,56,0.3593813663804626,train,0.9905482041587902,0.004298547064994469,0.9903155058088658,0.004402259953494528,0.9906137342829509,0.00432590872319662 +flat_mae,reg,logistic,aabc_sex,56,0.3593813663804626,test,0.8363636363636363,0.05059773620519901,0.8307692307692308,0.05281904738235101,0.8288043478260869,0.05281782748210221 +flat_mae,reg,logistic,aabc_sex,57,0.046415888336127774,train,0.9262759924385633,0.011264005500037482,0.9240784423403167,0.011669959264646989,0.9222867024238695,0.012022808436750985 +flat_mae,reg,logistic,aabc_sex,57,0.046415888336127774,test,0.9090909090909091,0.03940046565987357,0.9071259709557582,0.040235893758619266,0.9096467391304348,0.0397284722707083 +flat_mae,reg,logistic,aabc_sex,58,0.3593813663804626,train,0.9829867674858223,0.005718929807210508,0.9825466942830434,0.005870758536145056,0.9822532899557432,0.006018839467179349 +flat_mae,reg,logistic,aabc_sex,58,0.3593813663804626,test,0.8,0.05297990886932187,0.795677136102668,0.054041794647281365,0.7975543478260869,0.05382640879522031 +flat_mae,reg,logistic,aabc_sex,59,0.046415888336127774,train,0.9281663516068053,0.011411172495531665,0.9262625818000411,0.011734050647821978,0.9257451859667634,0.011914833273518709 +flat_mae,reg,logistic,aabc_sex,59,0.046415888336127774,test,0.8727272727272727,0.044655537448264886,0.8711943793911007,0.04484634736039705,0.8783967391304348,0.04324600627820621 +flat_mae,reg,logistic,aabc_sex,60,0.005994842503189409,train,0.8903591682419659,0.01279375198154394,0.8868624443198915,0.013260967144210711,0.8845511298689879,0.01343674897824259 +flat_mae,reg,logistic,aabc_sex,60,0.005994842503189409,test,0.8363636363636363,0.049048857409965604,0.8307692307692308,0.05156178682032743,0.8288043478260869,0.05189087094871355 +flat_mae,reg,logistic,aabc_sex,61,0.046415888336127774,train,0.9300567107750473,0.011483928057441369,0.9282475209414007,0.011782165642716598,0.9279873384331311,0.01185188124316268 +flat_mae,reg,logistic,aabc_sex,61,0.046415888336127774,test,0.9090909090909091,0.03700996515347127,0.9086075108009306,0.036836782983375456,0.921875,0.031805438803764366 +flat_mae,reg,logistic,aabc_sex,62,0.046415888336127774,train,0.9243856332703214,0.01220257226109025,0.9224772121105542,0.012547013387019511,0.9224772121105542,0.012799454203616735 +flat_mae,reg,logistic,aabc_sex,62,0.046415888336127774,test,0.8909090909090909,0.040255892233362955,0.8863636363636364,0.04286375110083308,0.8817934782608696,0.044079024053751366 +flat_mae,reg,logistic,aabc_sex,63,0.046415888336127774,train,0.9224952741020794,0.011802397479994403,0.920289455598555,0.012187298938048785,0.9190187285676603,0.012465356657931529 +flat_mae,reg,logistic,aabc_sex,63,0.046415888336127774,test,0.9454545454545454,0.030017493522200893,0.9442755825734549,0.030586492305141582,0.9470108695652174,0.029880341863299936 +flat_mae,reg,logistic,aabc_sex,64,0.3593813663804626,train,0.9810964083175804,0.006140147312109424,0.9806193030276386,0.0062930500141907516,0.9806193030276386,0.00633929274283051 +flat_mae,reg,logistic,aabc_sex,64,0.3593813663804626,test,0.8727272727272727,0.04351709457972988,0.8720505151213027,0.04337939127655704,0.8845108695652174,0.04021371476291492 +flat_mae,reg,logistic,aabc_sex,65,0.3593813663804626,train,0.9792060491493384,0.006478800043535026,0.9786941127795048,0.006633366766860531,0.978985316099534,0.006612588017386388 +flat_mae,reg,logistic,aabc_sex,65,0.3593813663804626,test,0.8727272727272727,0.04577271914778297,0.8663658451926415,0.0491834606736652,0.8600543478260869,0.04980361368912649 +flat_mae,reg,logistic,aabc_sex,66,0.046415888336127774,train,0.9300567107750473,0.010792887429862702,0.9282475209414007,0.011062601238070052,0.9279873384331311,0.011117947153448758 +flat_mae,reg,logistic,aabc_sex,66,0.046415888336127774,test,0.8545454545454545,0.046922041904939774,0.8484848484848485,0.04958950666676133,0.8444293478260869,0.0498739934231838 +flat_mae,reg,logistic,aabc_sex,67,0.3593813663804626,train,0.9829867674858223,0.005853455575190834,0.9826086638880467,0.005964461344828555,0.9840777865705326,0.005553469602534568 +flat_mae,reg,logistic,aabc_sex,67,0.3593813663804626,test,0.9272727272727272,0.03550392831250981,0.9252717391304348,0.03659291826595148,0.9252717391304348,0.036834209059696164 +flat_mae,reg,logistic,aabc_sex,68,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,68,21.54434690031882,test,0.9454545454545454,0.030779600414119954,0.9435897435897436,0.032088032358139014,0.9408967391304348,0.033290283282011494 +flat_mae,reg,logistic,aabc_sex,69,0.3593813663804626,train,0.9773156899810964,0.006241351089904033,0.9767431636331663,0.006397495306971324,0.9767431636331663,0.006464285628169244 +flat_mae,reg,logistic,aabc_sex,69,0.3593813663804626,test,0.9818181818181818,0.016719365720105676,0.9814251941911516,0.01694977278756107,0.984375,0.014368204915715811 +flat_mae,reg,logistic,aabc_sex,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,70,2.782559402207126,test,0.8909090909090909,0.04302743310958718,0.8891129032258065,0.043387611157435634,0.8940217391304348,0.041997095265642104 +flat_mae,reg,logistic,aabc_sex,71,0.046415888336127774,train,0.9300567107750473,0.01059026312131027,0.9281579768393621,0.010887147639922977,0.9273791728948679,0.011028383365561934 +flat_mae,reg,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.04429017763241351,0.84593837535014,0.04870379047304653,0.8383152173913043,0.04842836700820749 +flat_mae,reg,logistic,aabc_sex,72,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,72,2.782559402207126,test,0.8909090909090909,0.040059393921055696,0.8863636363636364,0.0423667243530503,0.8817934782608696,0.04329371892296306 +flat_mae,reg,logistic,aabc_sex,73,0.046415888336127774,train,0.9187145557655955,0.011575951429825392,0.91640113635946,0.011965042482786018,0.9151425891731879,0.01227837306692977 +flat_mae,reg,logistic,aabc_sex,73,0.046415888336127774,test,0.9636363636363636,0.025342715386092632,0.9621212121212122,0.02707216698496684,0.9565217391304348,0.03030107274424119 +flat_mae,reg,logistic,aabc_sex,74,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,74,2.782559402207126,test,0.8181818181818182,0.05140921871216327,0.8151881720430108,0.052177662218938915,0.8192934782608696,0.052042115344013075 +flat_mae,reg,logistic,aabc_sex,75,0.046415888336127774,train,0.9281663516068053,0.0109226902626808,0.9260738452486026,0.011281277932039552,0.9245288548902371,0.011534755394171241 +flat_mae,reg,logistic,aabc_sex,75,0.046415888336127774,test,0.8727272727272727,0.04243597413547196,0.8683760683760684,0.04448346204419462,0.8661684782608696,0.04515016123483443 +flat_mae,reg,logistic,aabc_sex,76,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,76,21.54434690031882,test,0.8181818181818182,0.050136949638049,0.8074229691876751,0.054850028535168,0.8009510869565217,0.054249396924024806 +flat_mae,reg,logistic,aabc_sex,77,0.3593813663804626,train,0.9754253308128544,0.006745170249399947,0.974820315103051,0.006907381673201099,0.9751091767050617,0.006881753348628764 +flat_mae,reg,logistic,aabc_sex,77,0.3593813663804626,test,0.8545454545454545,0.04634650663950789,0.84593837535014,0.0510852831231392,0.8383152173913043,0.05149882344086475 +flat_mae,reg,logistic,aabc_sex,78,0.046415888336127774,train,0.9168241965973535,0.012197501481237166,0.9144012944983819,0.012609243150735203,0.9129004367068203,0.01289894234554061 +flat_mae,reg,logistic,aabc_sex,78,0.046415888336127774,test,0.9818181818181818,0.016437449682542477,0.9811965811965813,0.017197400283794628,0.9782608695652174,0.01965347244651818 +flat_mae,reg,logistic,aabc_sex,79,0.046415888336127774,train,0.9300567107750473,0.011046813875404281,0.9281579768393621,0.011390119187836795,0.9273791728948679,0.01165328948148667 +flat_mae,reg,logistic,aabc_sex,79,0.046415888336127774,test,0.8909090909090909,0.04351261240165348,0.8863636363636364,0.04612296154531753,0.8817934782608696,0.046957842104618515 +flat_mae,reg,logistic,aabc_sex,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,80,2.782559402207126,test,0.8545454545454545,0.04773197898874313,0.8484848484848485,0.05039884611693917,0.8444293478260869,0.05077979560226417 +flat_mae,reg,logistic,aabc_sex,81,0.046415888336127774,train,0.9224952741020794,0.01114212780636056,0.9203912716328067,0.011439572071386577,0.9196268941059234,0.011492610908180574 +flat_mae,reg,logistic,aabc_sex,81,0.046415888336127774,test,0.9090909090909091,0.038172656909588966,0.905982905982906,0.03977408008574832,0.9035326086956521,0.04056264017469613 +flat_mae,reg,logistic,aabc_sex,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,82,2.782559402207126,test,0.8909090909090909,0.0405990839777785,0.8863636363636364,0.04270737598905875,0.8817934782608696,0.04337759874040355 +flat_mae,reg,logistic,aabc_sex,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,83,2.782559402207126,test,0.9090909090909091,0.037805924587084205,0.9045470322804582,0.04081315496066223,0.8974184782608696,0.04248443043996943 +flat_mae,reg,logistic,aabc_sex,84,0.005994842503189409,train,0.8979206049149339,0.013389130418435443,0.8945187876639489,0.013915611908903846,0.8916952431196694,0.01412643751299315 +flat_mae,reg,logistic,aabc_sex,84,0.005994842503189409,test,0.8545454545454545,0.04668620763642162,0.8505434782608696,0.047962127075162515,0.8505434782608696,0.04767874614297801 +flat_mae,reg,logistic,aabc_sex,85,0.3593813663804626,train,0.9773156899810964,0.006561889504835017,0.9767431636331663,0.006730404173816327,0.9767431636331663,0.006845884430193186 +flat_mae,reg,logistic,aabc_sex,85,0.3593813663804626,test,0.9454545454545454,0.031056333778612628,0.9435897435897436,0.0324589506808142,0.9408967391304348,0.03391612664095968 +flat_mae,reg,logistic,aabc_sex,86,0.046415888336127774,train,0.9319470699432892,0.011562425700209713,0.9300552388787683,0.011903667122719126,0.9290131598229725,0.012112013201604106 +flat_mae,reg,logistic,aabc_sex,86,0.046415888336127774,test,0.8909090909090909,0.044285758769067804,0.8879076086956521,0.04557639565834529,0.8879076086956521,0.04572526490725721 +flat_mae,reg,logistic,aabc_sex,87,0.3593813663804626,train,0.9829867674858223,0.005591328365523352,0.9825885657235016,0.005714406651616085,0.9834696210322693,0.0055200452384341955 +flat_mae,reg,logistic,aabc_sex,87,0.3593813663804626,test,0.8909090909090909,0.04140347731633537,0.8863636363636364,0.044078992380496486,0.8817934782608696,0.045096968809475564 +flat_mae,reg,logistic,aabc_sex,88,0.3593813663804626,train,0.9773156899810964,0.006343414466489716,0.9767431636331663,0.006503212879722966,0.9767431636331663,0.006564945935773902 +flat_mae,reg,logistic,aabc_sex,88,0.3593813663804626,test,0.8545454545454545,0.05047115533313215,0.8505434782608696,0.052073210625239466,0.8505434782608696,0.05251560798180824 +flat_mae,reg,logistic,aabc_sex,89,0.3593813663804626,train,0.9792060491493384,0.006248065727466675,0.9787193581065019,0.0063836925731156945,0.9795934816377971,0.006203356595418099 +flat_mae,reg,logistic,aabc_sex,89,0.3593813663804626,test,0.8363636363636363,0.04904056677978592,0.8281846581048247,0.05305887147040436,0.8226902173913043,0.05320687305239415 +flat_mae,reg,logistic,aabc_sex,90,0.046415888336127774,train,0.9319470699432892,0.011314908041399833,0.9299646954986761,0.011688232660159137,0.9284049942847094,0.012003972571495254 +flat_mae,reg,logistic,aabc_sex,90,0.046415888336127774,test,0.8545454545454545,0.04686656282195014,0.8484848484848485,0.049658923214614646,0.8444293478260869,0.05010974286474692 +flat_mae,reg,logistic,aabc_sex,91,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,91,2.782559402207126,test,0.8727272727272727,0.045084149053384245,0.8699763593380614,0.045880929692043694,0.8722826086956521,0.045412745510578875 +flat_mae,reg,logistic,aabc_sex,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,92,166.81005372000556,test,0.9090909090909091,0.038535092946945806,0.9045470322804582,0.04192469588254153,0.8974184782608696,0.04354917100921839 +flat_mae,reg,logistic,aabc_sex,93,0.005994842503189409,train,0.8979206049149339,0.013469512003678253,0.8948077772867875,0.013944227610549018,0.8929115741961957,0.01415664332697611 +flat_mae,reg,logistic,aabc_sex,93,0.005994842503189409,test,0.8363636363636363,0.047749691704253305,0.8281846581048247,0.05189700548455112,0.8226902173913043,0.052313011616883294 +flat_mae,reg,logistic,aabc_sex,94,0.046415888336127774,train,0.9224952741020794,0.011665443646192583,0.9201850291269996,0.012082574305645554,0.918410563029397,0.012407349264397374 +flat_mae,reg,logistic,aabc_sex,94,0.046415888336127774,test,0.8909090909090909,0.041498843954823325,0.8879076086956521,0.04274831357990964,0.8879076086956521,0.042758849347046976 +flat_mae,reg,logistic,aabc_sex,95,0.046415888336127774,train,0.9243856332703214,0.011211585564636901,0.922283598754187,0.011553547400624722,0.9212608810340279,0.011776977245218695 +flat_mae,reg,logistic,aabc_sex,95,0.046415888336127774,test,0.9090909090909091,0.038783126602827026,0.9071259709557582,0.039487743078217626,0.9096467391304348,0.038921795264619835 +flat_mae,reg,logistic,aabc_sex,96,0.3593813663804626,train,0.9792060491493384,0.006023360671742771,0.9786941127795048,0.006170938034420476,0.978985316099534,0.006208896456217745 +flat_mae,reg,logistic,aabc_sex,96,0.3593813663804626,test,0.9090909090909091,0.039087966062575974,0.905982905982906,0.04072338065699296,0.9035326086956521,0.041259272883494284 +flat_mae,reg,logistic,aabc_sex,97,0.3593813663804626,train,0.9792060491493384,0.006032229408670017,0.9787193581065019,0.006162900157485963,0.9795934816377971,0.0060069526295659734 +flat_mae,reg,logistic,aabc_sex,97,0.3593813663804626,test,0.8727272727272727,0.0431261689265461,0.8699763593380614,0.0438658149537676,0.8722826086956521,0.04348831598657484 +flat_mae,reg,logistic,aabc_sex,98,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,98,166.81005372000556,test,0.8545454545454545,0.046094677988277266,0.84593837535014,0.0505806101835397,0.8383152173913043,0.050875920986581356 +flat_mae,reg,logistic,aabc_sex,99,0.046415888336127774,train,0.9281663516068053,0.011501884106643203,0.9261694188164776,0.01184669713045553,0.9251370204285002,0.01204587426776228 +flat_mae,reg,logistic,aabc_sex,99,0.046415888336127774,test,0.8909090909090909,0.0413181008100016,0.884453781512605,0.04601805272671248,0.8756793478260869,0.047428713779446775 +flat_mae,reg,logistic,aabc_sex,100,0.005994842503189409,train,0.8979206049149339,0.013670136474965084,0.8948077772867875,0.014204379193012444,0.8929115741961957,0.014590345847584178 +flat_mae,reg,logistic,aabc_sex,100,0.005994842503189409,test,0.8909090909090909,0.04099741576213742,0.8891129032258065,0.04144308816100281,0.8940217391304348,0.04036666489795088 diff --git a/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc560497c7321b6f65baee49ed2d8ea0046df5fc --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:26:27 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (aabc_sex reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/aabc_sex__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:23:30 time: 5.9766 data: 4.8707 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:56 time: 0.2670 data: 0.1047 max mem: 3581 +extract (train) [ 40/236] eta: 0:01:16 time: 0.2307 data: 0.0805 max mem: 3581 +extract (train) [ 60/236] eta: 0:01:00 time: 0.2441 data: 0.0871 max mem: 3581 +extract (train) [ 80/236] eta: 0:00:48 time: 0.2204 data: 0.0750 max mem: 3581 +extract (train) [100/236] eta: 0:00:39 time: 0.2114 data: 0.0692 max mem: 3581 +extract (train) [120/236] eta: 0:00:32 time: 0.2155 data: 0.0719 max mem: 3581 +extract (train) [140/236] eta: 0:00:26 time: 0.2349 data: 0.0821 max mem: 3581 +extract (train) [160/236] eta: 0:00:20 time: 0.2139 data: 0.0716 max mem: 3581 +extract (train) [180/236] eta: 0:00:14 time: 0.2312 data: 0.0809 max mem: 3581 +extract (train) [200/236] eta: 0:00:09 time: 0.2487 data: 0.0875 max mem: 3581 +extract (train) [220/236] eta: 0:00:04 time: 0.2120 data: 0.0692 max mem: 3581 +extract (train) [235/236] eta: 0:00:00 time: 0.1930 data: 0.0624 max mem: 3581 +extract (train) Total time: 0:00:59 (0.2542 s / it) +extract (validation) [ 0/29] eta: 0:02:33 time: 5.3039 data: 5.1552 max mem: 3581 +extract (validation) [20/29] eta: 0:00:03 time: 0.1893 data: 0.0545 max mem: 3581 +extract (validation) [28/29] eta: 0:00:00 time: 0.1847 data: 0.0533 max mem: 3581 +extract (validation) Total time: 0:00:11 (0.3795 s / it) +extract (test) [ 0/28] eta: 0:02:31 time: 5.4175 data: 5.2632 max mem: 3581 +extract (test) [20/28] eta: 0:00:03 time: 0.1998 data: 0.0594 max mem: 3581 +extract (test) [27/28] eta: 0:00:00 time: 0.1790 data: 0.0517 max mem: 3581 +extract (test) Total time: 0:00:11 (0.4075 s / it) +feature extraction time: 0:01:22 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_sex | | 0.0059948 | train | 0.89225 | 0.013543 | 0.88889 | 0.014035 | 0.8864 | 0.014242 | +| flat_mae | reg | logistic | aabc_sex | | 0.0059948 | test | 0.90909 | 0.038779 | 0.90713 | 0.039195 | 0.91667 | 0.036794 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05148344835522822, "f1": 0.8131793478260869, "f1_std": 0.05300365663120187, "bacc": 0.8131793478260869, "bacc_std": 0.052888620726691385} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04827765425977575, "f1": 0.8505434782608696, "f1_std": 0.049764394368594665, "bacc": 0.8505434782608696, "bacc_std": 0.04999948302840019} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.057200577931542596, "f1": 0.7727272727272727, "f1_std": 0.060807554236573795, "bacc": 0.7697010869565217, "bacc_std": 0.06040350207723156} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04679437700203158, "f1": 0.8521505376344086, "f1_std": 0.04734437631820999, "bacc": 0.8566576086956521, "bacc_std": 0.046600335502049686} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04354431199867573, "f1": 0.8484848484848485, "f1_std": 0.0464208692011045, "bacc": 0.8444293478260869, "bacc_std": 0.0469015114655155} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03665158718221091, "f1": 0.9252717391304348, "f1_std": 0.037676477828171816, "bacc": 0.9252717391304348, "bacc_std": 0.03793582409885245} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044359165199427286, "f1": 0.8683760683760684, "f1_std": 0.0463579447980355, "bacc": 0.8661684782608696, "bacc_std": 0.047033672274319656} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 166.81005372000556, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04384005489530254, "f1": 0.8699763593380614, "f1_std": 0.0448195005823914, "bacc": 0.8722826086956521, "bacc_std": 0.04484391415055287} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04670801159445172, "f1": 0.8505434782608696, "f1_std": 0.04810536351814372, "bacc": 0.8505434782608696, "bacc_std": 0.04829859142812354} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042331333763322626, "f1": 0.8891129032258065, "f1_std": 0.0426952658338831, "bacc": 0.8940217391304348, "bacc_std": 0.04145416230090705} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03996644873882828, "f1": 0.9045470322804582, "f1_std": 0.043407626401297715, "bacc": 0.8974184782608696, "bacc_std": 0.04499273039106961} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05503992840664188, "f1": 0.7472605160834218, "f1_std": 0.061627586322157445, "bacc": 0.7418478260869565, "bacc_std": 0.05935685305898193} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03773739623212186, "f1": 0.905982905982906, "f1_std": 0.039270895393158665, "bacc": 0.9035326086956521, "bacc_std": 0.039873222077380616} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.028640236390362915, "f1": 0.9447975911676145, "f1_std": 0.028654830127008868, "bacc": 0.953125, "bacc_std": 0.024612703147968127} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.052215383566452264, "f1": 0.8328267477203647, "f1_std": 0.05328373221171638, "bacc": 0.8349184782608696, "bacc_std": 0.05299092067767351} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049311343472281506, "f1": 0.8484848484848485, "f1_std": 0.0525919754819421, "bacc": 0.8444293478260869, "bacc_std": 0.05282677741262409} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.043503965811432774, "f1": 0.8428571428571429, "f1_std": 0.05015809623994732, "bacc": 0.8322010869565217, "bacc_std": 0.049696903182686485} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047726202585405206, "f1": 0.8533333333333333, "f1_std": 0.04776293280424774, "bacc": 0.8627717391304348, "bacc_std": 0.045588157330957266} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04524168340541022, "f1": 0.8505434782608696, "f1_std": 0.046632931350771664, "bacc": 0.8505434782608696, "bacc_std": 0.04664664959146638} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042667098827012466, "f1": 0.8663658451926415, "f1_std": 0.046234106240726894, "bacc": 0.8600543478260869, "bacc_std": 0.04695916316025159} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.055135763318323726, "f1": 0.7931623931623932, "f1_std": 0.057742187433411724, "bacc": 0.7914402173913043, "bacc_std": 0.05785653466547375} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03860208532884639, "f1": 0.9086075108009306, "f1_std": 0.038427405743493015, "bacc": 0.921875, "bacc_std": 0.03317366707947736} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03847203986325225, "f1": 0.9071259709557582, "f1_std": 0.03923633973842022, "bacc": 0.9096467391304348, "bacc_std": 0.03890995317687199} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047619705623159425, "f1": 0.84593837535014, "f1_std": 0.05244983553981353, "bacc": 0.8383152173913043, "bacc_std": 0.052615318087127605} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035207418451934064, "f1": 0.9252717391304348, "f1_std": 0.0362539448535669, "bacc": 0.9252717391304348, "bacc_std": 0.03647459046576888} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04483039110851195, "f1": 0.8484848484848485, "f1_std": 0.04782164287359455, "bacc": 0.8444293478260869, "bacc_std": 0.04841654876479196} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05215964051998011, "f1": 0.8131793478260869, "f1_std": 0.053984763917201466, "bacc": 0.8131793478260869, "bacc_std": 0.05419020245328094} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051109359334374806, "f1": 0.8074229691876751, "f1_std": 0.05629244782322332, "bacc": 0.8009510869565217, "bacc_std": 0.055652170595915325} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0514369516981712, "f1": 0.8307692307692308, "f1_std": 0.053825278490135756, "bacc": 0.8288043478260869, "bacc_std": 0.053944144032825214} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05450028432710727, "f1": 0.8151881720430108, "f1_std": 0.055179253649181255, "bacc": 0.8192934782608696, "bacc_std": 0.05474897016199512} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04839200808275049, "f1": 0.8328267477203647, "f1_std": 0.04904916567875039, "bacc": 0.8349184782608696, "bacc_std": 0.04829106116781827} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.039736320999956325, "f1": 0.884453781512605, "f1_std": 0.04403138735844937, "bacc": 0.8756793478260869, "bacc_std": 0.045401153721031505} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03686713244267114, "f1": 0.9071259709557582, "f1_std": 0.037501458436335657, "bacc": 0.9096467391304348, "bacc_std": 0.03686944111904073} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0456399853687839, "f1": 0.8699763593380614, "f1_std": 0.04655171928823117, "bacc": 0.8722826086956521, "bacc_std": 0.04636882654416331} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04606720207954912, "f1": 0.8484848484848485, "f1_std": 0.04890383864811312, "bacc": 0.8444293478260869, "bacc_std": 0.049585007714192615} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04293718648760155, "f1": 0.8683760683760684, "f1_std": 0.045008431821918216, "bacc": 0.8661684782608696, "bacc_std": 0.04530649852701437} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05588493491463707, "f1": 0.7727272727272727, "f1_std": 0.059475640628794645, "bacc": 0.7697010869565217, "bacc_std": 0.05901106269226388} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04400625049518109, "f1": 0.8428571428571429, "f1_std": 0.05076742089045086, "bacc": 0.8322010869565217, "bacc_std": 0.05035063029861087} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.0376825540009307, "f1": 0.905982905982906, "f1_std": 0.039386072830629254, "bacc": 0.9035326086956521, "bacc_std": 0.04011051500656701} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 21.54434690031882, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04384403612958357, "f1": 0.8484848484848485, "f1_std": 0.04614613234513338, "bacc": 0.8444293478260869, "bacc_std": 0.046330867070314176} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038480287966483194, "f1": 0.905982905982906, "f1_std": 0.040362495966507414, "bacc": 0.9035326086956521, "bacc_std": 0.04107032746648314} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05163814337521893, "f1": 0.8131793478260869, "f1_std": 0.053294451521401186, "bacc": 0.8131793478260869, "bacc_std": 0.0530794048993146} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048167462698484695, "f1": 0.8307692307692308, "f1_std": 0.050372141894493475, "bacc": 0.8288043478260869, "bacc_std": 0.050755327177261556} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04563558130441349, "f1": 0.8505434782608696, "f1_std": 0.0469383150456708, "bacc": 0.8505434782608696, "bacc_std": 0.04687424865497405} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029323501983819084, "f1": 0.9447975911676145, "f1_std": 0.029326318341046025, "bacc": 0.953125, "bacc_std": 0.02519988451734452} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038148140992150914, "f1": 0.9071259709557582, "f1_std": 0.03887759388432839, "bacc": 0.9096467391304348, "bacc_std": 0.038341376939406355} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04224776975873136, "f1": 0.8663658451926415, "f1_std": 0.045638659989561486, "bacc": 0.8600543478260869, "bacc_std": 0.046469068011797295} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04058312151703114, "f1": 0.8879076086956521, "f1_std": 0.041885923840686734, "bacc": 0.8879076086956521, "bacc_std": 0.04216694092273785} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045169594102725315, "f1": 0.8683760683760684, "f1_std": 0.04725954817855603, "bacc": 0.8661684782608696, "bacc_std": 0.0475266087381254} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04601589376877126, "f1": 0.8683760683760684, "f1_std": 0.04786679180359651, "bacc": 0.8661684782608696, "bacc_std": 0.04820649437756565} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04113310785709572, "f1": 0.8683760683760684, "f1_std": 0.043061667916583925, "bacc": 0.8661684782608696, "bacc_std": 0.04377531275140523} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03815048064643117, "f1": 0.9071259709557582, "f1_std": 0.03890875186431489, "bacc": 0.9096467391304348, "bacc_std": 0.03856616421592726} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 21.54434690031882, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030369602026907232, "f1": 0.9442755825734549, "f1_std": 0.03091173988423925, "bacc": 0.9470108695652174, "bacc_std": 0.029915117583385237} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052438078398390035, "f1": 0.8151881720430108, "f1_std": 0.052933455074335736, "bacc": 0.8192934782608696, "bacc_std": 0.05218737193013163} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.8, "acc_std": 0.05157962311476456, "f1": 0.795677136102668, "f1_std": 0.052825845330730606, "bacc": 0.7975543478260869, "bacc_std": 0.05298057518729177} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05059773620519901, "f1": 0.8307692307692308, "f1_std": 0.05281904738235101, "bacc": 0.8288043478260869, "bacc_std": 0.05281782748210221} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03940046565987357, "f1": 0.9071259709557582, "f1_std": 0.040235893758619266, "bacc": 0.9096467391304348, "bacc_std": 0.0397284722707083} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05297990886932187, "f1": 0.795677136102668, "f1_std": 0.054041794647281365, "bacc": 0.7975543478260869, "bacc_std": 0.05382640879522031} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044655537448264886, "f1": 0.8711943793911007, "f1_std": 0.04484634736039705, "bacc": 0.8783967391304348, "bacc_std": 0.04324600627820621} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049048857409965604, "f1": 0.8307692307692308, "f1_std": 0.05156178682032743, "bacc": 0.8288043478260869, "bacc_std": 0.05189087094871355} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03700996515347127, "f1": 0.9086075108009306, "f1_std": 0.036836782983375456, "bacc": 0.921875, "bacc_std": 0.031805438803764366} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040255892233362955, "f1": 0.8863636363636364, "f1_std": 0.04286375110083308, "bacc": 0.8817934782608696, "bacc_std": 0.044079024053751366} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030017493522200893, "f1": 0.9442755825734549, "f1_std": 0.030586492305141582, "bacc": 0.9470108695652174, "bacc_std": 0.029880341863299936} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04351709457972988, "f1": 0.8720505151213027, "f1_std": 0.04337939127655704, "bacc": 0.8845108695652174, "bacc_std": 0.04021371476291492} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04577271914778297, "f1": 0.8663658451926415, "f1_std": 0.0491834606736652, "bacc": 0.8600543478260869, "bacc_std": 0.04980361368912649} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046922041904939774, "f1": 0.8484848484848485, "f1_std": 0.04958950666676133, "bacc": 0.8444293478260869, "bacc_std": 0.0498739934231838} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03550392831250981, "f1": 0.9252717391304348, "f1_std": 0.03659291826595148, "bacc": 0.9252717391304348, "bacc_std": 0.036834209059696164} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030779600414119954, "f1": 0.9435897435897436, "f1_std": 0.032088032358139014, "bacc": 0.9408967391304348, "bacc_std": 0.033290283282011494} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.016719365720105676, "f1": 0.9814251941911516, "f1_std": 0.01694977278756107, "bacc": 0.984375, "bacc_std": 0.014368204915715811} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04302743310958718, "f1": 0.8891129032258065, "f1_std": 0.043387611157435634, "bacc": 0.8940217391304348, "bacc_std": 0.041997095265642104} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04429017763241351, "f1": 0.84593837535014, "f1_std": 0.04870379047304653, "bacc": 0.8383152173913043, "bacc_std": 0.04842836700820749} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040059393921055696, "f1": 0.8863636363636364, "f1_std": 0.0423667243530503, "bacc": 0.8817934782608696, "bacc_std": 0.04329371892296306} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.025342715386092632, "f1": 0.9621212121212122, "f1_std": 0.02707216698496684, "bacc": 0.9565217391304348, "bacc_std": 0.03030107274424119} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05140921871216327, "f1": 0.8151881720430108, "f1_std": 0.052177662218938915, "bacc": 0.8192934782608696, "bacc_std": 0.052042115344013075} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04243597413547196, "f1": 0.8683760683760684, "f1_std": 0.04448346204419462, "bacc": 0.8661684782608696, "bacc_std": 0.04515016123483443} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 21.54434690031882, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050136949638049, "f1": 0.8074229691876751, "f1_std": 0.054850028535168, "bacc": 0.8009510869565217, "bacc_std": 0.054249396924024806} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04634650663950789, "f1": 0.84593837535014, "f1_std": 0.0510852831231392, "bacc": 0.8383152173913043, "bacc_std": 0.05149882344086475} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.016437449682542477, "f1": 0.9811965811965813, "f1_std": 0.017197400283794628, "bacc": 0.9782608695652174, "bacc_std": 0.01965347244651818} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04351261240165348, "f1": 0.8863636363636364, "f1_std": 0.04612296154531753, "bacc": 0.8817934782608696, "bacc_std": 0.046957842104618515} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04773197898874313, "f1": 0.8484848484848485, "f1_std": 0.05039884611693917, "bacc": 0.8444293478260869, "bacc_std": 0.05077979560226417} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038172656909588966, "f1": 0.905982905982906, "f1_std": 0.03977408008574832, "bacc": 0.9035326086956521, "bacc_std": 0.04056264017469613} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.0405990839777785, "f1": 0.8863636363636364, "f1_std": 0.04270737598905875, "bacc": 0.8817934782608696, "bacc_std": 0.04337759874040355} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037805924587084205, "f1": 0.9045470322804582, "f1_std": 0.04081315496066223, "bacc": 0.8974184782608696, "bacc_std": 0.04248443043996943} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04668620763642162, "f1": 0.8505434782608696, "f1_std": 0.047962127075162515, "bacc": 0.8505434782608696, "bacc_std": 0.04767874614297801} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.031056333778612628, "f1": 0.9435897435897436, "f1_std": 0.0324589506808142, "bacc": 0.9408967391304348, "bacc_std": 0.03391612664095968} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.044285758769067804, "f1": 0.8879076086956521, "f1_std": 0.04557639565834529, "bacc": 0.8879076086956521, "bacc_std": 0.04572526490725721} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04140347731633537, "f1": 0.8863636363636364, "f1_std": 0.044078992380496486, "bacc": 0.8817934782608696, "bacc_std": 0.045096968809475564} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.05047115533313215, "f1": 0.8505434782608696, "f1_std": 0.052073210625239466, "bacc": 0.8505434782608696, "bacc_std": 0.05251560798180824} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04904056677978592, "f1": 0.8281846581048247, "f1_std": 0.05305887147040436, "bacc": 0.8226902173913043, "bacc_std": 0.05320687305239415} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04686656282195014, "f1": 0.8484848484848485, "f1_std": 0.049658923214614646, "bacc": 0.8444293478260869, "bacc_std": 0.05010974286474692} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045084149053384245, "f1": 0.8699763593380614, "f1_std": 0.045880929692043694, "bacc": 0.8722826086956521, "bacc_std": 0.045412745510578875} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038535092946945806, "f1": 0.9045470322804582, "f1_std": 0.04192469588254153, "bacc": 0.8974184782608696, "bacc_std": 0.04354917100921839} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047749691704253305, "f1": 0.8281846581048247, "f1_std": 0.05189700548455112, "bacc": 0.8226902173913043, "bacc_std": 0.052313011616883294} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041498843954823325, "f1": 0.8879076086956521, "f1_std": 0.04274831357990964, "bacc": 0.8879076086956521, "bacc_std": 0.042758849347046976} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038783126602827026, "f1": 0.9071259709557582, "f1_std": 0.039487743078217626, "bacc": 0.9096467391304348, "bacc_std": 0.038921795264619835} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039087966062575974, "f1": 0.905982905982906, "f1_std": 0.04072338065699296, "bacc": 0.9035326086956521, "bacc_std": 0.041259272883494284} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0431261689265461, "f1": 0.8699763593380614, "f1_std": 0.0438658149537676, "bacc": 0.8722826086956521, "bacc_std": 0.04348831598657484} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046094677988277266, "f1": 0.84593837535014, "f1_std": 0.0505806101835397, "bacc": 0.8383152173913043, "bacc_std": 0.050875920986581356} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.0413181008100016, "f1": 0.884453781512605, "f1_std": 0.04601805272671248, "bacc": 0.8756793478260869, "bacc_std": 0.047428713779446775} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04099741576213742, "f1": 0.8891129032258065, "f1_std": 0.04144308816100281, "bacc": 0.8940217391304348, "bacc_std": 0.04036666489795088} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_sex | train | 100 | 9.6674 | 36.478 | 0.95739 | 0.034699 | 0.9562 | 0.035722 | 0.95577 | 0.036449 | +| flat_mae | reg | logistic | aabc_sex | test | 100 | 9.6674 | 36.478 | 0.87364 | 0.043117 | 0.86922 | 0.045043 | 0.86817 | 0.046293 | + + +done! total time: 0:05:22 diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5eab8205e02c1977d73d4e6128207803fde47cf3 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..e51f8982bba2d17549d7f0ecd9c81bc2c50bcf69 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.7905982905982906,0.014818535583604683,0.7861720710397573,0.015298679218625565,0.7841453805240002,0.015278347071412242 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.6370967741935484,0.04116817871373008,0.6330637206549615,0.041889288986639316,0.6327572663000786,0.041627451951507785 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,train,0.7905982905982906,0.015365266665844179,0.7870193984599286,0.015747122977721375,0.7855666297526762,0.015772710991709456 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,test,0.6693548387096774,0.04237947346383389,0.6595915634415801,0.04441234874135119,0.6591386554621849,0.043255184439127445 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,train,0.801994301994302,0.014450904953953514,0.7989056848743392,0.01478216552468129,0.7976744186046512,0.01484452207528048 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,test,0.6290322580645161,0.0418490299903622,0.6255252100840336,0.04224287207441996,0.6255252100840336,0.04223940254247183 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,train,0.9088319088319088,0.010416700538089914,0.9075707702435813,0.01059486604233226,0.9063861203396086,0.010731904058402462 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,test,0.6048387096774194,0.044832037099247025,0.6017043592264831,0.04538466148578306,0.601890756302521,0.045329751363327724 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,train,0.7094017094017094,0.016837827621828495,0.7000427289559892,0.01777420904796608,0.6986341823551125,0.01736825606695023 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,test,0.7016129032258065,0.03973943160838807,0.6909813430322624,0.042422564444162474,0.6901260504201681,0.041079366401794506 +flat_mae,patch,logistic,abide_dx,5,0.3593813663804626,train,0.9031339031339032,0.011460389384021922,0.9015874324825794,0.01169532558972553,0.8997416020671835,0.011836765992535808 +flat_mae,patch,logistic,abide_dx,5,0.3593813663804626,test,0.6290322580645161,0.04256584542412067,0.6266038229903116,0.04273817736047087,0.6271008403361344,0.04267661346044492 +flat_mae,patch,logistic,abide_dx,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,6,166.81005372000556,test,0.6370967741935484,0.04404506489875436,0.6330637206549615,0.04485219372337858,0.6328781512605042,0.044760993665419045 +flat_mae,patch,logistic,abide_dx,7,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,7,1291.5496650148827,test,0.6612903225806451,0.04256437555746936,0.6580882352941176,0.04290575299762164,0.6580882352941176,0.04275427703874282 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9074074074074074,0.01060699141973597,0.9060946873424365,0.01079768367877812,0.904798818752307,0.01094379357367557 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.6048387096774194,0.04191781265392919,0.6017043592264831,0.04251349484302152,0.601890756302521,0.04253020434614034 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.9002849002849003,0.011219320768080845,0.8991644231400617,0.011342454748304493,0.8989294942783315,0.011365460569613786 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.717741935483871,0.03871068211061777,0.7076850542197077,0.04124479528466586,0.70640756302521,0.039951887460948875 +flat_mae,patch,logistic,abide_dx,10,0.3593813663804626,train,0.905982905982906,0.010844762557472768,0.9047458491295302,0.011014061960328067,0.9038021410114434,0.01112997189739409 +flat_mae,patch,logistic,abide_dx,10,0.3593813663804626,test,0.6612903225806451,0.042713292289279066,0.6569169960474308,0.04336551201222108,0.6565126050420168,0.04315140025741813 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,train,0.9145299145299145,0.010355047616712831,0.9133475971033574,0.010534249538119076,0.9121447028423773,0.010664349574360035 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,test,0.5564516129032258,0.04691496891198467,0.551522325244953,0.04715993173852779,0.5514705882352942,0.0469521104335761 +flat_mae,patch,logistic,abide_dx,12,2.782559402207126,train,0.9914529914529915,0.0034274285786572655,0.9913569505548625,0.0034684657862745325,0.9910668143226282,0.0035881871698963553 +flat_mae,patch,logistic,abide_dx,12,2.782559402207126,test,0.5806451612903226,0.04473672551474323,0.5766806722689075,0.04506519690269759,0.5766806722689075,0.044863854821259666 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.9116809116809117,0.010856521895238296,0.9102708943223519,0.011087060655051629,0.9083794758213363,0.011280477199271752 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.6370967741935484,0.04267310343182977,0.6342182890855457,0.042843245061922275,0.634453781512605,0.04277996402742408 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.9031339031339032,0.010528790593807626,0.9015874324825794,0.01076445019031807,0.8997416020671835,0.01093857631262492 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.6290322580645161,0.04298801952987525,0.6242424242424243,0.04408404111994835,0.6239495798319328,0.04383632266666239 +flat_mae,patch,logistic,abide_dx,15,2.782559402207126,train,0.9928774928774928,0.003191914515872468,0.9927952559531508,0.003232904179116814,0.9923588039867111,0.0034520409067682737 +flat_mae,patch,logistic,abide_dx,15,2.782559402207126,test,0.5887096774193549,0.0449762655641828,0.5873947935016637,0.0451488177158158,0.5887605042016807,0.04519259794100779 +flat_mae,patch,logistic,abide_dx,16,2.782559402207126,train,0.9886039886039886,0.004182874702359975,0.9884759340731499,0.004232851196487397,0.988187523071244,0.004349348212289285 +flat_mae,patch,logistic,abide_dx,16,2.782559402207126,test,0.5725806451612904,0.04286419216342589,0.5703170970905524,0.04314559861635074,0.5709033613445378,0.04311301136489517 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.9216524216524217,0.009988294786560957,0.920486676730254,0.010183019050542013,0.9188999630860096,0.010362257859279175 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.5887096774193549,0.04376458876727602,0.5854473942969518,0.04440352299273254,0.585609243697479,0.044306992689567844 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,train,0.7891737891737892,0.0155095829333524,0.7858079412856142,0.015786363226882243,0.7845699520118126,0.01576254159097625 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,test,0.7096774193548387,0.04235441797692907,0.7069327731092437,0.042753430530421035,0.7069327731092437,0.04262754531424951 +flat_mae,patch,logistic,abide_dx,19,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,19,10000.0,test,0.6451612903225806,0.04131984831439056,0.6356837606837606,0.04305739158896024,0.6355042016806722,0.04207684124933722 +flat_mae,patch,logistic,abide_dx,20,0.3593813663804626,train,0.9102564102564102,0.009809172663646233,0.9089840815780538,0.01000146230494706,0.9076781100036914,0.01019115745721 +flat_mae,patch,logistic,abide_dx,20,0.3593813663804626,test,0.6451612903225806,0.04204661144276597,0.6418067226890756,0.042800680205757065,0.6418067226890756,0.04281570331374563 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,train,0.8034188034188035,0.014390125843368172,0.8006994733377222,0.014631044091705027,0.7998523440383905,0.01466395849876361 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,test,0.6451612903225806,0.04412349050281408,0.6375232527238905,0.045328106611363314,0.6370798319327731,0.04469522725339643 +flat_mae,patch,logistic,abide_dx,22,0.3593813663804626,train,0.9017094017094017,0.010843291369294562,0.9002469217161368,0.01105024546467721,0.8987449243263197,0.01118567191319808 +flat_mae,patch,logistic,abide_dx,22,0.3593813663804626,test,0.6693548387096774,0.042924442970997866,0.665680278818965,0.043640647573690274,0.6654411764705883,0.04348855644330462 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,train,0.7250712250712251,0.01618532008723985,0.7177580801399899,0.01681786944889831,0.71609449981543,0.016596774112664296 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,test,0.6048387096774194,0.04213467271928536,0.585171024783232,0.0455772243207917,0.5892857142857143,0.04302249834506851 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,train,0.8974358974358975,0.011364004554210674,0.8960863808685784,0.011562310277430057,0.8951642672572906,0.011721116544655052 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,test,0.6774193548387096,0.040310511096868264,0.6760710553814002,0.040620294010812054,0.6775210084033614,0.04073127386810247 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,train,0.8005698005698005,0.014990493276668775,0.7970846925713342,0.015315230361083405,0.7954964931709118,0.015299283017395172 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,test,0.6129032258064516,0.04497802388583003,0.607905138339921,0.04562548393079618,0.6076680672268908,0.0453690344969679 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,train,0.8005698005698005,0.014417817689794287,0.7978110599078341,0.01463880270736467,0.7969730527870063,0.014659295148579669 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,test,0.5725806451612904,0.04383483747051651,0.5643931861867832,0.045016321860362564,0.5646008403361344,0.04437317550860183 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,train,0.9002849002849003,0.011176974779476847,0.8988364337063663,0.01138480329128405,0.8974529346622371,0.011529748295326433 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,test,0.6370967741935484,0.0433451770148358,0.6342182890855457,0.043750688224369526,0.634453781512605,0.04383221662326893 +flat_mae,patch,logistic,abide_dx,28,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,28,10000.0,test,0.6532258064516129,0.04177822872527551,0.6480760345851759,0.042592892391561346,0.6475840336134454,0.042292328002531826 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.792022792022792,0.015423153956742866,0.7882256678127479,0.01585812577839149,0.7865633074935401,0.01584587845487351 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.6693548387096774,0.03877511605901675,0.6630211440312852,0.03974032054868613,0.6622899159663866,0.039263926616287546 +flat_mae,patch,logistic,abide_dx,30,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,30,1291.5496650148827,test,0.6854838709677419,0.0408511039643883,0.6819885579009667,0.041563053570272075,0.6817226890756303,0.04156475817609348 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,train,0.8005698005698005,0.014328777942393567,0.7967691505992704,0.01474708970952786,0.7949058693244739,0.014781159523797681 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,test,0.6209677419354839,0.0420973149143372,0.6118548118548119,0.04311895291703265,0.6118697478991597,0.04233277949973712 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,train,0.9031339031339032,0.010810699264051855,0.9015151515151515,0.011045435319442553,0.8994462901439646,0.011172972917430656 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,test,0.6612903225806451,0.043704392014688516,0.6502820306204673,0.04577179888700723,0.6502100840336134,0.044532360175593325 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.7279202279202279,0.015871511042909705,0.7216939717173227,0.01626234956152086,0.72015503875969,0.016076699728180606 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.5967741935483871,0.04147088794981986,0.5836690840719849,0.04366107605779701,0.5850840336134454,0.04212667456743428 +flat_mae,patch,logistic,abide_dx,34,2.782559402207126,train,0.9871794871794872,0.004300513886520062,0.9870469879839183,0.004343282426883844,0.9871908453303803,0.004298452001849844 +flat_mae,patch,logistic,abide_dx,34,2.782559402207126,test,0.5967741935483871,0.04228328476662577,0.58994708994709,0.04314068654320464,0.5898109243697479,0.04273326783327153 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,train,0.9002849002849003,0.010954212187687638,0.8988364337063663,0.011167884011107233,0.8974529346622371,0.011316943250678236 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,test,0.6693548387096774,0.04238335280759793,0.6682977751680041,0.04254047249689141,0.6701680672268908,0.04253785621318848 +flat_mae,patch,logistic,abide_dx,36,0.3593813663804626,train,0.9017094017094017,0.010682146506739404,0.9004486572139683,0.010850034167421892,0.8996308600959764,0.010961738312925191 +flat_mae,patch,logistic,abide_dx,36,0.3593813663804626,test,0.6612903225806451,0.0406622435941538,0.6522435897435898,0.0422832608406869,0.6517857142857143,0.04136384017078413 +flat_mae,patch,logistic,abide_dx,37,0.005994842503189409,train,0.7193732193732194,0.01625422692280419,0.7101842285599325,0.01730426655522675,0.7085640457733481,0.01693368074040467 +flat_mae,patch,logistic,abide_dx,37,0.005994842503189409,test,0.6451612903225806,0.040001118610789664,0.6288435374149659,0.04315819754919613,0.6307773109243697,0.041004130640820695 +flat_mae,patch,logistic,abide_dx,38,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,38,21.54434690031882,test,0.6129032258064516,0.045259900981294995,0.6045708211533352,0.04698721852285207,0.6045168067226891,0.04619479143324546 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,train,0.7991452991452992,0.015265312189797448,0.7958641569203293,0.015556695655370432,0.794499815430048,0.015532086250115013 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,test,0.5806451612903226,0.041395719816748236,0.5716183895827798,0.0420874662842533,0.571953781512605,0.041541566548323035 +flat_mae,patch,logistic,abide_dx,40,2.782559402207126,train,0.9886039886039886,0.0039026670130636798,0.9884759340731499,0.003948970350602756,0.988187523071244,0.004058446884806565 +flat_mae,patch,logistic,abide_dx,40,2.782559402207126,test,0.5887096774193549,0.04543103772032369,0.5788211788211788,0.0468204045338352,0.5793067226890757,0.04594888915579086 +flat_mae,patch,logistic,abide_dx,41,0.046415888336127774,train,0.7948717948717948,0.014571048417494457,0.7917463247412486,0.014895604921800082,0.7906238464377999,0.014968178130927257 +flat_mae,patch,logistic,abide_dx,41,0.046415888336127774,test,0.6290322580645161,0.040990503582842515,0.6227513227513227,0.04206383257735084,0.6223739495798319,0.04161823842632969 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,train,0.8105413105413105,0.014236008549311717,0.8074463324142113,0.014548953964337573,0.806016980435585,0.014584726376141303 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,test,0.5725806451612904,0.044282098712563056,0.5703170970905524,0.04452592328159447,0.5709033613445378,0.04446716653413546 +flat_mae,patch,logistic,abide_dx,43,2.782559402207126,train,0.9914529914529915,0.003716535955402044,0.991351626600524,0.0037663028564986828,0.9907715023994094,0.004010926809547889 +flat_mae,patch,logistic,abide_dx,43,2.782559402207126,test,0.6451612903225806,0.0426730272287027,0.6428384393820372,0.042842192118187405,0.6433823529411764,0.04276863619846117 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,train,0.7165242165242165,0.01762012310054685,0.7075459255770137,0.018486294255485907,0.7059800664451827,0.018120500862905033 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,test,0.6129032258064516,0.04224003555322818,0.5921052631578947,0.0458758222099516,0.5966386554621849,0.04308664073601647 +flat_mae,patch,logistic,abide_dx,45,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,45,1291.5496650148827,test,0.6290322580645161,0.04065328579834442,0.6145945945945945,0.04326166282267838,0.6160714285714286,0.04147002284571123 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,train,0.8162393162393162,0.01398720407754299,0.8136340718026815,0.014237021876841982,0.8126614987080103,0.014297350248976673 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,test,0.5483870967741935,0.044800108122918444,0.5441176470588236,0.04513599085373431,0.5441176470588236,0.04517028189802117 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,train,0.801994301994302,0.01537474068447481,0.7989056848743392,0.015691642827327615,0.7976744186046512,0.015718044466736348 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,test,0.6129032258064516,0.04102610777603427,0.6025641025641025,0.04280546240696318,0.6029411764705883,0.04174033229828727 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,train,0.801994301994302,0.014291171657958823,0.7989056848743392,0.014541977217156898,0.7976744186046512,0.014505676574117707 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,test,0.6290322580645161,0.04303037186948081,0.6227513227513227,0.0437512918319916,0.6223739495798319,0.04331829996783883 +flat_mae,patch,logistic,abide_dx,49,0.046415888336127774,train,0.7977207977207977,0.01540496069515813,0.7946387368976201,0.015701611206989395,0.7935031376891841,0.015729964560140718 +flat_mae,patch,logistic,abide_dx,49,0.046415888336127774,test,0.6370967741935484,0.042301169224903984,0.6317074780542539,0.04315561297273528,0.6313025210084033,0.042925624917899446 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,train,0.801994301994302,0.01470316905742869,0.7984571056789664,0.015035516446300731,0.7967884828349945,0.015036747580832219 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,test,0.5887096774193549,0.04513608455584354,0.5740553647201454,0.04728664637180317,0.576155462184874,0.0456750975497822 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,train,0.8148148148148148,0.015023767232963812,0.8119932098358494,0.015352311620923851,0.8107788851974899,0.015418408074006849 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,test,0.5887096774193549,0.04322796094775131,0.5740553647201454,0.04591331000200175,0.576155462184874,0.044002248084473325 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,train,0.8034188034188035,0.013771444419180335,0.799983482677458,0.014098739928495748,0.7983757844222961,0.014123943777703023 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,test,0.6612903225806451,0.04279920387387191,0.6555555555555556,0.04374005410331952,0.654936974789916,0.043379502978802616 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,train,0.9002849002849003,0.01135938951834395,0.8987655745269958,0.011589583865512157,0.8971576227390181,0.011733916590683808 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,test,0.6048387096774194,0.04326400575014277,0.6041951664386684,0.04331266079436049,0.6066176470588236,0.043470972559348516 +flat_mae,patch,logistic,abide_dx,54,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,54,1291.5496650148827,test,0.5887096774193549,0.0442419207836933,0.5712833028269271,0.046806134291490956,0.5745798319327731,0.04487123093287576 +flat_mae,patch,logistic,abide_dx,55,0.005994842503189409,train,0.7222222222222222,0.017378228700567117,0.7131265206557706,0.01828115279843551,0.7114433370247324,0.017898011237934386 +flat_mae,patch,logistic,abide_dx,55,0.005994842503189409,test,0.5967741935483871,0.04303475775009988,0.5860042735042735,0.04471696873483475,0.5866596638655462,0.043713693107310776 +flat_mae,patch,logistic,abide_dx,56,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,56,166.81005372000556,test,0.6370967741935484,0.04208310246413973,0.6365057650967364,0.042248488172724974,0.6391806722689075,0.04241298695682428 +flat_mae,patch,logistic,abide_dx,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,57,21.54434690031882,test,0.6129032258064516,0.041728072684320924,0.5978378378378378,0.04350948450915952,0.5997899159663866,0.042092140592841346 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.7962962962962963,0.015229496292557473,0.7931187981081331,0.015547194462603933,0.7919158361018825,0.01559634494158874 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.6209677419354839,0.04216294083202188,0.6179613241560145,0.042568044757632144,0.618172268907563,0.042538441324485726 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,train,0.905982905982906,0.010974223993513305,0.9044117647058824,0.011229965594420325,0.9023255813953488,0.011445929492358465 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,test,0.6532258064516129,0.04352926700931646,0.6465831510572015,0.04475309797779523,0.6460084033613445,0.04421008486969677 +flat_mae,patch,logistic,abide_dx,60,2.782559402207126,train,0.9943019943019943,0.002818722272597746,0.994237967036575,0.0028528043878519264,0.9939461055740126,0.0030049932902350618 +flat_mae,patch,logistic,abide_dx,60,2.782559402207126,test,0.5725806451612904,0.045049100279630036,0.5678306043269548,0.04555113650754432,0.5677521008403361,0.04545428510316895 +flat_mae,patch,logistic,abide_dx,61,2.782559402207126,train,0.9914529914529915,0.0034168216003266833,0.9913569505548625,0.003458382161463672,0.9910668143226282,0.0036047573359188883 +flat_mae,patch,logistic,abide_dx,61,2.782559402207126,test,0.5645161290322581,0.04344677212779636,0.5588932806324111,0.04411021863045149,0.5588235294117647,0.043885974397892574 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,train,0.9045584045584045,0.011590160987757898,0.9033342033816792,0.01174526495683663,0.9025101513473606,0.011779596880362403 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,test,0.532258064516129,0.044449587949570206,0.5278361344537815,0.04491284496471821,0.5278361344537815,0.044848264422703045 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,train,0.8091168091168092,0.0150095669471964,0.8062083855231064,0.015306435498771429,0.8050203026947212,0.015358233944520442 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,test,0.6693548387096774,0.04378338922121208,0.6691611895620486,0.043887534383691834,0.6733193277310925,0.043776960114990514 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.9002849002849003,0.011906804377627535,0.8988364337063663,0.012125506112342218,0.8974529346622371,0.012264599339880321 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.6935483870967742,0.03833630076436075,0.6883597883597883,0.039486215625609694,0.6875,0.0391444184644421 +flat_mae,patch,logistic,abide_dx,65,2.782559402207126,train,0.99002849002849,0.003716369969685025,0.989913358334411,0.0037630887188553645,0.9894795127353266,0.00392292755829259 +flat_mae,patch,logistic,abide_dx,65,2.782559402207126,test,0.6612903225806451,0.0413512151801885,0.6502820306204673,0.0432984664117924,0.6502100840336134,0.04206976573172885 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,train,0.7108262108262108,0.017011083481835134,0.7031341464684866,0.01767830298110431,0.7016980435585087,0.017403125180926626 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,test,0.6209677419354839,0.04278813996841058,0.6153389215233318,0.04372055300066416,0.6150210084033614,0.04323088829163628 +flat_mae,patch,logistic,abide_dx,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,67,166.81005372000556,test,0.5887096774193549,0.04399300802926596,0.5865315462569467,0.04410082687029852,0.5871848739495797,0.04417341493277336 +flat_mae,patch,logistic,abide_dx,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,68,166.81005372000556,test,0.6532258064516129,0.03975163201251854,0.6521171788347361,0.03992539903285054,0.6538865546218487,0.04004601018823633 +flat_mae,patch,logistic,abide_dx,69,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,69,21.54434690031882,test,0.6290322580645161,0.0410696313617167,0.6191239316239316,0.04273022224519619,0.6192226890756303,0.04171780717914232 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,train,0.8091168091168092,0.015022439472787827,0.8059269162210339,0.01537287690301833,0.8044296788482834,0.015409125757521515 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,test,0.6451612903225806,0.04382367885406074,0.6356837606837606,0.04548777902762728,0.6355042016806722,0.04457440693988865 +flat_mae,patch,logistic,abide_dx,71,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,71,21.54434690031882,test,0.6370967741935484,0.04578166923882793,0.6317074780542539,0.04679606765744111,0.6313025210084033,0.04651091167125417 +flat_mae,patch,logistic,abide_dx,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,72,166.81005372000556,test,0.6129032258064516,0.042619923233149816,0.6045708211533352,0.04357197133587653,0.6045168067226891,0.04303233720748623 +flat_mae,patch,logistic,abide_dx,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,73,166.81005372000556,test,0.6290322580645161,0.04238898704480567,0.6255252100840336,0.04311185504729567,0.6255252100840336,0.04306630060235681 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,train,0.7136752136752137,0.016176530799281153,0.7052040873887219,0.0167825780043892,0.7036913990402363,0.016469130338340514 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,test,0.6048387096774194,0.044634283939697886,0.5931704050887178,0.046352551748906415,0.5940126050420168,0.04520131100253305 +flat_mae,patch,logistic,abide_dx,75,2.782559402207126,train,0.9914529914529915,0.003392217378169765,0.9913621262458472,0.003428723198165832,0.9913621262458472,0.003463845001265082 +flat_mae,patch,logistic,abide_dx,75,2.782559402207126,test,0.6048387096774194,0.042298586210669446,0.5931704050887178,0.04398294267566581,0.5940126050420168,0.04289013666282322 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.915954415954416,0.010636167878625844,0.9147038895833635,0.010839809873184402,0.913141380583241,0.010996618358868442 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.6290322580645161,0.04279819790618115,0.6191239316239316,0.04504349878867377,0.6192226890756303,0.043773764516261866 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,train,0.7891737891737892,0.015717463806775606,0.7859615004285054,0.016012809694820617,0.7848652639350313,0.016024415271167856 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,test,0.5806451612903226,0.04487488387632477,0.5735449735449736,0.04549683732345315,0.5735294117647058,0.0450557328921649 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,train,0.811965811965812,0.01495638146895544,0.8089638395250072,0.01520330314840955,0.8076042820228866,0.015161113051824498 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,test,0.6290322580645161,0.04379527091390663,0.6210470369386127,0.04515488737709276,0.6207983193277311,0.04443602412764422 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,train,0.8988603988603988,0.011633344535233445,0.8973555281426915,0.011878355397461718,0.8958656330749355,0.012079854506343938 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,test,0.6048387096774194,0.04434889989567154,0.5989703649924097,0.04546326677935883,0.5987394957983193,0.04502478273616522 +flat_mae,patch,logistic,abide_dx,80,0.046415888336127774,train,0.7977207977207977,0.014731566453369696,0.7940277043110289,0.015093827185079322,0.7923218899963086,0.015101040493583913 +flat_mae,patch,logistic,abide_dx,80,0.046415888336127774,test,0.6935483870967742,0.038541111474284934,0.6906512605042017,0.03890517902941172,0.6906512605042017,0.038703815494591974 +flat_mae,patch,logistic,abide_dx,81,0.3593813663804626,train,0.9074074074074074,0.011038101500838752,0.9061580320950635,0.011223847142600524,0.905094130675526,0.01135149541874801 +flat_mae,patch,logistic,abide_dx,81,0.3593813663804626,test,0.6532258064516129,0.04437590115747488,0.6465831510572015,0.045552638754952114,0.6460084033613445,0.0449726731650029 +flat_mae,patch,logistic,abide_dx,82,0.046415888336127774,train,0.792022792022792,0.015058994439480328,0.7892862723168395,0.01530812478083325,0.7886304909560724,0.015357492502541757 +flat_mae,patch,logistic,abide_dx,82,0.046415888336127774,test,0.6451612903225806,0.043591543729299534,0.6336287939833467,0.04584599189118749,0.6339285714285714,0.04454030341273281 +flat_mae,patch,logistic,abide_dx,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,83,166.81005372000556,test,0.6290322580645161,0.04372037721675129,0.628161668839635,0.043861877363717994,0.6302521008403361,0.043763756500371384 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,train,0.896011396011396,0.01162921609111885,0.8943893165167394,0.011849107859600473,0.8926910299003323,0.01193608401422136 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,test,0.6290322580645161,0.04518633809996662,0.6290322580645161,0.04529126836909797,0.6349789915966386,0.044935218358781184 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,train,0.811965811965812,0.014656705055792977,0.809100797679478,0.014939447062217037,0.8078995939461056,0.014976468328309137 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,test,0.6774193548387096,0.038984574979963126,0.6688034188034189,0.040636378793984615,0.6680672268907563,0.03964454624039576 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.7991452991452992,0.01392189780454544,0.7961567530721056,0.01423296477487321,0.7950904392764857,0.014297939141970674 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6370967741935484,0.042915278442873274,0.6317074780542539,0.04409284167982467,0.6313025210084033,0.043750084790136444 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.9088319088319088,0.0109051325630738,0.9075075965315349,0.011073833714484777,0.9060908084163898,0.01111071730396578 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.6612903225806451,0.044142126311456344,0.6590730557737627,0.04439521114371359,0.6596638655462186,0.044450533282304715 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,train,0.8988603988603988,0.012154229487612337,0.8975631110462571,0.012315871259214474,0.8967515688445922,0.012357975653110045 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,test,0.6451612903225806,0.04420059986126583,0.6436781609195402,0.044393831701847845,0.6449579831932774,0.04445183719993839 +flat_mae,patch,logistic,abide_dx,89,2.782559402207126,train,0.9914529914529915,0.00344934600192671,0.991351626600524,0.003495007289195398,0.9907715023994094,0.003725643861785322 +flat_mae,patch,logistic,abide_dx,89,2.782559402207126,test,0.6451612903225806,0.041680103279205585,0.6405797101449275,0.04243099140932304,0.6402310924369747,0.04218175630187983 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,train,0.7877492877492878,0.015683857720342278,0.78412170320088,0.01603594238217432,0.782687338501292,0.016031505470058872 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,test,0.6532258064516129,0.04171909431405499,0.6408702094699266,0.04379852021669606,0.641281512605042,0.04237596204751009 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,train,0.8005698005698005,0.01477791673947661,0.7972370766488414,0.015075745163322106,0.7957918050941306,0.015068286427786725 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,test,0.6451612903225806,0.042110107887113134,0.6428384393820372,0.0422836401477386,0.6433823529411764,0.04221710110349343 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9943019943019943,0.0027844772742178464,0.9942414174972314,0.0028143062974986294,0.9942414174972314,0.002842531033794735 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.6048387096774194,0.044314230677996946,0.6041951664386684,0.0442844716590143,0.6066176470588236,0.04437074473023588 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,train,0.7222222222222222,0.01653421546382569,0.7140039653771182,0.017234084612308145,0.7123292727943891,0.01693319425565561 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,test,0.5483870967741935,0.041534920052492104,0.5308108108108108,0.04356241849911919,0.5346638655462185,0.04190240223138046 +flat_mae,patch,logistic,abide_dx,94,2.782559402207126,train,0.9971509971509972,0.0021498534722985832,0.9971207087486158,0.002172569407240183,0.9971207087486158,0.002176107018465351 +flat_mae,patch,logistic,abide_dx,94,2.782559402207126,test,0.5967741935483871,0.04330664984636554,0.5950888192267503,0.04339607108000503,0.5961134453781513,0.04345829193019775 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,train,0.905982905982906,0.011010382412631976,0.904867892605764,0.011137429465605196,0.9043927648578811,0.011145208696964214 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,test,0.6209677419354839,0.043858664664819205,0.6097756946769334,0.04596402863871766,0.6102941176470589,0.044748609531486135 +flat_mae,patch,logistic,abide_dx,96,0.005994842503189409,train,0.7236467236467237,0.016723009690008003,0.7156114266622118,0.017481667550565398,0.7139165743816906,0.0172002633579958 +flat_mae,patch,logistic,abide_dx,96,0.005994842503189409,test,0.6048387096774194,0.04288702095129717,0.5953379953379954,0.04457367554226418,0.5955882352941176,0.04356897327172391 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,train,0.9074074074074074,0.011247542838723902,0.9059630900491514,0.011472183770938088,0.9042081949058693,0.011617694894230837 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,test,0.6693548387096774,0.04361636271829867,0.6644445911160979,0.04454316187732389,0.6638655462184874,0.04433263941920254 +flat_mae,patch,logistic,abide_dx,98,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,98,10000.0,test,0.5725806451612904,0.04367926569690728,0.5662332519305657,0.04409609839613478,0.5661764705882353,0.04381598426792919 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,train,0.8888888888888888,0.011144154241790778,0.8874269126076266,0.01134083701852161,0.8865263935031378,0.011503632797464794 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,test,0.6451612903225806,0.04169651514995424,0.6391534391534391,0.042514621024023785,0.6386554621848739,0.042180391168879214 +flat_mae,patch,logistic,abide_dx,100,0.046415888336127774,train,0.8048433048433048,0.015553828055982601,0.8013570034389814,0.015907860660906728,0.7996677740863787,0.015905927407039895 +flat_mae,patch,logistic,abide_dx,100,0.046415888336127774,test,0.6290322580645161,0.03965011845960313,0.6119727891156463,0.04289841245014668,0.6144957983193278,0.04072903677058663 diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..104b9377ef0b203f3042b45edf9d756c43695520 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:52:11 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:20:09 time: 4.1857 data: 3.3793 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:40 time: 0.1812 data: 0.0622 max mem: 3005 +extract (train) [ 40/289] eta: 0:01:05 time: 0.1492 data: 0.0446 max mem: 3005 +extract (train) [ 60/289] eta: 0:00:52 time: 0.1609 data: 0.0520 max mem: 3005 +extract (train) [ 80/289] eta: 0:00:44 time: 0.1646 data: 0.0519 max mem: 3005 +extract (train) [100/289] eta: 0:00:38 time: 0.1695 data: 0.0543 max mem: 3005 +extract (train) [120/289] eta: 0:00:33 time: 0.1809 data: 0.0605 max mem: 3005 +extract (train) [140/289] eta: 0:00:29 time: 0.1634 data: 0.0517 max mem: 3005 +extract (train) [160/289] eta: 0:00:24 time: 0.1648 data: 0.0524 max mem: 3005 +extract (train) [180/289] eta: 0:00:20 time: 0.1809 data: 0.0605 max mem: 3005 +extract (train) [200/289] eta: 0:00:16 time: 0.1729 data: 0.0563 max mem: 3005 +extract (train) [220/289] eta: 0:00:12 time: 0.1618 data: 0.0520 max mem: 3005 +extract (train) [240/289] eta: 0:00:09 time: 0.1600 data: 0.0501 max mem: 3005 +extract (train) [260/289] eta: 0:00:05 time: 0.1715 data: 0.0566 max mem: 3005 +extract (train) [280/289] eta: 0:00:01 time: 0.1539 data: 0.0473 max mem: 3005 +extract (train) [288/289] eta: 0:00:00 time: 0.1516 data: 0.0469 max mem: 3005 +extract (train) Total time: 0:00:52 (0.1817 s / it) +extract (validation) [ 0/62] eta: 0:03:51 time: 3.7403 data: 3.5680 max mem: 3005 +extract (validation) [20/62] eta: 0:00:15 time: 0.1984 data: 0.0651 max mem: 3005 +extract (validation) [40/62] eta: 0:00:05 time: 0.1551 data: 0.0472 max mem: 3005 +extract (validation) [60/62] eta: 0:00:00 time: 0.1432 data: 0.0414 max mem: 3005 +extract (validation) [61/62] eta: 0:00:00 time: 0.1434 data: 0.0417 max mem: 3005 +extract (validation) Total time: 0:00:14 (0.2276 s / it) +extract (test) [ 0/62] eta: 0:03:49 time: 3.7007 data: 3.5589 max mem: 3005 +extract (test) [20/62] eta: 0:00:15 time: 0.2078 data: 0.0745 max mem: 3005 +extract (test) [40/62] eta: 0:00:05 time: 0.1518 data: 0.0464 max mem: 3005 +extract (test) [60/62] eta: 0:00:00 time: 0.1460 data: 0.0431 max mem: 3005 +extract (test) [61/62] eta: 0:00:00 time: 0.1466 data: 0.0434 max mem: 3005 +extract (test) Total time: 0:00:14 (0.2300 s / it) +feature extraction time: 0:01:21 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.7906 | 0.014819 | 0.78617 | 0.015299 | 0.78415 | 0.015278 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.6371 | 0.041168 | 0.63306 | 0.041889 | 0.63276 | 0.041627 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04237947346383389, "f1": 0.6595915634415801, "f1_std": 0.04441234874135119, "bacc": 0.6591386554621849, "bacc_std": 0.043255184439127445} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0418490299903622, "f1": 0.6255252100840336, "f1_std": 0.04224287207441996, "bacc": 0.6255252100840336, "bacc_std": 0.04223940254247183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044832037099247025, "f1": 0.6017043592264831, "f1_std": 0.04538466148578306, "bacc": 0.601890756302521, "bacc_std": 0.045329751363327724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.7016129032258065, "acc_std": 0.03973943160838807, "f1": 0.6909813430322624, "f1_std": 0.042422564444162474, "bacc": 0.6901260504201681, "bacc_std": 0.041079366401794506} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04256584542412067, "f1": 0.6266038229903116, "f1_std": 0.04273817736047087, "bacc": 0.6271008403361344, "bacc_std": 0.04267661346044492} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04404506489875436, "f1": 0.6330637206549615, "f1_std": 0.04485219372337858, "bacc": 0.6328781512605042, "bacc_std": 0.044760993665419045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 1291.5496650148827, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04256437555746936, "f1": 0.6580882352941176, "f1_std": 0.04290575299762164, "bacc": 0.6580882352941176, "bacc_std": 0.04275427703874282} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04191781265392919, "f1": 0.6017043592264831, "f1_std": 0.04251349484302152, "bacc": 0.601890756302521, "bacc_std": 0.04253020434614034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.717741935483871, "acc_std": 0.03871068211061777, "f1": 0.7076850542197077, "f1_std": 0.04124479528466586, "bacc": 0.70640756302521, "bacc_std": 0.039951887460948875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.042713292289279066, "f1": 0.6569169960474308, "f1_std": 0.04336551201222108, "bacc": 0.6565126050420168, "bacc_std": 0.04315140025741813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04691496891198467, "f1": 0.551522325244953, "f1_std": 0.04715993173852779, "bacc": 0.5514705882352942, "bacc_std": 0.0469521104335761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04473672551474323, "f1": 0.5766806722689075, "f1_std": 0.04506519690269759, "bacc": 0.5766806722689075, "bacc_std": 0.044863854821259666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04267310343182977, "f1": 0.6342182890855457, "f1_std": 0.042843245061922275, "bacc": 0.634453781512605, "bacc_std": 0.04277996402742408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04298801952987525, "f1": 0.6242424242424243, "f1_std": 0.04408404111994835, "bacc": 0.6239495798319328, "bacc_std": 0.04383632266666239} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0449762655641828, "f1": 0.5873947935016637, "f1_std": 0.0451488177158158, "bacc": 0.5887605042016807, "bacc_std": 0.04519259794100779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04286419216342589, "f1": 0.5703170970905524, "f1_std": 0.04314559861635074, "bacc": 0.5709033613445378, "bacc_std": 0.04311301136489517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04376458876727602, "f1": 0.5854473942969518, "f1_std": 0.04440352299273254, "bacc": 0.585609243697479, "bacc_std": 0.044306992689567844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.7096774193548387, "acc_std": 0.04235441797692907, "f1": 0.7069327731092437, "f1_std": 0.042753430530421035, "bacc": 0.7069327731092437, "bacc_std": 0.04262754531424951} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 10000.0, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04131984831439056, "f1": 0.6356837606837606, "f1_std": 0.04305739158896024, "bacc": 0.6355042016806722, "bacc_std": 0.04207684124933722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04204661144276597, "f1": 0.6418067226890756, "f1_std": 0.042800680205757065, "bacc": 0.6418067226890756, "bacc_std": 0.04281570331374563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04412349050281408, "f1": 0.6375232527238905, "f1_std": 0.045328106611363314, "bacc": 0.6370798319327731, "bacc_std": 0.04469522725339643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.042924442970997866, "f1": 0.665680278818965, "f1_std": 0.043640647573690274, "bacc": 0.6654411764705883, "bacc_std": 0.04348855644330462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04213467271928536, "f1": 0.585171024783232, "f1_std": 0.0455772243207917, "bacc": 0.5892857142857143, "bacc_std": 0.04302249834506851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.040310511096868264, "f1": 0.6760710553814002, "f1_std": 0.040620294010812054, "bacc": 0.6775210084033614, "bacc_std": 0.04073127386810247} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04497802388583003, "f1": 0.607905138339921, "f1_std": 0.04562548393079618, "bacc": 0.6076680672268908, "bacc_std": 0.0453690344969679} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04383483747051651, "f1": 0.5643931861867832, "f1_std": 0.045016321860362564, "bacc": 0.5646008403361344, "bacc_std": 0.04437317550860183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0433451770148358, "f1": 0.6342182890855457, "f1_std": 0.043750688224369526, "bacc": 0.634453781512605, "bacc_std": 0.04383221662326893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 10000.0, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04177822872527551, "f1": 0.6480760345851759, "f1_std": 0.042592892391561346, "bacc": 0.6475840336134454, "bacc_std": 0.042292328002531826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.03877511605901675, "f1": 0.6630211440312852, "f1_std": 0.03974032054868613, "bacc": 0.6622899159663866, "bacc_std": 0.039263926616287546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 1291.5496650148827, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.0408511039643883, "f1": 0.6819885579009667, "f1_std": 0.041563053570272075, "bacc": 0.6817226890756303, "bacc_std": 0.04156475817609348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.0420973149143372, "f1": 0.6118548118548119, "f1_std": 0.04311895291703265, "bacc": 0.6118697478991597, "bacc_std": 0.04233277949973712} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.043704392014688516, "f1": 0.6502820306204673, "f1_std": 0.04577179888700723, "bacc": 0.6502100840336134, "bacc_std": 0.044532360175593325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04147088794981986, "f1": 0.5836690840719849, "f1_std": 0.04366107605779701, "bacc": 0.5850840336134454, "bacc_std": 0.04212667456743428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04228328476662577, "f1": 0.58994708994709, "f1_std": 0.04314068654320464, "bacc": 0.5898109243697479, "bacc_std": 0.04273326783327153} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04238335280759793, "f1": 0.6682977751680041, "f1_std": 0.04254047249689141, "bacc": 0.6701680672268908, "bacc_std": 0.04253785621318848} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0406622435941538, "f1": 0.6522435897435898, "f1_std": 0.0422832608406869, "bacc": 0.6517857142857143, "bacc_std": 0.04136384017078413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.040001118610789664, "f1": 0.6288435374149659, "f1_std": 0.04315819754919613, "bacc": 0.6307773109243697, "bacc_std": 0.041004130640820695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 21.54434690031882, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.045259900981294995, "f1": 0.6045708211533352, "f1_std": 0.04698721852285207, "bacc": 0.6045168067226891, "bacc_std": 0.04619479143324546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.041395719816748236, "f1": 0.5716183895827798, "f1_std": 0.0420874662842533, "bacc": 0.571953781512605, "bacc_std": 0.041541566548323035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04543103772032369, "f1": 0.5788211788211788, "f1_std": 0.0468204045338352, "bacc": 0.5793067226890757, "bacc_std": 0.04594888915579086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.040990503582842515, "f1": 0.6227513227513227, "f1_std": 0.04206383257735084, "bacc": 0.6223739495798319, "bacc_std": 0.04161823842632969} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044282098712563056, "f1": 0.5703170970905524, "f1_std": 0.04452592328159447, "bacc": 0.5709033613445378, "bacc_std": 0.04446716653413546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 2.782559402207126, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0426730272287027, "f1": 0.6428384393820372, "f1_std": 0.042842192118187405, "bacc": 0.6433823529411764, "bacc_std": 0.04276863619846117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04224003555322818, "f1": 0.5921052631578947, "f1_std": 0.0458758222099516, "bacc": 0.5966386554621849, "bacc_std": 0.04308664073601647} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 1291.5496650148827, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04065328579834442, "f1": 0.6145945945945945, "f1_std": 0.04326166282267838, "bacc": 0.6160714285714286, "bacc_std": 0.04147002284571123} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.044800108122918444, "f1": 0.5441176470588236, "f1_std": 0.04513599085373431, "bacc": 0.5441176470588236, "bacc_std": 0.04517028189802117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04102610777603427, "f1": 0.6025641025641025, "f1_std": 0.04280546240696318, "bacc": 0.6029411764705883, "bacc_std": 0.04174033229828727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04303037186948081, "f1": 0.6227513227513227, "f1_std": 0.0437512918319916, "bacc": 0.6223739495798319, "bacc_std": 0.04331829996783883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042301169224903984, "f1": 0.6317074780542539, "f1_std": 0.04315561297273528, "bacc": 0.6313025210084033, "bacc_std": 0.042925624917899446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04513608455584354, "f1": 0.5740553647201454, "f1_std": 0.04728664637180317, "bacc": 0.576155462184874, "bacc_std": 0.0456750975497822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04322796094775131, "f1": 0.5740553647201454, "f1_std": 0.04591331000200175, "bacc": 0.576155462184874, "bacc_std": 0.044002248084473325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04279920387387191, "f1": 0.6555555555555556, "f1_std": 0.04374005410331952, "bacc": 0.654936974789916, "bacc_std": 0.043379502978802616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04326400575014277, "f1": 0.6041951664386684, "f1_std": 0.04331266079436049, "bacc": 0.6066176470588236, "bacc_std": 0.043470972559348516} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 1291.5496650148827, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0442419207836933, "f1": 0.5712833028269271, "f1_std": 0.046806134291490956, "bacc": 0.5745798319327731, "bacc_std": 0.04487123093287576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04303475775009988, "f1": 0.5860042735042735, "f1_std": 0.04471696873483475, "bacc": 0.5866596638655462, "bacc_std": 0.043713693107310776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 166.81005372000556, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04208310246413973, "f1": 0.6365057650967364, "f1_std": 0.042248488172724974, "bacc": 0.6391806722689075, "bacc_std": 0.04241298695682428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.041728072684320924, "f1": 0.5978378378378378, "f1_std": 0.04350948450915952, "bacc": 0.5997899159663866, "bacc_std": 0.042092140592841346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04216294083202188, "f1": 0.6179613241560145, "f1_std": 0.042568044757632144, "bacc": 0.618172268907563, "bacc_std": 0.042538441324485726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04352926700931646, "f1": 0.6465831510572015, "f1_std": 0.04475309797779523, "bacc": 0.6460084033613445, "bacc_std": 0.04421008486969677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.045049100279630036, "f1": 0.5678306043269548, "f1_std": 0.04555113650754432, "bacc": 0.5677521008403361, "bacc_std": 0.04545428510316895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 2.782559402207126, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04344677212779636, "f1": 0.5588932806324111, "f1_std": 0.04411021863045149, "bacc": 0.5588235294117647, "bacc_std": 0.043885974397892574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.044449587949570206, "f1": 0.5278361344537815, "f1_std": 0.04491284496471821, "bacc": 0.5278361344537815, "bacc_std": 0.044848264422703045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04378338922121208, "f1": 0.6691611895620486, "f1_std": 0.043887534383691834, "bacc": 0.6733193277310925, "bacc_std": 0.043776960114990514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.03833630076436075, "f1": 0.6883597883597883, "f1_std": 0.039486215625609694, "bacc": 0.6875, "bacc_std": 0.0391444184644421} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0413512151801885, "f1": 0.6502820306204673, "f1_std": 0.0432984664117924, "bacc": 0.6502100840336134, "bacc_std": 0.04206976573172885} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04278813996841058, "f1": 0.6153389215233318, "f1_std": 0.04372055300066416, "bacc": 0.6150210084033614, "bacc_std": 0.04323088829163628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04399300802926596, "f1": 0.5865315462569467, "f1_std": 0.04410082687029852, "bacc": 0.5871848739495797, "bacc_std": 0.04417341493277336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.03975163201251854, "f1": 0.6521171788347361, "f1_std": 0.03992539903285054, "bacc": 0.6538865546218487, "bacc_std": 0.04004601018823633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 21.54434690031882, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0410696313617167, "f1": 0.6191239316239316, "f1_std": 0.04273022224519619, "bacc": 0.6192226890756303, "bacc_std": 0.04171780717914232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04382367885406074, "f1": 0.6356837606837606, "f1_std": 0.04548777902762728, "bacc": 0.6355042016806722, "bacc_std": 0.04457440693988865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 21.54434690031882, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04578166923882793, "f1": 0.6317074780542539, "f1_std": 0.04679606765744111, "bacc": 0.6313025210084033, "bacc_std": 0.04651091167125417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.042619923233149816, "f1": 0.6045708211533352, "f1_std": 0.04357197133587653, "bacc": 0.6045168067226891, "bacc_std": 0.04303233720748623} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04238898704480567, "f1": 0.6255252100840336, "f1_std": 0.04311185504729567, "bacc": 0.6255252100840336, "bacc_std": 0.04306630060235681} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044634283939697886, "f1": 0.5931704050887178, "f1_std": 0.046352551748906415, "bacc": 0.5940126050420168, "bacc_std": 0.04520131100253305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.042298586210669446, "f1": 0.5931704050887178, "f1_std": 0.04398294267566581, "bacc": 0.5940126050420168, "bacc_std": 0.04289013666282322} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04279819790618115, "f1": 0.6191239316239316, "f1_std": 0.04504349878867377, "bacc": 0.6192226890756303, "bacc_std": 0.043773764516261866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04487488387632477, "f1": 0.5735449735449736, "f1_std": 0.04549683732345315, "bacc": 0.5735294117647058, "bacc_std": 0.0450557328921649} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04379527091390663, "f1": 0.6210470369386127, "f1_std": 0.04515488737709276, "bacc": 0.6207983193277311, "bacc_std": 0.04443602412764422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04434889989567154, "f1": 0.5989703649924097, "f1_std": 0.04546326677935883, "bacc": 0.5987394957983193, "bacc_std": 0.04502478273616522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.038541111474284934, "f1": 0.6906512605042017, "f1_std": 0.03890517902941172, "bacc": 0.6906512605042017, "bacc_std": 0.038703815494591974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04437590115747488, "f1": 0.6465831510572015, "f1_std": 0.045552638754952114, "bacc": 0.6460084033613445, "bacc_std": 0.0449726731650029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.043591543729299534, "f1": 0.6336287939833467, "f1_std": 0.04584599189118749, "bacc": 0.6339285714285714, "bacc_std": 0.04454030341273281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04372037721675129, "f1": 0.628161668839635, "f1_std": 0.043861877363717994, "bacc": 0.6302521008403361, "bacc_std": 0.043763756500371384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04518633809996662, "f1": 0.6290322580645161, "f1_std": 0.04529126836909797, "bacc": 0.6349789915966386, "bacc_std": 0.044935218358781184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.038984574979963126, "f1": 0.6688034188034189, "f1_std": 0.040636378793984615, "bacc": 0.6680672268907563, "bacc_std": 0.03964454624039576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042915278442873274, "f1": 0.6317074780542539, "f1_std": 0.04409284167982467, "bacc": 0.6313025210084033, "bacc_std": 0.043750084790136444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.044142126311456344, "f1": 0.6590730557737627, "f1_std": 0.04439521114371359, "bacc": 0.6596638655462186, "bacc_std": 0.044450533282304715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04420059986126583, "f1": 0.6436781609195402, "f1_std": 0.044393831701847845, "bacc": 0.6449579831932774, "bacc_std": 0.04445183719993839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 2.782559402207126, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.041680103279205585, "f1": 0.6405797101449275, "f1_std": 0.04243099140932304, "bacc": 0.6402310924369747, "bacc_std": 0.04218175630187983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04171909431405499, "f1": 0.6408702094699266, "f1_std": 0.04379852021669606, "bacc": 0.641281512605042, "bacc_std": 0.04237596204751009} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042110107887113134, "f1": 0.6428384393820372, "f1_std": 0.0422836401477386, "bacc": 0.6433823529411764, "bacc_std": 0.04221710110349343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044314230677996946, "f1": 0.6041951664386684, "f1_std": 0.0442844716590143, "bacc": 0.6066176470588236, "bacc_std": 0.04437074473023588} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.041534920052492104, "f1": 0.5308108108108108, "f1_std": 0.04356241849911919, "bacc": 0.5346638655462185, "bacc_std": 0.04190240223138046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04330664984636554, "f1": 0.5950888192267503, "f1_std": 0.04339607108000503, "bacc": 0.5961134453781513, "bacc_std": 0.04345829193019775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.043858664664819205, "f1": 0.6097756946769334, "f1_std": 0.04596402863871766, "bacc": 0.6102941176470589, "bacc_std": 0.044748609531486135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04288702095129717, "f1": 0.5953379953379954, "f1_std": 0.04457367554226418, "bacc": 0.5955882352941176, "bacc_std": 0.04356897327172391} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04361636271829867, "f1": 0.6644445911160979, "f1_std": 0.04454316187732389, "bacc": 0.6638655462184874, "bacc_std": 0.04433263941920254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 10000.0, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04367926569690728, "f1": 0.5662332519305657, "f1_std": 0.04409609839613478, "bacc": 0.5661764705882353, "bacc_std": 0.04381598426792919} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04169651514995424, "f1": 0.6391534391534391, "f1_std": 0.042514621024023785, "bacc": 0.6386554621848739, "bacc_std": 0.042180391168879214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03965011845960313, "f1": 0.6119727891156463, "f1_std": 0.04289841245014668, "bacc": 0.6144957983193278, "bacc_std": 0.04072903677058663} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 364.68 | 1722 | 0.88343 | 0.094633 | 0.88124 | 0.096835 | 0.88027 | 0.09733 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 364.68 | 1722 | 0.62669 | 0.037059 | 0.61993 | 0.037985 | 0.62046 | 0.037665 | + + +done! total time: 0:05:39 diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a0a07b57e180732e7f8ad14ea368e31d625bc4d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (abide_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic +model: flat_mae +representation: reg +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..c014743a8ffe754d81749ef4c62d5172de0d7569 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,abide_dx,,0.005994842503189409,train,0.7464387464387464,0.016669835820154036,0.7371450208254449,0.017754817361997664,0.7347823231991595,0.017313480805242414 +flat_mae,reg,logistic,abide_dx,,0.005994842503189409,test,0.5725806451612904,0.043679563486705986,0.5513074349696183,0.04765749563870436,0.5586540979313956,0.04444102872447975 +flat_mae,reg,logistic,abide_dx,1,0.3593813663804626,train,0.9515669515669516,0.00798548410736162,0.9509296798546065,0.008109843253284019,0.9498708010335917,0.00828577316896061 +flat_mae,reg,logistic,abide_dx,1,0.3593813663804626,test,0.6854838709677419,0.040436210861099865,0.6794591370053689,0.041489049225923016,0.6785714285714286,0.04109573610684454 +flat_mae,reg,logistic,abide_dx,2,0.3593813663804626,train,0.9544159544159544,0.0074339318007584495,0.9539037362925997,0.007522325597162053,0.9536360280546328,0.007585403919516863 +flat_mae,reg,logistic,abide_dx,2,0.3593813663804626,test,0.5967741935483871,0.04477625030137057,0.5929621848739496,0.04545449300319796,0.5929621848739496,0.04541128452827496 +flat_mae,reg,logistic,abide_dx,3,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,3,2.782559402207126,test,0.5725806451612904,0.04447805758640692,0.5718845677806006,0.04455760274805515,0.5740546218487395,0.044729956999862173 +flat_mae,reg,logistic,abide_dx,4,0.3593813663804626,train,0.9529914529914529,0.008266733649774925,0.952418998716303,0.008378506222476673,0.9517534145441122,0.008491254829695248 +flat_mae,reg,logistic,abide_dx,4,0.3593813663804626,test,0.6532258064516129,0.04184497366494297,0.6513893429225237,0.042094203806670505,0.6523109243697479,0.04218164357463039 +flat_mae,reg,logistic,abide_dx,5,0.3593813663804626,train,0.9586894586894587,0.0073219838813689195,0.9581595805681895,0.007430346043906575,0.9572166851236619,0.007572147214256731 +flat_mae,reg,logistic,abide_dx,5,0.3593813663804626,test,0.5967741935483871,0.04243460721743788,0.5880946053680574,0.043704890722018885,0.5882352941176471,0.043004709940939856 +flat_mae,reg,logistic,abide_dx,6,0.046415888336127774,train,0.8433048433048433,0.013279799450023536,0.8404439962972758,0.013604810677544295,0.838390550018457,0.013648762469765709 +flat_mae,reg,logistic,abide_dx,6,0.046415888336127774,test,0.6370967741935484,0.040122082169641575,0.6301451580831179,0.04107176508166805,0.6297268907563025,0.04046331294185677 +flat_mae,reg,logistic,abide_dx,7,0.3593813663804626,train,0.9629629629629629,0.0069723685780574305,0.9625237152689373,0.0070621166924182514,0.9619785898855666,0.007163123412004203 +flat_mae,reg,logistic,abide_dx,7,0.3593813663804626,test,0.6209677419354839,0.04293312079971005,0.6167554415729598,0.043698413024880256,0.6165966386554622,0.043539373634497294 +flat_mae,reg,logistic,abide_dx,8,0.046415888336127774,train,0.8518518518518519,0.012799227661539176,0.848907284768212,0.01319827279229894,0.8464377999261721,0.013319911142232271 +flat_mae,reg,logistic,abide_dx,8,0.046415888336127774,test,0.5967741935483871,0.044144380466181375,0.5860042735042735,0.04552120049140009,0.5866596638655462,0.04455999309080427 +flat_mae,reg,logistic,abide_dx,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,9,2.782559402207126,test,0.6370967741935484,0.044201859355093305,0.6317074780542539,0.04533784459506236,0.6313025210084033,0.04492528177047167 +flat_mae,reg,logistic,abide_dx,10,0.046415888336127774,train,0.8603988603988604,0.012761556114747526,0.8578501057921184,0.013107035123192053,0.8556662975267626,0.013226359033513366 +flat_mae,reg,logistic,abide_dx,10,0.046415888336127774,test,0.6048387096774194,0.040242795258770656,0.5972691721349506,0.04115962243551246,0.5971638655462186,0.040807026500779525 +flat_mae,reg,logistic,abide_dx,11,0.046415888336127774,train,0.8660968660968661,0.012717936910919778,0.8634354304635761,0.013079944279736362,0.8608342561830934,0.01316148335263242 +flat_mae,reg,logistic,abide_dx,11,0.046415888336127774,test,0.6290322580645161,0.04141731981735753,0.6191239316239316,0.04290270934941534,0.6192226890756303,0.04192092435608226 +flat_mae,reg,logistic,abide_dx,12,0.046415888336127774,train,0.8547008547008547,0.012841189720594927,0.8519318097223256,0.013204645983298152,0.8496124031007752,0.013287504634848665 +flat_mae,reg,logistic,abide_dx,12,0.046415888336127774,test,0.6048387096774194,0.04390595142163911,0.6017043592264831,0.044356730149835946,0.601890756302521,0.04428867184475829 +flat_mae,reg,logistic,abide_dx,13,0.3593813663804626,train,0.9501424501424501,0.007852143362697158,0.9495029420650563,0.007977118302696257,0.9485788113695091,0.008178719135660957 +flat_mae,reg,logistic,abide_dx,13,0.3593813663804626,test,0.6290322580645161,0.04451905086250414,0.6242424242424243,0.045235849658404305,0.6239495798319328,0.045121420057164345 +flat_mae,reg,logistic,abide_dx,14,0.046415888336127774,train,0.8547008547008547,0.013157440083839438,0.8518129139072848,0.013583874576686682,0.8493170911775563,0.013722987100125558 +flat_mae,reg,logistic,abide_dx,14,0.046415888336127774,test,0.6048387096774194,0.042366838582155376,0.5989703649924097,0.043100103313728204,0.5987394957983193,0.04266176552058258 +flat_mae,reg,logistic,abide_dx,15,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,15,166.81005372000556,test,0.5967741935483871,0.04236254014447428,0.5860042735042735,0.043834677016484636,0.5866596638655462,0.042829337917633284 +flat_mae,reg,logistic,abide_dx,16,0.046415888336127774,train,0.8376068376068376,0.014013757043785671,0.8348930481283423,0.014369512374753878,0.8332225913621263,0.01447807183111223 +flat_mae,reg,logistic,abide_dx,16,0.046415888336127774,test,0.6612903225806451,0.04169682398064439,0.6580882352941176,0.042425055144457935,0.6580882352941176,0.042393370494280526 +flat_mae,reg,logistic,abide_dx,17,0.3593813663804626,train,0.9643874643874644,0.007145685363317612,0.9640194110664401,0.007218190720568529,0.964156515319306,0.007226467425305599 +flat_mae,reg,logistic,abide_dx,17,0.3593813663804626,test,0.5967741935483871,0.04226293988128309,0.5860042735042735,0.04391891145703172,0.5866596638655462,0.04284930130673616 +flat_mae,reg,logistic,abide_dx,18,0.3593813663804626,train,0.9743589743589743,0.006061035781036417,0.9741161047154738,0.006111069323446224,0.9746770025839793,0.005994014227476414 +flat_mae,reg,logistic,abide_dx,18,0.3593813663804626,test,0.5725806451612904,0.041743016763180346,0.5662332519305657,0.0428957163993649,0.5661764705882353,0.04230720886682545 +flat_mae,reg,logistic,abide_dx,19,0.3593813663804626,train,0.9515669515669516,0.008533167805727703,0.9510805404341838,0.008609575814465482,0.9513473606496863,0.008576406569164904 +flat_mae,reg,logistic,abide_dx,19,0.3593813663804626,test,0.5967741935483871,0.04385512937534919,0.58994708994709,0.04468644173834092,0.5898109243697479,0.0442277195286528 +flat_mae,reg,logistic,abide_dx,20,0.046415888336127774,train,0.8433048433048433,0.013439870410039809,0.8399253731343284,0.013911889543964077,0.8372093023255813,0.013970143645342657 +flat_mae,reg,logistic,abide_dx,20,0.046415888336127774,test,0.6370967741935484,0.04143026938898805,0.6301451580831179,0.04216913904718563,0.6297268907563025,0.04163643749021562 +flat_mae,reg,logistic,abide_dx,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,21,21.54434690031882,test,0.5483870967741935,0.04621202493614917,0.5473272490221643,0.04618553780052106,0.5488445378151261,0.046257449617800504 +flat_mae,reg,logistic,abide_dx,22,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,22,166.81005372000556,test,0.5887096774193549,0.04386853349508467,0.5854473942969518,0.044126758572491195,0.585609243697479,0.04426622171191006 +flat_mae,reg,logistic,abide_dx,23,0.3593813663804626,train,0.9601139601139601,0.0077309418121645325,0.9596153846153846,0.007851522391810818,0.9588039867109635,0.008077587929046826 +flat_mae,reg,logistic,abide_dx,23,0.3593813663804626,test,0.6290322580645161,0.043731309371602835,0.6255252100840336,0.044130796671736354,0.6255252100840336,0.04397884121378654 +flat_mae,reg,logistic,abide_dx,24,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,24,21.54434690031882,test,0.5564516129032258,0.04585237078219136,0.5557292684515667,0.04587551708985841,0.5577731092436975,0.046125725511665105 +flat_mae,reg,logistic,abide_dx,25,0.046415888336127774,train,0.8490028490028491,0.013301671809756577,0.8461252140251618,0.01365169005038609,0.8438538205980066,0.013703638290423061 +flat_mae,reg,logistic,abide_dx,25,0.046415888336127774,test,0.5725806451612904,0.04437126968389662,0.5703170970905524,0.044695308631690495,0.5709033613445378,0.04472146478992341 +flat_mae,reg,logistic,abide_dx,26,0.005994842503189409,train,0.7264957264957265,0.015477842204953313,0.715830220025806,0.016475119886418914,0.7141380583241048,0.0160067727353488 +flat_mae,reg,logistic,abide_dx,26,0.005994842503189409,test,0.5806451612903226,0.04542277697426821,0.5694444444444444,0.04723501853526152,0.5703781512605042,0.04603533784326887 +flat_mae,reg,logistic,abide_dx,27,0.3593813663804626,train,0.9658119658119658,0.0068021779184954155,0.965406506402096,0.006886358683410939,0.9648578811369509,0.006968776314199522 +flat_mae,reg,logistic,abide_dx,27,0.3593813663804626,test,0.5887096774193549,0.04539140681555386,0.5788211788211788,0.046985386454283753,0.5793067226890757,0.0460377692534301 +flat_mae,reg,logistic,abide_dx,28,0.3593813663804626,train,0.9615384615384616,0.0075171980802924455,0.9610943821470137,0.007613544446363389,0.9606866002214839,0.007714754793150709 +flat_mae,reg,logistic,abide_dx,28,0.3593813663804626,test,0.6290322580645161,0.04190334997125809,0.6191239316239316,0.04378359962124326,0.6192226890756303,0.04269130750798991 +flat_mae,reg,logistic,abide_dx,29,0.000774263682681127,train,0.6566951566951567,0.015609935940943398,0.624002400240024,0.018573718737575563,0.6331118493909191,0.016303858878987193 +flat_mae,reg,logistic,abide_dx,29,0.000774263682681127,test,0.6451612903225806,0.03910569936984882,0.6163150492264415,0.044763528175546186,0.6244747899159664,0.04032004838338246 +flat_mae,reg,logistic,abide_dx,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,30,2.782559402207126,test,0.5967741935483871,0.04547196137832732,0.5929621848739496,0.045856686354974856,0.5929621848739496,0.04569233438609822 +flat_mae,reg,logistic,abide_dx,31,0.3593813663804626,train,0.9558404558404558,0.007591657121270234,0.955151627561903,0.007758177638235558,0.953156146179402,0.008123268196525478 +flat_mae,reg,logistic,abide_dx,31,0.3593813663804626,test,0.6209677419354839,0.044528314718328674,0.6153389215233318,0.045461715079500024,0.6150210084033614,0.04510356690334861 +flat_mae,reg,logistic,abide_dx,32,0.046415888336127774,train,0.8376068376068376,0.013625842442803963,0.8350142250443244,0.01390722136449673,0.8335179032853451,0.013963595156723567 +flat_mae,reg,logistic,abide_dx,32,0.046415888336127774,test,0.6693548387096774,0.040077695094911686,0.6575739206573719,0.042253196629571575,0.657563025210084,0.04080751082106158 +flat_mae,reg,logistic,abide_dx,33,0.046415888336127774,train,0.8418803418803419,0.014216763724866445,0.8386718094918665,0.014596372358667798,0.8362126245847177,0.014593036558710807 +flat_mae,reg,logistic,abide_dx,33,0.046415888336127774,test,0.6612903225806451,0.042695044272269236,0.6580882352941176,0.04317255357500099,0.6580882352941176,0.04307911133164687 +flat_mae,reg,logistic,abide_dx,34,0.3593813663804626,train,0.9558404558404558,0.008018748171481889,0.9553026957637998,0.00812349952277913,0.9546327057954964,0.008206138692448857 +flat_mae,reg,logistic,abide_dx,34,0.3593813663804626,test,0.5887096774193549,0.04398962842476635,0.5854473942969518,0.04427180672915537,0.585609243697479,0.04431420562848979 +flat_mae,reg,logistic,abide_dx,35,0.046415888336127774,train,0.8390313390313391,0.014267702444940512,0.8362802178637547,0.014600784640901344,0.8345145810262089,0.014655864787237575 +flat_mae,reg,logistic,abide_dx,35,0.046415888336127774,test,0.6532258064516129,0.04177531135632377,0.6480760345851759,0.04249035250063569,0.6475840336134454,0.04224878152167263 +flat_mae,reg,logistic,abide_dx,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,36,21.54434690031882,test,0.6209677419354839,0.04110486006948503,0.6137071651090342,0.04233670244565982,0.6134453781512605,0.041753052663752725 +flat_mae,reg,logistic,abide_dx,37,0.005994842503189409,train,0.7435897435897436,0.01592032479010881,0.7361343133979286,0.0166882983138432,0.7340716131413806,0.016440626524531824 +flat_mae,reg,logistic,abide_dx,37,0.005994842503189409,test,0.6451612903225806,0.04113393708894208,0.6288435374149659,0.04456932417990564,0.6307773109243697,0.04233585343470021 +flat_mae,reg,logistic,abide_dx,38,0.046415888336127774,train,0.8632478632478633,0.012170231567557933,0.8600571466542628,0.012602701571614056,0.8567737172388334,0.012707877382021317 +flat_mae,reg,logistic,abide_dx,38,0.046415888336127774,test,0.5645161290322581,0.04335554374906762,0.5475675675675675,0.04564894165440584,0.5509453781512605,0.04394311987567429 +flat_mae,reg,logistic,abide_dx,39,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,39,21.54434690031882,test,0.5483870967741935,0.04465994767259169,0.5464994775339603,0.0448676125787825,0.5472689075630253,0.04503381547047771 +flat_mae,reg,logistic,abide_dx,40,0.3593813663804626,train,0.9544159544159544,0.00832524213326332,0.9538461538461538,0.00844410488258241,0.9530454042081948,0.008569142929806923 +flat_mae,reg,logistic,abide_dx,40,0.3593813663804626,test,0.5725806451612904,0.04229713473345127,0.5643931861867832,0.043428580782225765,0.5646008403361344,0.04284647688169526 +flat_mae,reg,logistic,abide_dx,41,0.046415888336127774,train,0.8518518518518519,0.013778183972671878,0.848907284768212,0.014171445494311943,0.8464377999261721,0.01422340961566704 +flat_mae,reg,logistic,abide_dx,41,0.046415888336127774,test,0.6209677419354839,0.04136649979017729,0.6118548118548119,0.042674748595361765,0.6118697478991597,0.04178785441579138 +flat_mae,reg,logistic,abide_dx,42,0.046415888336127774,train,0.8660968660968661,0.011982719028336225,0.8634354304635761,0.012303990525966334,0.8608342561830934,0.012365826074969308 +flat_mae,reg,logistic,abide_dx,42,0.046415888336127774,test,0.5483870967741935,0.044329611594281955,0.5441176470588236,0.044765789262860595,0.5441176470588236,0.04466590204759366 +flat_mae,reg,logistic,abide_dx,43,0.3593813663804626,train,0.9515669515669516,0.008226307640824163,0.9510227198108872,0.008322876662108063,0.9507567368032483,0.008389619919810949 +flat_mae,reg,logistic,abide_dx,43,0.3593813663804626,test,0.5967741935483871,0.044175517277593654,0.5860042735042735,0.04566788743529486,0.5866596638655462,0.04472220970290267 +flat_mae,reg,logistic,abide_dx,44,0.046415888336127774,train,0.8490028490028491,0.013531769044280488,0.8462460327955568,0.013860693998555395,0.8441491325212256,0.01391045686868242 +flat_mae,reg,logistic,abide_dx,44,0.046415888336127774,test,0.5967741935483871,0.04472968876744002,0.5880946053680574,0.046377587586969186,0.5882352941176471,0.04548614186640538 +flat_mae,reg,logistic,abide_dx,45,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,45,10000.0,test,0.6290322580645161,0.04302963429637184,0.6227513227513227,0.04440291857039305,0.6223739495798319,0.04382912286950865 +flat_mae,reg,logistic,abide_dx,46,0.005994842503189409,train,0.7450142450142451,0.016071334919833813,0.736938294865756,0.016829132916844285,0.7347729789590254,0.016496865889713094 +flat_mae,reg,logistic,abide_dx,46,0.005994842503189409,test,0.5725806451612904,0.04166644155332877,0.5599598259122867,0.042910475225252126,0.5614495798319328,0.042002139719472066 +flat_mae,reg,logistic,abide_dx,47,0.046415888336127774,train,0.8461538461538461,0.014141448020125383,0.8434653342693149,0.014474126296952218,0.8415651531930601,0.014558216998084638 +flat_mae,reg,logistic,abide_dx,47,0.046415888336127774,test,0.5806451612903226,0.04387266066467963,0.5752305665349143,0.044358447599806415,0.5751050420168067,0.04412574969361125 +flat_mae,reg,logistic,abide_dx,48,0.046415888336127774,train,0.8504273504273504,0.013278325116764926,0.8477553675992193,0.013588189351715472,0.8457364341085272,0.013638483177778675 +flat_mae,reg,logistic,abide_dx,48,0.046415888336127774,test,0.5967741935483871,0.04454879586834484,0.5860042735042735,0.04571007754215472,0.5866596638655462,0.04469792866158943 +flat_mae,reg,logistic,abide_dx,49,0.3593813663804626,train,0.9472934472934473,0.008600316191409425,0.9465822644233439,0.008742694464926724,0.9454042081949059,0.00892787847560354 +flat_mae,reg,logistic,abide_dx,49,0.3593813663804626,test,0.5564516129032258,0.04438565205357024,0.5529334644378892,0.04462595831103846,0.553046218487395,0.04456978759390579 +flat_mae,reg,logistic,abide_dx,50,0.046415888336127774,train,0.8603988603988604,0.01279373588615507,0.858065953654189,0.01310973031969914,0.8562569213732004,0.01323441429259215 +flat_mae,reg,logistic,abide_dx,50,0.046415888336127774,test,0.5725806451612904,0.04475209492303126,0.5623043623043623,0.0461537973732943,0.5630252100840336,0.04529797445177885 +flat_mae,reg,logistic,abide_dx,51,0.005994842503189409,train,0.7393162393162394,0.016059597074419688,0.729910878537054,0.01697962617968734,0.7278331487633813,0.016573138910163272 +flat_mae,reg,logistic,abide_dx,51,0.005994842503189409,test,0.5564516129032258,0.04179022920520937,0.5307877536979704,0.04590354603728372,0.5388655462184874,0.04260396910372323 +flat_mae,reg,logistic,abide_dx,52,0.005994842503189409,train,0.7464387464387464,0.01584589952373999,0.7359362980870829,0.016910847348299837,0.7337024732373569,0.016374840156965255 +flat_mae,reg,logistic,abide_dx,52,0.005994842503189409,test,0.6209677419354839,0.041291894501328516,0.6097756946769334,0.04312645130559833,0.6102941176470589,0.04211456977279716 +flat_mae,reg,logistic,abide_dx,53,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,53,166.81005372000556,test,0.6209677419354839,0.045685312330629176,0.6167554415729598,0.046153908476910054,0.6165966386554622,0.045874696102791566 +flat_mae,reg,logistic,abide_dx,54,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,54,1291.5496650148827,test,0.6370967741935484,0.043355231733049594,0.6317074780542539,0.04429545828251603,0.6313025210084033,0.043819450459559435 +flat_mae,reg,logistic,abide_dx,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,55,2.782559402207126,test,0.5403225806451613,0.04202454562471295,0.5366764995083579,0.04226836421020314,0.5367647058823529,0.042226507377330684 +flat_mae,reg,logistic,abide_dx,56,0.3593813663804626,train,0.9515669515669516,0.00807901006617277,0.9509925507363026,0.00818526569911385,0.9504614248800296,0.008289582773688034 +flat_mae,reg,logistic,abide_dx,56,0.3593813663804626,test,0.6774193548387096,0.040906723672561575,0.6732542819499341,0.04150939469813584,0.6727941176470589,0.04123670578958091 +flat_mae,reg,logistic,abide_dx,57,0.005994842503189409,train,0.7250712250712251,0.016422814916019417,0.7157686054439607,0.01708065863132524,0.7140273163528977,0.01669729911415368 +flat_mae,reg,logistic,abide_dx,57,0.005994842503189409,test,0.6532258064516129,0.0431076743990159,0.6429862738533645,0.04519640938808118,0.6428571428571428,0.044001479949620635 +flat_mae,reg,logistic,abide_dx,58,0.005994842503189409,train,0.7407407407407407,0.016475632477447884,0.7309370577532179,0.017540441733840242,0.7288298265042451,0.017071090786735927 +flat_mae,reg,logistic,abide_dx,58,0.005994842503189409,test,0.6048387096774194,0.04164316229179491,0.5953379953379954,0.04334287158876636,0.5955882352941176,0.042434796139162616 +flat_mae,reg,logistic,abide_dx,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,59,166.81005372000556,test,0.6370967741935484,0.0428309450551633,0.6283716283716283,0.04421173659526166,0.6281512605042017,0.0434306151062574 +flat_mae,reg,logistic,abide_dx,60,0.046415888336127774,train,0.8518518518518519,0.014152559226903129,0.8491470510446972,0.014491361161686181,0.8470284237726098,0.014518192970131157 +flat_mae,reg,logistic,abide_dx,60,0.046415888336127774,test,0.5241935483870968,0.04519635447529369,0.5171275823377994,0.04604135296770034,0.5173319327731093,0.04568379095963638 +flat_mae,reg,logistic,abide_dx,61,0.046415888336127774,train,0.8518518518518519,0.012473244228191935,0.8486567164179104,0.012882853153838532,0.8458471760797341,0.012963149347783068 +flat_mae,reg,logistic,abide_dx,61,0.046415888336127774,test,0.5645161290322581,0.04454789364609567,0.5616653574234092,0.04480726085356473,0.5619747899159664,0.04462782093544198 +flat_mae,reg,logistic,abide_dx,62,0.3593813663804626,train,0.9586894586894587,0.007569377014057261,0.9581320450885669,0.007691090963319243,0.9569213732004429,0.007864581324893071 +flat_mae,reg,logistic,abide_dx,62,0.3593813663804626,test,0.6129032258064516,0.04143534260828014,0.6045708211533352,0.04283677298348824,0.6045168067226891,0.04215786333113725 +flat_mae,reg,logistic,abide_dx,63,0.3593813663804626,train,0.9529914529914529,0.007645910512513248,0.9524775629519462,0.0077343247597621775,0.9523440383905499,0.007812577350071475 +flat_mae,reg,logistic,abide_dx,63,0.3593813663804626,test,0.5806451612903226,0.04542277697426821,0.5766806722689075,0.04599952638202881,0.5766806722689075,0.045894753570088626 +flat_mae,reg,logistic,abide_dx,64,0.046415888336127774,train,0.8361823361823362,0.014320958528212288,0.8332558787991449,0.014688819310771598,0.8313399778516057,0.014767141645770527 +flat_mae,reg,logistic,abide_dx,64,0.046415888336127774,test,0.6209677419354839,0.04084501556574653,0.5990368077055384,0.045022651137223986,0.6039915966386554,0.04187498417716217 +flat_mae,reg,logistic,abide_dx,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,65,166.81005372000556,test,0.6370967741935484,0.04215592186370711,0.6330637206549615,0.042771852220749126,0.6328781512605042,0.04265788924654729 +flat_mae,reg,logistic,abide_dx,66,0.005994842503189409,train,0.7236467236467237,0.015002337190720695,0.7128701181510748,0.01597400711632626,0.7112587670727206,0.01548175563006225 +flat_mae,reg,logistic,abide_dx,66,0.005994842503189409,test,0.5483870967741935,0.040998071069571054,0.5308108108108108,0.04200252833634002,0.5346638655462185,0.04085427673206726 +flat_mae,reg,logistic,abide_dx,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,67,21.54434690031882,test,0.6129032258064516,0.04259646272681134,0.6112852664576802,0.04311803519508464,0.6123949579831933,0.04322953500201409 +flat_mae,reg,logistic,abide_dx,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,68,166.81005372000556,test,0.6209677419354839,0.04466215821801434,0.6153389215233318,0.0454846734707191,0.6150210084033614,0.04524802571907161 +flat_mae,reg,logistic,abide_dx,69,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,69,21.54434690031882,test,0.5967741935483871,0.04886131864714218,0.5915678524374176,0.04982888477374512,0.5913865546218487,0.04954942196714396 +flat_mae,reg,logistic,abide_dx,70,0.046415888336127774,train,0.8475783475783476,0.013615077594457597,0.845088402769328,0.013891613260898843,0.8434477667035807,0.013915047307122008 +flat_mae,reg,logistic,abide_dx,70,0.046415888336127774,test,0.5887096774193549,0.0427285402016351,0.5841388834089565,0.043137691855526634,0.5840336134453781,0.04288688747572812 +flat_mae,reg,logistic,abide_dx,71,0.3593813663804626,train,0.9629629629629629,0.007251241731712697,0.9625467857377372,0.007334271873651809,0.9622739018087856,0.0073951190135534755 +flat_mae,reg,logistic,abide_dx,71,0.3593813663804626,test,0.5967741935483871,0.04474287740013715,0.5880946053680574,0.04601309092730096,0.5882352941176471,0.045289131518694466 +flat_mae,reg,logistic,abide_dx,72,0.3593813663804626,train,0.9529914529914529,0.007910229603007874,0.9524486892907945,0.008012873666806523,0.9520487264673312,0.008108710269892452 +flat_mae,reg,logistic,abide_dx,72,0.3593813663804626,test,0.5725806451612904,0.042593702176970706,0.5643931861867832,0.04323931014137616,0.5646008403361344,0.0426954026376263 +flat_mae,reg,logistic,abide_dx,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,73,166.81005372000556,test,0.6290322580645161,0.043218583893465355,0.628161668839635,0.043369105783123946,0.6302521008403361,0.04355797730576449 +flat_mae,reg,logistic,abide_dx,74,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,74,1291.5496650148827,test,0.5483870967741935,0.04411772103195122,0.5479166666666666,0.044115187436999805,0.5504201680672269,0.044240256732845484 +flat_mae,reg,logistic,abide_dx,75,0.005994842503189409,train,0.7293447293447294,0.016618243875226823,0.7197333916083917,0.017622974086914486,0.7179032853451458,0.017189275703276025 +flat_mae,reg,logistic,abide_dx,75,0.005994842503189409,test,0.6290322580645161,0.04213240365900487,0.6091008771929824,0.04580076601595055,0.6129201680672269,0.04311661986218083 +flat_mae,reg,logistic,abide_dx,76,0.046415888336127774,train,0.8547008547008547,0.013087718762240172,0.8519318097223256,0.013462604241651415,0.8496124031007752,0.013592605561031371 +flat_mae,reg,logistic,abide_dx,76,0.046415888336127774,test,0.6129032258064516,0.04099884519071121,0.6045708211533352,0.0422852515840541,0.6045168067226891,0.0415366893968801 +flat_mae,reg,logistic,abide_dx,77,0.3593813663804626,train,0.9515669515669516,0.0076152151441936525,0.9509615384615384,0.007736687143037136,0.9501661129568106,0.007947305325316227 +flat_mae,reg,logistic,abide_dx,77,0.3593813663804626,test,0.6451612903225806,0.03951544107366913,0.6436781609195402,0.039821358816480065,0.6449579831932774,0.03985197594935986 +flat_mae,reg,logistic,abide_dx,78,0.005994842503189409,train,0.7478632478632479,0.016382206113260835,0.7409102567043178,0.01702839148645749,0.7388335179032853,0.016793329405823936 +flat_mae,reg,logistic,abide_dx,78,0.005994842503189409,test,0.5967741935483871,0.043368013551183665,0.5836690840719849,0.0453737046352454,0.5850840336134454,0.043837818587380756 +flat_mae,reg,logistic,abide_dx,79,0.046415888336127774,train,0.8475783475783476,0.013749270924457479,0.8446115495609181,0.014145981081803058,0.842266519010705,0.014206193748345208 +flat_mae,reg,logistic,abide_dx,79,0.046415888336127774,test,0.5887096774193549,0.04377996669563567,0.5788211788211788,0.045260366624246484,0.5793067226890757,0.04423902390983099 +flat_mae,reg,logistic,abide_dx,80,0.046415888336127774,train,0.8547008547008547,0.012900431816887627,0.8516913695826878,0.01337139144731927,0.8490217792543373,0.013529892595971834 +flat_mae,reg,logistic,abide_dx,80,0.046415888336127774,test,0.6774193548387096,0.042130474089835966,0.671957671957672,0.04294372078820443,0.671218487394958,0.042482173011616615 +flat_mae,reg,logistic,abide_dx,81,0.046415888336127774,train,0.8433048433048433,0.013948737733445925,0.8401903973509934,0.014329065050397945,0.8377999261720193,0.014394417722043312 +flat_mae,reg,logistic,abide_dx,81,0.046415888336127774,test,0.5806451612903226,0.043611555142364325,0.5643243243243243,0.04615453298885897,0.5672268907563025,0.0441917204785215 +flat_mae,reg,logistic,abide_dx,82,0.046415888336127774,train,0.8547008547008547,0.013235895399931575,0.8521617045876864,0.013520605300545599,0.850203026947213,0.013542627418238919 +flat_mae,reg,logistic,abide_dx,82,0.046415888336127774,test,0.5403225806451613,0.04427587105347816,0.531517200238616,0.04538478540546199,0.5320378151260504,0.04472449553436308 +flat_mae,reg,logistic,abide_dx,83,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,83,21.54434690031882,test,0.5725806451612904,0.0445896541011657,0.5712141971683957,0.04459915529890859,0.5724789915966386,0.04478221604987822 +flat_mae,reg,logistic,abide_dx,84,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,84,1291.5496650148827,test,0.5564516129032258,0.044789004127562544,0.5557292684515667,0.044745614928541226,0.5577731092436975,0.04473798775457111 +flat_mae,reg,logistic,abide_dx,85,0.046415888336127774,train,0.8518518518518519,0.013922603247162955,0.8491470510446972,0.014331748884784805,0.8470284237726098,0.014466794831686107 +flat_mae,reg,logistic,abide_dx,85,0.046415888336127774,test,0.6532258064516129,0.042285570339029147,0.6465831510572015,0.043400697899960525,0.6460084033613445,0.04280648931328728 +flat_mae,reg,logistic,abide_dx,86,0.3593813663804626,train,0.9615384615384616,0.0072444686230581316,0.9610194902548725,0.007365580597182709,0.9598006644518272,0.007577053992211843 +flat_mae,reg,logistic,abide_dx,86,0.3593813663804626,test,0.5887096774193549,0.042438043227831374,0.5765651155005022,0.04452401274684425,0.5777310924369747,0.04323671221265839 +flat_mae,reg,logistic,abide_dx,87,0.046415888336127774,train,0.8447293447293447,0.013329593797069729,0.8418327376756742,0.013635091742672073,0.8396825396825396,0.013647420536303037 +flat_mae,reg,logistic,abide_dx,87,0.046415888336127774,test,0.6693548387096774,0.04278317851917918,0.6667322189446083,0.04291663833555673,0.6670168067226891,0.042759365414215024 +flat_mae,reg,logistic,abide_dx,88,0.046415888336127774,train,0.8504273504273504,0.01323506289668245,0.8465918669338978,0.013782764968954584,0.843078626799557,0.013839467999923685 +flat_mae,reg,logistic,abide_dx,88,0.046415888336127774,test,0.6209677419354839,0.04092915060695576,0.6137071651090342,0.04219814186541897,0.6134453781512605,0.041505744636061416 +flat_mae,reg,logistic,abide_dx,89,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,89,2.782559402207126,test,0.5645161290322581,0.043142104202818235,0.5616653574234092,0.04304986932433616,0.5619747899159664,0.04297098812388241 +flat_mae,reg,logistic,abide_dx,90,0.046415888336127774,train,0.8547008547008547,0.012652950897680098,0.8523811487238693,0.012860400174149383,0.8507936507936508,0.012867024249498375 +flat_mae,reg,logistic,abide_dx,90,0.046415888336127774,test,0.6209677419354839,0.03804289982174116,0.5920767130958213,0.042232310955551015,0.6008403361344538,0.03875744015581625 +flat_mae,reg,logistic,abide_dx,91,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,91,166.81005372000556,test,0.6209677419354839,0.04374869999555104,0.6189604445897352,0.043881121562135084,0.6197478991596639,0.04391015633047646 +flat_mae,reg,logistic,abide_dx,92,0.046415888336127774,train,0.8490028490028491,0.012331344004241136,0.8457462686567164,0.012770633473737837,0.84296788482835,0.012870398864379682 +flat_mae,reg,logistic,abide_dx,92,0.046415888336127774,test,0.5806451612903226,0.04184348158426366,0.5735449735449736,0.04263139459369582,0.5735294117647058,0.042094231448015156 +flat_mae,reg,logistic,abide_dx,93,0.005994842503189409,train,0.7407407407407407,0.016339564369579486,0.7321105072463768,0.017156537063042425,0.7300110741971206,0.016810652511102468 +flat_mae,reg,logistic,abide_dx,93,0.005994842503189409,test,0.5645161290322581,0.04248736116165312,0.5334448160535117,0.04631993934173365,0.5446428571428572,0.042946287084981355 +flat_mae,reg,logistic,abide_dx,94,0.046415888336127774,train,0.8390313390313391,0.014170776355193393,0.8360284344711117,0.0145576856847737,0.8339239571797712,0.014659486934283168 +flat_mae,reg,logistic,abide_dx,94,0.046415888336127774,test,0.5967741935483871,0.04267541689399135,0.5929621848739496,0.04297651798423618,0.5929621848739496,0.04284243974339102 +flat_mae,reg,logistic,abide_dx,95,0.046415888336127774,train,0.8575498575498576,0.013018825813782038,0.8548351075709075,0.013395622962956757,0.8524916943521594,0.013517195502841948 +flat_mae,reg,logistic,abide_dx,95,0.046415888336127774,test,0.5564516129032258,0.043427778025894787,0.5457875457875458,0.044144913061838,0.5467436974789917,0.04349842067581899 +flat_mae,reg,logistic,abide_dx,96,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,96,2.782559402207126,test,0.5564516129032258,0.04345010115434234,0.5479551932126997,0.04473527126261598,0.5483193277310925,0.044007619199756966 +flat_mae,reg,logistic,abide_dx,97,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,97,2.782559402207126,test,0.5806451612903226,0.045849241713040036,0.580536039552433,0.045900759823294286,0.5845588235294117,0.04559795108101379 +flat_mae,reg,logistic,abide_dx,98,0.046415888336127774,train,0.8561253561253561,0.013186567445465973,0.853441344084799,0.013516855827502406,0.8511997046880768,0.013578234412003088 +flat_mae,reg,logistic,abide_dx,98,0.046415888336127774,test,0.5806451612903226,0.045607370655021504,0.5752305665349143,0.046039153145388756,0.5751050420168067,0.045778325793028 +flat_mae,reg,logistic,abide_dx,99,0.005994842503189409,train,0.7321937321937322,0.015086028307522578,0.7220668508659613,0.0160597198273702,0.7201919527500923,0.015603627638825668 +flat_mae,reg,logistic,abide_dx,99,0.005994842503189409,test,0.5967741935483871,0.040353742797321054,0.58994708994709,0.041293781352463026,0.5898109243697479,0.04078130210921474 +flat_mae,reg,logistic,abide_dx,100,0.046415888336127774,train,0.8561253561253561,0.012500059333793155,0.8536663894180462,0.012817803859850421,0.8517903285345145,0.012952605308343983 +flat_mae,reg,logistic,abide_dx,100,0.046415888336127774,test,0.6129032258064516,0.04227449508206204,0.5978378378378378,0.044870889255292506,0.5997899159663866,0.04316729745037143 diff --git a/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5fdd0611a4d79e9a221b5ba934e21c255bb0a8f4 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:24:52 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (abide_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic +model: flat_mae +representation: reg +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/abide_dx__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:25:00 time: 5.1924 data: 3.9922 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:56 time: 0.1954 data: 0.0685 max mem: 3005 +extract (train) [ 40/289] eta: 0:01:15 time: 0.1699 data: 0.0545 max mem: 3005 +extract (train) [ 60/289] eta: 0:00:59 time: 0.1721 data: 0.0567 max mem: 3005 +extract (train) [ 80/289] eta: 0:00:50 time: 0.1880 data: 0.0644 max mem: 3005 +extract (train) [100/289] eta: 0:00:42 time: 0.1622 data: 0.0508 max mem: 3005 +extract (train) [120/289] eta: 0:00:36 time: 0.1697 data: 0.0566 max mem: 3005 +extract (train) [140/289] eta: 0:00:31 time: 0.1580 data: 0.0481 max mem: 3005 +extract (train) [160/289] eta: 0:00:26 time: 0.1806 data: 0.0617 max mem: 3005 +extract (train) [180/289] eta: 0:00:21 time: 0.1693 data: 0.0571 max mem: 3005 +extract (train) [200/289] eta: 0:00:17 time: 0.1714 data: 0.0582 max mem: 3005 +extract (train) [220/289] eta: 0:00:13 time: 0.1660 data: 0.0545 max mem: 3005 +extract (train) [240/289] eta: 0:00:09 time: 0.1675 data: 0.0552 max mem: 3005 +extract (train) [260/289] eta: 0:00:05 time: 0.1655 data: 0.0534 max mem: 3005 +extract (train) [280/289] eta: 0:00:01 time: 0.1538 data: 0.0467 max mem: 3005 +extract (train) [288/289] eta: 0:00:00 time: 0.1604 data: 0.0517 max mem: 3005 +extract (train) Total time: 0:00:54 (0.1900 s / it) +extract (validation) [ 0/62] eta: 0:03:52 time: 3.7502 data: 3.6034 max mem: 3005 +extract (validation) [20/62] eta: 0:00:16 time: 0.2254 data: 0.0849 max mem: 3005 +extract (validation) [40/62] eta: 0:00:06 time: 0.1604 data: 0.0517 max mem: 3005 +extract (validation) [60/62] eta: 0:00:00 time: 0.1628 data: 0.0537 max mem: 3005 +extract (validation) [61/62] eta: 0:00:00 time: 0.1639 data: 0.0549 max mem: 3005 +extract (validation) Total time: 0:00:15 (0.2466 s / it) +extract (test) [ 0/62] eta: 0:03:56 time: 3.8126 data: 3.5589 max mem: 3005 +extract (test) [20/62] eta: 0:00:16 time: 0.2259 data: 0.0869 max mem: 3005 +extract (test) [40/62] eta: 0:00:06 time: 0.1560 data: 0.0462 max mem: 3005 +extract (test) [60/62] eta: 0:00:00 time: 0.1563 data: 0.0486 max mem: 3005 +extract (test) [61/62] eta: 0:00:00 time: 0.1571 data: 0.0493 max mem: 3005 +extract (test) Total time: 0:00:15 (0.2441 s / it) +feature extraction time: 0:01:25 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | abide_dx | | 0.0059948 | train | 0.74644 | 0.01667 | 0.73715 | 0.017755 | 0.73478 | 0.017313 | +| flat_mae | reg | logistic | abide_dx | | 0.0059948 | test | 0.57258 | 0.04368 | 0.55131 | 0.047657 | 0.55865 | 0.044441 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.040436210861099865, "f1": 0.6794591370053689, "f1_std": 0.041489049225923016, "bacc": 0.6785714285714286, "bacc_std": 0.04109573610684454} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04477625030137057, "f1": 0.5929621848739496, "f1_std": 0.04545449300319796, "bacc": 0.5929621848739496, "bacc_std": 0.04541128452827496} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 2.782559402207126, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04447805758640692, "f1": 0.5718845677806006, "f1_std": 0.04455760274805515, "bacc": 0.5740546218487395, "bacc_std": 0.044729956999862173} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04184497366494297, "f1": 0.6513893429225237, "f1_std": 0.042094203806670505, "bacc": 0.6523109243697479, "bacc_std": 0.04218164357463039} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04243460721743788, "f1": 0.5880946053680574, "f1_std": 0.043704890722018885, "bacc": 0.5882352941176471, "bacc_std": 0.043004709940939856} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.040122082169641575, "f1": 0.6301451580831179, "f1_std": 0.04107176508166805, "bacc": 0.6297268907563025, "bacc_std": 0.04046331294185677} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04293312079971005, "f1": 0.6167554415729598, "f1_std": 0.043698413024880256, "bacc": 0.6165966386554622, "bacc_std": 0.043539373634497294} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044144380466181375, "f1": 0.5860042735042735, "f1_std": 0.04552120049140009, "bacc": 0.5866596638655462, "bacc_std": 0.04455999309080427} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.044201859355093305, "f1": 0.6317074780542539, "f1_std": 0.04533784459506236, "bacc": 0.6313025210084033, "bacc_std": 0.04492528177047167} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.040242795258770656, "f1": 0.5972691721349506, "f1_std": 0.04115962243551246, "bacc": 0.5971638655462186, "bacc_std": 0.040807026500779525} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04141731981735753, "f1": 0.6191239316239316, "f1_std": 0.04290270934941534, "bacc": 0.6192226890756303, "bacc_std": 0.04192092435608226} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04390595142163911, "f1": 0.6017043592264831, "f1_std": 0.044356730149835946, "bacc": 0.601890756302521, "bacc_std": 0.04428867184475829} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04451905086250414, "f1": 0.6242424242424243, "f1_std": 0.045235849658404305, "bacc": 0.6239495798319328, "bacc_std": 0.045121420057164345} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.042366838582155376, "f1": 0.5989703649924097, "f1_std": 0.043100103313728204, "bacc": 0.5987394957983193, "bacc_std": 0.04266176552058258} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 166.81005372000556, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04236254014447428, "f1": 0.5860042735042735, "f1_std": 0.043834677016484636, "bacc": 0.5866596638655462, "bacc_std": 0.042829337917633284} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04169682398064439, "f1": 0.6580882352941176, "f1_std": 0.042425055144457935, "bacc": 0.6580882352941176, "bacc_std": 0.042393370494280526} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04226293988128309, "f1": 0.5860042735042735, "f1_std": 0.04391891145703172, "bacc": 0.5866596638655462, "bacc_std": 0.04284930130673616} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.041743016763180346, "f1": 0.5662332519305657, "f1_std": 0.0428957163993649, "bacc": 0.5661764705882353, "bacc_std": 0.04230720886682545} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04385512937534919, "f1": 0.58994708994709, "f1_std": 0.04468644173834092, "bacc": 0.5898109243697479, "bacc_std": 0.0442277195286528} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04143026938898805, "f1": 0.6301451580831179, "f1_std": 0.04216913904718563, "bacc": 0.6297268907563025, "bacc_std": 0.04163643749021562} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04621202493614917, "f1": 0.5473272490221643, "f1_std": 0.04618553780052106, "bacc": 0.5488445378151261, "bacc_std": 0.046257449617800504} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 166.81005372000556, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04386853349508467, "f1": 0.5854473942969518, "f1_std": 0.044126758572491195, "bacc": 0.585609243697479, "bacc_std": 0.04426622171191006} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043731309371602835, "f1": 0.6255252100840336, "f1_std": 0.044130796671736354, "bacc": 0.6255252100840336, "bacc_std": 0.04397884121378654} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04585237078219136, "f1": 0.5557292684515667, "f1_std": 0.04587551708985841, "bacc": 0.5577731092436975, "bacc_std": 0.046125725511665105} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04437126968389662, "f1": 0.5703170970905524, "f1_std": 0.044695308631690495, "bacc": 0.5709033613445378, "bacc_std": 0.04472146478992341} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04542277697426821, "f1": 0.5694444444444444, "f1_std": 0.04723501853526152, "bacc": 0.5703781512605042, "bacc_std": 0.04603533784326887} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04539140681555386, "f1": 0.5788211788211788, "f1_std": 0.046985386454283753, "bacc": 0.5793067226890757, "bacc_std": 0.0460377692534301} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04190334997125809, "f1": 0.6191239316239316, "f1_std": 0.04378359962124326, "bacc": 0.6192226890756303, "bacc_std": 0.04269130750798991} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.000774263682681127, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.03910569936984882, "f1": 0.6163150492264415, "f1_std": 0.044763528175546186, "bacc": 0.6244747899159664, "bacc_std": 0.04032004838338246} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04547196137832732, "f1": 0.5929621848739496, "f1_std": 0.045856686354974856, "bacc": 0.5929621848739496, "bacc_std": 0.04569233438609822} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.044528314718328674, "f1": 0.6153389215233318, "f1_std": 0.045461715079500024, "bacc": 0.6150210084033614, "bacc_std": 0.04510356690334861} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.040077695094911686, "f1": 0.6575739206573719, "f1_std": 0.042253196629571575, "bacc": 0.657563025210084, "bacc_std": 0.04080751082106158} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.042695044272269236, "f1": 0.6580882352941176, "f1_std": 0.04317255357500099, "bacc": 0.6580882352941176, "bacc_std": 0.04307911133164687} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04398962842476635, "f1": 0.5854473942969518, "f1_std": 0.04427180672915537, "bacc": 0.585609243697479, "bacc_std": 0.04431420562848979} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04177531135632377, "f1": 0.6480760345851759, "f1_std": 0.04249035250063569, "bacc": 0.6475840336134454, "bacc_std": 0.04224878152167263} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04110486006948503, "f1": 0.6137071651090342, "f1_std": 0.04233670244565982, "bacc": 0.6134453781512605, "bacc_std": 0.041753052663752725} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04113393708894208, "f1": 0.6288435374149659, "f1_std": 0.04456932417990564, "bacc": 0.6307773109243697, "bacc_std": 0.04233585343470021} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04335554374906762, "f1": 0.5475675675675675, "f1_std": 0.04564894165440584, "bacc": 0.5509453781512605, "bacc_std": 0.04394311987567429} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 21.54434690031882, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04465994767259169, "f1": 0.5464994775339603, "f1_std": 0.0448676125787825, "bacc": 0.5472689075630253, "bacc_std": 0.04503381547047771} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04229713473345127, "f1": 0.5643931861867832, "f1_std": 0.043428580782225765, "bacc": 0.5646008403361344, "bacc_std": 0.04284647688169526} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04136649979017729, "f1": 0.6118548118548119, "f1_std": 0.042674748595361765, "bacc": 0.6118697478991597, "bacc_std": 0.04178785441579138} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.044329611594281955, "f1": 0.5441176470588236, "f1_std": 0.044765789262860595, "bacc": 0.5441176470588236, "bacc_std": 0.04466590204759366} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044175517277593654, "f1": 0.5860042735042735, "f1_std": 0.04566788743529486, "bacc": 0.5866596638655462, "bacc_std": 0.04472220970290267} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04472968876744002, "f1": 0.5880946053680574, "f1_std": 0.046377587586969186, "bacc": 0.5882352941176471, "bacc_std": 0.04548614186640538} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 10000.0, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04302963429637184, "f1": 0.6227513227513227, "f1_std": 0.04440291857039305, "bacc": 0.6223739495798319, "bacc_std": 0.04382912286950865} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04166644155332877, "f1": 0.5599598259122867, "f1_std": 0.042910475225252126, "bacc": 0.5614495798319328, "bacc_std": 0.042002139719472066} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04387266066467963, "f1": 0.5752305665349143, "f1_std": 0.044358447599806415, "bacc": 0.5751050420168067, "bacc_std": 0.04412574969361125} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04454879586834484, "f1": 0.5860042735042735, "f1_std": 0.04571007754215472, "bacc": 0.5866596638655462, "bacc_std": 0.04469792866158943} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04438565205357024, "f1": 0.5529334644378892, "f1_std": 0.04462595831103846, "bacc": 0.553046218487395, "bacc_std": 0.04456978759390579} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04475209492303126, "f1": 0.5623043623043623, "f1_std": 0.0461537973732943, "bacc": 0.5630252100840336, "bacc_std": 0.04529797445177885} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04179022920520937, "f1": 0.5307877536979704, "f1_std": 0.04590354603728372, "bacc": 0.5388655462184874, "bacc_std": 0.04260396910372323} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.041291894501328516, "f1": 0.6097756946769334, "f1_std": 0.04312645130559833, "bacc": 0.6102941176470589, "bacc_std": 0.04211456977279716} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.045685312330629176, "f1": 0.6167554415729598, "f1_std": 0.046153908476910054, "bacc": 0.6165966386554622, "bacc_std": 0.045874696102791566} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 1291.5496650148827, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.043355231733049594, "f1": 0.6317074780542539, "f1_std": 0.04429545828251603, "bacc": 0.6313025210084033, "bacc_std": 0.043819450459559435} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04202454562471295, "f1": 0.5366764995083579, "f1_std": 0.04226836421020314, "bacc": 0.5367647058823529, "bacc_std": 0.042226507377330684} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.040906723672561575, "f1": 0.6732542819499341, "f1_std": 0.04150939469813584, "bacc": 0.6727941176470589, "bacc_std": 0.04123670578958091} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.0431076743990159, "f1": 0.6429862738533645, "f1_std": 0.04519640938808118, "bacc": 0.6428571428571428, "bacc_std": 0.044001479949620635} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04164316229179491, "f1": 0.5953379953379954, "f1_std": 0.04334287158876636, "bacc": 0.5955882352941176, "bacc_std": 0.042434796139162616} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0428309450551633, "f1": 0.6283716283716283, "f1_std": 0.04421173659526166, "bacc": 0.6281512605042017, "bacc_std": 0.0434306151062574} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04519635447529369, "f1": 0.5171275823377994, "f1_std": 0.04604135296770034, "bacc": 0.5173319327731093, "bacc_std": 0.04568379095963638} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04454789364609567, "f1": 0.5616653574234092, "f1_std": 0.04480726085356473, "bacc": 0.5619747899159664, "bacc_std": 0.04462782093544198} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04143534260828014, "f1": 0.6045708211533352, "f1_std": 0.04283677298348824, "bacc": 0.6045168067226891, "bacc_std": 0.04215786333113725} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04542277697426821, "f1": 0.5766806722689075, "f1_std": 0.04599952638202881, "bacc": 0.5766806722689075, "bacc_std": 0.045894753570088626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04084501556574653, "f1": 0.5990368077055384, "f1_std": 0.045022651137223986, "bacc": 0.6039915966386554, "bacc_std": 0.04187498417716217} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04215592186370711, "f1": 0.6330637206549615, "f1_std": 0.042771852220749126, "bacc": 0.6328781512605042, "bacc_std": 0.04265788924654729} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.040998071069571054, "f1": 0.5308108108108108, "f1_std": 0.04200252833634002, "bacc": 0.5346638655462185, "bacc_std": 0.04085427673206726} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04259646272681134, "f1": 0.6112852664576802, "f1_std": 0.04311803519508464, "bacc": 0.6123949579831933, "bacc_std": 0.04322953500201409} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04466215821801434, "f1": 0.6153389215233318, "f1_std": 0.0454846734707191, "bacc": 0.6150210084033614, "bacc_std": 0.04524802571907161} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 21.54434690031882, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04886131864714218, "f1": 0.5915678524374176, "f1_std": 0.04982888477374512, "bacc": 0.5913865546218487, "bacc_std": 0.04954942196714396} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0427285402016351, "f1": 0.5841388834089565, "f1_std": 0.043137691855526634, "bacc": 0.5840336134453781, "bacc_std": 0.04288688747572812} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04474287740013715, "f1": 0.5880946053680574, "f1_std": 0.04601309092730096, "bacc": 0.5882352941176471, "bacc_std": 0.045289131518694466} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.042593702176970706, "f1": 0.5643931861867832, "f1_std": 0.04323931014137616, "bacc": 0.5646008403361344, "bacc_std": 0.0426954026376263} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043218583893465355, "f1": 0.628161668839635, "f1_std": 0.043369105783123946, "bacc": 0.6302521008403361, "bacc_std": 0.04355797730576449} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 1291.5496650148827, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04411772103195122, "f1": 0.5479166666666666, "f1_std": 0.044115187436999805, "bacc": 0.5504201680672269, "bacc_std": 0.044240256732845484} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04213240365900487, "f1": 0.6091008771929824, "f1_std": 0.04580076601595055, "bacc": 0.6129201680672269, "bacc_std": 0.04311661986218083} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04099884519071121, "f1": 0.6045708211533352, "f1_std": 0.0422852515840541, "bacc": 0.6045168067226891, "bacc_std": 0.0415366893968801} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.03951544107366913, "f1": 0.6436781609195402, "f1_std": 0.039821358816480065, "bacc": 0.6449579831932774, "bacc_std": 0.03985197594935986} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043368013551183665, "f1": 0.5836690840719849, "f1_std": 0.0453737046352454, "bacc": 0.5850840336134454, "bacc_std": 0.043837818587380756} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04377996669563567, "f1": 0.5788211788211788, "f1_std": 0.045260366624246484, "bacc": 0.5793067226890757, "bacc_std": 0.04423902390983099} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.042130474089835966, "f1": 0.671957671957672, "f1_std": 0.04294372078820443, "bacc": 0.671218487394958, "bacc_std": 0.042482173011616615} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.043611555142364325, "f1": 0.5643243243243243, "f1_std": 0.04615453298885897, "bacc": 0.5672268907563025, "bacc_std": 0.0441917204785215} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04427587105347816, "f1": 0.531517200238616, "f1_std": 0.04538478540546199, "bacc": 0.5320378151260504, "bacc_std": 0.04472449553436308} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 21.54434690031882, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.0445896541011657, "f1": 0.5712141971683957, "f1_std": 0.04459915529890859, "bacc": 0.5724789915966386, "bacc_std": 0.04478221604987822} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 1291.5496650148827, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.044789004127562544, "f1": 0.5557292684515667, "f1_std": 0.044745614928541226, "bacc": 0.5577731092436975, "bacc_std": 0.04473798775457111} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.042285570339029147, "f1": 0.6465831510572015, "f1_std": 0.043400697899960525, "bacc": 0.6460084033613445, "bacc_std": 0.04280648931328728} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042438043227831374, "f1": 0.5765651155005022, "f1_std": 0.04452401274684425, "bacc": 0.5777310924369747, "bacc_std": 0.04323671221265839} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04278317851917918, "f1": 0.6667322189446083, "f1_std": 0.04291663833555673, "bacc": 0.6670168067226891, "bacc_std": 0.042759365414215024} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04092915060695576, "f1": 0.6137071651090342, "f1_std": 0.04219814186541897, "bacc": 0.6134453781512605, "bacc_std": 0.041505744636061416} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 2.782559402207126, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.043142104202818235, "f1": 0.5616653574234092, "f1_std": 0.04304986932433616, "bacc": 0.5619747899159664, "bacc_std": 0.04297098812388241} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.03804289982174116, "f1": 0.5920767130958213, "f1_std": 0.042232310955551015, "bacc": 0.6008403361344538, "bacc_std": 0.03875744015581625} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04374869999555104, "f1": 0.6189604445897352, "f1_std": 0.043881121562135084, "bacc": 0.6197478991596639, "bacc_std": 0.04391015633047646} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04184348158426366, "f1": 0.5735449735449736, "f1_std": 0.04263139459369582, "bacc": 0.5735294117647058, "bacc_std": 0.042094231448015156} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04248736116165312, "f1": 0.5334448160535117, "f1_std": 0.04631993934173365, "bacc": 0.5446428571428572, "bacc_std": 0.042946287084981355} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04267541689399135, "f1": 0.5929621848739496, "f1_std": 0.04297651798423618, "bacc": 0.5929621848739496, "bacc_std": 0.04284243974339102} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.043427778025894787, "f1": 0.5457875457875458, "f1_std": 0.044144913061838, "bacc": 0.5467436974789917, "bacc_std": 0.04349842067581899} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 2.782559402207126, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04345010115434234, "f1": 0.5479551932126997, "f1_std": 0.04473527126261598, "bacc": 0.5483193277310925, "bacc_std": 0.044007619199756966} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.045849241713040036, "f1": 0.580536039552433, "f1_std": 0.045900759823294286, "bacc": 0.5845588235294117, "bacc_std": 0.04559795108101379} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.045607370655021504, "f1": 0.5752305665349143, "f1_std": 0.046039153145388756, "bacc": 0.5751050420168067, "bacc_std": 0.045778325793028} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.040353742797321054, "f1": 0.58994708994709, "f1_std": 0.041293781352463026, "bacc": 0.5898109243697479, "bacc_std": 0.04078130210921474} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04227449508206204, "f1": 0.5978378378378378, "f1_std": 0.044870889255292506, "bacc": 0.5997899159663866, "bacc_std": 0.04316729745037143} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|------:|--------:|--------:|----------:|-------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | abide_dx | train | 100 | 153.9 | 1019.3 | 0.89932 | 0.090535 | 0.8967 | 0.093975 | 0.89557 | 0.094489 | +| flat_mae | reg | logistic | abide_dx | test | 100 | 153.9 | 1019.3 | 0.6025 | 0.035557 | 0.5947 | 0.036082 | 0.59558 | 0.035472 | + + +done! total time: 0:05:45 diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..009cd92d8c0c1cff06bebbb4e0ddcf02dbee29ac --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..e2089e74e54ee18fd3d116538fc689ca56da5cf9 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7479452054794521,0.021554721347887166,0.736833855799373,0.022946530346294756,0.7336508518043597,0.022462896654640938 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.5846153846153846,0.057618627822860806,0.5578231292517006,0.06309096499040279,0.5612934362934363,0.0592380502859694 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,train,0.7397260273972602,0.02218183169502902,0.7285693038693062,0.023796317165362606,0.7256518287842706,0.02329957664182971 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,test,0.6307692307692307,0.05873252176116769,0.6198830409356726,0.0604753092760223,0.6192084942084942,0.059827196234437253 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,train,0.8328767123287671,0.01941810195230953,0.8279113625648279,0.020319904672539486,0.8246779019356415,0.02044763706941002 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,test,0.6923076923076923,0.056576441455880304,0.675,0.06118661314295948,0.6732625482625483,0.058832053743652954 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7589041095890411,0.0229762251985842,0.7499688628720886,0.0242979989407763,0.7469469377785919,0.024001261253343883 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.5692307692307692,0.05784798687766771,0.545,0.06155549411738206,0.5477799227799228,0.058897270770354206 +flat_mae,patch,logistic,adhd200_dx,4,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,4,2.782559402207126,test,0.5538461538461539,0.060534041476798764,0.5534233593935086,0.06083958887332807,0.5603281853281853,0.061099826610612175 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7753424657534247,0.02176287431481615,0.7670164404035372,0.023194957866220552,0.7636624534408011,0.023002255212838937 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5692307692307692,0.060089117249584545,0.5666666666666667,0.06056517305285745,0.5694980694980695,0.06136539284002246 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7589041095890411,0.02128701820193337,0.7504971414367387,0.022241773404853615,0.7476644074006228,0.021997011126622296 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.6153846153846154,0.060120305423877834,0.606060606060606,0.06230327968320969,0.6056949806949807,0.06162951877812147 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,train,0.8191780821917808,0.019797807756629888,0.8136256730828744,0.02062939266424599,0.8103895707394517,0.020643654881458157 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,test,0.5692307692307692,0.05855477585259745,0.564176245210728,0.05865634040864323,0.5651544401544402,0.05874587396824529 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.7506849315068493,0.022195822395376413,0.7417205153925708,0.02346098900422905,0.7389479147585027,0.02320993885816672 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.676923076923077,0.06202615214807266,0.6655231560891939,0.06490580932633228,0.6640926640926641,0.06364224900139301 +flat_mae,patch,logistic,adhd200_dx,9,0.3593813663804626,train,0.958904109589041,0.010372277093644255,0.9581181870338497,0.01058803815386204,0.9571350064114307,0.010817134271905643 +flat_mae,patch,logistic,adhd200_dx,9,0.3593813663804626,test,0.5076923076923077,0.0630233994302088,0.4871794871794872,0.0655077073783622,0.48938223938223935,0.06383980599356427 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8438356164383561,0.019374136049963415,0.8388820481843737,0.0201951810651229,0.8351041094217501,0.020151011775625807 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5538461538461539,0.06049186947176743,0.543030303030303,0.062475758780398905,0.542953667953668,0.061779515437967246 +flat_mae,patch,logistic,adhd200_dx,11,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,11,2.782559402207126,test,0.5230769230769231,0.0589652389826754,0.5115151515151515,0.059912447310021565,0.5115830115830116,0.059191974499188865 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7616438356164383,0.021948933379258906,0.7535869759212843,0.022969976584477105,0.7508090614886731,0.02273862043123146 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5692307692307692,0.05774769853760868,0.5512820512820513,0.06166578475310059,0.5521235521235521,0.05950401726232643 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7452054794520548,0.0225740623390213,0.7386051239402129,0.023192615396567917,0.7369634243145875,0.02305050882128331 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.5538461538461539,0.060958334785996715,0.5381034060279344,0.06311755309246836,0.5386100386100386,0.061702897673304856 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,train,0.821917808219178,0.018666044364967663,0.8162690023155139,0.019462060423713026,0.812816755205471,0.019440220962926358 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,test,0.5846153846153846,0.05748756284325162,0.5745454545454545,0.05913027863317107,0.5743243243243243,0.05871047961912486 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,train,0.8246575342465754,0.019605791095327802,0.8185433755903555,0.020663240369345613,0.8145264700494597,0.02066521081243674 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,test,0.6153846153846154,0.060827049417412994,0.6094688776736361,0.06168376931762646,0.61003861003861,0.061707489511910345 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7561643835616438,0.020951005763973463,0.7493924783027965,0.021744346957731923,0.7473896318006961,0.021642762591478314 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.6,0.06259662542655874,0.5921814671814671,0.06370679138636012,0.5921814671814671,0.06364347873222252 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7671232876712328,0.020237953935819463,0.7576948008840918,0.021700607206465793,0.7542284911766501,0.021463389280275514 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.6461538461538462,0.056666690799391545,0.6167649320687003,0.06421648132229565,0.6196911196911197,0.059051750880579186 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7479452054794521,0.022277467585413303,0.7391561024111359,0.02346615471493754,0.7365207302924833,0.02319013294182259 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.6461538461538462,0.056027043343360686,0.6289401836684041,0.05987107176905926,0.6283783783783784,0.05781904282814363 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,train,0.7452054794520548,0.023020400198422976,0.7330129541218018,0.02486504069226458,0.7297887280942785,0.024183388127276967 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,test,0.6615384615384615,0.052647259800459145,0.622093023255814,0.06303717798741677,0.6288610038610039,0.05540749457101387 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7397260273972602,0.02173338506241827,0.7319931056337483,0.022688156204163843,0.729956646516456,0.022539893876855784 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.6615384615384615,0.05653903715039275,0.6474358974358974,0.060137525582981174,0.6462355212355213,0.0585041538352041 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,train,0.8383561643835616,0.0180941521258164,0.8328923170040894,0.01903265700831486,0.8288148012456494,0.019117733143310115 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,test,0.6,0.05909463676743057,0.588206627680312,0.060896915091185506,0.5878378378378378,0.06015666300951957 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.736986301369863,0.02393263266674643,0.7283720930232558,0.02505814365415568,0.7260945228063748,0.0247757891131338 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6307692307692307,0.06057241741485385,0.6264367816091954,0.06120814236301764,0.627895752895753,0.06154412674398376 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7561643835616438,0.020636364284166698,0.7457122952038764,0.02214687120141278,0.7423673444464798,0.021698999963721502 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.5538461538461539,0.05077016316160134,0.49612403100775193,0.05964757569749845,0.5168918918918919,0.05181038564798979 +flat_mae,patch,logistic,adhd200_dx,24,0.046415888336127774,train,0.8246575342465754,0.02094862313874757,0.8196194712132444,0.021841654470879084,0.8166788789155524,0.021975639451473305 +flat_mae,patch,logistic,adhd200_dx,24,0.046415888336127774,test,0.5846153846153846,0.05729141534461642,0.5501153550371699,0.06366781323401839,0.556949806949807,0.0585820995320762 +flat_mae,patch,logistic,adhd200_dx,25,0.046415888336127774,train,0.8356164383561644,0.018339662179473113,0.8302325581395349,0.019291996129798164,0.82638761677963,0.01942970764335245 +flat_mae,patch,logistic,adhd200_dx,25,0.046415888336127774,test,0.5846153846153846,0.06211709499907622,0.578226387887527,0.06292712554499699,0.5786679536679536,0.0627717337817736 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,train,0.8273972602739726,0.019620221568300174,0.8219222637827288,0.020513629551038286,0.8183885937595408,0.020510823434865304 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,test,0.5692307692307692,0.05656368040384574,0.5512820512820513,0.05897713463376735,0.5521235521235521,0.05741034976650255 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7479452054794521,0.021171782365909737,0.7391561024111359,0.02231806717856028,0.7365207302924833,0.022099391697406656 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.6153846153846154,0.05828945523605429,0.6094688776736361,0.058735208831453375,0.61003861003861,0.05858354744498113 +flat_mae,patch,logistic,adhd200_dx,28,0.046415888336127774,train,0.821917808219178,0.019381988415753313,0.8176248568287865,0.01990928231688418,0.8156866336935946,0.019938843025727144 +flat_mae,patch,logistic,adhd200_dx,28,0.046415888336127774,test,0.6,0.05451080778781831,0.570630081300813,0.06023385353325281,0.5748069498069498,0.05612406970014136 +flat_mae,patch,logistic,adhd200_dx,29,0.3593813663804626,train,0.9452054794520548,0.011830413620525976,0.9440252729726414,0.012144570720807732,0.94212920559321,0.012534939288070509 +flat_mae,patch,logistic,adhd200_dx,29,0.3593813663804626,test,0.47692307692307695,0.06292126384448252,0.47078544061302685,0.06329844267154572,0.471042471042471,0.06368218243962279 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,train,0.7287671232876712,0.022251527217285447,0.7171406429795928,0.023898462736138604,0.7145081516761311,0.023344107171702714 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,test,0.5692307692307692,0.0612076028846709,0.5565302144249512,0.06299081442112053,0.5564671814671815,0.06194880306863747 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,train,0.8575342465753425,0.017618103417338073,0.8528682170542636,0.018514678437198544,0.8486749709959089,0.018723164615642968 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,test,0.5538461538461539,0.05976313007177863,0.5469838981014179,0.0608617816216368,0.5472972972972974,0.061121624792276306 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,train,0.7424657534246575,0.02314345351920595,0.7350660983444527,0.023955830850629932,0.7331013006045063,0.023749910794440388 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,test,0.5846153846153846,0.062457832402368615,0.5745454545454545,0.0640948831351513,0.5743243243243243,0.06359579063513054 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,train,0.7643835616438356,0.02147490866122325,0.754566210045662,0.022873048881266494,0.7510838370885998,0.022502130992692243 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,test,0.5692307692307692,0.05965006435465759,0.5565302144249512,0.062083413444177615,0.5564671814671815,0.06083732096663037 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.8301369863013699,0.01815755486426799,0.8258909337108389,0.018791971286074276,0.8236856567136839,0.018932470226374485 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.5846153846153846,0.06009620689228428,0.5810455956075435,0.06113419226875578,0.583011583011583,0.06162055040146502 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7342465753424657,0.022783381762123706,0.721529640320589,0.02437722348773868,0.7186450509861391,0.023695191790969426 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.5692307692307692,0.05913723683995633,0.545,0.06303908391988262,0.5477799227799228,0.060355524886203604 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,train,0.7342465753424657,0.023311263472650116,0.7263508552260378,0.024284273652324117,0.7243848079623862,0.024130409316971427 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,test,0.6615384615384615,0.052768459290166776,0.6366869918699187,0.059022972864466054,0.6375482625482626,0.05471335219672646 +flat_mae,patch,logistic,adhd200_dx,37,0.000774263682681127,train,0.6575342465753424,0.022010909115653978,0.6285244380032731,0.025336534364385196,0.6320296757647921,0.023116247355190068 +flat_mae,patch,logistic,adhd200_dx,37,0.000774263682681127,test,0.6615384615384615,0.053376190867083145,0.6299171842650104,0.06251014060350726,0.6332046332046332,0.05619269997013812 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7506849315068493,0.022754150236260012,0.7422576414808837,0.023931826378124944,0.7396653843805336,0.02366174784681826 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.5384615384615384,0.05699900134077924,0.5125,0.06049029596656931,0.5164092664092664,0.057506542909614766 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7534246575342466,0.02196891233997233,0.7431506849315068,0.02343352718388953,0.7399401599804604,0.02299208917010444 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.676923076923077,0.058763398824609514,0.6612062546537603,0.06241005650320301,0.6597490347490347,0.060487454132672716 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7424657534246575,0.02185057084036743,0.7311128526645768,0.02333545498294112,0.72807901325029,0.022796452173932096 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.6461538461538462,0.05552170213784898,0.6167649320687003,0.06335678529713759,0.6196911196911197,0.058210192808682476 +flat_mae,patch,logistic,adhd200_dx,41,0.000774263682681127,train,0.663013698630137,0.023051748714230165,0.6390758025227311,0.026009187555665118,0.6404713928069854,0.02422832586183476 +flat_mae,patch,logistic,adhd200_dx,41,0.000774263682681127,test,0.6307692307692307,0.057703227684481405,0.6153846153846154,0.06086968984076391,0.6148648648648649,0.05897178269384293 +flat_mae,patch,logistic,adhd200_dx,42,0.046415888336127774,train,0.8356164383561644,0.01934828272188471,0.8291518442239281,0.020458228004716892,0.8242352079135373,0.020404212554943395 +flat_mae,patch,logistic,adhd200_dx,42,0.046415888336127774,test,0.5230769230769231,0.06222691820355903,0.521263958184842,0.062093820948733364,0.5246138996138996,0.06241504746571004 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,train,0.7506849315068493,0.02269001001984561,0.7393859504586148,0.024296061310181135,0.7360780362703792,0.023760114212049394 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,test,0.5692307692307692,0.05970816601401684,0.5512820512820513,0.06199236991425693,0.5521235521235521,0.06037225555568077 +flat_mae,patch,logistic,adhd200_dx,44,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,44,2.782559402207126,test,0.5846153846153846,0.05910476106935116,0.5842217484008528,0.05919928555119167,0.5916988416988418,0.05964899606675737 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7616438356164383,0.021622872455995857,0.7519935020813646,0.02310885962953496,0.7486566526225804,0.022772982343979623 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5538461538461539,0.05962705394475042,0.5321419707123356,0.0629144100468239,0.5342664092664092,0.060530687961476766 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,train,0.7232876712328767,0.02213562724703811,0.7114263125347361,0.02356386550983922,0.7089363131220614,0.022995963169717026 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,test,0.6461538461538462,0.05485659410827256,0.6289401836684041,0.057852853048648845,0.6283783783783784,0.05563775822209253 +flat_mae,patch,logistic,adhd200_dx,47,0.046415888336127774,train,0.8493150684931506,0.01936689520190785,0.8442216514444901,0.020417455167664526,0.8399584783537889,0.020560503091810275 +flat_mae,patch,logistic,adhd200_dx,47,0.046415888336127774,test,0.5230769230769231,0.06275821170249096,0.5189782764382908,0.06317672602671416,0.5202702702702703,0.06336708796443521 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,train,0.7287671232876712,0.0228509647576347,0.7135984275059643,0.0250474890055396,0.7109208035659766,0.024077072493946455 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,test,0.7384615384615385,0.053027481989566944,0.7344388368180725,0.05423895658743122,0.7355212355212355,0.05441812256991714 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7671232876712328,0.02141449501470551,0.7576948008840918,0.02277006960429562,0.7542284911766501,0.022413114556494886 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.49230769230769234,0.05654875678112759,0.4501409894898744,0.06028820941715123,0.46283783783783783,0.056554902526187556 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,train,0.8465753424657534,0.019209739038703055,0.8421670373115888,0.019978045532709016,0.8389662331318313,0.020143031801479857 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,test,0.6,0.059825473982895505,0.5976190476190476,0.06025613412850478,0.6008687258687259,0.06048451140510875 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7506849315068493,0.02155124359710415,0.7432776064491695,0.022563865718769158,0.7411003236245954,0.022500300554364708 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.5846153846153846,0.05874144328975339,0.5699583435432491,0.062218887434453855,0.5699806949806949,0.06068139553918578 +flat_mae,patch,logistic,adhd200_dx,52,0.005994842503189409,train,0.7534246575342466,0.0217136768087639,0.7425548589341693,0.023230002284353117,0.7392226903584295,0.022768232444406688 +flat_mae,patch,logistic,adhd200_dx,52,0.005994842503189409,test,0.6923076923076923,0.05674482694145242,0.6794871794871795,0.060549092788397464,0.6776061776061776,0.0593738798569782 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.7589041095890411,0.021850482899180478,0.7499688628720886,0.023042593663167972,0.7469469377785919,0.022727550537533208 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.6461538461538462,0.05253197166809254,0.6167649320687003,0.05937010174667164,0.6196911196911197,0.0546975101019956 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,train,0.7424657534246575,0.019817907026704957,0.734031007751938,0.020982051754771556,0.7316663613604445,0.020835529703625424 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,test,0.5846153846153846,0.06169705717378524,0.578226387887527,0.06223693936725381,0.5786679536679536,0.06207129875489975 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7534246575342466,0.022258499402395993,0.7458531905675558,0.02321621667059037,0.7435275080906149,0.023080598199159228 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5538461538461539,0.057887211288584546,0.5381034060279344,0.06057810343287101,0.5386100386100386,0.05897648973194783 +flat_mae,patch,logistic,adhd200_dx,56,0.046415888336127774,train,0.8191780821917808,0.019246409788202322,0.8116438356164384,0.020505163719735302,0.8068022226292972,0.020408177928851384 +flat_mae,patch,logistic,adhd200_dx,56,0.046415888336127774,test,0.7538461538461538,0.05233479840660854,0.7509578544061303,0.05313976491880676,0.7533783783783784,0.05320164539581843 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,train,0.8410958904109589,0.018121119575426815,0.835891472868217,0.018942342106432787,0.8319594553336997,0.01897058145702966 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,test,0.5384615384615384,0.05774528029587962,0.5045731707317074,0.06321053563268794,0.5120656370656371,0.058965146575908595 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7424657534246575,0.023014742311624568,0.7345577768150028,0.02404687329373701,0.7323838309824754,0.02388292219392031 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.6153846153846154,0.05724131416500916,0.606060606060606,0.05901454963378071,0.6056949806949807,0.058321873125786565 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7589041095890411,0.021619187630088004,0.7504971414367387,0.022653873330374455,0.7476644074006228,0.022433525149971618 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6615384615384615,0.05669389205593231,0.6366869918699187,0.06378295710161025,0.6375482625482626,0.059447634303988264 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7534246575342466,0.022066005829063495,0.7463398813936248,0.02293685034763986,0.7442449777126457,0.02276671202171798 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6307692307692307,0.06007410807942521,0.6198830409356726,0.06184944458845069,0.6192084942084942,0.061082140773454204 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7397260273972602,0.021588468110051547,0.7291883068704555,0.022751685014646972,0.7263692984063015,0.022333899965338095 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.5692307692307692,0.06167777308161905,0.545,0.06620059621119585,0.5477799227799228,0.06323938450827297 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8356164383561644,0.018683777733228567,0.8308932542624166,0.01943951568270076,0.8278225560236918,0.019538910174881397 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.5692307692307692,0.060286778163223934,0.5683111954459203,0.06047453442615229,0.5738416988416988,0.061319564028101285 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7506849315068493,0.022724266737184907,0.7411650107149814,0.024138599739345304,0.7382304451364718,0.023781754169756607 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6153846153846154,0.06007367468810731,0.6018132810585641,0.062244594401222535,0.6013513513513513,0.06118141847270091 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,train,0.7232876712328767,0.022093395301972598,0.7127216052990454,0.02322949705878362,0.7103712523661232,0.022829821686176296 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,test,0.6153846153846154,0.05654845542232686,0.5905769715293525,0.06174464329941378,0.5926640926640927,0.05818497650337081 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7424657534246575,0.02193251219107029,0.7329212853406402,0.023187573529102543,0.7302314221163827,0.022900468888927314 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.7076923076923077,0.05372712999073578,0.6973780936045086,0.05677095606741868,0.6954633204633205,0.05577347769373145 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7616438356164383,0.022127443352007548,0.752542372881356,0.02332796069057829,0.7493741222446113,0.02298825031769191 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6461538461538462,0.05731027562582204,0.6289401836684041,0.06098848105498682,0.6283783783783784,0.05892914750112902 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7506849315068493,0.022041584744742684,0.7427766032417196,0.023048558803261926,0.7403828540025645,0.02284364577333313 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6461538461538462,0.05467472399369733,0.6167649320687003,0.06253478227277709,0.6196911196911197,0.05722143470212512 +flat_mae,patch,logistic,adhd200_dx,68,0.046415888336127774,train,0.8465753424657534,0.018679583053262266,0.8398119122257053,0.020088786546601258,0.833943945777615,0.020181886749113125 +flat_mae,patch,logistic,adhd200_dx,68,0.046415888336127774,test,0.6307692307692307,0.05761304093004553,0.6036585365853658,0.06399693786369566,0.6061776061776062,0.05966462344669754 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7561643835616438,0.022324498121196253,0.7484298647089345,0.023480641753839758,0.7459546925566343,0.023323018986221446 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.6,0.056987697208573836,0.570630081300813,0.06374024422218129,0.5748069498069498,0.05910656159145559 +flat_mae,patch,logistic,adhd200_dx,70,0.3593813663804626,train,0.947945205479452,0.011813114073015751,0.9466903427653375,0.012199187198473484,0.9438389204371985,0.012799274094652735 +flat_mae,patch,logistic,adhd200_dx,70,0.3593813663804626,test,0.49230769230769234,0.060457396608965285,0.49230769230769234,0.060908906677803465,0.5019305019305019,0.060779167044177074 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,train,0.7561643835616438,0.02144642134607076,0.7457122952038764,0.02277311489687733,0.7423673444464798,0.02231645334126513 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,test,0.6153846153846154,0.06090799347080031,0.606060606060606,0.06264367802770703,0.6056949806949807,0.0619435055393028 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7397260273972602,0.022323683092105844,0.7324975891996143,0.023026141894869526,0.7306741161384869,0.022833435933715413 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.6,0.05979983178619155,0.5775,0.06280078641948833,0.5791505791505791,0.060354285442884986 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.6684931506849315,0.02327075937085806,0.6470329670329671,0.02569852477778862,0.6474781706051169,0.024236738580213368 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.49230769230769234,0.05854416024015419,0.4501409894898744,0.06238481306720599,0.46283783783783783,0.05874415015152394 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.736986301369863,0.021364737003755328,0.7294292068198666,0.02214123204744714,0.7275294620504366,0.021995434608671465 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.6615384615384615,0.0601251869561241,0.6575670498084292,0.060683155479074914,0.6592664092664093,0.060530003883176364 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,train,0.8356164383561644,0.0187975547692528,0.8312060673325934,0.019517899197198485,0.8285400256457227,0.01967217447947788 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,test,0.5384615384615384,0.06559170823624821,0.5357142857142857,0.0655577421551361,0.5381274131274132,0.06585580695114068 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7589041095890411,0.02161522298092061,0.7482758620689656,0.023052427336516832,0.7447945289124992,0.02257442040525606 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.6153846153846154,0.05015987693802557,0.5656241646618552,0.06210304332696292,0.5796332046332047,0.052747948203469626 +flat_mae,patch,logistic,adhd200_dx,77,0.005994842503189409,train,0.726027397260274,0.02215901521590508,0.7146118721461188,0.02356327397922557,0.7120809672101117,0.023075284726873434 +flat_mae,patch,logistic,adhd200_dx,77,0.005994842503189409,test,0.7384615384615385,0.050707421582548415,0.7215923406399596,0.05608497366383407,0.7181467181467182,0.05339228804921405 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.8410958904109589,0.018654304228282573,0.8374039938556068,0.019236168958501918,0.8355468034438542,0.019415207698777328 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5538461538461539,0.057827329179203836,0.5250692869740489,0.06237134210509991,0.5299227799227799,0.05894936734425199 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7506849315068493,0.02245900266096932,0.7427766032417196,0.02343508618239155,0.7403828540025645,0.023189142652729644 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.6,0.058415560867625754,0.5833333333333333,0.06195675162001897,0.5834942084942085,0.06015766089632744 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7643835616438356,0.020767897480780038,0.7539968652037617,0.02234411271031427,0.7503663674665689,0.021969166358799766 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.5384615384615384,0.06237784464822891,0.5192307692307693,0.06526552807160202,0.5207528957528957,0.06351746882891292 +flat_mae,patch,logistic,adhd200_dx,81,0.046415888336127774,train,0.8493150684931506,0.018277647655760034,0.8448381137879596,0.019048771628532572,0.8413934175978507,0.019181339680681125 +flat_mae,patch,logistic,adhd200_dx,81,0.046415888336127774,test,0.49230769230769234,0.05495694710865003,0.4595616024187452,0.05824943651176344,0.4671814671814672,0.055216052033539215 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7424657534246575,0.022065994943757848,0.7329212853406402,0.023202708038595178,0.7302314221163827,0.02286743886026846 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.7692307692307693,0.05188356559353092,0.7656813266041816,0.05288807825370034,0.7668918918918919,0.0529630247037585 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,train,0.7315068493150685,0.02309889708894051,0.7209480122324159,0.024324737978741018,0.7183702753862123,0.023832184347815887 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,test,0.6923076923076923,0.05847449249193489,0.6832358674463938,0.06080740401958112,0.6819498069498069,0.06004606977797315 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7506849315068493,0.021398544854710166,0.7427766032417196,0.022260594646318823,0.7403828540025645,0.022037922082425904 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.5846153846153846,0.06064427269454954,0.5810455956075435,0.06126680584118421,0.583011583011583,0.06185370389306462 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,train,0.7287671232876712,0.021685820904406457,0.7164748252242072,0.023409236910441958,0.7137906820541002,0.022863723379934035 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,test,0.676923076923077,0.05743490475332634,0.6612062546537603,0.0608267053812746,0.6597490347490347,0.05894010251967703 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,train,0.7479452054794521,0.02233158667801934,0.7386038111844564,0.02350338436213555,0.7358032606704524,0.023180344990583135 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,test,0.5384615384615384,0.05973995719120579,0.5045731707317074,0.06330649658459518,0.5120656370656371,0.06006229361922094 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7452054794520548,0.02257307077278902,0.7360440432033966,0.02373974042757286,0.733376076204433,0.023452813014775673 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.5846153846153846,0.05637628440919978,0.5578231292517006,0.06151290247814395,0.5612934362934363,0.05776044326027158 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,train,0.7698630136986301,0.020592894033350274,0.7618381804623415,0.02172649361056642,0.7588080845087622,0.02158838378554111 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,test,0.6153846153846154,0.05828653157731732,0.6018132810585641,0.061490834311492375,0.6013513513513513,0.060082193232379136 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,train,0.7452054794520548,0.022619912778765783,0.738129218900675,0.023538214417911405,0.7362459546925566,0.023385022033306307 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,test,0.6461538461538462,0.05532142549088188,0.6233308138070043,0.06064604720224832,0.6240347490347491,0.05746748386931857 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7534246575342466,0.02238560712305102,0.7431506849315068,0.023755621733604508,0.7399401599804604,0.023316067252116303 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6307692307692307,0.060799338208793186,0.6153846153846154,0.0634751284117089,0.6148648648648649,0.061764248229450223 +flat_mae,patch,logistic,adhd200_dx,91,0.3593813663804626,train,0.9643835616438357,0.009419055359516733,0.9635249713657572,0.009731834868732999,0.9605544360994077,0.010464877223956233 +flat_mae,patch,logistic,adhd200_dx,91,0.3593813663804626,test,0.5538461538461539,0.05730598035646353,0.5250692869740489,0.06152637058732103,0.5299227799227799,0.05818658417496716 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7506849315068493,0.022059630882976897,0.7417205153925708,0.023182906376220377,0.7389479147585027,0.022926217399385326 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.6461538461538462,0.05379690055082858,0.6091503267973856,0.06195626606112066,0.6153474903474904,0.055739907503528925 +flat_mae,patch,logistic,adhd200_dx,93,0.3593813663804626,train,0.9424657534246575,0.010980082276240885,0.9408678881388621,0.011421200576404646,0.936832142639067,0.012031315920383472 +flat_mae,patch,logistic,adhd200_dx,93,0.3593813663804626,test,0.5230769230769231,0.058071573874897614,0.49987589972697943,0.06133805185791141,0.502895752895753,0.05884179815352537 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,train,0.7561643835616438,0.02183015005225994,0.7462922032786373,0.023187904413277113,0.7430848140685107,0.022796153407053585 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,test,0.6461538461538462,0.05226187133337527,0.6091503267973856,0.06131125846901579,0.6153474903474904,0.05481560009683048 +flat_mae,patch,logistic,adhd200_dx,95,0.046415888336127774,train,0.8328767123287671,0.019571605613588883,0.8288479425623996,0.02010449489551578,0.8268303108017341,0.020127369932810772 +flat_mae,patch,logistic,adhd200_dx,95,0.046415888336127774,test,0.5230769230769231,0.06010845424887707,0.5062484685126194,0.06254345740724739,0.5072393822393823,0.0611168562389969 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7342465753424657,0.02271639073348723,0.7228549734244495,0.024256189430376475,0.7200799902302009,0.02368912171057044 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.676923076923077,0.056408484820666065,0.6719538572458543,0.057595203979358835,0.6727799227799228,0.05803797795375002 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,train,0.852054794520548,0.018127713558915397,0.847803928836175,0.01894668701166732,0.844538071685901,0.019170019577358668 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,test,0.6153846153846154,0.05511093921629149,0.5905769715293525,0.06005817685748905,0.5926640926640927,0.056744227992317334 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7342465753424657,0.022900690575251636,0.7228549734244495,0.024160410351601512,0.7200799902302009,0.023620589999319257 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.676923076923077,0.05636072324110431,0.656084656084656,0.061064255959187304,0.6554054054054055,0.057836017847194346 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7589041095890411,0.023529009575669238,0.7494227048617612,0.024958256879617297,0.746229468156561,0.02457382783249579 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5692307692307692,0.05856738596031798,0.5512820512820513,0.06137668079152661,0.5521235521235521,0.05972475766137966 +flat_mae,patch,logistic,adhd200_dx,100,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,100,2.782559402207126,test,0.5692307692307692,0.062359287279916624,0.564176245210728,0.06330336097680288,0.5651544401544402,0.06338963080034446 diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9ae3b43387d98e0a138a83d4fb7c0704761a483f --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:52:18 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:12:58 time: 5.1535 data: 3.9844 max mem: 2698 +extract (train) [ 20/151] eta: 0:01:00 time: 0.2287 data: 0.0896 max mem: 3005 +extract (train) [ 40/151] eta: 0:00:37 time: 0.2090 data: 0.0762 max mem: 3005 +extract (train) [ 60/151] eta: 0:00:26 time: 0.2001 data: 0.0713 max mem: 3005 +extract (train) [ 80/151] eta: 0:00:19 time: 0.2022 data: 0.0723 max mem: 3005 +extract (train) [100/151] eta: 0:00:13 time: 0.1917 data: 0.0657 max mem: 3005 +extract (train) [120/151] eta: 0:00:07 time: 0.2170 data: 0.0793 max mem: 3005 +extract (train) [140/151] eta: 0:00:02 time: 0.1595 data: 0.0499 max mem: 3005 +extract (train) [150/151] eta: 0:00:00 time: 0.1594 data: 0.0519 max mem: 3005 +extract (train) Total time: 0:00:35 (0.2342 s / it) +extract (validation) [ 0/32] eta: 0:02:17 time: 4.2931 data: 4.1599 max mem: 3005 +extract (validation) [20/32] eta: 0:00:04 time: 0.1965 data: 0.0660 max mem: 3005 +extract (validation) [31/32] eta: 0:00:00 time: 0.1497 data: 0.0433 max mem: 3005 +extract (validation) Total time: 0:00:10 (0.3205 s / it) +extract (test) [ 0/33] eta: 0:02:18 time: 4.1971 data: 4.0558 max mem: 3005 +extract (test) [20/33] eta: 0:00:04 time: 0.1912 data: 0.0628 max mem: 3005 +extract (test) [32/33] eta: 0:00:00 time: 0.1481 data: 0.0456 max mem: 3005 +extract (test) Total time: 0:00:10 (0.3080 s / it) +feature extraction time: 0:00:55 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.74795 | 0.021555 | 0.73683 | 0.022947 | 0.73365 | 0.022463 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.58462 | 0.057619 | 0.55782 | 0.063091 | 0.56129 | 0.059238 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05873252176116769, "f1": 0.6198830409356726, "f1_std": 0.0604753092760223, "bacc": 0.6192084942084942, "bacc_std": 0.059827196234437253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.056576441455880304, "f1": 0.675, "f1_std": 0.06118661314295948, "bacc": 0.6732625482625483, "bacc_std": 0.058832053743652954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05784798687766771, "f1": 0.545, "f1_std": 0.06155549411738206, "bacc": 0.5477799227799228, "bacc_std": 0.058897270770354206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 2.782559402207126, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.060534041476798764, "f1": 0.5534233593935086, "f1_std": 0.06083958887332807, "bacc": 0.5603281853281853, "bacc_std": 0.061099826610612175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.060089117249584545, "f1": 0.5666666666666667, "f1_std": 0.06056517305285745, "bacc": 0.5694980694980695, "bacc_std": 0.06136539284002246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060120305423877834, "f1": 0.606060606060606, "f1_std": 0.06230327968320969, "bacc": 0.6056949806949807, "bacc_std": 0.06162951877812147} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05855477585259745, "f1": 0.564176245210728, "f1_std": 0.05865634040864323, "bacc": 0.5651544401544402, "bacc_std": 0.05874587396824529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06202615214807266, "f1": 0.6655231560891939, "f1_std": 0.06490580932633228, "bacc": 0.6640926640926641, "bacc_std": 0.06364224900139301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.0630233994302088, "f1": 0.4871794871794872, "f1_std": 0.0655077073783622, "bacc": 0.48938223938223935, "bacc_std": 0.06383980599356427} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06049186947176743, "f1": 0.543030303030303, "f1_std": 0.062475758780398905, "bacc": 0.542953667953668, "bacc_std": 0.061779515437967246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0589652389826754, "f1": 0.5115151515151515, "f1_std": 0.059912447310021565, "bacc": 0.5115830115830116, "bacc_std": 0.059191974499188865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05774769853760868, "f1": 0.5512820512820513, "f1_std": 0.06166578475310059, "bacc": 0.5521235521235521, "bacc_std": 0.05950401726232643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.060958334785996715, "f1": 0.5381034060279344, "f1_std": 0.06311755309246836, "bacc": 0.5386100386100386, "bacc_std": 0.061702897673304856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05748756284325162, "f1": 0.5745454545454545, "f1_std": 0.05913027863317107, "bacc": 0.5743243243243243, "bacc_std": 0.05871047961912486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060827049417412994, "f1": 0.6094688776736361, "f1_std": 0.06168376931762646, "bacc": 0.61003861003861, "bacc_std": 0.061707489511910345} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06259662542655874, "f1": 0.5921814671814671, "f1_std": 0.06370679138636012, "bacc": 0.5921814671814671, "bacc_std": 0.06364347873222252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.056666690799391545, "f1": 0.6167649320687003, "f1_std": 0.06421648132229565, "bacc": 0.6196911196911197, "bacc_std": 0.059051750880579186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.056027043343360686, "f1": 0.6289401836684041, "f1_std": 0.05987107176905926, "bacc": 0.6283783783783784, "bacc_std": 0.05781904282814363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.052647259800459145, "f1": 0.622093023255814, "f1_std": 0.06303717798741677, "bacc": 0.6288610038610039, "bacc_std": 0.05540749457101387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05653903715039275, "f1": 0.6474358974358974, "f1_std": 0.060137525582981174, "bacc": 0.6462355212355213, "bacc_std": 0.0585041538352041} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05909463676743057, "f1": 0.588206627680312, "f1_std": 0.060896915091185506, "bacc": 0.5878378378378378, "bacc_std": 0.06015666300951957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06057241741485385, "f1": 0.6264367816091954, "f1_std": 0.06120814236301764, "bacc": 0.627895752895753, "bacc_std": 0.06154412674398376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05077016316160134, "f1": 0.49612403100775193, "f1_std": 0.05964757569749845, "bacc": 0.5168918918918919, "bacc_std": 0.05181038564798979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05729141534461642, "f1": 0.5501153550371699, "f1_std": 0.06366781323401839, "bacc": 0.556949806949807, "bacc_std": 0.0585820995320762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06211709499907622, "f1": 0.578226387887527, "f1_std": 0.06292712554499699, "bacc": 0.5786679536679536, "bacc_std": 0.0627717337817736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05656368040384574, "f1": 0.5512820512820513, "f1_std": 0.05897713463376735, "bacc": 0.5521235521235521, "bacc_std": 0.05741034976650255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05828945523605429, "f1": 0.6094688776736361, "f1_std": 0.058735208831453375, "bacc": 0.61003861003861, "bacc_std": 0.05858354744498113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05451080778781831, "f1": 0.570630081300813, "f1_std": 0.06023385353325281, "bacc": 0.5748069498069498, "bacc_std": 0.05612406970014136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06292126384448252, "f1": 0.47078544061302685, "f1_std": 0.06329844267154572, "bacc": 0.471042471042471, "bacc_std": 0.06368218243962279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0612076028846709, "f1": 0.5565302144249512, "f1_std": 0.06299081442112053, "bacc": 0.5564671814671815, "bacc_std": 0.06194880306863747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05976313007177863, "f1": 0.5469838981014179, "f1_std": 0.0608617816216368, "bacc": 0.5472972972972974, "bacc_std": 0.061121624792276306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.062457832402368615, "f1": 0.5745454545454545, "f1_std": 0.0640948831351513, "bacc": 0.5743243243243243, "bacc_std": 0.06359579063513054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05965006435465759, "f1": 0.5565302144249512, "f1_std": 0.062083413444177615, "bacc": 0.5564671814671815, "bacc_std": 0.06083732096663037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06009620689228428, "f1": 0.5810455956075435, "f1_std": 0.06113419226875578, "bacc": 0.583011583011583, "bacc_std": 0.06162055040146502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05913723683995633, "f1": 0.545, "f1_std": 0.06303908391988262, "bacc": 0.5477799227799228, "bacc_std": 0.060355524886203604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.052768459290166776, "f1": 0.6366869918699187, "f1_std": 0.059022972864466054, "bacc": 0.6375482625482626, "bacc_std": 0.05471335219672646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.053376190867083145, "f1": 0.6299171842650104, "f1_std": 0.06251014060350726, "bacc": 0.6332046332046332, "bacc_std": 0.05619269997013812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05699900134077924, "f1": 0.5125, "f1_std": 0.06049029596656931, "bacc": 0.5164092664092664, "bacc_std": 0.057506542909614766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.058763398824609514, "f1": 0.6612062546537603, "f1_std": 0.06241005650320301, "bacc": 0.6597490347490347, "bacc_std": 0.060487454132672716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05552170213784898, "f1": 0.6167649320687003, "f1_std": 0.06335678529713759, "bacc": 0.6196911196911197, "bacc_std": 0.058210192808682476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057703227684481405, "f1": 0.6153846153846154, "f1_std": 0.06086968984076391, "bacc": 0.6148648648648649, "bacc_std": 0.05897178269384293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06222691820355903, "f1": 0.521263958184842, "f1_std": 0.062093820948733364, "bacc": 0.5246138996138996, "bacc_std": 0.06241504746571004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05970816601401684, "f1": 0.5512820512820513, "f1_std": 0.06199236991425693, "bacc": 0.5521235521235521, "bacc_std": 0.06037225555568077} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05910476106935116, "f1": 0.5842217484008528, "f1_std": 0.05919928555119167, "bacc": 0.5916988416988418, "bacc_std": 0.05964899606675737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05962705394475042, "f1": 0.5321419707123356, "f1_std": 0.0629144100468239, "bacc": 0.5342664092664092, "bacc_std": 0.060530687961476766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05485659410827256, "f1": 0.6289401836684041, "f1_std": 0.057852853048648845, "bacc": 0.6283783783783784, "bacc_std": 0.05563775822209253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06275821170249096, "f1": 0.5189782764382908, "f1_std": 0.06317672602671416, "bacc": 0.5202702702702703, "bacc_std": 0.06336708796443521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.053027481989566944, "f1": 0.7344388368180725, "f1_std": 0.05423895658743122, "bacc": 0.7355212355212355, "bacc_std": 0.05441812256991714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.05654875678112759, "f1": 0.4501409894898744, "f1_std": 0.06028820941715123, "bacc": 0.46283783783783783, "bacc_std": 0.056554902526187556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.059825473982895505, "f1": 0.5976190476190476, "f1_std": 0.06025613412850478, "bacc": 0.6008687258687259, "bacc_std": 0.06048451140510875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05874144328975339, "f1": 0.5699583435432491, "f1_std": 0.062218887434453855, "bacc": 0.5699806949806949, "bacc_std": 0.06068139553918578} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05674482694145242, "f1": 0.6794871794871795, "f1_std": 0.060549092788397464, "bacc": 0.6776061776061776, "bacc_std": 0.0593738798569782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05253197166809254, "f1": 0.6167649320687003, "f1_std": 0.05937010174667164, "bacc": 0.6196911196911197, "bacc_std": 0.0546975101019956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06169705717378524, "f1": 0.578226387887527, "f1_std": 0.06223693936725381, "bacc": 0.5786679536679536, "bacc_std": 0.06207129875489975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.057887211288584546, "f1": 0.5381034060279344, "f1_std": 0.06057810343287101, "bacc": 0.5386100386100386, "bacc_std": 0.05897648973194783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05233479840660854, "f1": 0.7509578544061303, "f1_std": 0.05313976491880676, "bacc": 0.7533783783783784, "bacc_std": 0.05320164539581843} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05774528029587962, "f1": 0.5045731707317074, "f1_std": 0.06321053563268794, "bacc": 0.5120656370656371, "bacc_std": 0.058965146575908595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05724131416500916, "f1": 0.606060606060606, "f1_std": 0.05901454963378071, "bacc": 0.6056949806949807, "bacc_std": 0.058321873125786565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05669389205593231, "f1": 0.6366869918699187, "f1_std": 0.06378295710161025, "bacc": 0.6375482625482626, "bacc_std": 0.059447634303988264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06007410807942521, "f1": 0.6198830409356726, "f1_std": 0.06184944458845069, "bacc": 0.6192084942084942, "bacc_std": 0.061082140773454204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06167777308161905, "f1": 0.545, "f1_std": 0.06620059621119585, "bacc": 0.5477799227799228, "bacc_std": 0.06323938450827297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.060286778163223934, "f1": 0.5683111954459203, "f1_std": 0.06047453442615229, "bacc": 0.5738416988416988, "bacc_std": 0.061319564028101285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06007367468810731, "f1": 0.6018132810585641, "f1_std": 0.062244594401222535, "bacc": 0.6013513513513513, "bacc_std": 0.06118141847270091} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05654845542232686, "f1": 0.5905769715293525, "f1_std": 0.06174464329941378, "bacc": 0.5926640926640927, "bacc_std": 0.05818497650337081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05372712999073578, "f1": 0.6973780936045086, "f1_std": 0.05677095606741868, "bacc": 0.6954633204633205, "bacc_std": 0.05577347769373145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05731027562582204, "f1": 0.6289401836684041, "f1_std": 0.06098848105498682, "bacc": 0.6283783783783784, "bacc_std": 0.05892914750112902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05467472399369733, "f1": 0.6167649320687003, "f1_std": 0.06253478227277709, "bacc": 0.6196911196911197, "bacc_std": 0.05722143470212512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05761304093004553, "f1": 0.6036585365853658, "f1_std": 0.06399693786369566, "bacc": 0.6061776061776062, "bacc_std": 0.05966462344669754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.056987697208573836, "f1": 0.570630081300813, "f1_std": 0.06374024422218129, "bacc": 0.5748069498069498, "bacc_std": 0.05910656159145559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.060457396608965285, "f1": 0.49230769230769234, "f1_std": 0.060908906677803465, "bacc": 0.5019305019305019, "bacc_std": 0.060779167044177074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06090799347080031, "f1": 0.606060606060606, "f1_std": 0.06264367802770703, "bacc": 0.6056949806949807, "bacc_std": 0.0619435055393028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05979983178619155, "f1": 0.5775, "f1_std": 0.06280078641948833, "bacc": 0.5791505791505791, "bacc_std": 0.060354285442884986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.05854416024015419, "f1": 0.4501409894898744, "f1_std": 0.06238481306720599, "bacc": 0.46283783783783783, "bacc_std": 0.05874415015152394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0601251869561241, "f1": 0.6575670498084292, "f1_std": 0.060683155479074914, "bacc": 0.6592664092664093, "bacc_std": 0.060530003883176364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06559170823624821, "f1": 0.5357142857142857, "f1_std": 0.0655577421551361, "bacc": 0.5381274131274132, "bacc_std": 0.06585580695114068} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05015987693802557, "f1": 0.5656241646618552, "f1_std": 0.06210304332696292, "bacc": 0.5796332046332047, "bacc_std": 0.052747948203469626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.050707421582548415, "f1": 0.7215923406399596, "f1_std": 0.05608497366383407, "bacc": 0.7181467181467182, "bacc_std": 0.05339228804921405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.057827329179203836, "f1": 0.5250692869740489, "f1_std": 0.06237134210509991, "bacc": 0.5299227799227799, "bacc_std": 0.05894936734425199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.058415560867625754, "f1": 0.5833333333333333, "f1_std": 0.06195675162001897, "bacc": 0.5834942084942085, "bacc_std": 0.06015766089632744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06237784464822891, "f1": 0.5192307692307693, "f1_std": 0.06526552807160202, "bacc": 0.5207528957528957, "bacc_std": 0.06351746882891292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.05495694710865003, "f1": 0.4595616024187452, "f1_std": 0.05824943651176344, "bacc": 0.4671814671814672, "bacc_std": 0.055216052033539215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7692307692307693, "acc_std": 0.05188356559353092, "f1": 0.7656813266041816, "f1_std": 0.05288807825370034, "bacc": 0.7668918918918919, "bacc_std": 0.0529630247037585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05847449249193489, "f1": 0.6832358674463938, "f1_std": 0.06080740401958112, "bacc": 0.6819498069498069, "bacc_std": 0.06004606977797315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06064427269454954, "f1": 0.5810455956075435, "f1_std": 0.06126680584118421, "bacc": 0.583011583011583, "bacc_std": 0.06185370389306462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05743490475332634, "f1": 0.6612062546537603, "f1_std": 0.0608267053812746, "bacc": 0.6597490347490347, "bacc_std": 0.05894010251967703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05973995719120579, "f1": 0.5045731707317074, "f1_std": 0.06330649658459518, "bacc": 0.5120656370656371, "bacc_std": 0.06006229361922094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05637628440919978, "f1": 0.5578231292517006, "f1_std": 0.06151290247814395, "bacc": 0.5612934362934363, "bacc_std": 0.05776044326027158} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05828653157731732, "f1": 0.6018132810585641, "f1_std": 0.061490834311492375, "bacc": 0.6013513513513513, "bacc_std": 0.060082193232379136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05532142549088188, "f1": 0.6233308138070043, "f1_std": 0.06064604720224832, "bacc": 0.6240347490347491, "bacc_std": 0.05746748386931857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.060799338208793186, "f1": 0.6153846153846154, "f1_std": 0.0634751284117089, "bacc": 0.6148648648648649, "bacc_std": 0.061764248229450223} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05730598035646353, "f1": 0.5250692869740489, "f1_std": 0.06152637058732103, "bacc": 0.5299227799227799, "bacc_std": 0.05818658417496716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05379690055082858, "f1": 0.6091503267973856, "f1_std": 0.06195626606112066, "bacc": 0.6153474903474904, "bacc_std": 0.055739907503528925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.058071573874897614, "f1": 0.49987589972697943, "f1_std": 0.06133805185791141, "bacc": 0.502895752895753, "bacc_std": 0.05884179815352537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05226187133337527, "f1": 0.6091503267973856, "f1_std": 0.06131125846901579, "bacc": 0.6153474903474904, "bacc_std": 0.05481560009683048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06010845424887707, "f1": 0.5062484685126194, "f1_std": 0.06254345740724739, "bacc": 0.5072393822393823, "bacc_std": 0.0611168562389969} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.056408484820666065, "f1": 0.6719538572458543, "f1_std": 0.057595203979358835, "bacc": 0.6727799227799228, "bacc_std": 0.05803797795375002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05511093921629149, "f1": 0.5905769715293525, "f1_std": 0.06005817685748905, "bacc": 0.5926640926640927, "bacc_std": 0.056744227992317334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05636072324110431, "f1": 0.656084656084656, "f1_std": 0.061064255959187304, "bacc": 0.6554054054054055, "bacc_std": 0.057836017847194346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05856738596031798, "f1": 0.5512820512820513, "f1_std": 0.06137668079152661, "bacc": 0.5521235521235521, "bacc_std": 0.05972475766137966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 2.782559402207126, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.062359287279916624, "f1": 0.564176245210728, "f1_std": 0.06330336097680288, "bacc": 0.5651544401544402, "bacc_std": 0.06338963080034446} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|--------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 0.14427 | 0.5467 | 0.78701 | 0.073089 | 0.77896 | 0.076754 | 0.7763 | 0.076699 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 0.14427 | 0.5467 | 0.60231 | 0.060416 | 0.58507 | 0.062693 | 0.58726 | 0.060922 | + + +done! total time: 0:04:51 diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ff80ad328cf4f18850dd74e15182478c7f205fb --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adhd200_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +model: flat_mae +representation: reg +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9e074c287f411af2e0f51b7b7d270f4f2ccf713e --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,adhd200_dx,,0.005994842503189409,train,0.7780821917808219,0.02092838748900278,0.7719189971070395,0.021767325222481277,0.769676986016975,0.021743206728533457 +flat_mae,reg,logistic,adhd200_dx,,0.005994842503189409,test,0.5692307692307692,0.060014128514057206,0.545,0.06438226546373221,0.5477799227799228,0.061310360992910665 +flat_mae,reg,logistic,adhd200_dx,1,0.000774263682681127,train,0.7095890410958904,0.0226372200265637,0.6937531661600811,0.02468721773836641,0.691778103437748,0.023695768830358094 +flat_mae,reg,logistic,adhd200_dx,1,0.000774263682681127,test,0.6307692307692307,0.057172642110691015,0.6153846153846154,0.05984184823940859,0.6148648648648649,0.05812623880969185 +flat_mae,reg,logistic,adhd200_dx,2,0.005994842503189409,train,0.7780821917808219,0.02001525858171899,0.7690973984895464,0.021378490870497147,0.7653721682847896,0.02113515993023484 +flat_mae,reg,logistic,adhd200_dx,2,0.005994842503189409,test,0.676923076923077,0.05597946369008537,0.6612062546537603,0.06011226479461027,0.6597490347490347,0.05818371710824218 +flat_mae,reg,logistic,adhd200_dx,3,0.046415888336127774,train,0.8986301369863013,0.015509597510481577,0.8963617248233047,0.01597366674260702,0.8944098430726019,0.01631016527008006 +flat_mae,reg,logistic,adhd200_dx,3,0.046415888336127774,test,0.5846153846153846,0.06065029061331273,0.5699583435432491,0.0630938930630196,0.5699806949806949,0.06184774936815931 +flat_mae,reg,logistic,adhd200_dx,4,0.046415888336127774,train,0.8958904109589041,0.015620435921892252,0.8932879916292238,0.016192885336324625,0.8905477193625206,0.01657448851114052 +flat_mae,reg,logistic,adhd200_dx,4,0.046415888336127774,test,0.6153846153846154,0.059035009169918044,0.61207925519217,0.05967975321520279,0.6143822393822393,0.06056849238501182 +flat_mae,reg,logistic,adhd200_dx,5,0.046415888336127774,train,0.8986301369863013,0.016272975385709952,0.895814850530376,0.016973763006547682,0.8922574342065092,0.017477164592056133 +flat_mae,reg,logistic,adhd200_dx,5,0.046415888336127774,test,0.49230769230769234,0.06278626465705535,0.4879446168536643,0.06310919876910452,0.4888996138996139,0.06361574897763696 +flat_mae,reg,logistic,adhd200_dx,6,0.005994842503189409,train,0.7753424657534247,0.02032769705118527,0.7659817351598173,0.021669657038197836,0.7622275141967393,0.02131828484139543 +flat_mae,reg,logistic,adhd200_dx,6,0.005994842503189409,test,0.7230769230769231,0.05542395184277121,0.7149122807017544,0.05795997958953131,0.7133204633204633,0.05734892618338016 +flat_mae,reg,logistic,adhd200_dx,7,0.046415888336127774,train,0.8849315068493151,0.01648967696545995,0.8822580645161291,0.016978174875101094,0.8801215118764121,0.017201666309485886 +flat_mae,reg,logistic,adhd200_dx,7,0.046415888336127774,test,0.6,0.05840706774124325,0.5953065134099617,0.058913469546957024,0.5965250965250966,0.05918727228023486 +flat_mae,reg,logistic,adhd200_dx,8,0.005994842503189409,train,0.7835616438356164,0.021151873768846006,0.7757793485276164,0.022415161246070018,0.7723789460829211,0.02228635125239973 +flat_mae,reg,logistic,adhd200_dx,8,0.005994842503189409,test,0.6307692307692307,0.06239481089029362,0.6198830409356726,0.06461685118035697,0.6192084942084942,0.06366404275950352 +flat_mae,reg,logistic,adhd200_dx,9,0.005994842503189409,train,0.7835616438356164,0.021839930135431077,0.7771311088954329,0.02277782676477445,0.7745313549490138,0.022794751867958152 +flat_mae,reg,logistic,adhd200_dx,9,0.005994842503189409,test,0.5692307692307692,0.0592323196600276,0.5565302144249512,0.0611675996049126,0.5564671814671815,0.060281715260637185 +flat_mae,reg,logistic,adhd200_dx,10,0.046415888336127774,train,0.8821917808219178,0.01668622169247508,0.8789199614271939,0.017293239138348047,0.8755419185443,0.01746628336096159 +flat_mae,reg,logistic,adhd200_dx,10,0.046415888336127774,test,0.49230769230769234,0.061203117064913756,0.46760982874162327,0.06426508848347219,0.47152509652509655,0.0619916560933398 +flat_mae,reg,logistic,adhd200_dx,11,0.046415888336127774,train,0.9013698630136986,0.015462903992698411,0.8985359525574499,0.016101353492422664,0.8946846186725286,0.016500126382918014 +flat_mae,reg,logistic,adhd200_dx,11,0.046415888336127774,test,0.5692307692307692,0.05908922948744651,0.564176245210728,0.05977182609900448,0.5651544401544402,0.05979890891748785 +flat_mae,reg,logistic,adhd200_dx,12,0.005994842503189409,train,0.7698630136986301,0.022243875941227554,0.7627963111963855,0.023103498469373783,0.760243023752824,0.02293055745293161 +flat_mae,reg,logistic,adhd200_dx,12,0.005994842503189409,test,0.5230769230769231,0.0580633891515412,0.49987589972697943,0.0626428022644842,0.502895752895753,0.05972849257185143 +flat_mae,reg,logistic,adhd200_dx,13,0.005994842503189409,train,0.7753424657534247,0.021599819296867894,0.767984496124031,0.022698929490937096,0.7650973926848629,0.022583546890284335 +flat_mae,reg,logistic,adhd200_dx,13,0.005994842503189409,test,0.5846153846153846,0.05661618728527994,0.5699583435432491,0.05976078518236698,0.5699806949806949,0.05822846393412014 +flat_mae,reg,logistic,adhd200_dx,14,0.005994842503189409,train,0.7753424657534247,0.02017174969684679,0.7665075204393684,0.02140498894891449,0.7629449838187702,0.02119051208850139 +flat_mae,reg,logistic,adhd200_dx,14,0.005994842503189409,test,0.6461538461538462,0.05847964092200691,0.6336682185738789,0.06086890338230038,0.6327220077220077,0.05980931848061576 +flat_mae,reg,logistic,adhd200_dx,15,0.046415888336127774,train,0.9013698630136986,0.015480882465125915,0.898723640399556,0.016087545495781865,0.8954020882945595,0.01646770098382883 +flat_mae,reg,logistic,adhd200_dx,15,0.046415888336127774,test,0.46153846153846156,0.062105083689810305,0.4532564287430906,0.06247504159417956,0.4531853281853282,0.06228698584668029 +flat_mae,reg,logistic,adhd200_dx,16,0.005994842503189409,train,0.7945205479452054,0.02028260164383929,0.7888138862102217,0.02109124342289423,0.7863925016791842,0.021060697414578052 +flat_mae,reg,logistic,adhd200_dx,16,0.005994842503189409,test,0.6461538461538462,0.058756140306087494,0.6375757575757576,0.06042837884786278,0.6370656370656371,0.05994891019903007 +flat_mae,reg,logistic,adhd200_dx,17,0.005994842503189409,train,0.7835616438356164,0.019454949427690698,0.7752970972141049,0.0206945672933047,0.7716614764608902,0.020544291199130395 +flat_mae,reg,logistic,adhd200_dx,17,0.005994842503189409,test,0.6153846153846154,0.05050279739161682,0.5656241646618552,0.060335661740390796,0.5796332046332047,0.05227453553691125 +flat_mae,reg,logistic,adhd200_dx,18,0.000774263682681127,train,0.7150684931506849,0.02291189747652635,0.6995314083080041,0.025156064513766285,0.6973499419918178,0.024101517649875025 +flat_mae,reg,logistic,adhd200_dx,18,0.000774263682681127,test,0.6615384615384615,0.058008112062786166,0.6515594541910331,0.05987334723990986,0.6505791505791505,0.05914313341993746 +flat_mae,reg,logistic,adhd200_dx,19,0.005994842503189409,train,0.7643835616438356,0.023169779917921622,0.7534093765711413,0.025070453870558243,0.749648897844538,0.024500802541735953 +flat_mae,reg,logistic,adhd200_dx,19,0.005994842503189409,test,0.676923076923077,0.058165167803094414,0.656084656084656,0.06431018422438634,0.6554054054054055,0.060847868917686135 +flat_mae,reg,logistic,adhd200_dx,20,0.046415888336127774,train,0.8904109589041096,0.016179499781303598,0.8872621695082777,0.016859369744539256,0.8835409415643891,0.017176839170438815 +flat_mae,reg,logistic,adhd200_dx,20,0.046415888336127774,test,0.5538461538461539,0.05847205573412951,0.5469838981014179,0.05951227554826074,0.5472972972972974,0.0593758141534984 +flat_mae,reg,logistic,adhd200_dx,21,0.046415888336127774,train,0.8931506849315068,0.014885252978684793,0.8893087923111018,0.015754638490655604,0.8838157171643158,0.016213867041090915 +flat_mae,reg,logistic,adhd200_dx,21,0.046415888336127774,test,0.6307692307692307,0.05638658617607309,0.6036585365853658,0.06170716998295667,0.6061776061776062,0.05789264366777445 +flat_mae,reg,logistic,adhd200_dx,22,0.005994842503189409,train,0.7616438356164383,0.021955857384413534,0.7535869759212843,0.02303473554265503,0.7508090614886731,0.022822513389828988 +flat_mae,reg,logistic,adhd200_dx,22,0.005994842503189409,test,0.6,0.06092745904985854,0.588206627680312,0.06296706803230769,0.5878378378378378,0.06223981261886896 +flat_mae,reg,logistic,adhd200_dx,23,0.005994842503189409,train,0.7945205479452054,0.02013447081483475,0.786201294897728,0.02139270339942508,0.7820876839469988,0.02113344957020735 +flat_mae,reg,logistic,adhd200_dx,23,0.005994842503189409,test,0.5384615384615384,0.051360427287204885,0.4846723044397463,0.05804526318367178,0.5033783783783784,0.05171267936695095 +flat_mae,reg,logistic,adhd200_dx,24,0.046415888336127774,train,0.8821917808219178,0.01644089843355004,0.8789199614271939,0.01707795814656599,0.8755419185443,0.01737608646649045 +flat_mae,reg,logistic,adhd200_dx,24,0.046415888336127774,test,0.6153846153846154,0.05656198150241282,0.5905769715293525,0.06227807523761868,0.5926640926640927,0.0584752015579182 +flat_mae,reg,logistic,adhd200_dx,25,0.046415888336127774,train,0.8904109589041096,0.01587749971453985,0.8872621695082777,0.01652510318181784,0.8835409415643891,0.016820087492890688 +flat_mae,reg,logistic,adhd200_dx,25,0.046415888336127774,test,0.6307692307692307,0.06176710600809613,0.6198830409356726,0.06394610704045328,0.6192084942084942,0.06297576838069932 +flat_mae,reg,logistic,adhd200_dx,26,0.005994842503189409,train,0.7972602739726027,0.01982294229159462,0.7901907780263485,0.02083498568599695,0.7866672772791109,0.020690402095183625 +flat_mae,reg,logistic,adhd200_dx,26,0.005994842503189409,test,0.6307692307692307,0.059294662840934674,0.6153846153846154,0.06276757554879729,0.6148648648648649,0.06106095634061859 +flat_mae,reg,logistic,adhd200_dx,27,0.005994842503189409,train,0.7589041095890411,0.020505377643224142,0.7499688628720886,0.02167657399421666,0.7469469377785919,0.021441634142087652 +flat_mae,reg,logistic,adhd200_dx,27,0.005994842503189409,test,0.6615384615384615,0.05840381765912023,0.6474358974358974,0.062314038928973685,0.6462355212355213,0.0605297935593882 +flat_mae,reg,logistic,adhd200_dx,28,0.000774263682681127,train,0.6931506849315069,0.023502832622947983,0.6810834425513325,0.02482097707980784,0.6793674055077242,0.024311923250778442 +flat_mae,reg,logistic,adhd200_dx,28,0.000774263682681127,test,0.5538461538461539,0.05669392545442547,0.5250692869740489,0.06162957732152183,0.5299227799227799,0.05796323024724622 +flat_mae,reg,logistic,adhd200_dx,29,0.046415888336127774,train,0.8849315068493151,0.015742338922575545,0.8828196661163089,0.016091110459123835,0.8822739207425048,0.016326552200560538 +flat_mae,reg,logistic,adhd200_dx,29,0.046415888336127774,test,0.5846153846153846,0.06307455155016513,0.5745454545454545,0.06493459621404897,0.5743243243243243,0.064432121210835 +flat_mae,reg,logistic,adhd200_dx,30,0.005994842503189409,train,0.7561643835616438,0.021282667912228204,0.7493924783027965,0.022357639040159322,0.7473896318006961,0.022340442045325686 +flat_mae,reg,logistic,adhd200_dx,30,0.005994842503189409,test,0.5846153846153846,0.061930283383817966,0.5810455956075435,0.06234718242833867,0.583011583011583,0.06275353663962097 +flat_mae,reg,logistic,adhd200_dx,31,0.005994842503189409,train,0.7863013698630137,0.021674714231220363,0.7797394318252151,0.02244747393111517,0.7769585394150332,0.022255630459270155 +flat_mae,reg,logistic,adhd200_dx,31,0.005994842503189409,test,0.5846153846153846,0.05803507611067291,0.5699583435432491,0.060638787521142296,0.5699806949806949,0.05925936028283022 +flat_mae,reg,logistic,adhd200_dx,32,0.005994842503189409,train,0.7643835616438356,0.022105106571467494,0.7566666666666667,0.023120743898050102,0.7539537155767234,0.022942388705704544 +flat_mae,reg,logistic,adhd200_dx,32,0.005994842503189409,test,0.5846153846153846,0.06129703410877678,0.5745454545454545,0.06334327113622078,0.5743243243243243,0.06271738691414352 +flat_mae,reg,logistic,adhd200_dx,33,0.000774263682681127,train,0.7178082191780822,0.022680443603447547,0.7063831116595465,0.024157030525655297,0.7040819441900226,0.023717987210493858 +flat_mae,reg,logistic,adhd200_dx,33,0.000774263682681127,test,0.5692307692307692,0.05694948244098324,0.545,0.06145905010361006,0.5477799227799228,0.05841277130468106 +flat_mae,reg,logistic,adhd200_dx,34,0.046415888336127774,train,0.8958904109589041,0.015857964985487683,0.8938173689367956,0.016250904891291388,0.8927001282286133,0.016531462793252315 +flat_mae,reg,logistic,adhd200_dx,34,0.046415888336127774,test,0.5538461538461539,0.06047276466536146,0.5469838981014179,0.0611391183904927,0.5472972972972974,0.06115832915148681 +flat_mae,reg,logistic,adhd200_dx,35,0.005994842503189409,train,0.7808219178082192,0.020968685335132613,0.7722024589652374,0.02217111317537531,0.7685168223728399,0.021885141323562833 +flat_mae,reg,logistic,adhd200_dx,35,0.005994842503189409,test,0.6153846153846154,0.05907752403540492,0.606060606060606,0.060693186407199014,0.6056949806949807,0.06021909228100301 +flat_mae,reg,logistic,adhd200_dx,36,0.000774263682681127,train,0.6958904109589041,0.023550493698108698,0.6821081373725961,0.025078571725731767,0.6803596507296819,0.024338935054703643 +flat_mae,reg,logistic,adhd200_dx,36,0.000774263682681127,test,0.6153846153846154,0.05965918192672173,0.606060606060606,0.061829558863395986,0.6056949806949807,0.061087242413563045 +flat_mae,reg,logistic,adhd200_dx,37,0.005994842503189409,train,0.7671232876712328,0.021414895298933628,0.7587499319600937,0.022464438240328604,0.7556634304207119,0.022191076505851563 +flat_mae,reg,logistic,adhd200_dx,37,0.005994842503189409,test,0.676923076923077,0.05788349040562829,0.6655231560891939,0.0614274429143358,0.6640926640926641,0.06008256984795916 +flat_mae,reg,logistic,adhd200_dx,38,0.005994842503189409,train,0.7835616438356164,0.02027382747078746,0.7766961720450092,0.021157233765407332,0.7738138853269829,0.02102905050459336 +flat_mae,reg,logistic,adhd200_dx,38,0.005994842503189409,test,0.5076923076923077,0.059654738375512856,0.49317738791423005,0.061397688026523796,0.49372586872586877,0.06011611847379033 +flat_mae,reg,logistic,adhd200_dx,39,0.005994842503189409,train,0.7808219178082192,0.022051376575107428,0.77061337355455,0.023800033234025457,0.7663644135067473,0.02334041429912324 +flat_mae,reg,logistic,adhd200_dx,39,0.005994842503189409,test,0.6615384615384615,0.05967339906703001,0.6474358974358974,0.06222094237691313,0.6462355212355213,0.0607245986293766 +flat_mae,reg,logistic,adhd200_dx,40,0.000774263682681127,train,0.6958904109589041,0.023290027922654326,0.6771224328782843,0.02520775155594769,0.6760548329974965,0.02404657763339245 +flat_mae,reg,logistic,adhd200_dx,40,0.000774263682681127,test,0.6615384615384615,0.04971793575858389,0.6130952380952381,0.06234615955266387,0.6245173745173745,0.05286369882432733 +flat_mae,reg,logistic,adhd200_dx,41,0.005994842503189409,train,0.7753424657534247,0.02190526421862782,0.7670164404035372,0.023203246249879433,0.7636624534408011,0.02295788515076225 +flat_mae,reg,logistic,adhd200_dx,41,0.005994842503189409,test,0.5692307692307692,0.06003589458859804,0.5565302144249512,0.061499943457259676,0.5564671814671815,0.06070543860598501 +flat_mae,reg,logistic,adhd200_dx,42,0.005994842503189409,train,0.7808219178082192,0.020200071902352207,0.7711598746081505,0.02154602898767743,0.7670818831287781,0.021169421662351043 +flat_mae,reg,logistic,adhd200_dx,42,0.005994842503189409,test,0.5846153846153846,0.061550952578441946,0.5846153846153846,0.06198822959954095,0.596042471042471,0.06136644691682726 +flat_mae,reg,logistic,adhd200_dx,43,0.046415888336127774,train,0.8876712328767123,0.015159377228578394,0.8849633712304653,0.015652349707119914,0.8825486963424315,0.016007738232204187 +flat_mae,reg,logistic,adhd200_dx,43,0.046415888336127774,test,0.5846153846153846,0.060427009126322505,0.578226387887527,0.06092313802050427,0.5786679536679536,0.06092382838313274 +flat_mae,reg,logistic,adhd200_dx,44,0.000774263682681127,train,0.7013698630136986,0.023815942124303106,0.6885689907553092,0.025470646349214882,0.6866489589057825,0.02479939831625817 +flat_mae,reg,logistic,adhd200_dx,44,0.000774263682681127,test,0.5692307692307692,0.05466655024762703,0.5289855072463768,0.06221471197612961,0.5390926640926641,0.056347407176051086 +flat_mae,reg,logistic,adhd200_dx,45,0.005994842503189409,train,0.7917808219178082,0.021415406333948325,0.7835923360169756,0.0227624808699415,0.7796604994809794,0.022566470646107662 +flat_mae,reg,logistic,adhd200_dx,45,0.005994842503189409,test,0.49230769230769234,0.06400943717403924,0.4743935309973046,0.06645361445807867,0.47586872586872586,0.06484192618020329 +flat_mae,reg,logistic,adhd200_dx,46,0.005994842503189409,train,0.7534246575342466,0.0221106068241915,0.7448266219239373,0.023326413269260857,0.7420925688465531,0.023111648216609274 +flat_mae,reg,logistic,adhd200_dx,46,0.005994842503189409,test,0.6461538461538462,0.06116331034651721,0.6375757575757576,0.06250338335770013,0.6370656370656371,0.0616676916484754 +flat_mae,reg,logistic,adhd200_dx,47,0.005994842503189409,train,0.7808219178082192,0.02128135870158202,0.7726989662473533,0.02248992088484418,0.7692342919948708,0.0222805633239481 +flat_mae,reg,logistic,adhd200_dx,47,0.005994842503189409,test,0.5230769230769231,0.059477806511804225,0.5062484685126194,0.06109247575823652,0.5072393822393823,0.06003562428949995 +flat_mae,reg,logistic,adhd200_dx,48,0.3593813663804626,train,0.9972602739726028,0.0028588407683119606,0.9972160568687123,0.002900758100551366,0.9975728155339806,0.0025327108748394883 +flat_mae,reg,logistic,adhd200_dx,48,0.3593813663804626,test,0.5692307692307692,0.05873196563186225,0.5683111954459203,0.059114768425959426,0.5738416988416988,0.05978393373131858 +flat_mae,reg,logistic,adhd200_dx,49,0.046415888336127774,train,0.9013698630136986,0.014355197081288528,0.8979306487695748,0.015146674058234173,0.8925322098064359,0.015595886563797519 +flat_mae,reg,logistic,adhd200_dx,49,0.046415888336127774,test,0.5230769230769231,0.06347872774197388,0.5157414083153088,0.06415790725129597,0.515926640926641,0.06409194371920374 +flat_mae,reg,logistic,adhd200_dx,50,0.005994842503189409,train,0.7835616438356164,0.021205699620109385,0.7771311088954329,0.0220435771135872,0.7745313549490138,0.02195722672960203 +flat_mae,reg,logistic,adhd200_dx,50,0.005994842503189409,test,0.6307692307692307,0.05642686841647285,0.6153846153846154,0.059627103727383536,0.6148648648648649,0.05786440039192632 +flat_mae,reg,logistic,adhd200_dx,51,0.005994842503189409,train,0.7726027397260274,0.02027019587184729,0.7649163103616852,0.02134461386092565,0.7619527385968126,0.021245371790844746 +flat_mae,reg,logistic,adhd200_dx,51,0.005994842503189409,test,0.5076923076923077,0.06151688852635913,0.49317738791423005,0.06351757147044909,0.49372586872586877,0.06257331202133987 +flat_mae,reg,logistic,adhd200_dx,52,0.005994842503189409,train,0.7616438356164383,0.021673168262988835,0.7496156120638675,0.023452443275299162,0.7457867741344568,0.02282098945349416 +flat_mae,reg,logistic,adhd200_dx,52,0.005994842503189409,test,0.7230769230769231,0.0520638297368135,0.7176640926640927,0.05319306048470526,0.7176640926640927,0.053169392841835444 +flat_mae,reg,logistic,adhd200_dx,53,0.005994842503189409,train,0.7726027397260274,0.0209103988815344,0.7644264041492679,0.021984617427365793,0.7612352689747817,0.021761176040786027 +flat_mae,reg,logistic,adhd200_dx,53,0.005994842503189409,test,0.6,0.056433352847491416,0.570630081300813,0.06199964107663033,0.5748069498069498,0.057883640730489606 +flat_mae,reg,logistic,adhd200_dx,54,0.000774263682681127,train,0.7041095890410959,0.021866380761769656,0.6972350230414747,0.022530331789517202,0.6962508395921109,0.022483631678237952 +flat_mae,reg,logistic,adhd200_dx,54,0.000774263682681127,test,0.47692307692307695,0.06380985807981146,0.4738095238095238,0.06398367148119169,0.4753861003861004,0.06460305030743448 +flat_mae,reg,logistic,adhd200_dx,55,0.005994842503189409,train,0.7671232876712328,0.022006235319660363,0.7597363876433645,0.022977158908380594,0.7570983696647737,0.022834540672742935 +flat_mae,reg,logistic,adhd200_dx,55,0.005994842503189409,test,0.5538461538461539,0.06305079383759014,0.5500119360229172,0.06349606593475027,0.5516409266409266,0.06368202021516883 +flat_mae,reg,logistic,adhd200_dx,56,0.005994842503189409,train,0.7616438356164383,0.022534914592255283,0.7530734597709193,0.023542318997326055,0.7500915918666422,0.023231719509450396 +flat_mae,reg,logistic,adhd200_dx,56,0.005994842503189409,test,0.6923076923076923,0.05964251690266942,0.6886973180076628,0.060579049843737034,0.6906370656370657,0.06059347846291427 +flat_mae,reg,logistic,adhd200_dx,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adhd200_dx,57,21.54434690031882,test,0.5230769230769231,0.05520569390065896,0.47320261437908495,0.06178663705802395,0.48986486486486486,0.05590210553397819 +flat_mae,reg,logistic,adhd200_dx,58,0.005994842503189409,train,0.7972602739726027,0.020379828056946405,0.7901907780263485,0.02148956787156199,0.7866672772791109,0.021445259841381955 +flat_mae,reg,logistic,adhd200_dx,58,0.005994842503189409,test,0.6153846153846154,0.05744748875709922,0.6094688776736361,0.05842925576971971,0.61003861003861,0.05829385297118714 +flat_mae,reg,logistic,adhd200_dx,59,0.046415888336127774,train,0.8904109589041096,0.01598991894836337,0.8874707115550623,0.016612781319413676,0.88425841118642,0.016989500070454376 +flat_mae,reg,logistic,adhd200_dx,59,0.046415888336127774,test,0.6,0.06135159319989727,0.5953065134099617,0.06197942833546812,0.5965250965250966,0.062063924689805025 +flat_mae,reg,logistic,adhd200_dx,60,0.005994842503189409,train,0.7726027397260274,0.022127704550818396,0.7662873674059788,0.022900786504719082,0.7641051474629053,0.022801719077502385 +flat_mae,reg,logistic,adhd200_dx,60,0.005994842503189409,test,0.6461538461538462,0.0573175520686982,0.6289401836684041,0.06142311800179273,0.6283783783783784,0.059143122746962304 +flat_mae,reg,logistic,adhd200_dx,61,0.046415888336127774,train,0.8904109589041096,0.015607784191822198,0.8876715701360252,0.016112169239702923,0.8849758808084509,0.01632566496219063 +flat_mae,reg,logistic,adhd200_dx,61,0.046415888336127774,test,0.5384615384615384,0.06566545298691186,0.5294401544401545,0.06658049502901836,0.5294401544401545,0.06654200875288914 +flat_mae,reg,logistic,adhd200_dx,62,0.005994842503189409,train,0.7835616438356164,0.021434745198979345,0.7766961720450092,0.022460488394839297,0.7738138853269829,0.02235785401145052 +flat_mae,reg,logistic,adhd200_dx,62,0.005994842503189409,test,0.5076923076923077,0.06136494768785945,0.5047619047619047,0.06167758575062951,0.5067567567567568,0.06248987264589987 +flat_mae,reg,logistic,adhd200_dx,63,0.046415888336127774,train,0.8958904109589041,0.016242778091204026,0.8926935693507458,0.016974763388423905,0.888395310496428,0.017397560322402306 +flat_mae,reg,logistic,adhd200_dx,63,0.046415888336127774,test,0.5076923076923077,0.061060451179342355,0.4980694980694981,0.061281325091251275,0.4980694980694981,0.06100754756314708 +flat_mae,reg,logistic,adhd200_dx,64,0.005994842503189409,train,0.7780821917808219,0.021064473314784633,0.7714888584877223,0.02186280490564398,0.7689595163949441,0.021734258261506007 +flat_mae,reg,logistic,adhd200_dx,64,0.005994842503189409,test,0.6,0.059245552600668515,0.588206627680312,0.06101636705953768,0.5878378378378378,0.060128896245772605 +flat_mae,reg,logistic,adhd200_dx,65,0.005994842503189409,train,0.7698630136986301,0.021315324856717992,0.761333914559721,0.02257044311254767,0.7580906148867314,0.022349039338986812 +flat_mae,reg,logistic,adhd200_dx,65,0.005994842503189409,test,0.6923076923076923,0.05439488315650166,0.6794871794871795,0.057461499800194354,0.6776061776061776,0.05597209786828866 +flat_mae,reg,logistic,adhd200_dx,66,0.005994842503189409,train,0.7726027397260274,0.021853782512318155,0.7662873674059788,0.022721212563271972,0.7641051474629053,0.022689395714540064 +flat_mae,reg,logistic,adhd200_dx,66,0.005994842503189409,test,0.6461538461538462,0.05768615351791371,0.6375757575757576,0.05874082412249993,0.6370656370656371,0.05832647526841794 +flat_mae,reg,logistic,adhd200_dx,67,0.005994842503189409,train,0.7643835616438356,0.020934860234859597,0.758493875792454,0.021673347288814107,0.756823594064847,0.021685649132735733 +flat_mae,reg,logistic,adhd200_dx,67,0.005994842503189409,test,0.6307692307692307,0.054476408226446414,0.5962732919254659,0.06265111599275523,0.6018339768339769,0.05684187793277736 +flat_mae,reg,logistic,adhd200_dx,68,0.000774263682681127,train,0.6958904109589041,0.022365966156399627,0.6701297009469065,0.026103500016443716,0.6710325456432802,0.023823319245484323 +flat_mae,reg,logistic,adhd200_dx,68,0.000774263682681127,test,0.6153846153846154,0.05095194162178303,0.5751633986928104,0.05902791259799706,0.583976833976834,0.052633892100958334 +flat_mae,reg,logistic,adhd200_dx,69,0.005994842503189409,train,0.7780821917808219,0.021176699878685618,0.7719189971070395,0.022142330737227833,0.769676986016975,0.02219028946326998 +flat_mae,reg,logistic,adhd200_dx,69,0.005994842503189409,test,0.5846153846153846,0.055998943354359225,0.5501153550371699,0.06276121237733259,0.556949806949807,0.05763804209834488 +flat_mae,reg,logistic,adhd200_dx,70,0.005994842503189409,train,0.7863013698630137,0.02216125077340224,0.7788497390007457,0.023276849765748297,0.7755236001709714,0.02310376915943539 +flat_mae,reg,logistic,adhd200_dx,70,0.005994842503189409,test,0.5384615384615384,0.061087548000472724,0.5248538011695907,0.06283738590094717,0.525096525096525,0.06206668934539732 +flat_mae,reg,logistic,adhd200_dx,71,0.3593813663804626,train,0.9945205479452055,0.0038078683193059513,0.9944199841007766,0.003886081537267776,0.9937106918238994,0.004370666467127903 +flat_mae,reg,logistic,adhd200_dx,71,0.3593813663804626,test,0.5538461538461539,0.062218359511924046,0.5534233593935086,0.06229829946329207,0.5603281853281853,0.06256442140617618 +flat_mae,reg,logistic,adhd200_dx,72,0.046415888336127774,train,0.8958904109589041,0.015920655204811684,0.8932879916292238,0.016484513714671595,0.8905477193625206,0.01684501050398125 +flat_mae,reg,logistic,adhd200_dx,72,0.046415888336127774,test,0.6461538461538462,0.061241583892263404,0.6375757575757576,0.06299769774599755,0.6370656370656371,0.06234659188143713 +flat_mae,reg,logistic,adhd200_dx,73,0.000774263682681127,train,0.6767123287671233,0.023906087605094925,0.6599665256110655,0.02564294881629478,0.6590645417353606,0.024669476045290604 +flat_mae,reg,logistic,adhd200_dx,73,0.000774263682681127,test,0.5076923076923077,0.05935571237645097,0.4871794871794872,0.06156570913796003,0.48938223938223935,0.05994890677985065 +flat_mae,reg,logistic,adhd200_dx,74,0.005994842503189409,train,0.7643835616438356,0.02150410662208061,0.7566666666666667,0.022544375840454534,0.7539537155767234,0.022389099902188368 +flat_mae,reg,logistic,adhd200_dx,74,0.005994842503189409,test,0.6461538461538462,0.05947521984110635,0.6407113674597452,0.060098261819259424,0.6414092664092663,0.059847853136717984 +flat_mae,reg,logistic,adhd200_dx,75,0.046415888336127774,train,0.8849315068493151,0.017291535073930987,0.8820551486428263,0.017813001068554216,0.8794040422543812,0.017968741686789448 +flat_mae,reg,logistic,adhd200_dx,75,0.046415888336127774,test,0.5076923076923077,0.06144589961813282,0.5066413662239089,0.061500729382083896,0.5111003861003861,0.061566645045623446 +flat_mae,reg,logistic,adhd200_dx,76,0.005994842503189409,train,0.7780821917808219,0.020435499451462825,0.7701028763384421,0.02159493172481372,0.7668071075288514,0.02136150186382243 +flat_mae,reg,logistic,adhd200_dx,76,0.005994842503189409,test,0.6,0.052595362812220554,0.5533826638477801,0.06199270514300019,0.5661196911196912,0.05427549271617841 +flat_mae,reg,logistic,adhd200_dx,77,0.005994842503189409,train,0.7671232876712328,0.02118646345700421,0.7576948008840918,0.022502267934332226,0.7542284911766501,0.022155072910633882 +flat_mae,reg,logistic,adhd200_dx,77,0.005994842503189409,test,0.7538461538461538,0.05110726624191411,0.74,0.05658345607835071,0.7360038610038611,0.05405684990011642 +flat_mae,reg,logistic,adhd200_dx,78,0.046415888336127774,train,0.9041095890410958,0.014431527589039199,0.9014464802314368,0.01502927789969934,0.8978292727605789,0.01551433136735463 +flat_mae,reg,logistic,adhd200_dx,78,0.046415888336127774,test,0.5230769230769231,0.0631329657227248,0.5115151515151515,0.06445650326299723,0.5115830115830116,0.0638621042317883 +flat_mae,reg,logistic,adhd200_dx,79,0.005994842503189409,train,0.7726027397260274,0.02100426213796185,0.7649163103616852,0.022091044556901657,0.7619527385968126,0.021914799221642904 +flat_mae,reg,logistic,adhd200_dx,79,0.005994842503189409,test,0.5538461538461539,0.058813805085532876,0.5469838981014179,0.060046464553516685,0.5472972972972974,0.060120358712774304 +flat_mae,reg,logistic,adhd200_dx,80,0.000774263682681127,train,0.6931506849315069,0.022087737839859064,0.6780696780696781,0.023866600598260363,0.6764975270196006,0.023045833024638994 +flat_mae,reg,logistic,adhd200_dx,80,0.000774263682681127,test,0.5538461538461539,0.06156457907311398,0.5321419707123356,0.06452988246739617,0.5342664092664092,0.0625566874003427 +flat_mae,reg,logistic,adhd200_dx,81,0.005994842503189409,train,0.7726027397260274,0.02181706367083146,0.7662873674059788,0.02257975232276199,0.7641051474629053,0.02251490522341259 +flat_mae,reg,logistic,adhd200_dx,81,0.005994842503189409,test,0.6,0.0553704169834676,0.570630081300813,0.06167183301434378,0.5748069498069498,0.057183263556338085 +flat_mae,reg,logistic,adhd200_dx,82,0.005994842503189409,train,0.7726027397260274,0.021493520693183988,0.7622970756930223,0.022929691878242356,0.7583653904866581,0.022453724827332083 +flat_mae,reg,logistic,adhd200_dx,82,0.005994842503189409,test,0.7230769230769231,0.05498422796988048,0.7214285714285714,0.05520623222150543,0.7263513513513513,0.05506270725948215 +flat_mae,reg,logistic,adhd200_dx,83,0.046415888336127774,train,0.8849315068493151,0.015997495581816026,0.8820551486428263,0.01647510832894035,0.8794040422543812,0.016601058694674577 +flat_mae,reg,logistic,adhd200_dx,83,0.046415888336127774,test,0.6923076923076923,0.05641666630248894,0.6862934362934363,0.05765242551437509,0.6862934362934363,0.05754977390225277 +flat_mae,reg,logistic,adhd200_dx,84,0.046415888336127774,train,0.8821917808219178,0.016993016790094927,0.8793518283636588,0.01751772592579188,0.8769768577883618,0.017712880521345502 +flat_mae,reg,logistic,adhd200_dx,84,0.046415888336127774,test,0.6153846153846154,0.06062097592330143,0.6139225469232596,0.060631773544137964,0.6187258687258688,0.06083088510821498 +flat_mae,reg,logistic,adhd200_dx,85,0.005994842503189409,train,0.7698630136986301,0.02120145514183771,0.7597178683385579,0.0228260845013062,0.7559382060206387,0.022453909610883366 +flat_mae,reg,logistic,adhd200_dx,85,0.005994842503189409,test,0.6153846153846154,0.06032775176596667,0.606060606060606,0.06170095191614852,0.6056949806949807,0.06137105791708423 +flat_mae,reg,logistic,adhd200_dx,86,0.005994842503189409,train,0.7726027397260274,0.021670539448234362,0.7662873674059788,0.022533327049758918,0.7641051474629053,0.022495265263239785 +flat_mae,reg,logistic,adhd200_dx,86,0.005994842503189409,test,0.5692307692307692,0.059883745361991855,0.5376016260162602,0.06476251530827347,0.5434362934362934,0.06090471736687394 +flat_mae,reg,logistic,adhd200_dx,87,0.005994842503189409,train,0.7753424657534247,0.021441484475682258,0.767984496124031,0.022454420551927954,0.7650973926848629,0.02230922220942852 +flat_mae,reg,logistic,adhd200_dx,87,0.005994842503189409,test,0.6,0.0587482765704663,0.5775,0.06298394984043722,0.5791505791505791,0.060251711406062655 +flat_mae,reg,logistic,adhd200_dx,88,0.005994842503189409,train,0.7890410958904109,0.02023796209544562,0.7814558207167907,0.021386298653713567,0.7779507846369909,0.021311093767166462 +flat_mae,reg,logistic,adhd200_dx,88,0.005994842503189409,test,0.5846153846153846,0.05857972668678324,0.5699583435432491,0.061322622018507175,0.5699806949806949,0.05974689777928845 +flat_mae,reg,logistic,adhd200_dx,89,0.005994842503189409,train,0.7890410958904109,0.021098136155149872,0.7827733593031434,0.021980701798512057,0.7801031935030835,0.0218994327086109 +flat_mae,reg,logistic,adhd200_dx,89,0.005994842503189409,test,0.6,0.058888320739571336,0.5775,0.062879523232886,0.5791505791505791,0.06021417214067847 +flat_mae,reg,logistic,adhd200_dx,90,0.005994842503189409,train,0.7835616438356164,0.021039140177375716,0.7747986972922736,0.022229894949174545,0.7709440068388593,0.021919065429817173 +flat_mae,reg,logistic,adhd200_dx,90,0.005994842503189409,test,0.6153846153846154,0.059374857707708666,0.6018132810585641,0.061792820736669495,0.6013513513513513,0.06034872400188347 +flat_mae,reg,logistic,adhd200_dx,91,0.046415888336127774,train,0.8904109589041096,0.016115435524691763,0.8872621695082777,0.016785826494530504,0.8835409415643891,0.017134890950303665 +flat_mae,reg,logistic,adhd200_dx,91,0.046415888336127774,test,0.5692307692307692,0.05102651822464646,0.5190274841437632,0.05956599456532367,0.5347490347490347,0.05222639739371201 +flat_mae,reg,logistic,adhd200_dx,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adhd200_dx,92,166.81005372000556,test,0.5538461538461539,0.061476615075843524,0.543030303030303,0.06309245461888256,0.542953667953668,0.06231543529542896 +flat_mae,reg,logistic,adhd200_dx,93,0.005994842503189409,train,0.7780821917808219,0.020602401629403078,0.7701028763384421,0.021699944514068592,0.7668071075288514,0.021530304626817445 +flat_mae,reg,logistic,adhd200_dx,93,0.005994842503189409,test,0.6307692307692307,0.055721462831824,0.61,0.060653507586662735,0.6105212355212355,0.057679393521572704 +flat_mae,reg,logistic,adhd200_dx,94,0.005994842503189409,train,0.7808219178082192,0.02130432591919028,0.7726989662473533,0.022597835127036853,0.7692342919948708,0.022403660990515578 +flat_mae,reg,logistic,adhd200_dx,94,0.005994842503189409,test,0.6461538461538462,0.05031757136593221,0.6003742314889067,0.06207904624610059,0.6110038610038611,0.053311566018655734 +flat_mae,reg,logistic,adhd200_dx,95,0.005994842503189409,train,0.7917808219178082,0.021275330779004398,0.7861943519546184,0.022015780514183653,0.7839653172131648,0.02196597985283447 +flat_mae,reg,logistic,adhd200_dx,95,0.005994842503189409,test,0.5384615384615384,0.06100702064555633,0.5192307692307693,0.06362253148263441,0.5207528957528957,0.06190621252267073 +flat_mae,reg,logistic,adhd200_dx,96,0.046415888336127774,train,0.8876712328767123,0.017080543691299528,0.8849633712304653,0.01763880064957491,0.8825486963424315,0.017949701145123086 +flat_mae,reg,logistic,adhd200_dx,96,0.046415888336127774,test,0.6461538461538462,0.059122578512280984,0.6375757575757576,0.060636669916125555,0.6370656370656371,0.06015086412013059 +flat_mae,reg,logistic,adhd200_dx,97,0.046415888336127774,train,0.8931506849315068,0.01573236376570908,0.8907596558948345,0.01614445068518777,0.8888380045185321,0.01632595366321203 +flat_mae,reg,logistic,adhd200_dx,97,0.046415888336127774,test,0.5230769230769231,0.05387318881756287,0.47320261437908495,0.05907622153254185,0.48986486486486486,0.053843848818292614 +flat_mae,reg,logistic,adhd200_dx,98,0.000774263682681127,train,0.6876712328767123,0.022870789160190957,0.6706401975683891,0.02445731877228245,0.6694907492214691,0.023498647445655323 +flat_mae,reg,logistic,adhd200_dx,98,0.000774263682681127,test,0.6461538461538462,0.05867551932490532,0.6233308138070043,0.06411295425940841,0.6240347490347491,0.060536521349576045 +flat_mae,reg,logistic,adhd200_dx,99,0.005994842503189409,train,0.7780821917808219,0.022126797465345814,0.7710429105777943,0.02332175645164587,0.7682420467729132,0.0232708123289197 +flat_mae,reg,logistic,adhd200_dx,99,0.005994842503189409,test,0.5230769230769231,0.058034480670938735,0.49987589972697943,0.060056309558018356,0.502895752895753,0.058367649916049126 +flat_mae,reg,logistic,adhd200_dx,100,0.3593813663804626,train,0.9972602739726028,0.0027793031820078896,0.9972160568687123,0.0028201265714039095,0.9975728155339806,0.0024622467510506865 +flat_mae,reg,logistic,adhd200_dx,100,0.3593813663804626,test,0.5846153846153846,0.06286848111064687,0.5644080416976918,0.06681009938441267,0.5656370656370656,0.06451856818847286 diff --git a/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..8c4dcd7a92da477fe6c4c69c906ad269659fe48b --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:24:57 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adhd200_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +model: flat_mae +representation: reg +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:10:11 time: 4.0519 data: 3.2357 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:48 time: 0.1867 data: 0.0599 max mem: 3005 +extract (train) [ 40/151] eta: 0:00:30 time: 0.1718 data: 0.0527 max mem: 3005 +extract (train) [ 60/151] eta: 0:00:21 time: 0.1734 data: 0.0551 max mem: 3005 +extract (train) [ 80/151] eta: 0:00:15 time: 0.1695 data: 0.0533 max mem: 3005 +extract (train) [100/151] eta: 0:00:10 time: 0.1564 data: 0.0460 max mem: 3005 +extract (train) [120/151] eta: 0:00:06 time: 0.1718 data: 0.0556 max mem: 3005 +extract (train) [140/151] eta: 0:00:02 time: 0.1500 data: 0.0435 max mem: 3005 +extract (train) [150/151] eta: 0:00:00 time: 0.1462 data: 0.0425 max mem: 3005 +extract (train) Total time: 0:00:29 (0.1944 s / it) +extract (validation) [ 0/32] eta: 0:01:55 time: 3.6165 data: 3.4372 max mem: 3005 +extract (validation) [20/32] eta: 0:00:04 time: 0.1765 data: 0.0530 max mem: 3005 +extract (validation) [31/32] eta: 0:00:00 time: 0.1413 data: 0.0360 max mem: 3005 +extract (validation) Total time: 0:00:09 (0.2830 s / it) +extract (test) [ 0/33] eta: 0:01:53 time: 3.4310 data: 3.2328 max mem: 3005 +extract (test) [20/33] eta: 0:00:04 time: 0.1758 data: 0.0525 max mem: 3005 +extract (test) [32/33] eta: 0:00:00 time: 0.1435 data: 0.0374 max mem: 3005 +extract (test) Total time: 0:00:08 (0.2718 s / it) +feature extraction time: 0:00:47 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adhd200_dx | | 0.0059948 | train | 0.77808 | 0.020928 | 0.77192 | 0.021767 | 0.76968 | 0.021743 | +| flat_mae | reg | logistic | adhd200_dx | | 0.0059948 | test | 0.56923 | 0.060014 | 0.545 | 0.064382 | 0.54778 | 0.06131 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057172642110691015, "f1": 0.6153846153846154, "f1_std": 0.05984184823940859, "bacc": 0.6148648648648649, "bacc_std": 0.05812623880969185} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05597946369008537, "f1": 0.6612062546537603, "f1_std": 0.06011226479461027, "bacc": 0.6597490347490347, "bacc_std": 0.05818371710824218} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06065029061331273, "f1": 0.5699583435432491, "f1_std": 0.0630938930630196, "bacc": 0.5699806949806949, "bacc_std": 0.06184774936815931} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059035009169918044, "f1": 0.61207925519217, "f1_std": 0.05967975321520279, "bacc": 0.6143822393822393, "bacc_std": 0.06056849238501182} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06278626465705535, "f1": 0.4879446168536643, "f1_std": 0.06310919876910452, "bacc": 0.4888996138996139, "bacc_std": 0.06361574897763696} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05542395184277121, "f1": 0.7149122807017544, "f1_std": 0.05795997958953131, "bacc": 0.7133204633204633, "bacc_std": 0.05734892618338016} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05840706774124325, "f1": 0.5953065134099617, "f1_std": 0.058913469546957024, "bacc": 0.5965250965250966, "bacc_std": 0.05918727228023486} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06239481089029362, "f1": 0.6198830409356726, "f1_std": 0.06461685118035697, "bacc": 0.6192084942084942, "bacc_std": 0.06366404275950352} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0592323196600276, "f1": 0.5565302144249512, "f1_std": 0.0611675996049126, "bacc": 0.5564671814671815, "bacc_std": 0.060281715260637185} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.061203117064913756, "f1": 0.46760982874162327, "f1_std": 0.06426508848347219, "bacc": 0.47152509652509655, "bacc_std": 0.0619916560933398} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05908922948744651, "f1": 0.564176245210728, "f1_std": 0.05977182609900448, "bacc": 0.5651544401544402, "bacc_std": 0.05979890891748785} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0580633891515412, "f1": 0.49987589972697943, "f1_std": 0.0626428022644842, "bacc": 0.502895752895753, "bacc_std": 0.05972849257185143} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05661618728527994, "f1": 0.5699583435432491, "f1_std": 0.05976078518236698, "bacc": 0.5699806949806949, "bacc_std": 0.05822846393412014} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05847964092200691, "f1": 0.6336682185738789, "f1_std": 0.06086890338230038, "bacc": 0.6327220077220077, "bacc_std": 0.05980931848061576} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.062105083689810305, "f1": 0.4532564287430906, "f1_std": 0.06247504159417956, "bacc": 0.4531853281853282, "bacc_std": 0.06228698584668029} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.058756140306087494, "f1": 0.6375757575757576, "f1_std": 0.06042837884786278, "bacc": 0.6370656370656371, "bacc_std": 0.05994891019903007} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05050279739161682, "f1": 0.5656241646618552, "f1_std": 0.060335661740390796, "bacc": 0.5796332046332047, "bacc_std": 0.05227453553691125} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.058008112062786166, "f1": 0.6515594541910331, "f1_std": 0.05987334723990986, "bacc": 0.6505791505791505, "bacc_std": 0.05914313341993746} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.058165167803094414, "f1": 0.656084656084656, "f1_std": 0.06431018422438634, "bacc": 0.6554054054054055, "bacc_std": 0.060847868917686135} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05847205573412951, "f1": 0.5469838981014179, "f1_std": 0.05951227554826074, "bacc": 0.5472972972972974, "bacc_std": 0.0593758141534984} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05638658617607309, "f1": 0.6036585365853658, "f1_std": 0.06170716998295667, "bacc": 0.6061776061776062, "bacc_std": 0.05789264366777445} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06092745904985854, "f1": 0.588206627680312, "f1_std": 0.06296706803230769, "bacc": 0.5878378378378378, "bacc_std": 0.06223981261886896} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.051360427287204885, "f1": 0.4846723044397463, "f1_std": 0.05804526318367178, "bacc": 0.5033783783783784, "bacc_std": 0.05171267936695095} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05656198150241282, "f1": 0.5905769715293525, "f1_std": 0.06227807523761868, "bacc": 0.5926640926640927, "bacc_std": 0.0584752015579182} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06176710600809613, "f1": 0.6198830409356726, "f1_std": 0.06394610704045328, "bacc": 0.6192084942084942, "bacc_std": 0.06297576838069932} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059294662840934674, "f1": 0.6153846153846154, "f1_std": 0.06276757554879729, "bacc": 0.6148648648648649, "bacc_std": 0.06106095634061859} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05840381765912023, "f1": 0.6474358974358974, "f1_std": 0.062314038928973685, "bacc": 0.6462355212355213, "bacc_std": 0.0605297935593882} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05669392545442547, "f1": 0.5250692869740489, "f1_std": 0.06162957732152183, "bacc": 0.5299227799227799, "bacc_std": 0.05796323024724622} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06307455155016513, "f1": 0.5745454545454545, "f1_std": 0.06493459621404897, "bacc": 0.5743243243243243, "bacc_std": 0.064432121210835} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061930283383817966, "f1": 0.5810455956075435, "f1_std": 0.06234718242833867, "bacc": 0.583011583011583, "bacc_std": 0.06275353663962097} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05803507611067291, "f1": 0.5699583435432491, "f1_std": 0.060638787521142296, "bacc": 0.5699806949806949, "bacc_std": 0.05925936028283022} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06129703410877678, "f1": 0.5745454545454545, "f1_std": 0.06334327113622078, "bacc": 0.5743243243243243, "bacc_std": 0.06271738691414352} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05694948244098324, "f1": 0.545, "f1_std": 0.06145905010361006, "bacc": 0.5477799227799228, "bacc_std": 0.05841277130468106} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06047276466536146, "f1": 0.5469838981014179, "f1_std": 0.0611391183904927, "bacc": 0.5472972972972974, "bacc_std": 0.06115832915148681} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05907752403540492, "f1": 0.606060606060606, "f1_std": 0.060693186407199014, "bacc": 0.6056949806949807, "bacc_std": 0.06021909228100301} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05965918192672173, "f1": 0.606060606060606, "f1_std": 0.061829558863395986, "bacc": 0.6056949806949807, "bacc_std": 0.061087242413563045} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05788349040562829, "f1": 0.6655231560891939, "f1_std": 0.0614274429143358, "bacc": 0.6640926640926641, "bacc_std": 0.06008256984795916} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.059654738375512856, "f1": 0.49317738791423005, "f1_std": 0.061397688026523796, "bacc": 0.49372586872586877, "bacc_std": 0.06011611847379033} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05967339906703001, "f1": 0.6474358974358974, "f1_std": 0.06222094237691313, "bacc": 0.6462355212355213, "bacc_std": 0.0607245986293766} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.04971793575858389, "f1": 0.6130952380952381, "f1_std": 0.06234615955266387, "bacc": 0.6245173745173745, "bacc_std": 0.05286369882432733} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06003589458859804, "f1": 0.5565302144249512, "f1_std": 0.061499943457259676, "bacc": 0.5564671814671815, "bacc_std": 0.06070543860598501} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061550952578441946, "f1": 0.5846153846153846, "f1_std": 0.06198822959954095, "bacc": 0.596042471042471, "bacc_std": 0.06136644691682726} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060427009126322505, "f1": 0.578226387887527, "f1_std": 0.06092313802050427, "bacc": 0.5786679536679536, "bacc_std": 0.06092382838313274} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05466655024762703, "f1": 0.5289855072463768, "f1_std": 0.06221471197612961, "bacc": 0.5390926640926641, "bacc_std": 0.056347407176051086} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06400943717403924, "f1": 0.4743935309973046, "f1_std": 0.06645361445807867, "bacc": 0.47586872586872586, "bacc_std": 0.06484192618020329} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06116331034651721, "f1": 0.6375757575757576, "f1_std": 0.06250338335770013, "bacc": 0.6370656370656371, "bacc_std": 0.0616676916484754} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.059477806511804225, "f1": 0.5062484685126194, "f1_std": 0.06109247575823652, "bacc": 0.5072393822393823, "bacc_std": 0.06003562428949995} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05873196563186225, "f1": 0.5683111954459203, "f1_std": 0.059114768425959426, "bacc": 0.5738416988416988, "bacc_std": 0.05978393373131858} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06347872774197388, "f1": 0.5157414083153088, "f1_std": 0.06415790725129597, "bacc": 0.515926640926641, "bacc_std": 0.06409194371920374} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05642686841647285, "f1": 0.6153846153846154, "f1_std": 0.059627103727383536, "bacc": 0.6148648648648649, "bacc_std": 0.05786440039192632} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06151688852635913, "f1": 0.49317738791423005, "f1_std": 0.06351757147044909, "bacc": 0.49372586872586877, "bacc_std": 0.06257331202133987} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.0520638297368135, "f1": 0.7176640926640927, "f1_std": 0.05319306048470526, "bacc": 0.7176640926640927, "bacc_std": 0.053169392841835444} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.056433352847491416, "f1": 0.570630081300813, "f1_std": 0.06199964107663033, "bacc": 0.5748069498069498, "bacc_std": 0.057883640730489606} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06380985807981146, "f1": 0.4738095238095238, "f1_std": 0.06398367148119169, "bacc": 0.4753861003861004, "bacc_std": 0.06460305030743448} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06305079383759014, "f1": 0.5500119360229172, "f1_std": 0.06349606593475027, "bacc": 0.5516409266409266, "bacc_std": 0.06368202021516883} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05964251690266942, "f1": 0.6886973180076628, "f1_std": 0.060579049843737034, "bacc": 0.6906370656370657, "bacc_std": 0.06059347846291427} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05520569390065896, "f1": 0.47320261437908495, "f1_std": 0.06178663705802395, "bacc": 0.48986486486486486, "bacc_std": 0.05590210553397819} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05744748875709922, "f1": 0.6094688776736361, "f1_std": 0.05842925576971971, "bacc": 0.61003861003861, "bacc_std": 0.05829385297118714} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06135159319989727, "f1": 0.5953065134099617, "f1_std": 0.06197942833546812, "bacc": 0.5965250965250966, "bacc_std": 0.062063924689805025} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0573175520686982, "f1": 0.6289401836684041, "f1_std": 0.06142311800179273, "bacc": 0.6283783783783784, "bacc_std": 0.059143122746962304} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06566545298691186, "f1": 0.5294401544401545, "f1_std": 0.06658049502901836, "bacc": 0.5294401544401545, "bacc_std": 0.06654200875288914} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06136494768785945, "f1": 0.5047619047619047, "f1_std": 0.06167758575062951, "bacc": 0.5067567567567568, "bacc_std": 0.06248987264589987} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.061060451179342355, "f1": 0.4980694980694981, "f1_std": 0.061281325091251275, "bacc": 0.4980694980694981, "bacc_std": 0.06100754756314708} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.059245552600668515, "f1": 0.588206627680312, "f1_std": 0.06101636705953768, "bacc": 0.5878378378378378, "bacc_std": 0.060128896245772605} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05439488315650166, "f1": 0.6794871794871795, "f1_std": 0.057461499800194354, "bacc": 0.6776061776061776, "bacc_std": 0.05597209786828866} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05768615351791371, "f1": 0.6375757575757576, "f1_std": 0.05874082412249993, "bacc": 0.6370656370656371, "bacc_std": 0.05832647526841794} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.054476408226446414, "f1": 0.5962732919254659, "f1_std": 0.06265111599275523, "bacc": 0.6018339768339769, "bacc_std": 0.05684187793277736} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05095194162178303, "f1": 0.5751633986928104, "f1_std": 0.05902791259799706, "bacc": 0.583976833976834, "bacc_std": 0.052633892100958334} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.055998943354359225, "f1": 0.5501153550371699, "f1_std": 0.06276121237733259, "bacc": 0.556949806949807, "bacc_std": 0.05763804209834488} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.061087548000472724, "f1": 0.5248538011695907, "f1_std": 0.06283738590094717, "bacc": 0.525096525096525, "bacc_std": 0.06206668934539732} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.062218359511924046, "f1": 0.5534233593935086, "f1_std": 0.06229829946329207, "bacc": 0.5603281853281853, "bacc_std": 0.06256442140617618} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.061241583892263404, "f1": 0.6375757575757576, "f1_std": 0.06299769774599755, "bacc": 0.6370656370656371, "bacc_std": 0.06234659188143713} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.05935571237645097, "f1": 0.4871794871794872, "f1_std": 0.06156570913796003, "bacc": 0.48938223938223935, "bacc_std": 0.05994890677985065} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05947521984110635, "f1": 0.6407113674597452, "f1_std": 0.060098261819259424, "bacc": 0.6414092664092663, "bacc_std": 0.059847853136717984} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06144589961813282, "f1": 0.5066413662239089, "f1_std": 0.061500729382083896, "bacc": 0.5111003861003861, "bacc_std": 0.061566645045623446} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.052595362812220554, "f1": 0.5533826638477801, "f1_std": 0.06199270514300019, "bacc": 0.5661196911196912, "bacc_std": 0.05427549271617841} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05110726624191411, "f1": 0.74, "f1_std": 0.05658345607835071, "bacc": 0.7360038610038611, "bacc_std": 0.05405684990011642} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0631329657227248, "f1": 0.5115151515151515, "f1_std": 0.06445650326299723, "bacc": 0.5115830115830116, "bacc_std": 0.0638621042317883} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.058813805085532876, "f1": 0.5469838981014179, "f1_std": 0.060046464553516685, "bacc": 0.5472972972972974, "bacc_std": 0.060120358712774304} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06156457907311398, "f1": 0.5321419707123356, "f1_std": 0.06452988246739617, "bacc": 0.5342664092664092, "bacc_std": 0.0625566874003427} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.0553704169834676, "f1": 0.570630081300813, "f1_std": 0.06167183301434378, "bacc": 0.5748069498069498, "bacc_std": 0.057183263556338085} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05498422796988048, "f1": 0.7214285714285714, "f1_std": 0.05520623222150543, "bacc": 0.7263513513513513, "bacc_std": 0.05506270725948215} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05641666630248894, "f1": 0.6862934362934363, "f1_std": 0.05765242551437509, "bacc": 0.6862934362934363, "bacc_std": 0.05754977390225277} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06062097592330143, "f1": 0.6139225469232596, "f1_std": 0.060631773544137964, "bacc": 0.6187258687258688, "bacc_std": 0.06083088510821498} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06032775176596667, "f1": 0.606060606060606, "f1_std": 0.06170095191614852, "bacc": 0.6056949806949807, "bacc_std": 0.06137105791708423} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059883745361991855, "f1": 0.5376016260162602, "f1_std": 0.06476251530827347, "bacc": 0.5434362934362934, "bacc_std": 0.06090471736687394} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.0587482765704663, "f1": 0.5775, "f1_std": 0.06298394984043722, "bacc": 0.5791505791505791, "bacc_std": 0.060251711406062655} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05857972668678324, "f1": 0.5699583435432491, "f1_std": 0.061322622018507175, "bacc": 0.5699806949806949, "bacc_std": 0.05974689777928845} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.058888320739571336, "f1": 0.5775, "f1_std": 0.062879523232886, "bacc": 0.5791505791505791, "bacc_std": 0.06021417214067847} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059374857707708666, "f1": 0.6018132810585641, "f1_std": 0.061792820736669495, "bacc": 0.6013513513513513, "bacc_std": 0.06034872400188347} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05102651822464646, "f1": 0.5190274841437632, "f1_std": 0.05956599456532367, "bacc": 0.5347490347490347, "bacc_std": 0.05222639739371201} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.061476615075843524, "f1": 0.543030303030303, "f1_std": 0.06309245461888256, "bacc": 0.542953667953668, "bacc_std": 0.06231543529542896} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055721462831824, "f1": 0.61, "f1_std": 0.060653507586662735, "bacc": 0.6105212355212355, "bacc_std": 0.057679393521572704} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05031757136593221, "f1": 0.6003742314889067, "f1_std": 0.06207904624610059, "bacc": 0.6110038610038611, "bacc_std": 0.053311566018655734} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06100702064555633, "f1": 0.5192307692307693, "f1_std": 0.06362253148263441, "bacc": 0.5207528957528957, "bacc_std": 0.06190621252267073} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.059122578512280984, "f1": 0.6375757575757576, "f1_std": 0.060636669916125555, "bacc": 0.6370656370656371, "bacc_std": 0.06015086412013059} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05387318881756287, "f1": 0.47320261437908495, "f1_std": 0.05907622153254185, "bacc": 0.48986486486486486, "bacc_std": 0.053843848818292614} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.000774263682681127, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05867551932490532, "f1": 0.6233308138070043, "f1_std": 0.06411295425940841, "bacc": 0.6240347490347491, "bacc_std": 0.060536521349576045} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.058034480670938735, "f1": 0.49987589972697943, "f1_std": 0.060056309558018356, "bacc": 0.502895752895753, "bacc_std": 0.058367649916049126} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06286848111064687, "f1": 0.5644080416976918, "f1_std": 0.06681009938441267, "bacc": 0.5656370656370656, "bacc_std": 0.06451856818847286} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adhd200_dx | train | 100 | 1.9099 | 16.795 | 0.80819 | 0.07598 | 0.80116 | 0.079774 | 0.79845 | 0.079852 | +| flat_mae | reg | logistic | adhd200_dx | test | 100 | 1.9099 | 16.795 | 0.59431 | 0.060269 | 0.57791 | 0.062569 | 0.58023 | 0.061029 | + + +done! total time: 0:04:35 diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8264f74842c3afbeb87764ef696edd22c03aaa46 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f7b5103d32de5838e3c9500c3e1e3d1b97ae3ce4 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,0.005994842503189409,train,0.8292682926829268,0.013834836720735145,0.6926829268292682,0.03197450495629825,0.6617754952311079,0.026413272598752157 +flat_mae,patch,logistic,adni_ad_vs_cn,,0.005994842503189409,test,0.7560975609756098,0.04289400075087913,0.5119047619047619,0.08025601408111671,0.5243055555555556,0.05812561147295518 +flat_mae,patch,logistic,adni_ad_vs_cn,1,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,1,2.782559402207126,test,0.8292682926829268,0.054798620025439004,0.7602339181286549,0.08148565270844796,0.7516129032258064,0.08305078721495619 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,test,0.7073170731707317,0.06829747369955802,0.6272727272727273,0.08178495592030202,0.6370967741935484,0.08654047737773271 +flat_mae,patch,logistic,adni_ad_vs_cn,3,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,2.782559402207126,test,0.7073170731707317,0.05549005087922894,0.5340909090909092,0.0814746391265176,0.535483870967742,0.0689808026499777 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,166.81005372000556,test,0.7560975609756098,0.0694244032684834,0.6893939393939394,0.08205299901527323,0.7032258064516128,0.08600440155229477 +flat_mae,patch,logistic,adni_ad_vs_cn,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,5,2.782559402207126,test,0.6585365853658537,0.05452364696647398,0.4564393939393939,0.06387806372753559,0.4693548387096774,0.056487883647375364 +flat_mae,patch,logistic,adni_ad_vs_cn,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,166.81005372000556,test,0.7317073170731707,0.07249238405915077,0.6676492262343405,0.0850006211023934,0.6870967741935483,0.09163028975180641 +flat_mae,patch,logistic,adni_ad_vs_cn,7,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,21.54434690031882,test,0.8292682926829268,0.05609143126637976,0.7602339181286549,0.08228711043403408,0.7516129032258064,0.08493487809592484 +flat_mae,patch,logistic,adni_ad_vs_cn,8,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,166.81005372000556,test,0.8048780487804879,0.057286052840889924,0.7354838709677419,0.07782989464521667,0.7354838709677419,0.0810617073952138 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,train,0.989159891598916,0.005164273605587754,0.9847141673570836,0.007352515644698873,0.9807913550825869,0.00968710233912399 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,test,0.7804878048780488,0.0653618309646794,0.7280766396462786,0.07794944616381128,0.7532258064516129,0.0839188575434913 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,test,0.8536585365853658,0.04948669293970117,0.7864583333333333,0.07815493864420038,0.7677419354838709,0.0798101203292659 +flat_mae,patch,logistic,adni_ad_vs_cn,11,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,21.54434690031882,test,0.7073170731707317,0.057820086504011305,0.5729166666666666,0.08010344052964839,0.5693548387096774,0.07402129063466567 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.005994842503189409,train,0.8455284552845529,0.013165063966136011,0.7255073409461664,0.029595352647757825,0.688840496343167,0.02597410805567982 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.005994842503189409,test,0.7560975609756098,0.04338272326822923,0.569327731092437,0.08711651868919099,0.567741935483871,0.06584883984275529 +flat_mae,patch,logistic,adni_ad_vs_cn,13,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,166.81005372000556,test,0.7560975609756098,0.061513316946889204,0.6693548387096775,0.08127446917101135,0.6693548387096775,0.08331805908578943 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,train,0.9051490514905149,0.013486326724204118,0.851341551849166,0.023710755518247112,0.8167474730873532,0.02600231190432193 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,test,0.7560975609756098,0.0663289084246221,0.6693548387096775,0.08658015641460742,0.6693548387096775,0.08749034874675801 +flat_mae,patch,logistic,adni_ad_vs_cn,15,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,15,21.54434690031882,test,0.7317073170731707,0.06863112999768452,0.6676492262343405,0.08046309973026088,0.6870967741935483,0.08657934058677744 +flat_mae,patch,logistic,adni_ad_vs_cn,16,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,16,21.54434690031882,test,0.7317073170731707,0.06920950786260639,0.6479313036690086,0.08982947807253898,0.6532258064516129,0.09225673109796602 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,train,0.989159891598916,0.005538274398239835,0.9845864661654136,0.008055156438600702,0.9767441860465116,0.011881530540409921 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,test,0.7560975609756098,0.06771490784749518,0.6893939393939394,0.08269700774727287,0.7032258064516128,0.08742503701328465 +flat_mae,patch,logistic,adni_ad_vs_cn,18,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,1291.5496650148827,test,0.8048780487804879,0.060809216872386676,0.764367816091954,0.0693746483591815,0.8032258064516129,0.07358462915774625 +flat_mae,patch,logistic,adni_ad_vs_cn,19,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,166.81005372000556,test,0.7560975609756098,0.06599623229380591,0.6693548387096775,0.08318698917912322,0.6693548387096775,0.08408662868630983 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,train,0.991869918699187,0.0042773088409873705,0.9884880564885973,0.006140708961348227,0.9825581395348837,0.009176319548397382 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,test,0.7804878048780488,0.06495319323269896,0.7280766396462786,0.074956668928657,0.7532258064516129,0.08040476809599191 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,test,0.7804878048780488,0.05971215905761432,0.6917293233082706,0.08852130247194998,0.685483870967742,0.0873993584438672 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,train,0.9159891598915989,0.01191302267614704,0.8696573648887318,0.020515475338223267,0.8359561180047662,0.02346422440564635 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,test,0.7560975609756098,0.054481035455204656,0.6117424242424243,0.09133591889222313,0.6016129032258064,0.07802357464120625 +flat_mae,patch,logistic,adni_ad_vs_cn,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,2.782559402207126,test,0.7560975609756098,0.05768147783463413,0.6440972222222222,0.0912979639651208,0.635483870967742,0.08632321977679397 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,train,0.994579945799458,0.003894975548931391,0.9923570836785418,0.00557697828804509,0.9883720930232558,0.008356081264858654 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,test,0.7560975609756098,0.05199038944731995,0.6117424242424243,0.09169021795973548,0.6016129032258064,0.07822618895816026 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,test,0.7560975609756098,0.06532352105735996,0.6893939393939394,0.07907651446097673,0.7032258064516128,0.08393278236955762 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.046415888336127774,train,0.9105691056910569,0.01283079094674461,0.8583822759783684,0.02309825585451819,0.8202810419919468,0.0258575191300464 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.046415888336127774,test,0.7317073170731707,0.06292529776922495,0.6232247284878863,0.0862055033688093,0.6193548387096774,0.08347693413619331 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,test,0.8292682926829268,0.049542799624628035,0.7402714932126697,0.08719899863835033,0.717741935483871,0.08315568257325992 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,test,0.7317073170731707,0.05888788892654301,0.6232247284878863,0.0804593081423439,0.6193548387096774,0.07831726612144178 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,test,0.6829268292682927,0.06802036297972042,0.5547201336675021,0.08833130180933463,0.5532258064516129,0.08614386793818048 +flat_mae,patch,logistic,adni_ad_vs_cn,30,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,10000.0,test,0.7073170731707317,0.062081503106477275,0.603225806451613,0.07966252711156596,0.603225806451613,0.07945837698604863 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.926829268292683,0.012835461963061602,0.887597730067579,0.021889512715921697,0.8551647629221792,0.02581026144783025 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.7804878048780488,0.0386098408393135,0.5886287625418061,0.09135257181367867,0.5838709677419355,0.06512752646488214 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.005994842503189409,train,0.8401084010840109,0.014334889631394657,0.7121951219512195,0.03233886975577647,0.6772125893664229,0.027255130797568337 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.005994842503189409,test,0.6585365853658537,0.06290260452602141,0.5017361111111112,0.07850950358360588,0.5032258064516129,0.0729852672117379 +flat_mae,patch,logistic,adni_ad_vs_cn,33,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,166.81005372000556,test,0.8292682926829268,0.056822463740010695,0.7985964912280701,0.0610264218912561,0.853225806451613,0.05891495298940685 +flat_mae,patch,logistic,adni_ad_vs_cn,34,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,10000.0,test,0.7317073170731707,0.06592716764483694,0.6676492262343405,0.07889582712015417,0.6870967741935483,0.08643145022461002 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,train,0.991869918699187,0.0047073131727270985,0.9885825675299359,0.006634316256224777,0.986605308570959,0.008166174879082303 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.3593813663804626,test,0.8536585365853658,0.05257441399144382,0.7864583333333333,0.08298509790258964,0.7677419354838709,0.08407180668012278 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.6097560975609756,0.06854439674259787,0.47096774193548385,0.07388529204187261,0.47096774193548385,0.07389521055928061 +flat_mae,patch,logistic,adni_ad_vs_cn,37,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,166.81005372000556,test,0.6585365853658537,0.06861906329665027,0.5651515151515152,0.08171658089932742,0.5709677419354839,0.08726112593759 +flat_mae,patch,logistic,adni_ad_vs_cn,38,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,38,21.54434690031882,test,0.7073170731707317,0.07246047149041966,0.646551724137931,0.08223853051606658,0.6709677419354838,0.08954285395892439 +flat_mae,patch,logistic,adni_ad_vs_cn,39,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,39,21.54434690031882,test,0.6585365853658537,0.06313429592410327,0.5017361111111112,0.08174630542670099,0.5032258064516129,0.07559669291171636 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.3593813663804626,train,0.991869918699187,0.004390524933291687,0.9885825675299359,0.006198820270259955,0.986605308570959,0.007895555584875834 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.3593813663804626,test,0.6585365853658537,0.06661178784265949,0.5370967741935484,0.08187956836000952,0.5370967741935484,0.08180360379697668 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,test,0.7317073170731707,0.06915972254059245,0.6676492262343405,0.08060199453465082,0.6870967741935483,0.08880292483300863 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,train,0.994579945799458,0.003625996005792871,0.9923570836785418,0.005169770884433684,0.9883720930232558,0.007779026314753337 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,test,0.6829268292682927,0.05844454145544254,0.5176470588235295,0.07802454966968896,0.5193548387096775,0.07039928304371941 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,test,0.8048780487804879,0.05945417003310034,0.7354838709677419,0.07914969659955917,0.7354838709677419,0.08239298353435191 +flat_mae,patch,logistic,adni_ad_vs_cn,44,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,2.782559402207126,test,0.8536585365853658,0.04988913348115296,0.7864583333333333,0.0793622321393813,0.7677419354838709,0.08111424247927408 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,test,0.7804878048780488,0.0605377252698244,0.7119437939110069,0.07946924053532653,0.7193548387096774,0.08497718712045783 +flat_mae,patch,logistic,adni_ad_vs_cn,46,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,21.54434690031882,test,0.7317073170731707,0.06467314056123717,0.6232247284878863,0.08803529240777497,0.6193548387096774,0.08626547577156027 +flat_mae,patch,logistic,adni_ad_vs_cn,47,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,21.54434690031882,test,0.7560975609756098,0.053203301727370596,0.6117424242424243,0.08968084875942044,0.6016129032258064,0.07753094270007646 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.3593813663804626,train,0.991869918699187,0.005039248573903079,0.9884880564885973,0.007257974129974514,0.9825581395348837,0.01081094606843166 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.3593813663804626,test,0.6585365853658537,0.06422512087260354,0.5370967741935484,0.08218491986612324,0.5370967741935484,0.0827064062560818 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,test,0.7804878048780488,0.060747201471467886,0.6917293233082706,0.08600022059563367,0.685483870967742,0.08421284440850745 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,test,0.7804878048780488,0.05710443171380253,0.6917293233082706,0.08307724176257154,0.685483870967742,0.08053784155908872 +flat_mae,patch,logistic,adni_ad_vs_cn,51,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,51,166.81005372000556,test,0.6097560975609756,0.0702495085838757,0.5287356321839081,0.07650893600106642,0.5387096774193548,0.08478468161910417 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,train,0.9159891598915989,0.013674460541503017,0.8696573648887318,0.023478043265817698,0.8359561180047662,0.02683064741845025 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,test,0.7317073170731707,0.04995074313797236,0.5512437810945273,0.0867133336704071,0.5516129032258065,0.06897484943309713 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.3593813663804626,train,0.989159891598916,0.005411578739218375,0.9845864661654136,0.007872723206427264,0.9767441860465116,0.011609724155648768 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.3593813663804626,test,0.8536585365853658,0.04640416810342163,0.7670454545454546,0.08796551978663088,0.7338709677419355,0.08012979137087828 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,train,0.8482384823848238,0.012307939467365374,0.7250372578241431,0.028751757655824595,0.6865601117593887,0.024638357848104583 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,test,0.7317073170731707,0.057402510272668454,0.5918552036199095,0.0866448842013915,0.5854838709677419,0.07827414369560307 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,train,0.994579945799458,0.0037816444889585627,0.9923570836785418,0.005400815072513409,0.9883720930232558,0.008112946607126251 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,test,0.7317073170731707,0.06795265524720683,0.6479313036690086,0.08487008256246929,0.6532258064516129,0.08836056912955609 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,test,0.7560975609756098,0.06185186963636091,0.6440972222222222,0.09106825371084105,0.635483870967742,0.08594542977415073 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.9105691056910569,0.013660364655915672,0.8612481626234888,0.02375787941274375,0.8283753800640973,0.02683898846670145 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7804878048780488,0.062206426519270844,0.6917293233082706,0.09149701510828176,0.685483870967742,0.08962201096335376 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,test,0.6585365853658537,0.0713686377263996,0.5651515151515152,0.08213550580363621,0.5709677419354839,0.08697436864530089 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,21.54434690031882,test,0.7560975609756098,0.05533631961819983,0.6117424242424243,0.093732406755215,0.6016129032258064,0.08050176929435361 +flat_mae,patch,logistic,adni_ad_vs_cn,60,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,2.782559402207126,test,0.8048780487804879,0.056979681611933064,0.7354838709677419,0.07978659566087543,0.7354838709677419,0.08190426405470835 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,test,0.6829268292682927,0.072226937617132,0.6259649122807017,0.07883462075722913,0.6548387096774193,0.08874544000537818 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9132791327913279,0.01315117711490096,0.8647732478240953,0.0228954569332724,0.8301421645163941,0.026092595537559136 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.7317073170731707,0.06616783232434982,0.6232247284878863,0.08880106871907206,0.6193548387096774,0.08776678229787142 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,test,0.7317073170731707,0.06576145306409922,0.6479313036690086,0.08466427643853076,0.6532258064516129,0.08824678496824713 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,test,0.6585365853658537,0.0721152491586458,0.5651515151515152,0.0835022837242658,0.5709677419354839,0.08887420396894379 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,train,0.9105691056910569,0.012949065317986634,0.859836320314928,0.02285055036891575,0.824328211028022,0.025363179208550104 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,test,0.7073170731707317,0.0644881738670673,0.5729166666666666,0.09085050858208621,0.5693548387096774,0.08554439269612112 +flat_mae,patch,logistic,adni_ad_vs_cn,66,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,66,21.54434690031882,test,0.7317073170731707,0.06278668455403047,0.6232247284878863,0.08961138413190642,0.6193548387096774,0.08635920675325236 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,test,0.7317073170731707,0.06195043210561631,0.6232247284878863,0.08593375397506382,0.6193548387096774,0.08381334614914214 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,train,0.924119241192412,0.012004410633724698,0.88400395185917,0.020105320858296423,0.8533979784698824,0.023419156188322427 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,test,0.6829268292682927,0.06934689803089991,0.5839188134270101,0.08492574911593297,0.5870967741935484,0.08767208533704736 +flat_mae,patch,logistic,adni_ad_vs_cn,69,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,2.782559402207126,test,0.8048780487804879,0.0558935846220622,0.7152777777777778,0.08551549303885568,0.7016129032258065,0.08382059895737658 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,test,0.7560975609756098,0.04808506739235483,0.569327731092437,0.09088713192558485,0.567741935483871,0.06924347330408347 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,train,0.994579945799458,0.003731045004036313,0.9923570836785418,0.005325319569052921,0.9883720930232558,0.008004393060984905 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,test,0.7317073170731707,0.05089268549840672,0.5512437810945273,0.08692904998286015,0.5516129032258065,0.06989022342119622 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,test,0.7073170731707317,0.06553389394202999,0.603225806451613,0.08510141540813855,0.603225806451613,0.08593462867684518 +flat_mae,patch,logistic,adni_ad_vs_cn,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,21.54434690031882,test,0.7804878048780488,0.06016008784025382,0.6917293233082706,0.08510281239307298,0.685483870967742,0.08501887921574204 +flat_mae,patch,logistic,adni_ad_vs_cn,74,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,2.782559402207126,test,0.7073170731707317,0.047388530403477465,0.4831932773109243,0.07325283586154382,0.5016129032258064,0.05654444891794352 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,train,0.8346883468834688,0.013387406602205984,0.6985011452375531,0.03109126638981704,0.6655846823896787,0.025819342870959962 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,test,0.7560975609756098,0.03414285696506755,0.5119047619047619,0.0804767123926464,0.5338709677419355,0.05223709017556784 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,166.81005372000556,test,0.6585365853658537,0.06368153962487595,0.5370967741935484,0.08146514534488593,0.5370967741935484,0.08067339660554235 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,test,0.7804878048780488,0.058037219590730944,0.6917293233082706,0.08464949506529064,0.685483870967742,0.08547121050495364 +flat_mae,patch,logistic,adni_ad_vs_cn,78,0.046415888336127774,train,0.9295392953929539,0.011582516329704659,0.8922893838692294,0.01952568591645006,0.8609787164105513,0.02342290217219926 +flat_mae,patch,logistic,adni_ad_vs_cn,78,0.046415888336127774,test,0.7317073170731707,0.058678101295828594,0.5918552036199095,0.08822331111619984,0.5854838709677419,0.07977073933014434 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.046415888336127774,train,0.9214092140921409,0.012417838616381583,0.8804424333836098,0.02044900159745716,0.8516311940175856,0.023639487174320774 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.046415888336127774,test,0.8780487804878049,0.048301920772032436,0.8287385129490392,0.07029209597466063,0.8177419354838709,0.07489215909744354 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.6829268292682927,0.07357318589222533,0.6259649122807017,0.07965129561349939,0.6548387096774193,0.08963525673963588 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.3593813663804626,train,0.994579945799458,0.003919654889139166,0.9924192620593311,0.005490712376085334,0.9924192620593311,0.006410008743562609 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.3593813663804626,test,0.7804878048780488,0.04858445978193234,0.6328358208955224,0.09154629324621057,0.6177419354838709,0.07559072744008988 +flat_mae,patch,logistic,adni_ad_vs_cn,82,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,166.81005372000556,test,0.8048780487804879,0.05656793255419784,0.7354838709677419,0.07733130630273204,0.7354838709677419,0.07991865244963306 +flat_mae,patch,logistic,adni_ad_vs_cn,83,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,21.54434690031882,test,0.7317073170731707,0.06362673722163155,0.6232247284878863,0.08790959282009606,0.6193548387096774,0.0844204641415154 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.9159891598915989,0.012590722910945812,0.8669651683433157,0.022517890185990576,0.8278617799326156,0.02545257302826754 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.7073170731707317,0.05639657319530776,0.5340909090909092,0.08348941445768271,0.535483870967742,0.07093184546698865 +flat_mae,patch,logistic,adni_ad_vs_cn,85,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,10000.0,test,0.7560975609756098,0.06712016092337336,0.7054597701149425,0.07612082117471725,0.7370967741935484,0.08222591471664242 +flat_mae,patch,logistic,adni_ad_vs_cn,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,166.81005372000556,test,0.6829268292682927,0.05891542047528722,0.5176470588235295,0.07971942808161085,0.5193548387096775,0.07137160590201301 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,test,0.8292682926829268,0.05540439187379836,0.7602339181286549,0.0834844677655417,0.7516129032258064,0.08603364441230932 +flat_mae,patch,logistic,adni_ad_vs_cn,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,2.782559402207126,test,0.7317073170731707,0.06527850005858644,0.6676492262343405,0.07894146058796256,0.6870967741935483,0.08541755644486351 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,train,0.9105691056910569,0.012612151678404938,0.8583822759783684,0.02285434898890752,0.8202810419919468,0.02526045652491505 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,test,0.8292682926829268,0.05144165532366765,0.7402714932126697,0.08705471972316535,0.717741935483871,0.08480580859644078 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,train,0.9105691056910569,0.01340737368875086,0.8612481626234888,0.02305700549073709,0.8283753800640973,0.025926056535656973 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,test,0.7804878048780488,0.060853712302630335,0.6917293233082706,0.08841701427795176,0.685483870967742,0.08776885995816441 +flat_mae,patch,logistic,adni_ad_vs_cn,91,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,91,21.54434690031882,test,0.7073170731707317,0.06824072936712218,0.646551724137931,0.07775655194726162,0.6709677419354838,0.0867063785854821 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,test,0.6829268292682927,0.07330920231026282,0.5839188134270101,0.09022288303651897,0.5870967741935484,0.09443619674120071 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.3593813663804626,train,0.989159891598916,0.0053222503656391714,0.9845864661654136,0.007727862051919511,0.9767441860465116,0.011418083633260828 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.3593813663804626,test,0.5609756097560976,0.07055378800450512,0.4409090909090909,0.0720413207493076,0.43870967741935485,0.07644827024133914 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.3593813663804626,train,0.994579945799458,0.0036556246566758197,0.9924192620593311,0.005110902444489312,0.9924192620593311,0.00582593571624772 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.3593813663804626,test,0.7073170731707317,0.04773524046895948,0.4831932773109243,0.07255433978787183,0.5016129032258064,0.05599748247648706 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,test,0.7804878048780488,0.06380554933636577,0.7119437939110069,0.08133768450153317,0.7193548387096774,0.0862443637030937 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.8292682926829268,0.04326614782815962,0.7144278606965174,0.08863239292078348,0.6838709677419355,0.0765210108326815 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,test,0.7073170731707317,0.06722007858446644,0.603225806451613,0.08929294477663584,0.603225806451613,0.08999094942352683 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,train,0.9159891598915989,0.012873513020546375,0.8709455419294425,0.0217397391190504,0.8400032870408415,0.02481357976547943 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,test,0.7317073170731707,0.06190511055374208,0.6232247284878863,0.08719083339266026,0.6193548387096774,0.08517416441056819 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,train,0.9186991869918699,0.01251666319819646,0.8732249198350893,0.02180687702883564,0.837722902457063,0.02485504756847341 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,test,0.7317073170731707,0.053761564061834896,0.5918552036199095,0.08504583232429394,0.5854838709677419,0.07536456486889055 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.9132791327913279,0.01293276610275063,0.8633839318770826,0.023013357973592264,0.8260949954803188,0.025988379237115184 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.6585365853658537,0.053036303550950854,0.4564393939393939,0.06543667812830219,0.4693548387096774,0.057379768430886005 diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a529ca450b304b325ee4d4b630fe27f626ac0c1 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:52:31 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:36 time: 3.8836 data: 3.0770 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:51 time: 0.1792 data: 0.0575 max mem: 3005 +extract (train) [ 40/164] eta: 0:00:32 time: 0.1626 data: 0.0466 max mem: 3005 +extract (train) [ 60/164] eta: 0:00:23 time: 0.1610 data: 0.0491 max mem: 3005 +extract (train) [ 80/164] eta: 0:00:17 time: 0.1654 data: 0.0514 max mem: 3005 +extract (train) [100/164] eta: 0:00:12 time: 0.1614 data: 0.0497 max mem: 3005 +extract (train) [120/164] eta: 0:00:08 time: 0.1777 data: 0.0591 max mem: 3005 +extract (train) [140/164] eta: 0:00:04 time: 0.1650 data: 0.0533 max mem: 3005 +extract (train) [160/164] eta: 0:00:00 time: 0.1469 data: 0.0428 max mem: 3005 +extract (train) [163/164] eta: 0:00:00 time: 0.1484 data: 0.0435 max mem: 3005 +extract (train) Total time: 0:00:30 (0.1890 s / it) +extract (validation) [ 0/21] eta: 0:01:08 time: 3.2747 data: 3.1513 max mem: 3005 +extract (validation) [20/21] eta: 0:00:00 time: 0.1276 data: 0.0318 max mem: 3005 +extract (validation) Total time: 0:00:06 (0.2933 s / it) +extract (test) [ 0/21] eta: 0:01:08 time: 3.2573 data: 3.1348 max mem: 3005 +extract (test) [20/21] eta: 0:00:00 time: 0.1249 data: 0.0308 max mem: 3005 +extract (test) Total time: 0:00:06 (0.2886 s / it) +feature extraction time: 0:00:43 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 0.0059948 | train | 0.82927 | 0.013835 | 0.69268 | 0.031975 | 0.66178 | 0.026413 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 0.0059948 | test | 0.7561 | 0.042894 | 0.5119 | 0.080256 | 0.52431 | 0.058126 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 2.782559402207126, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.054798620025439004, "f1": 0.7602339181286549, "f1_std": 0.08148565270844796, "bacc": 0.7516129032258064, "bacc_std": 0.08305078721495619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06829747369955802, "f1": 0.6272727272727273, "f1_std": 0.08178495592030202, "bacc": 0.6370967741935484, "bacc_std": 0.08654047737773271} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05549005087922894, "f1": 0.5340909090909092, "f1_std": 0.0814746391265176, "bacc": 0.535483870967742, "bacc_std": 0.0689808026499777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0694244032684834, "f1": 0.6893939393939394, "f1_std": 0.08205299901527323, "bacc": 0.7032258064516128, "bacc_std": 0.08600440155229477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05452364696647398, "f1": 0.4564393939393939, "f1_std": 0.06387806372753559, "bacc": 0.4693548387096774, "bacc_std": 0.056487883647375364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.07249238405915077, "f1": 0.6676492262343405, "f1_std": 0.0850006211023934, "bacc": 0.6870967741935483, "bacc_std": 0.09163028975180641} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05609143126637976, "f1": 0.7602339181286549, "f1_std": 0.08228711043403408, "bacc": 0.7516129032258064, "bacc_std": 0.08493487809592484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.057286052840889924, "f1": 0.7354838709677419, "f1_std": 0.07782989464521667, "bacc": 0.7354838709677419, "bacc_std": 0.0810617073952138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0653618309646794, "f1": 0.7280766396462786, "f1_std": 0.07794944616381128, "bacc": 0.7532258064516129, "bacc_std": 0.0839188575434913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 1291.5496650148827, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04948669293970117, "f1": 0.7864583333333333, "f1_std": 0.07815493864420038, "bacc": 0.7677419354838709, "bacc_std": 0.0798101203292659} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.057820086504011305, "f1": 0.5729166666666666, "f1_std": 0.08010344052964839, "bacc": 0.5693548387096774, "bacc_std": 0.07402129063466567} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04338272326822923, "f1": 0.569327731092437, "f1_std": 0.08711651868919099, "bacc": 0.567741935483871, "bacc_std": 0.06584883984275529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061513316946889204, "f1": 0.6693548387096775, "f1_std": 0.08127446917101135, "bacc": 0.6693548387096775, "bacc_std": 0.08331805908578943} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0663289084246221, "f1": 0.6693548387096775, "f1_std": 0.08658015641460742, "bacc": 0.6693548387096775, "bacc_std": 0.08749034874675801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06863112999768452, "f1": 0.6676492262343405, "f1_std": 0.08046309973026088, "bacc": 0.6870967741935483, "bacc_std": 0.08657934058677744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06920950786260639, "f1": 0.6479313036690086, "f1_std": 0.08982947807253898, "bacc": 0.6532258064516129, "bacc_std": 0.09225673109796602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06771490784749518, "f1": 0.6893939393939394, "f1_std": 0.08269700774727287, "bacc": 0.7032258064516128, "bacc_std": 0.08742503701328465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 1291.5496650148827, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.060809216872386676, "f1": 0.764367816091954, "f1_std": 0.0693746483591815, "bacc": 0.8032258064516129, "bacc_std": 0.07358462915774625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06599623229380591, "f1": 0.6693548387096775, "f1_std": 0.08318698917912322, "bacc": 0.6693548387096775, "bacc_std": 0.08408662868630983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06495319323269896, "f1": 0.7280766396462786, "f1_std": 0.074956668928657, "bacc": 0.7532258064516129, "bacc_std": 0.08040476809599191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05971215905761432, "f1": 0.6917293233082706, "f1_std": 0.08852130247194998, "bacc": 0.685483870967742, "bacc_std": 0.0873993584438672} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.054481035455204656, "f1": 0.6117424242424243, "f1_std": 0.09133591889222313, "bacc": 0.6016129032258064, "bacc_std": 0.07802357464120625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05768147783463413, "f1": 0.6440972222222222, "f1_std": 0.0912979639651208, "bacc": 0.635483870967742, "bacc_std": 0.08632321977679397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05199038944731995, "f1": 0.6117424242424243, "f1_std": 0.09169021795973548, "bacc": 0.6016129032258064, "bacc_std": 0.07822618895816026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06532352105735996, "f1": 0.6893939393939394, "f1_std": 0.07907651446097673, "bacc": 0.7032258064516128, "bacc_std": 0.08393278236955762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06292529776922495, "f1": 0.6232247284878863, "f1_std": 0.0862055033688093, "bacc": 0.6193548387096774, "bacc_std": 0.08347693413619331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.049542799624628035, "f1": 0.7402714932126697, "f1_std": 0.08719899863835033, "bacc": 0.717741935483871, "bacc_std": 0.08315568257325992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05888788892654301, "f1": 0.6232247284878863, "f1_std": 0.0804593081423439, "bacc": 0.6193548387096774, "bacc_std": 0.07831726612144178} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06802036297972042, "f1": 0.5547201336675021, "f1_std": 0.08833130180933463, "bacc": 0.5532258064516129, "bacc_std": 0.08614386793818048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 10000.0, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.062081503106477275, "f1": 0.603225806451613, "f1_std": 0.07966252711156596, "bacc": 0.603225806451613, "bacc_std": 0.07945837698604863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0386098408393135, "f1": 0.5886287625418061, "f1_std": 0.09135257181367867, "bacc": 0.5838709677419355, "bacc_std": 0.06512752646488214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06290260452602141, "f1": 0.5017361111111112, "f1_std": 0.07850950358360588, "bacc": 0.5032258064516129, "bacc_std": 0.0729852672117379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.056822463740010695, "f1": 0.7985964912280701, "f1_std": 0.0610264218912561, "bacc": 0.853225806451613, "bacc_std": 0.05891495298940685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 10000.0, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06592716764483694, "f1": 0.6676492262343405, "f1_std": 0.07889582712015417, "bacc": 0.6870967741935483, "bacc_std": 0.08643145022461002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05257441399144382, "f1": 0.7864583333333333, "f1_std": 0.08298509790258964, "bacc": 0.7677419354838709, "bacc_std": 0.08407180668012278} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06854439674259787, "f1": 0.47096774193548385, "f1_std": 0.07388529204187261, "bacc": 0.47096774193548385, "bacc_std": 0.07389521055928061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06861906329665027, "f1": 0.5651515151515152, "f1_std": 0.08171658089932742, "bacc": 0.5709677419354839, "bacc_std": 0.08726112593759} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07246047149041966, "f1": 0.646551724137931, "f1_std": 0.08223853051606658, "bacc": 0.6709677419354838, "bacc_std": 0.08954285395892439} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06313429592410327, "f1": 0.5017361111111112, "f1_std": 0.08174630542670099, "bacc": 0.5032258064516129, "bacc_std": 0.07559669291171636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06661178784265949, "f1": 0.5370967741935484, "f1_std": 0.08187956836000952, "bacc": 0.5370967741935484, "bacc_std": 0.08180360379697668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06915972254059245, "f1": 0.6676492262343405, "f1_std": 0.08060199453465082, "bacc": 0.6870967741935483, "bacc_std": 0.08880292483300863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05844454145544254, "f1": 0.5176470588235295, "f1_std": 0.07802454966968896, "bacc": 0.5193548387096775, "bacc_std": 0.07039928304371941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05945417003310034, "f1": 0.7354838709677419, "f1_std": 0.07914969659955917, "bacc": 0.7354838709677419, "bacc_std": 0.08239298353435191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 2.782559402207126, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04988913348115296, "f1": 0.7864583333333333, "f1_std": 0.0793622321393813, "bacc": 0.7677419354838709, "bacc_std": 0.08111424247927408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0605377252698244, "f1": 0.7119437939110069, "f1_std": 0.07946924053532653, "bacc": 0.7193548387096774, "bacc_std": 0.08497718712045783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06467314056123717, "f1": 0.6232247284878863, "f1_std": 0.08803529240777497, "bacc": 0.6193548387096774, "bacc_std": 0.08626547577156027} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.053203301727370596, "f1": 0.6117424242424243, "f1_std": 0.08968084875942044, "bacc": 0.6016129032258064, "bacc_std": 0.07753094270007646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06422512087260354, "f1": 0.5370967741935484, "f1_std": 0.08218491986612324, "bacc": 0.5370967741935484, "bacc_std": 0.0827064062560818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.060747201471467886, "f1": 0.6917293233082706, "f1_std": 0.08600022059563367, "bacc": 0.685483870967742, "bacc_std": 0.08421284440850745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05710443171380253, "f1": 0.6917293233082706, "f1_std": 0.08307724176257154, "bacc": 0.685483870967742, "bacc_std": 0.08053784155908872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.0702495085838757, "f1": 0.5287356321839081, "f1_std": 0.07650893600106642, "bacc": 0.5387096774193548, "bacc_std": 0.08478468161910417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04995074313797236, "f1": 0.5512437810945273, "f1_std": 0.0867133336704071, "bacc": 0.5516129032258065, "bacc_std": 0.06897484943309713} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04640416810342163, "f1": 0.7670454545454546, "f1_std": 0.08796551978663088, "bacc": 0.7338709677419355, "bacc_std": 0.08012979137087828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057402510272668454, "f1": 0.5918552036199095, "f1_std": 0.0866448842013915, "bacc": 0.5854838709677419, "bacc_std": 0.07827414369560307} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06795265524720683, "f1": 0.6479313036690086, "f1_std": 0.08487008256246929, "bacc": 0.6532258064516129, "bacc_std": 0.08836056912955609} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06185186963636091, "f1": 0.6440972222222222, "f1_std": 0.09106825371084105, "bacc": 0.635483870967742, "bacc_std": 0.08594542977415073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.062206426519270844, "f1": 0.6917293233082706, "f1_std": 0.09149701510828176, "bacc": 0.685483870967742, "bacc_std": 0.08962201096335376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0713686377263996, "f1": 0.5651515151515152, "f1_std": 0.08213550580363621, "bacc": 0.5709677419354839, "bacc_std": 0.08697436864530089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05533631961819983, "f1": 0.6117424242424243, "f1_std": 0.093732406755215, "bacc": 0.6016129032258064, "bacc_std": 0.08050176929435361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056979681611933064, "f1": 0.7354838709677419, "f1_std": 0.07978659566087543, "bacc": 0.7354838709677419, "bacc_std": 0.08190426405470835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.072226937617132, "f1": 0.6259649122807017, "f1_std": 0.07883462075722913, "bacc": 0.6548387096774193, "bacc_std": 0.08874544000537818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06616783232434982, "f1": 0.6232247284878863, "f1_std": 0.08880106871907206, "bacc": 0.6193548387096774, "bacc_std": 0.08776678229787142} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06576145306409922, "f1": 0.6479313036690086, "f1_std": 0.08466427643853076, "bacc": 0.6532258064516129, "bacc_std": 0.08824678496824713} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0721152491586458, "f1": 0.5651515151515152, "f1_std": 0.0835022837242658, "bacc": 0.5709677419354839, "bacc_std": 0.08887420396894379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0644881738670673, "f1": 0.5729166666666666, "f1_std": 0.09085050858208621, "bacc": 0.5693548387096774, "bacc_std": 0.08554439269612112} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06278668455403047, "f1": 0.6232247284878863, "f1_std": 0.08961138413190642, "bacc": 0.6193548387096774, "bacc_std": 0.08635920675325236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06195043210561631, "f1": 0.6232247284878863, "f1_std": 0.08593375397506382, "bacc": 0.6193548387096774, "bacc_std": 0.08381334614914214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06934689803089991, "f1": 0.5839188134270101, "f1_std": 0.08492574911593297, "bacc": 0.5870967741935484, "bacc_std": 0.08767208533704736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0558935846220622, "f1": 0.7152777777777778, "f1_std": 0.08551549303885568, "bacc": 0.7016129032258065, "bacc_std": 0.08382059895737658} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04808506739235483, "f1": 0.569327731092437, "f1_std": 0.09088713192558485, "bacc": 0.567741935483871, "bacc_std": 0.06924347330408347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05089268549840672, "f1": 0.5512437810945273, "f1_std": 0.08692904998286015, "bacc": 0.5516129032258065, "bacc_std": 0.06989022342119622} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06553389394202999, "f1": 0.603225806451613, "f1_std": 0.08510141540813855, "bacc": 0.603225806451613, "bacc_std": 0.08593462867684518} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06016008784025382, "f1": 0.6917293233082706, "f1_std": 0.08510281239307298, "bacc": 0.685483870967742, "bacc_std": 0.08501887921574204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.047388530403477465, "f1": 0.4831932773109243, "f1_std": 0.07325283586154382, "bacc": 0.5016129032258064, "bacc_std": 0.05654444891794352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03414285696506755, "f1": 0.5119047619047619, "f1_std": 0.0804767123926464, "bacc": 0.5338709677419355, "bacc_std": 0.05223709017556784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06368153962487595, "f1": 0.5370967741935484, "f1_std": 0.08146514534488593, "bacc": 0.5370967741935484, "bacc_std": 0.08067339660554235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.058037219590730944, "f1": 0.6917293233082706, "f1_std": 0.08464949506529064, "bacc": 0.685483870967742, "bacc_std": 0.08547121050495364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.058678101295828594, "f1": 0.5918552036199095, "f1_std": 0.08822331111619984, "bacc": 0.5854838709677419, "bacc_std": 0.07977073933014434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.048301920772032436, "f1": 0.8287385129490392, "f1_std": 0.07029209597466063, "bacc": 0.8177419354838709, "bacc_std": 0.07489215909744354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07357318589222533, "f1": 0.6259649122807017, "f1_std": 0.07965129561349939, "bacc": 0.6548387096774193, "bacc_std": 0.08963525673963588} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04858445978193234, "f1": 0.6328358208955224, "f1_std": 0.09154629324621057, "bacc": 0.6177419354838709, "bacc_std": 0.07559072744008988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05656793255419784, "f1": 0.7354838709677419, "f1_std": 0.07733130630273204, "bacc": 0.7354838709677419, "bacc_std": 0.07991865244963306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06362673722163155, "f1": 0.6232247284878863, "f1_std": 0.08790959282009606, "bacc": 0.6193548387096774, "bacc_std": 0.0844204641415154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05639657319530776, "f1": 0.5340909090909092, "f1_std": 0.08348941445768271, "bacc": 0.535483870967742, "bacc_std": 0.07093184546698865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 10000.0, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06712016092337336, "f1": 0.7054597701149425, "f1_std": 0.07612082117471725, "bacc": 0.7370967741935484, "bacc_std": 0.08222591471664242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05891542047528722, "f1": 0.5176470588235295, "f1_std": 0.07971942808161085, "bacc": 0.5193548387096775, "bacc_std": 0.07137160590201301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05540439187379836, "f1": 0.7602339181286549, "f1_std": 0.0834844677655417, "bacc": 0.7516129032258064, "bacc_std": 0.08603364441230932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06527850005858644, "f1": 0.6676492262343405, "f1_std": 0.07894146058796256, "bacc": 0.6870967741935483, "bacc_std": 0.08541755644486351} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05144165532366765, "f1": 0.7402714932126697, "f1_std": 0.08705471972316535, "bacc": 0.717741935483871, "bacc_std": 0.08480580859644078} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.060853712302630335, "f1": 0.6917293233082706, "f1_std": 0.08841701427795176, "bacc": 0.685483870967742, "bacc_std": 0.08776885995816441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06824072936712218, "f1": 0.646551724137931, "f1_std": 0.07775655194726162, "bacc": 0.6709677419354838, "bacc_std": 0.0867063785854821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07330920231026282, "f1": 0.5839188134270101, "f1_std": 0.09022288303651897, "bacc": 0.5870967741935484, "bacc_std": 0.09443619674120071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.5609756097560976, "acc_std": 0.07055378800450512, "f1": 0.4409090909090909, "f1_std": 0.0720413207493076, "bacc": 0.43870967741935485, "bacc_std": 0.07644827024133914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04773524046895948, "f1": 0.4831932773109243, "f1_std": 0.07255433978787183, "bacc": 0.5016129032258064, "bacc_std": 0.05599748247648706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06380554933636577, "f1": 0.7119437939110069, "f1_std": 0.08133768450153317, "bacc": 0.7193548387096774, "bacc_std": 0.0862443637030937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04326614782815962, "f1": 0.7144278606965174, "f1_std": 0.08863239292078348, "bacc": 0.6838709677419355, "bacc_std": 0.0765210108326815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06722007858446644, "f1": 0.603225806451613, "f1_std": 0.08929294477663584, "bacc": 0.603225806451613, "bacc_std": 0.08999094942352683} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06190511055374208, "f1": 0.6232247284878863, "f1_std": 0.08719083339266026, "bacc": 0.6193548387096774, "bacc_std": 0.08517416441056819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.053761564061834896, "f1": 0.5918552036199095, "f1_std": 0.08504583232429394, "bacc": 0.5854838709677419, "bacc_std": 0.07536456486889055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.053036303550950854, "f1": 0.4564393939393939, "f1_std": 0.06543667812830219, "bacc": 0.4693548387096774, "bacc_std": 0.057379768430886005} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 377.24 | 1711.2 | 0.97829 | 0.042007 | 0.96482 | 0.070811 | 0.95699 | 0.083845 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 377.24 | 1711.2 | 0.74171 | 0.059147 | 0.63492 | 0.086979 | 0.63681 | 0.086429 | + + +done! total time: 0:04:37 diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04ff67b09bc79b29b0ee16f9c476965e8aa4c662 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adni_ad_vs_cn reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +model: flat_mae +representation: reg +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..a72b51a6ec652d12d11a9378935863f89bf28fe8 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,adni_ad_vs_cn,,0.046415888336127774,train,0.924119241192412,0.011738390960547695,0.8851183063511832,0.019490739512148764,0.8549767669356811,0.023093692891588826 +flat_mae,reg,logistic,adni_ad_vs_cn,,0.046415888336127774,test,0.6829268292682927,0.06140258313738023,0.5176470588235295,0.08156945135302258,0.5173611111111112,0.07808156778842629 +flat_mae,reg,logistic,adni_ad_vs_cn,1,0.3593813663804626,train,0.994579945799458,0.0038129609402626627,0.9924192620593311,0.005331382527575832,0.9924192620593311,0.005977251715628222 +flat_mae,reg,logistic,adni_ad_vs_cn,1,0.3593813663804626,test,0.7804878048780488,0.049318110529245694,0.6328358208955224,0.09452044022079005,0.6177419354838709,0.07822401221753426 +flat_mae,reg,logistic,adni_ad_vs_cn,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,2,21.54434690031882,test,0.6585365853658537,0.06894246624967623,0.5651515151515152,0.08281958457496735,0.5709677419354839,0.08792482930743109 +flat_mae,reg,logistic,adni_ad_vs_cn,3,0.005994842503189409,train,0.8428184281842819,0.01255045237916734,0.7114077669902913,0.030503745921045884,0.6749322047826445,0.025436733797319984 +flat_mae,reg,logistic,adni_ad_vs_cn,3,0.005994842503189409,test,0.7317073170731707,0.02680992650920385,0.4225352112676056,0.009132940949010071,0.4838709677419355,0.01772914494963481 +flat_mae,reg,logistic,adni_ad_vs_cn,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,4,166.81005372000556,test,0.7560975609756098,0.06438793417882557,0.6693548387096775,0.0842186965123577,0.6693548387096775,0.08417202938445281 +flat_mae,reg,logistic,adni_ad_vs_cn,5,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,5,166.81005372000556,test,0.6341463414634146,0.05015915182297482,0.3880597014925373,0.01911015433665028,0.41935483870967744,0.03316976168938657 +flat_mae,reg,logistic,adni_ad_vs_cn,6,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,6,0.3593813663804626,test,0.6585365853658537,0.06256806406854018,0.5017361111111112,0.07635510608717559,0.5032258064516129,0.07123233161730617 +flat_mae,reg,logistic,adni_ad_vs_cn,7,0.3593813663804626,train,0.997289972899729,0.0025863599072987733,0.9961941891766453,0.003657832647427604,0.9941860465116279,0.005548644219728174 +flat_mae,reg,logistic,adni_ad_vs_cn,7,0.3593813663804626,test,0.7317073170731707,0.06336679246004644,0.6232247284878863,0.08913294173020493,0.6193548387096774,0.0878555087415302 +flat_mae,reg,logistic,adni_ad_vs_cn,8,0.046415888336127774,train,0.9159891598915989,0.012704218572082692,0.8683310887806899,0.02254722112965222,0.8319089489686909,0.02551346069508748 +flat_mae,reg,logistic,adni_ad_vs_cn,8,0.046415888336127774,test,0.7804878048780488,0.04839447267124521,0.6328358208955224,0.09068955641570942,0.6177419354838709,0.07507402458596327 +flat_mae,reg,logistic,adni_ad_vs_cn,9,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,9,0.3593813663804626,test,0.6829268292682927,0.06811983352309922,0.5839188134270101,0.0812889187114294,0.5870967741935484,0.08250962791863499 +flat_mae,reg,logistic,adni_ad_vs_cn,10,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,10,0.3593813663804626,test,0.6585365853658537,0.0720832851637604,0.5651515151515152,0.08507831633987775,0.5709677419354839,0.08945574249160446 +flat_mae,reg,logistic,adni_ad_vs_cn,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,11,166.81005372000556,test,0.7317073170731707,0.04804348119842374,0.5512437810945273,0.08020620483668531,0.5516129032258065,0.06441920550162798 +flat_mae,reg,logistic,adni_ad_vs_cn,12,0.046415888336127774,train,0.9105691056910569,0.013286059804910213,0.8626194478603744,0.022457804040536145,0.8324225491001725,0.02526515064844968 +flat_mae,reg,logistic,adni_ad_vs_cn,12,0.046415888336127774,test,0.7804878048780488,0.052219825473821334,0.6660633484162897,0.08949552868840517,0.6516129032258065,0.081677713588116 +flat_mae,reg,logistic,adni_ad_vs_cn,13,0.046415888336127774,train,0.924119241192412,0.012248598199081967,0.88400395185917,0.020392501550530684,0.8533979784698824,0.02381044665599888 +flat_mae,reg,logistic,adni_ad_vs_cn,13,0.046415888336127774,test,0.7560975609756098,0.051889485821753185,0.6117424242424243,0.08821110183564165,0.6016129032258064,0.07762874908629813 +flat_mae,reg,logistic,adni_ad_vs_cn,14,0.046415888336127774,train,0.9214092140921409,0.012340924253855499,0.8768258572464519,0.021679024416673607,0.8394896869093598,0.024980388505198604 +flat_mae,reg,logistic,adni_ad_vs_cn,14,0.046415888336127774,test,0.7560975609756098,0.04414919812883801,0.569327731092437,0.08812386527532683,0.567741935483871,0.06645211399748431 +flat_mae,reg,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.7804878048780488,0.05689979921322791,0.6917293233082706,0.08189692871094378,0.685483870967742,0.08190800885549725 +flat_mae,reg,logistic,adni_ad_vs_cn,16,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,16,2.782559402207126,test,0.6829268292682927,0.07111136125112988,0.5839188134270101,0.08684225326398431,0.5870967741935484,0.08975490247451777 +flat_mae,reg,logistic,adni_ad_vs_cn,17,0.3593813663804626,train,0.997289972899729,0.002603183815454055,0.996224870837383,0.00359984353753281,0.9982332155477032,0.0016971286712059183 +flat_mae,reg,logistic,adni_ad_vs_cn,17,0.3593813663804626,test,0.8048780487804879,0.051815900715170914,0.6893939393939394,0.09412733976746422,0.667741935483871,0.08260936616226716 +flat_mae,reg,logistic,adni_ad_vs_cn,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,18,21.54434690031882,test,0.7317073170731707,0.06919123161085003,0.6479313036690086,0.08723671016746243,0.6532258064516129,0.09141587877836091 +flat_mae,reg,logistic,adni_ad_vs_cn,19,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,19,0.3593813663804626,test,0.7560975609756098,0.05255639728801149,0.6117424242424243,0.08866717976964096,0.6016129032258064,0.07618514027246434 +flat_mae,reg,logistic,adni_ad_vs_cn,20,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,20,0.3593813663804626,test,0.7317073170731707,0.06781173855448829,0.6479313036690086,0.08463774636068129,0.6532258064516129,0.08789740235223421 +flat_mae,reg,logistic,adni_ad_vs_cn,21,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,21,2.782559402207126,test,0.7560975609756098,0.06376831950332709,0.6693548387096775,0.08765688769037329,0.6693548387096775,0.0887581579548493 +flat_mae,reg,logistic,adni_ad_vs_cn,22,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,22,0.3593813663804626,test,0.7560975609756098,0.045739635085203116,0.569327731092437,0.09079749628260578,0.567741935483871,0.06930265823996876 +flat_mae,reg,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.7317073170731707,0.06673789373691841,0.6479313036690086,0.08773488436860939,0.6532258064516129,0.09214356412679939 +flat_mae,reg,logistic,adni_ad_vs_cn,24,0.005994842503189409,train,0.8373983739837398,0.012992103948969198,0.7014563106796117,0.03102940659784173,0.6673514668419755,0.02539231691328091 +flat_mae,reg,logistic,adni_ad_vs_cn,24,0.005994842503189409,test,0.7317073170731707,0.0409968295809937,0.4972129319955407,0.07637354001625225,0.5177419354838709,0.05463827993005345 +flat_mae,reg,logistic,adni_ad_vs_cn,25,0.046415888336127774,train,0.9349593495934959,0.010849990075954944,0.9005748158792886,0.018055840343081973,0.8685594543512203,0.02203438821897497 +flat_mae,reg,logistic,adni_ad_vs_cn,25,0.046415888336127774,test,0.7073170731707317,0.05600057363520254,0.5340909090909092,0.08192646001331498,0.535483870967742,0.07019488257745314 +flat_mae,reg,logistic,adni_ad_vs_cn,26,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,26,166.81005372000556,test,0.7317073170731707,0.06154589896940664,0.6232247284878863,0.08426645717443598,0.6193548387096774,0.08255656774752582 +flat_mae,reg,logistic,adni_ad_vs_cn,27,0.005994842503189409,train,0.8292682926829268,0.01325517888175689,0.6844055877601445,0.03166500126705484,0.6539567754129345,0.025442722932830578 +flat_mae,reg,logistic,adni_ad_vs_cn,27,0.005994842503189409,test,0.7560975609756098,1.1102230246251565e-16,0.4305555555555556,1.6653345369377348e-16,0.5,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,28,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,28,166.81005372000556,test,0.7317073170731707,0.054647123894224915,0.5918552036199095,0.0849188158949194,0.5854838709677419,0.07553217478452147 +flat_mae,reg,logistic,adni_ad_vs_cn,29,0.046415888336127774,train,0.9159891598915989,0.012666582014274973,0.8696573648887318,0.02181588026193274,0.8359561180047662,0.024948342083328037 +flat_mae,reg,logistic,adni_ad_vs_cn,29,0.046415888336127774,test,0.7560975609756098,0.06060570727998967,0.6440972222222222,0.09183846297249933,0.635483870967742,0.08628848330870674 +flat_mae,reg,logistic,adni_ad_vs_cn,30,0.046415888336127774,train,0.926829268292683,0.012117829132702507,0.887597730067579,0.020542193333641642,0.8551647629221792,0.024149614469781595 +flat_mae,reg,logistic,adni_ad_vs_cn,30,0.046415888336127774,test,0.7560975609756098,0.05250509734184729,0.6117424242424243,0.09077262545076793,0.6016129032258064,0.07725088356417945 +flat_mae,reg,logistic,adni_ad_vs_cn,31,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,31,2.782559402207126,test,0.6829268292682927,0.06408165151334794,0.5547201336675021,0.08401543596915236,0.5532258064516129,0.08062792454504338 +flat_mae,reg,logistic,adni_ad_vs_cn,32,0.046415888336127774,train,0.9159891598915989,0.013167916547031438,0.8709455419294425,0.02256776446108774,0.8400032870408415,0.026162713283682807 +flat_mae,reg,logistic,adni_ad_vs_cn,32,0.046415888336127774,test,0.7073170731707317,0.0598203934833438,0.5729166666666666,0.08525605507072617,0.5693548387096774,0.07867132983086608 +flat_mae,reg,logistic,adni_ad_vs_cn,33,0.046415888336127774,train,0.9295392953929539,0.011901920701910358,0.893324141611813,0.019446435983267868,0.8650258854466266,0.022898876623182466 +flat_mae,reg,logistic,adni_ad_vs_cn,33,0.046415888336127774,test,0.7317073170731707,0.06726648859428354,0.6676492262343405,0.0804246735772784,0.6870967741935483,0.08818206003943867 +flat_mae,reg,logistic,adni_ad_vs_cn,34,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,34,166.81005372000556,test,0.7560975609756098,0.06053625125200146,0.6693548387096775,0.08113963712960158,0.6693548387096775,0.08288517909512913 +flat_mae,reg,logistic,adni_ad_vs_cn,35,0.046415888336127774,train,0.924119241192412,0.012375819964490421,0.88400395185917,0.02059009256789467,0.8533979784698824,0.024071680593747806 +flat_mae,reg,logistic,adni_ad_vs_cn,35,0.046415888336127774,test,0.8292682926829268,0.04806823921940326,0.7144278606965174,0.09581154900137538,0.6838709677419355,0.08420286754752362 +flat_mae,reg,logistic,adni_ad_vs_cn,36,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,36,0.3593813663804626,test,0.6585365853658537,0.05393584661676961,0.4564393939393939,0.0640070227304991,0.4693548387096774,0.05601308045538986 +flat_mae,reg,logistic,adni_ad_vs_cn,37,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,37,166.81005372000556,test,0.6585365853658537,0.07435264943690126,0.5651515151515152,0.08622416630061724,0.5709677419354839,0.09096853688739819 +flat_mae,reg,logistic,adni_ad_vs_cn,38,0.046415888336127774,train,0.9214092140921409,0.01224442427907192,0.8768258572464519,0.02143226656423496,0.8394896869093598,0.024782254880034212 +flat_mae,reg,logistic,adni_ad_vs_cn,38,0.046415888336127774,test,0.7073170731707317,0.06548403949750933,0.603225806451613,0.08614792577928539,0.603225806451613,0.08638997869147935 +flat_mae,reg,logistic,adni_ad_vs_cn,39,0.046415888336127774,train,0.9322493224932249,0.011727217603886347,0.8969331322272499,0.019464472633308205,0.8667926698989235,0.023588793491242217 +flat_mae,reg,logistic,adni_ad_vs_cn,39,0.046415888336127774,test,0.6829268292682927,0.05012688243500868,0.4696517412935323,0.06550062528785579,0.4854838709677419,0.053599108799493075 +flat_mae,reg,logistic,adni_ad_vs_cn,40,0.046415888336127774,train,0.9349593495934959,0.011984307323983244,0.9015299768724425,0.019774031565752085,0.8726066233872956,0.0238379194899873 +flat_mae,reg,logistic,adni_ad_vs_cn,40,0.046415888336127774,test,0.7073170731707317,0.05444217142752666,0.5340909090909092,0.08207845991675787,0.535483870967742,0.07003204402576466 +flat_mae,reg,logistic,adni_ad_vs_cn,41,0.046415888336127774,train,0.924119241192412,0.01184645315161511,0.8828571428571428,0.020193472461065568,0.8493508094338073,0.02377897102942531 +flat_mae,reg,logistic,adni_ad_vs_cn,41,0.046415888336127774,test,0.7804878048780488,0.06035950200631978,0.6917293233082706,0.08600459291349043,0.685483870967742,0.08702527644825324 +flat_mae,reg,logistic,adni_ad_vs_cn,42,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,42,0.3593813663804626,test,0.6829268292682927,0.06472448357877385,0.5547201336675021,0.08205930827950869,0.5532258064516129,0.07938135539134432 +flat_mae,reg,logistic,adni_ad_vs_cn,43,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,43,0.3593813663804626,test,0.7804878048780488,0.06109732242784841,0.6917293233082706,0.08952848509961356,0.685483870967742,0.08848076382535124 +flat_mae,reg,logistic,adni_ad_vs_cn,44,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,44,166.81005372000556,test,0.8536585365853658,0.03619430016817414,0.7415966386554622,0.08701391017309015,0.7,0.07419831534475699 +flat_mae,reg,logistic,adni_ad_vs_cn,45,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,45,1291.5496650148827,test,0.7804878048780488,0.055519374562529034,0.6660633484162897,0.08779198042967831,0.6516129032258065,0.08188164823950671 +flat_mae,reg,logistic,adni_ad_vs_cn,46,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,46,166.81005372000556,test,0.8048780487804879,0.05548132366329493,0.7152777777777778,0.08472572518414193,0.7016129032258065,0.08400080520896465 +flat_mae,reg,logistic,adni_ad_vs_cn,47,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,47,0.3593813663804626,test,0.8048780487804879,0.06330607969502898,0.7354838709677419,0.0871432864885108,0.7354838709677419,0.0898680755850777 +flat_mae,reg,logistic,adni_ad_vs_cn,48,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,48,21.54434690031882,test,0.7073170731707317,0.05822148933465231,0.5729166666666666,0.08577293827416362,0.5693548387096774,0.07987640661037822 +flat_mae,reg,logistic,adni_ad_vs_cn,49,0.046415888336127774,train,0.9132791327913279,0.012723761654810404,0.8647732478240953,0.022139095343333613,0.8301421645163941,0.024943200743245457 +flat_mae,reg,logistic,adni_ad_vs_cn,49,0.046415888336127774,test,0.7560975609756098,0.0456586147591576,0.569327731092437,0.0929207908577623,0.567741935483871,0.07005997690696132 +flat_mae,reg,logistic,adni_ad_vs_cn,50,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,50,1291.5496650148827,test,0.8292682926829268,0.05953090469596682,0.7759562841530054,0.07701618233726301,0.7854838709677419,0.08059918044010785 +flat_mae,reg,logistic,adni_ad_vs_cn,51,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,51,2.782559402207126,test,0.7317073170731707,0.06454158120884192,0.6479313036690086,0.08214992073233642,0.6532258064516129,0.08570454025664881 +flat_mae,reg,logistic,adni_ad_vs_cn,52,0.3593813663804626,train,0.997289972899729,0.002620056667259768,0.9961941891766453,0.003709120822807379,0.9941860465116279,0.005620935524528217 +flat_mae,reg,logistic,adni_ad_vs_cn,52,0.3593813663804626,test,0.7804878048780488,0.055212530800591866,0.6660633484162897,0.0902975315807485,0.6516129032258065,0.08266304961149087 +flat_mae,reg,logistic,adni_ad_vs_cn,53,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,53,166.81005372000556,test,0.7317073170731707,0.07078860621441767,0.6676492262343405,0.08257272367233344,0.6870967741935483,0.08808893339861079 +flat_mae,reg,logistic,adni_ad_vs_cn,54,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,54,2.782559402207126,test,0.6585365853658537,0.06345175273890785,0.5370967741935484,0.07843050047658243,0.5370967741935484,0.07862973696954674 +flat_mae,reg,logistic,adni_ad_vs_cn,55,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,55,0.3593813663804626,test,0.7804878048780488,0.05228622093309674,0.6660633484162897,0.08691037742594464,0.6516129032258065,0.08020655566273817 +flat_mae,reg,logistic,adni_ad_vs_cn,56,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,56,21.54434690031882,test,0.7073170731707317,0.0661582836906485,0.603225806451613,0.0855117480071142,0.603225806451613,0.08669549735243615 +flat_mae,reg,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.9159891598915989,0.013084576633036034,0.8709455419294425,0.022215747225991952,0.8400032870408415,0.025765990670113495 +flat_mae,reg,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7317073170731707,0.056614353382541255,0.5918552036199095,0.08890666977246504,0.5854838709677419,0.07794857265651724 +flat_mae,reg,logistic,adni_ad_vs_cn,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,58,21.54434690031882,test,0.7073170731707317,0.0663008483996456,0.603225806451613,0.08847824812709901,0.603225806451613,0.08885960936724324 +flat_mae,reg,logistic,adni_ad_vs_cn,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,59,166.81005372000556,test,0.6585365853658537,0.06445619313019965,0.5017361111111112,0.0807528570369315,0.5032258064516129,0.07536294000585927 +flat_mae,reg,logistic,adni_ad_vs_cn,60,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,60,10000.0,test,0.8780487804878049,0.04786566568240238,0.8287385129490392,0.07273561119783759,0.8177419354838709,0.07774845133984352 +flat_mae,reg,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.997289972899729,0.0028782274019665587,0.9961941891766453,0.004076538373209165,0.9941860465116279,0.006174801810032905 +flat_mae,reg,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.7073170731707317,0.06463977886180301,0.5729166666666666,0.09091953175202928,0.5693548387096774,0.08433776420052523 +flat_mae,reg,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9159891598915989,0.013363109282032248,0.8696573648887318,0.022859469024497158,0.8359561180047662,0.02601963563023946 +flat_mae,reg,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.6829268292682927,0.06056674628459645,0.5176470588235295,0.08039419130446027,0.5193548387096775,0.07257490299866304 +flat_mae,reg,logistic,adni_ad_vs_cn,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,63,2.782559402207126,test,0.7560975609756098,0.05987373189942769,0.6440972222222222,0.08967527169052836,0.635483870967742,0.08444695636389692 +flat_mae,reg,logistic,adni_ad_vs_cn,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,64,21.54434690031882,test,0.6341463414634146,0.06665455174474345,0.48621553884711777,0.07986309629141092,0.48709677419354835,0.07687897018165285 +flat_mae,reg,logistic,adni_ad_vs_cn,65,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,65,0.3593813663804626,test,0.7560975609756098,0.0619333755749921,0.6693548387096775,0.08486918368441576,0.6693548387096775,0.08914935824178408 +flat_mae,reg,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.9159891598915989,0.012689294952361454,0.8696573648887318,0.02175702777244918,0.8359561180047662,0.024274825667337283 +flat_mae,reg,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.8536585365853658,0.04437177316065455,0.7670454545454546,0.08397566903251061,0.7338709677419355,0.07923723379550145 +flat_mae,reg,logistic,adni_ad_vs_cn,67,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,67,0.3593813663804626,test,0.8292682926829268,0.05697617356887757,0.7759562841530054,0.07338519126041977,0.7854838709677419,0.0782436228463958 +flat_mae,reg,logistic,adni_ad_vs_cn,68,0.046415888336127774,train,0.924119241192412,0.012789334280355249,0.88400395185917,0.021505037060396845,0.8533979784698824,0.025026972260539897 +flat_mae,reg,logistic,adni_ad_vs_cn,68,0.046415888336127774,test,0.7073170731707317,0.046467581914137246,0.4831932773109243,0.0713735589892931,0.5016129032258064,0.054718261936816455 +flat_mae,reg,logistic,adni_ad_vs_cn,69,0.3593813663804626,train,0.997289972899729,0.002674119841971101,0.9961941891766453,0.0037838572736313843,0.9941860465116279,0.005736919893531013 +flat_mae,reg,logistic,adni_ad_vs_cn,69,0.3593813663804626,test,0.8048780487804879,0.05421114557524685,0.6893939393939394,0.09439795952001184,0.667741935483871,0.08451777756333621 +flat_mae,reg,logistic,adni_ad_vs_cn,70,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,70,21.54434690031882,test,0.7317073170731707,0.057022095136270844,0.5918552036199095,0.08667194612139598,0.5854838709677419,0.07785355151739044 +flat_mae,reg,logistic,adni_ad_vs_cn,71,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,71,21.54434690031882,test,0.6829268292682927,0.05993998584942102,0.5547201336675021,0.08037830553517925,0.5532258064516129,0.0781105865122402 +flat_mae,reg,logistic,adni_ad_vs_cn,72,0.046415888336127774,train,0.9349593495934959,0.01117333886728334,0.9015299768724425,0.01827213707464206,0.8726066233872956,0.021952024761447315 +flat_mae,reg,logistic,adni_ad_vs_cn,72,0.046415888336127774,test,0.7317073170731707,0.03999512165374124,0.4972129319955407,0.0719958327686336,0.5177419354838709,0.051018908065823286 +flat_mae,reg,logistic,adni_ad_vs_cn,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,73,166.81005372000556,test,0.7073170731707317,0.07230933615503214,0.6272727272727273,0.08478423970313026,0.6370967741935484,0.09070430718743026 +flat_mae,reg,logistic,adni_ad_vs_cn,74,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,74,166.81005372000556,test,0.6829268292682927,0.05157239645532434,0.4696517412935323,0.06904660937558106,0.4854838709677419,0.05717631374334861 +flat_mae,reg,logistic,adni_ad_vs_cn,75,0.046415888336127774,train,0.9186991869918699,0.013241638344199958,0.8757185198491109,0.022082009250893923,0.8458172405292136,0.025422464601075615 +flat_mae,reg,logistic,adni_ad_vs_cn,75,0.046415888336127774,test,0.7804878048780488,0.04089434450207767,0.5886287625418061,0.09710428287772914,0.5838709677419355,0.06970419557195238 +flat_mae,reg,logistic,adni_ad_vs_cn,76,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,76,166.81005372000556,test,0.5609756097560976,0.0679591156794012,0.4409090909090909,0.07093716542307071,0.43870967741935485,0.07517401657454557 +flat_mae,reg,logistic,adni_ad_vs_cn,77,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,77,0.3593813663804626,test,0.6585365853658537,0.06619848293856169,0.5370967741935484,0.08423062940666688,0.5370967741935484,0.0848830358261991 +flat_mae,reg,logistic,adni_ad_vs_cn,78,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,78,166.81005372000556,test,0.6585365853658537,0.07537972877249947,0.5651515151515152,0.0874929670752916,0.5709677419354839,0.09245607326655707 +flat_mae,reg,logistic,adni_ad_vs_cn,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,79,2.782559402207126,test,0.8048780487804879,0.06311667335721316,0.764367816091954,0.07121047801743477,0.8032258064516129,0.07528641087773542 +flat_mae,reg,logistic,adni_ad_vs_cn,80,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,80,1291.5496650148827,test,0.5365853658536586,0.07674638967649206,0.49316851008458035,0.07664905634533943,0.5241935483870968,0.09216529121804462 +flat_mae,reg,logistic,adni_ad_vs_cn,81,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,81,0.3593813663804626,test,0.7317073170731707,0.05083207707234085,0.5512437810945273,0.08797747365537119,0.5516129032258065,0.07021280118733564 +flat_mae,reg,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.7073170731707317,0.06386099941033722,0.5729166666666666,0.08674898198222204,0.5693548387096774,0.08119864928163086 +flat_mae,reg,logistic,adni_ad_vs_cn,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,83,2.782559402207126,test,0.7804878048780488,0.05989910215541123,0.6917293233082706,0.08304646809621834,0.685483870967742,0.08210139888108489 +flat_mae,reg,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.9186991869918699,0.012874311685735,0.8744897959183674,0.02177639577942819,0.8417700714931383,0.024833233613757978 +flat_mae,reg,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.6829268292682927,0.06007332422269291,0.5176470588235295,0.0805219436895911,0.5193548387096775,0.0720982518431241 +flat_mae,reg,logistic,adni_ad_vs_cn,85,0.3593813663804626,train,0.997289972899729,0.0026130283104582475,0.9961941891766453,0.0036965715345039585,0.9941860465116279,0.005605857247436579 +flat_mae,reg,logistic,adni_ad_vs_cn,85,0.3593813663804626,test,0.8780487804878049,0.0459443576255068,0.8144796380090498,0.08309751656660334,0.7838709677419355,0.08307495324343904 +flat_mae,reg,logistic,adni_ad_vs_cn,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,86,2.782559402207126,test,0.5365853658536586,0.07250520094578032,0.42593957258658804,0.06911039541625488,0.42258064516129035,0.07581430872116322 +flat_mae,reg,logistic,adni_ad_vs_cn,87,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,87,0.3593813663804626,test,0.8292682926829268,0.05115161585793795,0.7402714932126697,0.08816658716060931,0.717741935483871,0.08317206635805359 +flat_mae,reg,logistic,adni_ad_vs_cn,88,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,88,166.81005372000556,test,0.6829268292682927,0.06255193677868832,0.5547201336675021,0.08315616629039728,0.5532258064516129,0.0800031583663746 +flat_mae,reg,logistic,adni_ad_vs_cn,89,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,89,0.3593813663804626,test,0.7804878048780488,0.055901566417230736,0.6660633484162897,0.09269236883133694,0.6516129032258065,0.08573966816585206 +flat_mae,reg,logistic,adni_ad_vs_cn,90,0.046415888336127774,train,0.9295392953929539,0.011865432075802719,0.8922893838692294,0.02012415112417176,0.8609787164105513,0.024197082164386256 +flat_mae,reg,logistic,adni_ad_vs_cn,90,0.046415888336127774,test,0.7560975609756098,0.058428273778031024,0.6440972222222222,0.08538213763898575,0.635483870967742,0.08055841134156763 +flat_mae,reg,logistic,adni_ad_vs_cn,91,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,91,0.3593813663804626,test,0.7317073170731707,0.06671792397947401,0.6232247284878863,0.08817952445985863,0.6193548387096774,0.087813628857435 +flat_mae,reg,logistic,adni_ad_vs_cn,92,0.046415888336127774,train,0.924119241192412,0.012672122653596463,0.88400395185917,0.021041396657425524,0.8533979784698824,0.02401522595120034 +flat_mae,reg,logistic,adni_ad_vs_cn,92,0.046415888336127774,test,0.7317073170731707,0.041199475574652804,0.4972129319955407,0.0780543515464708,0.5177419354838709,0.05512781440358137 +flat_mae,reg,logistic,adni_ad_vs_cn,93,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,93,0.3593813663804626,test,0.6097560975609756,0.07581247411481751,0.5287356321839081,0.08293064773941654,0.5387096774193548,0.09251281515412843 +flat_mae,reg,logistic,adni_ad_vs_cn,94,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,94,21.54434690031882,test,0.6585365853658537,0.06096780437847264,0.5017361111111112,0.07962288718350703,0.5032258064516129,0.07332873673829188 +flat_mae,reg,logistic,adni_ad_vs_cn,95,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,95,21.54434690031882,test,0.6097560975609756,0.06781145783130181,0.5030303030303029,0.07411082604164797,0.5048387096774194,0.07808989263628098 +flat_mae,reg,logistic,adni_ad_vs_cn,96,0.046415888336127774,train,0.9186991869918699,0.011975021925201721,0.8732249198350893,0.020988380181489904,0.837722902457063,0.024267197349392134 +flat_mae,reg,logistic,adni_ad_vs_cn,96,0.046415888336127774,test,0.8292682926829268,0.05344638771058278,0.7602339181286549,0.07824501895851142,0.7516129032258064,0.08147078401023557 +flat_mae,reg,logistic,adni_ad_vs_cn,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,97,21.54434690031882,test,0.6097560975609756,0.05794848889025399,0.4305555555555556,0.05931869251857854,0.43709677419354837,0.05743325991472957 +flat_mae,reg,logistic,adni_ad_vs_cn,98,0.046415888336127774,train,0.924119241192412,0.012333957082312776,0.8851183063511832,0.02029901216506481,0.8574451475059577,0.02381395213390482 +flat_mae,reg,logistic,adni_ad_vs_cn,98,0.046415888336127774,test,0.7073170731707317,0.06014891296700575,0.5729166666666666,0.08297146473528375,0.5693548387096774,0.07820883879831654 +flat_mae,reg,logistic,adni_ad_vs_cn,99,0.3593813663804626,train,0.997289972899729,0.0026999215378224035,0.9961941891766453,0.003823658841817042,0.9941860465116279,0.005792273531723638 +flat_mae,reg,logistic,adni_ad_vs_cn,99,0.3593813663804626,test,0.7073170731707317,0.06518962410550663,0.603225806451613,0.08336727147123128,0.603225806451613,0.08348537641360884 +flat_mae,reg,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.9132791327913279,0.013676474434325335,0.8661224489795918,0.02335400825267269,0.8341893335524694,0.026276126011441154 +flat_mae,reg,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.7317073170731707,0.05110426057715167,0.5512437810945273,0.08783032988154861,0.5516129032258065,0.07084018000413106 diff --git a/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e026e2e37a475c5df22796afc9fd8cb0ba27ddc8 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:25:56 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (adni_ad_vs_cn reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +model: flat_mae +representation: reg +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:12:18 time: 4.5029 data: 3.6235 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:59 time: 0.2111 data: 0.0717 max mem: 3005 +extract (train) [ 40/164] eta: 0:00:37 time: 0.1779 data: 0.0577 max mem: 3005 +extract (train) [ 60/164] eta: 0:00:26 time: 0.1765 data: 0.0599 max mem: 3005 +extract (train) [ 80/164] eta: 0:00:19 time: 0.1649 data: 0.0512 max mem: 3005 +extract (train) [100/164] eta: 0:00:14 time: 0.1681 data: 0.0539 max mem: 3005 +extract (train) [120/164] eta: 0:00:09 time: 0.1751 data: 0.0561 max mem: 3005 +extract (train) [140/164] eta: 0:00:04 time: 0.1670 data: 0.0528 max mem: 3005 +extract (train) [160/164] eta: 0:00:00 time: 0.1513 data: 0.0449 max mem: 3005 +extract (train) [163/164] eta: 0:00:00 time: 0.1534 data: 0.0461 max mem: 3005 +extract (train) Total time: 0:00:33 (0.2029 s / it) +extract (validation) [ 0/21] eta: 0:01:16 time: 3.6216 data: 3.5162 max mem: 3005 +extract (validation) [20/21] eta: 0:00:00 time: 0.1408 data: 0.0400 max mem: 3005 +extract (validation) Total time: 0:00:06 (0.3228 s / it) +extract (test) [ 0/21] eta: 0:01:11 time: 3.4179 data: 3.3143 max mem: 3005 +extract (test) [20/21] eta: 0:00:00 time: 0.1453 data: 0.0443 max mem: 3005 +extract (test) Total time: 0:00:06 (0.3154 s / it) +feature extraction time: 0:00:46 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adni_ad_vs_cn | | 0.046416 | train | 0.92412 | 0.011738 | 0.88512 | 0.019491 | 0.85498 | 0.023094 | +| flat_mae | reg | logistic | adni_ad_vs_cn | | 0.046416 | test | 0.68293 | 0.061403 | 0.51765 | 0.081569 | 0.51736 | 0.078082 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.049318110529245694, "f1": 0.6328358208955224, "f1_std": 0.09452044022079005, "bacc": 0.6177419354838709, "bacc_std": 0.07822401221753426} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06894246624967623, "f1": 0.5651515151515152, "f1_std": 0.08281958457496735, "bacc": 0.5709677419354839, "bacc_std": 0.08792482930743109} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.02680992650920385, "f1": 0.4225352112676056, "f1_std": 0.009132940949010071, "bacc": 0.4838709677419355, "bacc_std": 0.01772914494963481} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06438793417882557, "f1": 0.6693548387096775, "f1_std": 0.0842186965123577, "bacc": 0.6693548387096775, "bacc_std": 0.08417202938445281} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.05015915182297482, "f1": 0.3880597014925373, "f1_std": 0.01911015433665028, "bacc": 0.41935483870967744, "bacc_std": 0.03316976168938657} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06256806406854018, "f1": 0.5017361111111112, "f1_std": 0.07635510608717559, "bacc": 0.5032258064516129, "bacc_std": 0.07123233161730617} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06336679246004644, "f1": 0.6232247284878863, "f1_std": 0.08913294173020493, "bacc": 0.6193548387096774, "bacc_std": 0.0878555087415302} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04839447267124521, "f1": 0.6328358208955224, "f1_std": 0.09068955641570942, "bacc": 0.6177419354838709, "bacc_std": 0.07507402458596327} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06811983352309922, "f1": 0.5839188134270101, "f1_std": 0.0812889187114294, "bacc": 0.5870967741935484, "bacc_std": 0.08250962791863499} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0720832851637604, "f1": 0.5651515151515152, "f1_std": 0.08507831633987775, "bacc": 0.5709677419354839, "bacc_std": 0.08945574249160446} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04804348119842374, "f1": 0.5512437810945273, "f1_std": 0.08020620483668531, "bacc": 0.5516129032258065, "bacc_std": 0.06441920550162798} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.052219825473821334, "f1": 0.6660633484162897, "f1_std": 0.08949552868840517, "bacc": 0.6516129032258065, "bacc_std": 0.081677713588116} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.051889485821753185, "f1": 0.6117424242424243, "f1_std": 0.08821110183564165, "bacc": 0.6016129032258064, "bacc_std": 0.07762874908629813} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04414919812883801, "f1": 0.569327731092437, "f1_std": 0.08812386527532683, "bacc": 0.567741935483871, "bacc_std": 0.06645211399748431} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05689979921322791, "f1": 0.6917293233082706, "f1_std": 0.08189692871094378, "bacc": 0.685483870967742, "bacc_std": 0.08190800885549725} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07111136125112988, "f1": 0.5839188134270101, "f1_std": 0.08684225326398431, "bacc": 0.5870967741935484, "bacc_std": 0.08975490247451777} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.051815900715170914, "f1": 0.6893939393939394, "f1_std": 0.09412733976746422, "bacc": 0.667741935483871, "bacc_std": 0.08260936616226716} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06919123161085003, "f1": 0.6479313036690086, "f1_std": 0.08723671016746243, "bacc": 0.6532258064516129, "bacc_std": 0.09141587877836091} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05255639728801149, "f1": 0.6117424242424243, "f1_std": 0.08866717976964096, "bacc": 0.6016129032258064, "bacc_std": 0.07618514027246434} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06781173855448829, "f1": 0.6479313036690086, "f1_std": 0.08463774636068129, "bacc": 0.6532258064516129, "bacc_std": 0.08789740235223421} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06376831950332709, "f1": 0.6693548387096775, "f1_std": 0.08765688769037329, "bacc": 0.6693548387096775, "bacc_std": 0.0887581579548493} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.045739635085203116, "f1": 0.569327731092437, "f1_std": 0.09079749628260578, "bacc": 0.567741935483871, "bacc_std": 0.06930265823996876} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06673789373691841, "f1": 0.6479313036690086, "f1_std": 0.08773488436860939, "bacc": 0.6532258064516129, "bacc_std": 0.09214356412679939} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0409968295809937, "f1": 0.4972129319955407, "f1_std": 0.07637354001625225, "bacc": 0.5177419354838709, "bacc_std": 0.05463827993005345} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05600057363520254, "f1": 0.5340909090909092, "f1_std": 0.08192646001331498, "bacc": 0.535483870967742, "bacc_std": 0.07019488257745314} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06154589896940664, "f1": 0.6232247284878863, "f1_std": 0.08426645717443598, "bacc": 0.6193548387096774, "bacc_std": 0.08255656774752582} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 1.1102230246251565e-16, "f1": 0.4305555555555556, "f1_std": 1.6653345369377348e-16, "bacc": 0.5, "bacc_std": 0.0} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.054647123894224915, "f1": 0.5918552036199095, "f1_std": 0.0849188158949194, "bacc": 0.5854838709677419, "bacc_std": 0.07553217478452147} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06060570727998967, "f1": 0.6440972222222222, "f1_std": 0.09183846297249933, "bacc": 0.635483870967742, "bacc_std": 0.08628848330870674} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05250509734184729, "f1": 0.6117424242424243, "f1_std": 0.09077262545076793, "bacc": 0.6016129032258064, "bacc_std": 0.07725088356417945} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06408165151334794, "f1": 0.5547201336675021, "f1_std": 0.08401543596915236, "bacc": 0.5532258064516129, "bacc_std": 0.08062792454504338} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0598203934833438, "f1": 0.5729166666666666, "f1_std": 0.08525605507072617, "bacc": 0.5693548387096774, "bacc_std": 0.07867132983086608} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06726648859428354, "f1": 0.6676492262343405, "f1_std": 0.0804246735772784, "bacc": 0.6870967741935483, "bacc_std": 0.08818206003943867} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06053625125200146, "f1": 0.6693548387096775, "f1_std": 0.08113963712960158, "bacc": 0.6693548387096775, "bacc_std": 0.08288517909512913} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04806823921940326, "f1": 0.7144278606965174, "f1_std": 0.09581154900137538, "bacc": 0.6838709677419355, "bacc_std": 0.08420286754752362} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05393584661676961, "f1": 0.4564393939393939, "f1_std": 0.0640070227304991, "bacc": 0.4693548387096774, "bacc_std": 0.05601308045538986} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07435264943690126, "f1": 0.5651515151515152, "f1_std": 0.08622416630061724, "bacc": 0.5709677419354839, "bacc_std": 0.09096853688739819} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06548403949750933, "f1": 0.603225806451613, "f1_std": 0.08614792577928539, "bacc": 0.603225806451613, "bacc_std": 0.08638997869147935} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05012688243500868, "f1": 0.4696517412935323, "f1_std": 0.06550062528785579, "bacc": 0.4854838709677419, "bacc_std": 0.053599108799493075} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05444217142752666, "f1": 0.5340909090909092, "f1_std": 0.08207845991675787, "bacc": 0.535483870967742, "bacc_std": 0.07003204402576466} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06035950200631978, "f1": 0.6917293233082706, "f1_std": 0.08600459291349043, "bacc": 0.685483870967742, "bacc_std": 0.08702527644825324} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06472448357877385, "f1": 0.5547201336675021, "f1_std": 0.08205930827950869, "bacc": 0.5532258064516129, "bacc_std": 0.07938135539134432} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06109732242784841, "f1": 0.6917293233082706, "f1_std": 0.08952848509961356, "bacc": 0.685483870967742, "bacc_std": 0.08848076382535124} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 166.81005372000556, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.03619430016817414, "f1": 0.7415966386554622, "f1_std": 0.08701391017309015, "bacc": 0.7, "bacc_std": 0.07419831534475699} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 1291.5496650148827, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.055519374562529034, "f1": 0.6660633484162897, "f1_std": 0.08779198042967831, "bacc": 0.6516129032258065, "bacc_std": 0.08188164823950671} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05548132366329493, "f1": 0.7152777777777778, "f1_std": 0.08472572518414193, "bacc": 0.7016129032258065, "bacc_std": 0.08400080520896465} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06330607969502898, "f1": 0.7354838709677419, "f1_std": 0.0871432864885108, "bacc": 0.7354838709677419, "bacc_std": 0.0898680755850777} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05822148933465231, "f1": 0.5729166666666666, "f1_std": 0.08577293827416362, "bacc": 0.5693548387096774, "bacc_std": 0.07987640661037822} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0456586147591576, "f1": 0.569327731092437, "f1_std": 0.0929207908577623, "bacc": 0.567741935483871, "bacc_std": 0.07005997690696132} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 1291.5496650148827, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05953090469596682, "f1": 0.7759562841530054, "f1_std": 0.07701618233726301, "bacc": 0.7854838709677419, "bacc_std": 0.08059918044010785} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06454158120884192, "f1": 0.6479313036690086, "f1_std": 0.08214992073233642, "bacc": 0.6532258064516129, "bacc_std": 0.08570454025664881} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.055212530800591866, "f1": 0.6660633484162897, "f1_std": 0.0902975315807485, "bacc": 0.6516129032258065, "bacc_std": 0.08266304961149087} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.07078860621441767, "f1": 0.6676492262343405, "f1_std": 0.08257272367233344, "bacc": 0.6870967741935483, "bacc_std": 0.08808893339861079} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06345175273890785, "f1": 0.5370967741935484, "f1_std": 0.07843050047658243, "bacc": 0.5370967741935484, "bacc_std": 0.07862973696954674} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05228622093309674, "f1": 0.6660633484162897, "f1_std": 0.08691037742594464, "bacc": 0.6516129032258065, "bacc_std": 0.08020655566273817} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0661582836906485, "f1": 0.603225806451613, "f1_std": 0.0855117480071142, "bacc": 0.603225806451613, "bacc_std": 0.08669549735243615} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.056614353382541255, "f1": 0.5918552036199095, "f1_std": 0.08890666977246504, "bacc": 0.5854838709677419, "bacc_std": 0.07794857265651724} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0663008483996456, "f1": 0.603225806451613, "f1_std": 0.08847824812709901, "bacc": 0.603225806451613, "bacc_std": 0.08885960936724324} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06445619313019965, "f1": 0.5017361111111112, "f1_std": 0.0807528570369315, "bacc": 0.5032258064516129, "bacc_std": 0.07536294000585927} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 10000.0, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.04786566568240238, "f1": 0.8287385129490392, "f1_std": 0.07273561119783759, "bacc": 0.8177419354838709, "bacc_std": 0.07774845133984352} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06463977886180301, "f1": 0.5729166666666666, "f1_std": 0.09091953175202928, "bacc": 0.5693548387096774, "bacc_std": 0.08433776420052523} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06056674628459645, "f1": 0.5176470588235295, "f1_std": 0.08039419130446027, "bacc": 0.5193548387096775, "bacc_std": 0.07257490299866304} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05987373189942769, "f1": 0.6440972222222222, "f1_std": 0.08967527169052836, "bacc": 0.635483870967742, "bacc_std": 0.08444695636389692} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06665455174474345, "f1": 0.48621553884711777, "f1_std": 0.07986309629141092, "bacc": 0.48709677419354835, "bacc_std": 0.07687897018165285} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0619333755749921, "f1": 0.6693548387096775, "f1_std": 0.08486918368441576, "bacc": 0.6693548387096775, "bacc_std": 0.08914935824178408} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04437177316065455, "f1": 0.7670454545454546, "f1_std": 0.08397566903251061, "bacc": 0.7338709677419355, "bacc_std": 0.07923723379550145} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05697617356887757, "f1": 0.7759562841530054, "f1_std": 0.07338519126041977, "bacc": 0.7854838709677419, "bacc_std": 0.0782436228463958} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.046467581914137246, "f1": 0.4831932773109243, "f1_std": 0.0713735589892931, "bacc": 0.5016129032258064, "bacc_std": 0.054718261936816455} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05421114557524685, "f1": 0.6893939393939394, "f1_std": 0.09439795952001184, "bacc": 0.667741935483871, "bacc_std": 0.08451777756333621} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057022095136270844, "f1": 0.5918552036199095, "f1_std": 0.08667194612139598, "bacc": 0.5854838709677419, "bacc_std": 0.07785355151739044} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05993998584942102, "f1": 0.5547201336675021, "f1_std": 0.08037830553517925, "bacc": 0.5532258064516129, "bacc_std": 0.0781105865122402} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.03999512165374124, "f1": 0.4972129319955407, "f1_std": 0.0719958327686336, "bacc": 0.5177419354838709, "bacc_std": 0.051018908065823286} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07230933615503214, "f1": 0.6272727272727273, "f1_std": 0.08478423970313026, "bacc": 0.6370967741935484, "bacc_std": 0.09070430718743026} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05157239645532434, "f1": 0.4696517412935323, "f1_std": 0.06904660937558106, "bacc": 0.4854838709677419, "bacc_std": 0.05717631374334861} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04089434450207767, "f1": 0.5886287625418061, "f1_std": 0.09710428287772914, "bacc": 0.5838709677419355, "bacc_std": 0.06970419557195238} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 166.81005372000556, "split": "test", "acc": 0.5609756097560976, "acc_std": 0.0679591156794012, "f1": 0.4409090909090909, "f1_std": 0.07093716542307071, "bacc": 0.43870967741935485, "bacc_std": 0.07517401657454557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06619848293856169, "f1": 0.5370967741935484, "f1_std": 0.08423062940666688, "bacc": 0.5370967741935484, "bacc_std": 0.0848830358261991} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07537972877249947, "f1": 0.5651515151515152, "f1_std": 0.0874929670752916, "bacc": 0.5709677419354839, "bacc_std": 0.09245607326655707} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06311667335721316, "f1": 0.764367816091954, "f1_std": 0.07121047801743477, "bacc": 0.8032258064516129, "bacc_std": 0.07528641087773542} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 1291.5496650148827, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07674638967649206, "f1": 0.49316851008458035, "f1_std": 0.07664905634533943, "bacc": 0.5241935483870968, "bacc_std": 0.09216529121804462} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05083207707234085, "f1": 0.5512437810945273, "f1_std": 0.08797747365537119, "bacc": 0.5516129032258065, "bacc_std": 0.07021280118733564} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06386099941033722, "f1": 0.5729166666666666, "f1_std": 0.08674898198222204, "bacc": 0.5693548387096774, "bacc_std": 0.08119864928163086} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05989910215541123, "f1": 0.6917293233082706, "f1_std": 0.08304646809621834, "bacc": 0.685483870967742, "bacc_std": 0.08210139888108489} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06007332422269291, "f1": 0.5176470588235295, "f1_std": 0.0805219436895911, "bacc": 0.5193548387096775, "bacc_std": 0.0720982518431241} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.0459443576255068, "f1": 0.8144796380090498, "f1_std": 0.08309751656660334, "bacc": 0.7838709677419355, "bacc_std": 0.08307495324343904} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07250520094578032, "f1": 0.42593957258658804, "f1_std": 0.06911039541625488, "bacc": 0.42258064516129035, "bacc_std": 0.07581430872116322} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05115161585793795, "f1": 0.7402714932126697, "f1_std": 0.08816658716060931, "bacc": 0.717741935483871, "bacc_std": 0.08317206635805359} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06255193677868832, "f1": 0.5547201336675021, "f1_std": 0.08315616629039728, "bacc": 0.5532258064516129, "bacc_std": 0.0800031583663746} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.055901566417230736, "f1": 0.6660633484162897, "f1_std": 0.09269236883133694, "bacc": 0.6516129032258065, "bacc_std": 0.08573966816585206} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.058428273778031024, "f1": 0.6440972222222222, "f1_std": 0.08538213763898575, "bacc": 0.635483870967742, "bacc_std": 0.08055841134156763} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06671792397947401, "f1": 0.6232247284878863, "f1_std": 0.08817952445985863, "bacc": 0.6193548387096774, "bacc_std": 0.087813628857435} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.041199475574652804, "f1": 0.4972129319955407, "f1_std": 0.0780543515464708, "bacc": 0.5177419354838709, "bacc_std": 0.05512781440358137} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07581247411481751, "f1": 0.5287356321839081, "f1_std": 0.08293064773941654, "bacc": 0.5387096774193548, "bacc_std": 0.09251281515412843} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06096780437847264, "f1": 0.5017361111111112, "f1_std": 0.07962288718350703, "bacc": 0.5032258064516129, "bacc_std": 0.07332873673829188} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06781145783130181, "f1": 0.5030303030303029, "f1_std": 0.07411082604164797, "bacc": 0.5048387096774194, "bacc_std": 0.07808989263628098} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05344638771058278, "f1": 0.7602339181286549, "f1_std": 0.07824501895851142, "bacc": 0.7516129032258064, "bacc_std": 0.08147078401023557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.05794848889025399, "f1": 0.4305555555555556, "f1_std": 0.05931869251857854, "bacc": 0.43709677419354837, "bacc_std": 0.05743325991472957} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06014891296700575, "f1": 0.5729166666666666, "f1_std": 0.08297146473528375, "bacc": 0.5693548387096774, "bacc_std": 0.07820883879831654} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06518962410550663, "f1": 0.603225806451613, "f1_std": 0.08336727147123128, "bacc": 0.603225806451613, "bacc_std": 0.08348537641360884} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05110426057715167, "f1": 0.5512437810945273, "f1_std": 0.08783032988154861, "bacc": 0.5516129032258065, "bacc_std": 0.07084018000413106} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adni_ad_vs_cn | train | 100 | 169.87 | 1018 | 0.97385 | 0.042243 | 0.95831 | 0.070274 | 0.94855 | 0.08396 | +| flat_mae | reg | logistic | adni_ad_vs_cn | test | 100 | 169.87 | 1018 | 0.72829 | 0.066414 | 0.59998 | 0.093205 | 0.60016 | 0.085664 | + + +done! total time: 0:04:51 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b00e871a22619b3a2397292d495f25efba6b07bd --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..0ed542204240989a0c24e72b6ce9962b5ad80069 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 15, "eval/id_best": 34, "eval/lr_best": 0.0015299999999999997, "eval/wd_best": 0.05, "eval/train/loss": 6.865380419185385e-05, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.0340086854994297, "eval/validation/acc": 0.9937996031746031, "eval/validation/acc_std": 0.0012946579660676415, "eval/validation/f1": 0.9929089573488176, "eval/validation/f1_std": 0.0016491498428427972, "eval/test/loss": 0.04517523571848869, "eval/test/acc": 0.9908730158730159, "eval/test/acc_std": 0.0012564808061108444, "eval/test/f1": 0.9883338200112077, "eval/test/f1_std": 0.0018212925829520544} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..e042b781605088f2fe235802b44711e7e76eb1e3 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 15, "eval/best/id_best": 34, "eval/best/lr_best": 0.0015299999999999997, "eval/best/wd_best": 0.05, "eval/best/train/loss": 6.865380419185385e-05, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.0340086854994297, "eval/best/validation/acc": 0.9937996031746031, "eval/best/validation/acc_std": 0.0012946579660676415, "eval/best/validation/f1": 0.9929089573488176, "eval/best/validation/f1_std": 0.0016491498428427972, "eval/best/test/loss": 0.04517523571848869, "eval/best/test/acc": 0.9908730158730159, "eval/best/test/acc_std": 0.0012564808061108444, "eval/best/test/f1": 0.9883338200112077, "eval/best/test/f1_std": 0.0018212925829520544} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..3c15bc0027d7cbedccd4ea7c9b853e1ee11aada5 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 34, "eval/last/lr_best": 0.0015299999999999997, "eval/last/wd_best": 0.05, "eval/last/train/loss": 6.615406164200976e-05, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.03395787626504898, "eval/last/validation/acc": 0.9935515873015873, "eval/last/validation/acc_std": 0.0012990421779304128, "eval/last/validation/f1": 0.992730247533712, "eval/last/validation/f1_std": 0.0016488955659978133, "eval/last/test/loss": 0.045090653002262115, "eval/last/test/acc": 0.9908730158730159, "eval/last/test/acc_std": 0.0012564808061108444, "eval/last/test/f1": 0.9883338200112077, "eval/last/test/f1_std": 0.0018212925829520544} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9df24bc8c01fa4a57b3654af4cf039bab1f5f108 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",train,6.865380419185385e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",validation,0.0340086854994297,0.9937996031746031,0.0012946579660676415,0.9929089573488176,0.0016491498428427972 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",test,0.04517523571848869,0.9908730158730159,0.0012564808061108444,0.9883338200112077,0.0018212925829520544 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..9df24bc8c01fa4a57b3654af4cf039bab1f5f108 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",train,6.865380419185385e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",validation,0.0340086854994297,0.9937996031746031,0.0012946579660676415,0.9929089573488176,0.0016491498428427972 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",test,0.04517523571848869,0.9908730158730159,0.0012564808061108444,0.9883338200112077,0.0018212925829520544 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..57b25ade26baee759674bca01d836421742bfd8b --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0015299999999999997,0.05,34,"[5.1, 1.0]",train,6.615406164200976e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0015299999999999997,0.05,34,"[5.1, 1.0]",validation,0.03395787626504898,0.9935515873015873,0.0012990421779304128,0.992730247533712,0.0016488955659978133 +flat_mae,patch,attn,hcpya_task21,last,19,0.0015299999999999997,0.05,34,"[5.1, 1.0]",test,0.045090653002262115,0.9908730158730159,0.0012564808061108444,0.9883338200112077,0.0018212925829520544 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..30647b53275405c12bad55b89959f2011d040e06 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,887 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 22:50:12 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:21:44 lr: nan time: 3.2621 data: 2.7855 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:47 lr: 0.000003 loss: 3.0512 (3.0529) grad: 0.2440 (0.2504) time: 0.4665 data: 0.0022 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:13 lr: 0.000006 loss: 3.0107 (3.0046) grad: 0.2440 (0.2499) time: 0.4699 data: 0.0042 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:53 lr: 0.000009 loss: 2.8706 (2.9427) grad: 0.2425 (0.2468) time: 0.4567 data: 0.0043 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:39 lr: 0.000012 loss: 2.7332 (2.8749) grad: 0.2339 (0.2411) time: 0.4615 data: 0.0039 max mem: 22446 +train: [0] [100/400] eta: 0:02:27 lr: 0.000015 loss: 2.5713 (2.8023) grad: 0.2175 (0.2373) time: 0.4659 data: 0.0045 max mem: 22446 +train: [0] [120/400] eta: 0:02:16 lr: 0.000018 loss: 2.4622 (2.7298) grad: 0.2178 (0.2333) time: 0.4551 data: 0.0040 max mem: 22446 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 2.2976 (2.6610) grad: 0.2163 (0.2316) time: 0.4448 data: 0.0042 max mem: 22446 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 2.1840 (2.5985) grad: 0.2062 (0.2272) time: 0.4525 data: 0.0040 max mem: 22446 +train: [0] [180/400] eta: 0:01:44 lr: 0.000027 loss: 2.0941 (2.5368) grad: 0.1891 (0.2234) time: 0.4475 data: 0.0042 max mem: 22446 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 2.0184 (2.4786) grad: 0.1936 (0.2205) time: 0.4507 data: 0.0041 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 1.8963 (2.4239) grad: 0.1898 (0.2172) time: 0.4495 data: 0.0042 max mem: 22446 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 1.8188 (2.3695) grad: 0.1898 (0.2152) time: 0.4457 data: 0.0043 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 1.7450 (2.3211) grad: 0.1856 (0.2130) time: 0.4492 data: 0.0043 max mem: 22446 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 1.7201 (2.2772) grad: 0.1747 (0.2101) time: 0.4434 data: 0.0041 max mem: 22446 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 1.6779 (2.2346) grad: 0.1646 (0.2071) time: 0.5962 data: 0.1674 max mem: 22446 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 1.6162 (2.1932) grad: 0.1638 (0.2046) time: 0.4533 data: 0.0116 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.5285 (2.1539) grad: 0.1682 (0.2028) time: 0.4368 data: 0.0040 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.5171 (2.1176) grad: 0.1682 (0.2006) time: 0.4644 data: 0.0043 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.4840 (2.0829) grad: 0.1622 (0.1985) time: 0.4584 data: 0.0042 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.4280 (2.0483) grad: 0.1622 (0.1969) time: 0.4537 data: 0.0043 max mem: 22446 +train: [0] Total time: 0:03:07 (0.4687 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.4280 (2.0483) grad: 0.1622 (0.1969) +eval (validation): [0] [ 0/63] eta: 0:03:21 time: 3.2063 data: 2.9668 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:26 time: 0.4759 data: 0.1486 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:10 time: 0.3391 data: 0.0030 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3322 data: 0.0035 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3290 data: 0.0032 max mem: 22446 +eval (validation): [0] Total time: 0:00:27 (0.4316 s / it) +cv: [0] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.045 acc: 0.986 f1: 0.984 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:58 lr: nan time: 3.4462 data: 3.0477 max mem: 22446 +train: [1] [ 20/400] eta: 0:04:09 lr: 0.000063 loss: 1.3651 (1.3752) grad: 0.1558 (0.1604) time: 0.5164 data: 0.0598 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:20 lr: 0.000066 loss: 1.3512 (1.3541) grad: 0.1588 (0.1592) time: 0.4548 data: 0.0035 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:58 lr: 0.000069 loss: 1.3102 (1.3316) grad: 0.1526 (0.1562) time: 0.4618 data: 0.0043 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:41 lr: 0.000072 loss: 1.2727 (1.3152) grad: 0.1461 (0.1542) time: 0.4434 data: 0.0041 max mem: 22446 +train: [1] [100/400] eta: 0:02:28 lr: 0.000075 loss: 1.2627 (1.3042) grad: 0.1455 (0.1537) time: 0.4441 data: 0.0042 max mem: 22446 +train: [1] [120/400] eta: 0:02:15 lr: 0.000078 loss: 1.2375 (1.2862) grad: 0.1439 (0.1528) time: 0.4443 data: 0.0043 max mem: 22446 +train: [1] [140/400] eta: 0:02:04 lr: 0.000081 loss: 1.1842 (1.2715) grad: 0.1424 (0.1513) time: 0.4468 data: 0.0043 max mem: 22446 +train: [1] [160/400] eta: 0:01:54 lr: 0.000084 loss: 1.1645 (1.2558) grad: 0.1373 (0.1502) time: 0.4435 data: 0.0042 max mem: 22446 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 1.1383 (1.2426) grad: 0.1410 (0.1494) time: 0.4652 data: 0.0037 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.1094 (1.2286) grad: 0.1410 (0.1486) time: 0.4571 data: 0.0043 max mem: 22446 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 1.0755 (1.2134) grad: 0.1404 (0.1485) time: 0.4431 data: 0.0043 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.0563 (1.2003) grad: 0.1416 (0.1476) time: 0.4600 data: 0.0042 max mem: 22446 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 1.0545 (1.1883) grad: 0.1369 (0.1467) time: 0.4468 data: 0.0042 max mem: 22446 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 1.0140 (1.1751) grad: 0.1322 (0.1462) time: 0.4554 data: 0.0044 max mem: 22446 +train: [1] [300/400] eta: 0:00:48 lr: 0.000105 loss: 0.9912 (1.1626) grad: 0.1319 (0.1451) time: 0.6920 data: 0.2393 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 0.9830 (1.1510) grad: 0.1300 (0.1443) time: 0.4527 data: 0.0033 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 0.9460 (1.1387) grad: 0.1296 (0.1436) time: 0.4567 data: 0.0042 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 0.9419 (1.1282) grad: 0.1279 (0.1428) time: 0.4651 data: 0.0043 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 0.9179 (1.1179) grad: 0.1291 (0.1423) time: 0.4517 data: 0.0039 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 0.9025 (1.1070) grad: 0.1282 (0.1415) time: 0.4511 data: 0.0042 max mem: 22446 +train: [1] Total time: 0:03:10 (0.4756 s / it) +train: [1] Summary: lr: 0.000120 loss: 0.9025 (1.1070) grad: 0.1282 (0.1415) +eval (validation): [1] [ 0/63] eta: 0:04:26 time: 4.2326 data: 3.9499 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:25 time: 0.4002 data: 0.0160 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:11 time: 0.3712 data: 0.0037 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3493 data: 0.0036 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3459 data: 0.0035 max mem: 22446 +eval (validation): [1] Total time: 0:00:27 (0.4407 s / it) +cv: [1] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.029 acc: 0.989 f1: 0.986 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:44 lr: nan time: 3.4104 data: 3.0068 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:54 lr: 0.000123 loss: 0.8555 (0.8741) grad: 0.1386 (0.1472) time: 0.4771 data: 0.0038 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:16 lr: 0.000126 loss: 0.8763 (0.8790) grad: 0.1442 (0.1508) time: 0.4687 data: 0.0043 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:56 lr: 0.000129 loss: 0.8668 (0.8671) grad: 0.1559 (0.1514) time: 0.4664 data: 0.0046 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:40 lr: 0.000132 loss: 0.8515 (0.8676) grad: 0.1493 (0.1595) time: 0.4453 data: 0.0041 max mem: 22446 +train: [2] [100/400] eta: 0:02:28 lr: 0.000135 loss: 0.8515 (0.8627) grad: 0.1453 (0.1580) time: 0.4646 data: 0.0045 max mem: 22446 +train: [2] [120/400] eta: 0:02:16 lr: 0.000138 loss: 0.8132 (0.8582) grad: 0.1566 (0.1594) time: 0.4595 data: 0.0044 max mem: 22446 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 0.8047 (0.8528) grad: 0.1582 (0.1600) time: 0.4518 data: 0.0042 max mem: 22446 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 0.7925 (0.8517) grad: 0.1715 (0.1650) time: 0.4649 data: 0.0041 max mem: 22446 +train: [2] [180/400] eta: 0:01:45 lr: 0.000147 loss: 0.8085 (0.8474) grad: 0.1774 (0.1672) time: 0.4549 data: 0.0044 max mem: 22446 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.8033 (0.8417) grad: 0.1683 (0.1684) time: 0.4543 data: 0.0043 max mem: 22446 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 0.8033 (0.8409) grad: 0.1823 (0.1716) time: 0.4566 data: 0.0045 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 0.8073 (0.8377) grad: 0.2005 (0.1744) time: 0.4587 data: 0.0045 max mem: 22446 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 0.7794 (0.8323) grad: 0.2025 (0.1765) time: 0.4550 data: 0.0042 max mem: 22446 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 0.8098 (0.8302) grad: 0.2025 (0.1806) time: 0.4630 data: 0.0045 max mem: 22446 +train: [2] [300/400] eta: 0:00:48 lr: 0.000165 loss: 0.7807 (0.8260) grad: 0.1999 (0.1840) time: 0.6193 data: 0.1809 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.7279 (0.8264) grad: 0.2182 (0.1893) time: 0.4368 data: 0.0035 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 0.8127 (0.8251) grad: 0.2339 (0.1919) time: 0.4748 data: 0.0041 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 0.8385 (0.8255) grad: 0.2082 (0.1930) time: 0.4569 data: 0.0044 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.7596 (0.8185) grad: 0.1990 (0.1962) time: 0.4480 data: 0.0042 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.6721 (0.8117) grad: 0.1957 (0.1975) time: 0.4507 data: 0.0045 max mem: 22446 +train: [2] Total time: 0:03:09 (0.4743 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.6721 (0.8117) grad: 0.1957 (0.1975) +eval (validation): [2] [ 0/63] eta: 0:03:14 time: 3.0888 data: 2.8164 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:20 time: 0.3549 data: 0.0034 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3381 data: 0.0034 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3850 data: 0.0038 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3770 data: 0.0037 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4081 s / it) +cv: [2] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.035 acc: 0.990 f1: 0.988 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:24 lr: nan time: 3.3616 data: 3.0176 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:41 lr: 0.000183 loss: 0.5961 (0.6439) grad: 0.1983 (0.2129) time: 0.4450 data: 0.0031 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:11 lr: 0.000186 loss: 0.6544 (0.6768) grad: 0.2035 (0.2126) time: 0.4752 data: 0.0042 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:54 lr: 0.000189 loss: 0.6808 (0.7023) grad: 0.2298 (0.2387) time: 0.4756 data: 0.0044 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:39 lr: 0.000192 loss: 0.6715 (0.7086) grad: 0.2587 (0.2463) time: 0.4517 data: 0.0042 max mem: 22446 +train: [3] [100/400] eta: 0:02:27 lr: 0.000195 loss: 0.6715 (0.7099) grad: 0.2759 (0.2546) time: 0.4668 data: 0.0043 max mem: 22446 +train: [3] [120/400] eta: 0:02:15 lr: 0.000198 loss: 0.6925 (0.7204) grad: 0.2528 (0.2549) time: 0.4488 data: 0.0041 max mem: 22446 +train: [3] [140/400] eta: 0:02:04 lr: 0.000201 loss: 0.7815 (0.7335) grad: 0.2379 (0.2559) time: 0.4388 data: 0.0040 max mem: 22446 +train: [3] [160/400] eta: 0:01:54 lr: 0.000204 loss: 0.6273 (0.7284) grad: 0.2586 (0.2569) time: 0.4607 data: 0.0041 max mem: 22446 +train: [3] [180/400] eta: 0:01:44 lr: 0.000207 loss: 0.6292 (0.7355) grad: 0.2936 (0.2656) time: 0.4643 data: 0.0044 max mem: 22446 +train: [3] [200/400] eta: 0:01:34 lr: 0.000210 loss: 0.6648 (0.7426) grad: 0.3044 (0.2695) time: 0.4567 data: 0.0045 max mem: 22446 +train: [3] [220/400] eta: 0:01:24 lr: 0.000213 loss: 0.6648 (0.7372) grad: 0.2915 (0.2709) time: 0.4524 data: 0.0044 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.6667 (0.7449) grad: 0.2897 (0.2738) time: 0.4612 data: 0.0043 max mem: 22446 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 0.7313 (0.7464) grad: 0.3176 (0.2830) time: 0.4507 data: 0.0044 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.7313 (0.7463) grad: 0.3432 (0.2892) time: 0.4776 data: 0.0045 max mem: 22446 +train: [3] [300/400] eta: 0:00:47 lr: 0.000225 loss: 0.7755 (0.7507) grad: 0.3351 (0.2931) time: 0.6253 data: 0.1748 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.6833 (0.7491) grad: 0.3427 (0.2979) time: 0.4515 data: 0.0035 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.5920 (0.7402) grad: 0.3427 (0.3022) time: 0.4711 data: 0.0045 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.5244 (0.7313) grad: 0.2859 (0.3034) time: 0.4694 data: 0.0044 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.5470 (0.7317) grad: 0.3294 (0.3044) time: 0.4542 data: 0.0043 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.6868 (0.7325) grad: 0.3362 (0.3071) time: 0.4724 data: 0.0042 max mem: 22446 +train: [3] Total time: 0:03:10 (0.4764 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.6868 (0.7325) grad: 0.3362 (0.3071) +eval (validation): [3] [ 0/63] eta: 0:03:21 time: 3.2046 data: 2.9621 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3570 data: 0.0036 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3550 data: 0.0026 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3524 data: 0.0033 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3501 data: 0.0032 max mem: 22446 +eval (validation): [3] Total time: 0:00:25 (0.4042 s / it) +cv: [3] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 0.035 acc: 0.989 f1: 0.986 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:23:30 lr: nan time: 3.5250 data: 3.1286 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:47 lr: 0.000243 loss: 0.6560 (0.7492) grad: 0.3187 (0.3179) time: 0.4528 data: 0.0030 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:07 lr: 0.000246 loss: 0.7351 (0.7556) grad: 0.3413 (0.3391) time: 0.4366 data: 0.0040 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:52 lr: 0.000249 loss: 0.6528 (0.7180) grad: 0.3653 (0.3576) time: 0.4815 data: 0.0041 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:38 lr: 0.000252 loss: 0.5786 (0.6916) grad: 0.3490 (0.3509) time: 0.4594 data: 0.0041 max mem: 22446 +train: [4] [100/400] eta: 0:02:25 lr: 0.000255 loss: 0.5648 (0.7057) grad: 0.3357 (0.3642) time: 0.4428 data: 0.0041 max mem: 22446 +train: [4] [120/400] eta: 0:02:14 lr: 0.000258 loss: 0.6388 (0.7319) grad: 0.4134 (0.3853) time: 0.4603 data: 0.0042 max mem: 22446 +train: [4] [140/400] eta: 0:02:03 lr: 0.000261 loss: 0.6114 (0.7259) grad: 0.3985 (0.3798) time: 0.4501 data: 0.0041 max mem: 22446 +train: [4] [160/400] eta: 0:01:53 lr: 0.000264 loss: 0.6505 (0.7547) grad: 0.3817 (0.3892) time: 0.4600 data: 0.0038 max mem: 22446 +train: [4] [180/400] eta: 0:01:43 lr: 0.000267 loss: 0.7817 (0.7533) grad: 0.4132 (0.3947) time: 0.4457 data: 0.0042 max mem: 22446 +train: [4] [200/400] eta: 0:01:34 lr: 0.000270 loss: 0.7858 (0.7634) grad: 0.4132 (0.4070) time: 0.4689 data: 0.0042 max mem: 22446 +train: [4] [220/400] eta: 0:01:24 lr: 0.000273 loss: 0.7858 (0.7698) grad: 0.4433 (0.4100) time: 0.4595 data: 0.0044 max mem: 22446 +train: [4] [240/400] eta: 0:01:14 lr: 0.000276 loss: 0.7352 (0.7754) grad: 0.4058 (0.4185) time: 0.4417 data: 0.0042 max mem: 22446 +train: [4] [260/400] eta: 0:01:05 lr: 0.000279 loss: 0.7968 (0.8092) grad: 0.4438 (0.4238) time: 0.4487 data: 0.0042 max mem: 22446 +train: [4] [280/400] eta: 0:00:55 lr: 0.000282 loss: 1.0903 (0.8227) grad: 0.4482 (0.4381) time: 0.4659 data: 0.0044 max mem: 22446 +train: [4] [300/400] eta: 0:00:47 lr: 0.000285 loss: 0.6949 (0.8255) grad: 0.4825 (0.4418) time: 0.5907 data: 0.1647 max mem: 22446 +train: [4] [320/400] eta: 0:00:37 lr: 0.000288 loss: 0.4794 (0.8053) grad: 0.4565 (0.4429) time: 0.4478 data: 0.0038 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 0.5317 (0.7924) grad: 0.4565 (0.4423) time: 0.4563 data: 0.0039 max mem: 22446 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 0.5999 (0.8018) grad: 0.4578 (0.4478) time: 0.4459 data: 0.0040 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.8872 (0.8195) grad: 0.5145 (0.4549) time: 0.4417 data: 0.0040 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.9483 (0.8276) grad: 0.5446 (0.4620) time: 0.4423 data: 0.0042 max mem: 22446 +train: [4] Total time: 0:03:07 (0.4682 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.9483 (0.8276) grad: 0.5446 (0.4620) +eval (validation): [4] [ 0/63] eta: 0:03:06 time: 2.9584 data: 2.7347 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:19 time: 0.3254 data: 0.0037 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:08 time: 0.3202 data: 0.0028 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3296 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3307 data: 0.0033 max mem: 22446 +eval (validation): [4] Total time: 0:00:23 (0.3714 s / it) +cv: [4] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.032 acc: 0.990 f1: 0.988 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:15 lr: nan time: 3.1891 data: 2.8044 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:41 lr: 0.000300 loss: 0.6817 (0.8095) grad: 0.3928 (0.4367) time: 0.4513 data: 0.0031 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:06 lr: 0.000300 loss: 0.7400 (0.8703) grad: 0.4680 (0.4878) time: 0.4526 data: 0.0037 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:48 lr: 0.000300 loss: 0.7091 (0.8534) grad: 0.5050 (0.4785) time: 0.4493 data: 0.0043 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:36 lr: 0.000300 loss: 0.6523 (0.8209) grad: 0.4799 (0.4834) time: 0.4705 data: 0.0044 max mem: 22446 +train: [5] [100/400] eta: 0:02:24 lr: 0.000300 loss: 0.9056 (0.8351) grad: 0.4660 (0.4718) time: 0.4556 data: 0.0041 max mem: 22446 +train: [5] [120/400] eta: 0:02:13 lr: 0.000300 loss: 0.9498 (0.8459) grad: 0.4329 (0.4715) time: 0.4508 data: 0.0041 max mem: 22446 +train: [5] [140/400] eta: 0:02:02 lr: 0.000300 loss: 0.7513 (0.8481) grad: 0.4569 (0.4799) time: 0.4372 data: 0.0042 max mem: 22446 +train: [5] [160/400] eta: 0:01:52 lr: 0.000299 loss: 0.7513 (0.8720) grad: 0.5032 (0.4870) time: 0.4548 data: 0.0043 max mem: 22446 +train: [5] [180/400] eta: 0:01:43 lr: 0.000299 loss: 0.7393 (0.8491) grad: 0.5017 (0.4943) time: 0.4575 data: 0.0042 max mem: 22446 +train: [5] [200/400] eta: 0:01:33 lr: 0.000299 loss: 0.5803 (0.8434) grad: 0.4875 (0.4896) time: 0.4521 data: 0.0043 max mem: 22446 +train: [5] [220/400] eta: 0:01:23 lr: 0.000299 loss: 0.6336 (0.8562) grad: 0.4143 (0.4858) time: 0.4516 data: 0.0043 max mem: 22446 +train: [5] [240/400] eta: 0:01:14 lr: 0.000299 loss: 0.7344 (0.8592) grad: 0.5410 (0.4998) time: 0.4489 data: 0.0040 max mem: 22446 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 0.7344 (0.8774) grad: 0.5427 (0.4992) time: 0.4425 data: 0.0041 max mem: 22446 +train: [5] [280/400] eta: 0:00:55 lr: 0.000298 loss: 0.8026 (0.8780) grad: 0.4691 (0.5035) time: 0.4620 data: 0.0045 max mem: 22446 +train: [5] [300/400] eta: 0:00:47 lr: 0.000298 loss: 0.6392 (0.8724) grad: 0.4691 (0.5040) time: 0.6204 data: 0.1802 max mem: 22446 +train: [5] [320/400] eta: 0:00:37 lr: 0.000298 loss: 0.5783 (0.8656) grad: 0.4430 (0.5016) time: 0.4380 data: 0.0039 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 0.6173 (0.8650) grad: 0.4430 (0.4990) time: 0.4682 data: 0.0045 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 0.6173 (0.8624) grad: 0.4822 (0.5074) time: 0.4500 data: 0.0044 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.6990 (0.8584) grad: 0.5054 (0.5075) time: 0.4467 data: 0.0045 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.6982 (0.8509) grad: 0.4420 (0.5044) time: 0.4447 data: 0.0042 max mem: 22446 +train: [5] Total time: 0:03:07 (0.4676 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.6982 (0.8509) grad: 0.4420 (0.5044) +eval (validation): [5] [ 0/63] eta: 0:03:17 time: 3.1415 data: 2.8533 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:20 time: 0.3343 data: 0.0040 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3541 data: 0.0032 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3350 data: 0.0035 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3318 data: 0.0035 max mem: 22446 +eval (validation): [5] Total time: 0:00:24 (0.3897 s / it) +cv: [5] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.028 acc: 0.991 f1: 0.989 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:47 lr: nan time: 3.2691 data: 2.8803 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:50 lr: 0.000296 loss: 0.4753 (0.5211) grad: 0.4855 (0.5189) time: 0.4728 data: 0.0029 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:13 lr: 0.000296 loss: 0.5450 (0.6494) grad: 0.4734 (0.4791) time: 0.4677 data: 0.0040 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:54 lr: 0.000296 loss: 0.5904 (0.6202) grad: 0.4072 (0.4568) time: 0.4591 data: 0.0043 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:40 lr: 0.000295 loss: 0.5670 (0.6539) grad: 0.3831 (0.4389) time: 0.4707 data: 0.0041 max mem: 22446 +train: [6] [100/400] eta: 0:02:28 lr: 0.000295 loss: 0.5670 (0.6289) grad: 0.3998 (0.4439) time: 0.4599 data: 0.0042 max mem: 22446 +train: [6] [120/400] eta: 0:02:16 lr: 0.000295 loss: 0.3412 (0.5992) grad: 0.4012 (0.4347) time: 0.4458 data: 0.0040 max mem: 22446 +train: [6] [140/400] eta: 0:02:04 lr: 0.000294 loss: 0.3877 (0.5931) grad: 0.3537 (0.4243) time: 0.4490 data: 0.0042 max mem: 22446 +train: [6] [160/400] eta: 0:01:54 lr: 0.000294 loss: 0.3510 (0.5766) grad: 0.3447 (0.4124) time: 0.4644 data: 0.0043 max mem: 22446 +train: [6] [180/400] eta: 0:01:44 lr: 0.000293 loss: 0.5024 (0.5955) grad: 0.3602 (0.4150) time: 0.4514 data: 0.0041 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.5809 (0.6271) grad: 0.4059 (0.4158) time: 0.4552 data: 0.0042 max mem: 22446 +train: [6] [220/400] eta: 0:01:24 lr: 0.000292 loss: 0.4281 (0.6147) grad: 0.4260 (0.4142) time: 0.4530 data: 0.0040 max mem: 22446 +train: [6] [240/400] eta: 0:01:15 lr: 0.000292 loss: 0.3623 (0.5967) grad: 0.3780 (0.4119) time: 0.4480 data: 0.0044 max mem: 22446 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 0.3807 (0.5910) grad: 0.3637 (0.4107) time: 0.4543 data: 0.0042 max mem: 22446 +train: [6] [280/400] eta: 0:00:56 lr: 0.000291 loss: 0.6093 (0.6045) grad: 0.3463 (0.4094) time: 0.4670 data: 0.0043 max mem: 22446 +train: [6] [300/400] eta: 0:00:47 lr: 0.000290 loss: 0.5864 (0.6028) grad: 0.3515 (0.4068) time: 0.5988 data: 0.1672 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.4215 (0.5922) grad: 0.3549 (0.4048) time: 0.4537 data: 0.0033 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.3905 (0.5856) grad: 0.3547 (0.3992) time: 0.4467 data: 0.0044 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.3571 (0.5732) grad: 0.3140 (0.3936) time: 0.4493 data: 0.0043 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.2499 (0.5590) grad: 0.2822 (0.3884) time: 0.4452 data: 0.0042 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.2333 (0.5494) grad: 0.2667 (0.3846) time: 0.4563 data: 0.0043 max mem: 22446 +train: [6] Total time: 0:03:08 (0.4711 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.2333 (0.5494) grad: 0.2667 (0.3846) +eval (validation): [6] [ 0/63] eta: 0:03:22 time: 3.2188 data: 2.9361 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3405 data: 0.0037 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3431 data: 0.0028 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3386 data: 0.0038 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3364 data: 0.0039 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3909 s / it) +cv: [6] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 0.024 acc: 0.992 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:23:46 lr: nan time: 3.5665 data: 3.2217 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:47 lr: 0.000286 loss: 0.2761 (0.3254) grad: 0.2300 (0.2544) time: 0.4490 data: 0.0027 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:10 lr: 0.000286 loss: 0.2761 (0.3370) grad: 0.2297 (0.2521) time: 0.4555 data: 0.0042 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:52 lr: 0.000285 loss: 0.3003 (0.3831) grad: 0.2662 (0.2866) time: 0.4604 data: 0.0044 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:38 lr: 0.000284 loss: 0.3430 (0.3641) grad: 0.2785 (0.2857) time: 0.4643 data: 0.0043 max mem: 22446 +train: [7] [100/400] eta: 0:02:25 lr: 0.000284 loss: 0.2885 (0.3535) grad: 0.2999 (0.2893) time: 0.4488 data: 0.0041 max mem: 22446 +train: [7] [120/400] eta: 0:02:14 lr: 0.000283 loss: 0.3346 (0.3551) grad: 0.3117 (0.3025) time: 0.4428 data: 0.0042 max mem: 22446 +train: [7] [140/400] eta: 0:02:04 lr: 0.000282 loss: 0.3543 (0.3678) grad: 0.3116 (0.3041) time: 0.4634 data: 0.0043 max mem: 22446 +train: [7] [160/400] eta: 0:01:53 lr: 0.000282 loss: 0.3543 (0.3706) grad: 0.3001 (0.3021) time: 0.4462 data: 0.0044 max mem: 22446 +train: [7] [180/400] eta: 0:01:43 lr: 0.000281 loss: 0.3258 (0.3760) grad: 0.2853 (0.3045) time: 0.4498 data: 0.0041 max mem: 22446 +train: [7] [200/400] eta: 0:01:33 lr: 0.000280 loss: 0.3589 (0.3737) grad: 0.3408 (0.3098) time: 0.4566 data: 0.0043 max mem: 22446 +train: [7] [220/400] eta: 0:01:24 lr: 0.000279 loss: 0.2563 (0.3748) grad: 0.2950 (0.3054) time: 0.4533 data: 0.0041 max mem: 22446 +train: [7] [240/400] eta: 0:01:14 lr: 0.000278 loss: 0.2358 (0.3708) grad: 0.2697 (0.3068) time: 0.4580 data: 0.0042 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.2805 (0.3667) grad: 0.3166 (0.3081) time: 0.4620 data: 0.0043 max mem: 22446 +train: [7] [280/400] eta: 0:00:55 lr: 0.000277 loss: 0.3254 (0.3756) grad: 0.2938 (0.3059) time: 0.4560 data: 0.0043 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.3895 (0.3779) grad: 0.2860 (0.3069) time: 0.6299 data: 0.1741 max mem: 22446 +train: [7] [320/400] eta: 0:00:37 lr: 0.000275 loss: 0.3895 (0.3802) grad: 0.2860 (0.3049) time: 0.4469 data: 0.0031 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.2374 (0.3724) grad: 0.2472 (0.3014) time: 0.4445 data: 0.0042 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.2051 (0.3649) grad: 0.2472 (0.3000) time: 0.4514 data: 0.0042 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.1994 (0.3577) grad: 0.2769 (0.2993) time: 0.4348 data: 0.0041 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.1966 (0.3504) grad: 0.2597 (0.2985) time: 0.4520 data: 0.0043 max mem: 22446 +train: [7] Total time: 0:03:07 (0.4697 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.1966 (0.3504) grad: 0.2597 (0.2985) +eval (validation): [7] [ 0/63] eta: 0:03:21 time: 3.1929 data: 2.9200 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:20 time: 0.3290 data: 0.0028 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3457 data: 0.0029 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3324 data: 0.0032 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3350 data: 0.0033 max mem: 22446 +eval (validation): [7] Total time: 0:00:24 (0.3863 s / it) +cv: [7] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.030 acc: 0.992 f1: 0.990 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:41 lr: nan time: 3.4032 data: 3.0342 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:47 lr: 0.000270 loss: 0.2088 (0.1928) grad: 0.1752 (0.2073) time: 0.4592 data: 0.0029 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:10 lr: 0.000270 loss: 0.2092 (0.2224) grad: 0.2111 (0.2266) time: 0.4540 data: 0.0041 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:50 lr: 0.000269 loss: 0.2096 (0.2336) grad: 0.2259 (0.2299) time: 0.4489 data: 0.0041 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:37 lr: 0.000268 loss: 0.1968 (0.2273) grad: 0.2085 (0.2324) time: 0.4553 data: 0.0042 max mem: 22446 +train: [8] [100/400] eta: 0:02:25 lr: 0.000267 loss: 0.1717 (0.2236) grad: 0.2030 (0.2335) time: 0.4568 data: 0.0045 max mem: 22446 +train: [8] [120/400] eta: 0:02:13 lr: 0.000266 loss: 0.2345 (0.2391) grad: 0.2398 (0.2398) time: 0.4422 data: 0.0042 max mem: 22446 +train: [8] [140/400] eta: 0:02:03 lr: 0.000265 loss: 0.2586 (0.2531) grad: 0.2791 (0.2471) time: 0.4610 data: 0.0042 max mem: 22446 +train: [8] [160/400] eta: 0:01:53 lr: 0.000264 loss: 0.2272 (0.2498) grad: 0.2482 (0.2434) time: 0.4569 data: 0.0043 max mem: 22446 +train: [8] [180/400] eta: 0:01:43 lr: 0.000263 loss: 0.1967 (0.2534) grad: 0.2051 (0.2454) time: 0.4497 data: 0.0043 max mem: 22446 +train: [8] [200/400] eta: 0:01:33 lr: 0.000262 loss: 0.2291 (0.2487) grad: 0.2430 (0.2432) time: 0.4495 data: 0.0043 max mem: 22446 +train: [8] [220/400] eta: 0:01:23 lr: 0.000260 loss: 0.1734 (0.2457) grad: 0.2233 (0.2405) time: 0.4434 data: 0.0041 max mem: 22446 +train: [8] [240/400] eta: 0:01:14 lr: 0.000259 loss: 0.1813 (0.2462) grad: 0.2050 (0.2372) time: 0.4452 data: 0.0042 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.1813 (0.2424) grad: 0.2016 (0.2337) time: 0.4732 data: 0.0044 max mem: 22446 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 0.1806 (0.2478) grad: 0.1954 (0.2338) time: 0.4603 data: 0.0045 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.2062 (0.2573) grad: 0.2593 (0.2381) time: 0.6074 data: 0.1663 max mem: 22446 +train: [8] [320/400] eta: 0:00:37 lr: 0.000255 loss: 0.2062 (0.2542) grad: 0.2557 (0.2336) time: 0.4580 data: 0.0037 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.1579 (0.2520) grad: 0.1795 (0.2319) time: 0.4482 data: 0.0044 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.1646 (0.2471) grad: 0.1988 (0.2283) time: 0.4466 data: 0.0044 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.1148 (0.2429) grad: 0.1462 (0.2248) time: 0.4558 data: 0.0041 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.1565 (0.2410) grad: 0.1547 (0.2240) time: 0.4618 data: 0.0044 max mem: 22446 +train: [8] Total time: 0:03:07 (0.4697 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.1565 (0.2410) grad: 0.1547 (0.2240) +eval (validation): [8] [ 0/63] eta: 0:03:19 time: 3.1678 data: 2.8833 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3729 data: 0.0042 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3315 data: 0.0034 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3263 data: 0.0034 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3235 data: 0.0033 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3923 s / it) +cv: [8] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.026 acc: 0.992 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:23:06 lr: nan time: 3.4659 data: 3.0513 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:59 lr: 0.000249 loss: 0.1798 (0.2317) grad: 0.1580 (0.2074) time: 0.4891 data: 0.0036 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:17 lr: 0.000248 loss: 0.1738 (0.1998) grad: 0.1604 (0.1920) time: 0.4646 data: 0.0043 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:56 lr: 0.000247 loss: 0.1738 (0.2042) grad: 0.1652 (0.1908) time: 0.4587 data: 0.0042 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:40 lr: 0.000246 loss: 0.1721 (0.1991) grad: 0.1756 (0.1841) time: 0.4436 data: 0.0042 max mem: 22446 +train: [9] [100/400] eta: 0:02:28 lr: 0.000244 loss: 0.1721 (0.1950) grad: 0.1858 (0.1851) time: 0.4667 data: 0.0043 max mem: 22446 +train: [9] [120/400] eta: 0:02:16 lr: 0.000243 loss: 0.1796 (0.1941) grad: 0.1910 (0.1836) time: 0.4555 data: 0.0043 max mem: 22446 +train: [9] [140/400] eta: 0:02:05 lr: 0.000242 loss: 0.1481 (0.1919) grad: 0.1861 (0.1846) time: 0.4462 data: 0.0039 max mem: 22446 +train: [9] [160/400] eta: 0:01:55 lr: 0.000241 loss: 0.1534 (0.1883) grad: 0.1766 (0.1840) time: 0.4668 data: 0.0042 max mem: 22446 +train: [9] [180/400] eta: 0:01:44 lr: 0.000240 loss: 0.1456 (0.1840) grad: 0.1557 (0.1821) time: 0.4498 data: 0.0040 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.1232 (0.1820) grad: 0.1557 (0.1807) time: 0.4449 data: 0.0041 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.1385 (0.1817) grad: 0.1850 (0.1826) time: 0.4462 data: 0.0044 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.1309 (0.1772) grad: 0.1703 (0.1794) time: 0.4555 data: 0.0043 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1140 (0.1781) grad: 0.1546 (0.1797) time: 0.4636 data: 0.0044 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.1683 (0.1769) grad: 0.1572 (0.1781) time: 0.4679 data: 0.0044 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.1270 (0.1757) grad: 0.1353 (0.1784) time: 0.5965 data: 0.1655 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.1116 (0.1725) grad: 0.1591 (0.1781) time: 0.4444 data: 0.0044 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1265 (0.1724) grad: 0.1740 (0.1791) time: 0.4662 data: 0.0043 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.1213 (0.1705) grad: 0.1653 (0.1774) time: 0.4540 data: 0.0044 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1102 (0.1684) grad: 0.1428 (0.1764) time: 0.4580 data: 0.0042 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1102 (0.1679) grad: 0.1688 (0.1753) time: 0.4559 data: 0.0042 max mem: 22446 +train: [9] Total time: 0:03:09 (0.4728 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1102 (0.1679) grad: 0.1688 (0.1753) +eval (validation): [9] [ 0/63] eta: 0:03:18 time: 3.1458 data: 2.8604 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3455 data: 0.0034 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3508 data: 0.0034 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3350 data: 0.0034 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3344 data: 0.0034 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3927 s / it) +cv: [9] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.033 acc: 0.992 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:36 lr: nan time: 3.2418 data: 2.8540 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:50 lr: 0.000224 loss: 0.1145 (0.1779) grad: 0.1031 (0.1273) time: 0.4756 data: 0.0044 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:14 lr: 0.000222 loss: 0.1071 (0.1495) grad: 0.1168 (0.1367) time: 0.4713 data: 0.0043 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:54 lr: 0.000221 loss: 0.1205 (0.1462) grad: 0.1541 (0.1375) time: 0.4608 data: 0.0046 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:39 lr: 0.000220 loss: 0.1230 (0.1379) grad: 0.1541 (0.1449) time: 0.4484 data: 0.0042 max mem: 22446 +train: [10] [100/400] eta: 0:02:27 lr: 0.000218 loss: 0.1082 (0.1364) grad: 0.1591 (0.1496) time: 0.4570 data: 0.0041 max mem: 22446 +train: [10] [120/400] eta: 0:02:15 lr: 0.000217 loss: 0.1128 (0.1355) grad: 0.1600 (0.1548) time: 0.4578 data: 0.0041 max mem: 22446 +train: [10] [140/400] eta: 0:02:04 lr: 0.000215 loss: 0.1398 (0.1387) grad: 0.1559 (0.1578) time: 0.4394 data: 0.0041 max mem: 22446 +train: [10] [160/400] eta: 0:01:54 lr: 0.000214 loss: 0.1329 (0.1350) grad: 0.1412 (0.1548) time: 0.4582 data: 0.0043 max mem: 22446 +train: [10] [180/400] eta: 0:01:43 lr: 0.000213 loss: 0.1057 (0.1336) grad: 0.1334 (0.1539) time: 0.4329 data: 0.0040 max mem: 22446 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 0.1029 (0.1315) grad: 0.1334 (0.1526) time: 0.4407 data: 0.0042 max mem: 22446 +train: [10] [220/400] eta: 0:01:23 lr: 0.000210 loss: 0.1058 (0.1310) grad: 0.1332 (0.1514) time: 0.4419 data: 0.0043 max mem: 22446 +train: [10] [240/400] eta: 0:01:14 lr: 0.000208 loss: 0.1076 (0.1300) grad: 0.1450 (0.1508) time: 0.4413 data: 0.0043 max mem: 22446 +train: [10] [260/400] eta: 0:01:04 lr: 0.000207 loss: 0.1076 (0.1312) grad: 0.1450 (0.1510) time: 0.4333 data: 0.0043 max mem: 22446 +train: [10] [280/400] eta: 0:00:55 lr: 0.000205 loss: 0.0955 (0.1306) grad: 0.1378 (0.1482) time: 0.4547 data: 0.0045 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.0934 (0.1304) grad: 0.1302 (0.1471) time: 0.6052 data: 0.1719 max mem: 22446 +train: [10] [320/400] eta: 0:00:37 lr: 0.000202 loss: 0.0810 (0.1284) grad: 0.1056 (0.1447) time: 0.4382 data: 0.0031 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.0836 (0.1267) grad: 0.1036 (0.1433) time: 0.4486 data: 0.0045 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.0842 (0.1270) grad: 0.1099 (0.1418) time: 0.4440 data: 0.0043 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.0865 (0.1261) grad: 0.1011 (0.1399) time: 0.4576 data: 0.0043 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.0957 (0.1253) grad: 0.1011 (0.1382) time: 0.4460 data: 0.0043 max mem: 22446 +train: [10] Total time: 0:03:06 (0.4651 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.0957 (0.1253) grad: 0.1011 (0.1382) +eval (validation): [10] [ 0/63] eta: 0:03:32 time: 3.3776 data: 3.0940 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:22 time: 0.3687 data: 0.0025 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3409 data: 0.0035 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3286 data: 0.0035 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3265 data: 0.0035 max mem: 22446 +eval (validation): [10] Total time: 0:00:25 (0.3987 s / it) +cv: [10] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:21:40 lr: nan time: 3.2517 data: 2.8623 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:50 lr: 0.000195 loss: 0.0877 (0.1122) grad: 0.0992 (0.1071) time: 0.4744 data: 0.0036 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:16 lr: 0.000193 loss: 0.0824 (0.1079) grad: 0.0929 (0.0984) time: 0.4792 data: 0.0045 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:56 lr: 0.000192 loss: 0.0775 (0.1043) grad: 0.0929 (0.1030) time: 0.4701 data: 0.0042 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:40 lr: 0.000190 loss: 0.0865 (0.1014) grad: 0.0929 (0.1003) time: 0.4458 data: 0.0041 max mem: 22446 +train: [11] [100/400] eta: 0:02:27 lr: 0.000189 loss: 0.0808 (0.0984) grad: 0.0929 (0.0996) time: 0.4549 data: 0.0042 max mem: 22446 +train: [11] [120/400] eta: 0:02:16 lr: 0.000187 loss: 0.0710 (0.0971) grad: 0.1130 (0.1008) time: 0.4716 data: 0.0043 max mem: 22446 +train: [11] [140/400] eta: 0:02:06 lr: 0.000186 loss: 0.0788 (0.0979) grad: 0.0959 (0.1010) time: 0.4663 data: 0.0042 max mem: 22446 +train: [11] [160/400] eta: 0:01:55 lr: 0.000184 loss: 0.0975 (0.0989) grad: 0.0906 (0.1016) time: 0.4486 data: 0.0043 max mem: 22446 +train: [11] [180/400] eta: 0:01:45 lr: 0.000183 loss: 0.0801 (0.0977) grad: 0.0950 (0.1012) time: 0.4458 data: 0.0041 max mem: 22446 +train: [11] [200/400] eta: 0:01:34 lr: 0.000181 loss: 0.0736 (0.0953) grad: 0.0885 (0.0997) time: 0.4424 data: 0.0043 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.0706 (0.0947) grad: 0.0898 (0.1009) time: 0.4442 data: 0.0042 max mem: 22446 +train: [11] [240/400] eta: 0:01:15 lr: 0.000178 loss: 0.0737 (0.0936) grad: 0.1064 (0.1003) time: 0.4461 data: 0.0041 max mem: 22446 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 0.0801 (0.0943) grad: 0.0893 (0.0997) time: 0.4652 data: 0.0042 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.0866 (0.0948) grad: 0.0945 (0.1006) time: 0.4721 data: 0.0045 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.0790 (0.0944) grad: 0.1063 (0.1011) time: 0.5920 data: 0.1669 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.0790 (0.0944) grad: 0.0919 (0.0993) time: 0.4424 data: 0.0046 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.0785 (0.0937) grad: 0.0726 (0.0977) time: 0.4467 data: 0.0038 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.0780 (0.0932) grad: 0.0710 (0.0960) time: 0.4445 data: 0.0043 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.0732 (0.0921) grad: 0.0710 (0.0945) time: 0.4500 data: 0.0044 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.0742 (0.0917) grad: 0.0645 (0.0937) time: 0.4506 data: 0.0044 max mem: 22446 +train: [11] Total time: 0:03:08 (0.4702 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.0742 (0.0917) grad: 0.0645 (0.0937) +eval (validation): [11] [ 0/63] eta: 0:03:19 time: 3.1611 data: 2.9243 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3451 data: 0.0098 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3316 data: 0.0030 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3261 data: 0.0034 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3241 data: 0.0034 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3837 s / it) +cv: [11] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.036 acc: 0.994 f1: 0.992 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:21:24 lr: nan time: 3.2120 data: 2.8496 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:47 lr: 0.000164 loss: 0.0709 (0.0778) grad: 0.0689 (0.0766) time: 0.4691 data: 0.0036 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:13 lr: 0.000163 loss: 0.0709 (0.0775) grad: 0.0896 (0.0823) time: 0.4748 data: 0.0043 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:54 lr: 0.000161 loss: 0.0642 (0.0752) grad: 0.0809 (0.0786) time: 0.4647 data: 0.0043 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:40 lr: 0.000160 loss: 0.0642 (0.0737) grad: 0.0716 (0.0782) time: 0.4635 data: 0.0041 max mem: 22446 +train: [12] [100/400] eta: 0:02:28 lr: 0.000158 loss: 0.0742 (0.0747) grad: 0.0681 (0.0733) time: 0.4680 data: 0.0041 max mem: 22446 +train: [12] [120/400] eta: 0:02:17 lr: 0.000156 loss: 0.0778 (0.0785) grad: 0.0681 (0.0763) time: 0.4695 data: 0.0042 max mem: 22446 +train: [12] [140/400] eta: 0:02:06 lr: 0.000155 loss: 0.0774 (0.0785) grad: 0.0812 (0.0766) time: 0.4618 data: 0.0043 max mem: 22446 +train: [12] [160/400] eta: 0:01:56 lr: 0.000153 loss: 0.0665 (0.0782) grad: 0.0710 (0.0755) time: 0.4706 data: 0.0041 max mem: 22446 +train: [12] [180/400] eta: 0:01:45 lr: 0.000152 loss: 0.0622 (0.0774) grad: 0.0392 (0.0724) time: 0.4534 data: 0.0044 max mem: 22446 +train: [12] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.0644 (0.0767) grad: 0.0478 (0.0709) time: 0.4585 data: 0.0043 max mem: 22446 +train: [12] [220/400] eta: 0:01:25 lr: 0.000149 loss: 0.0670 (0.0774) grad: 0.0584 (0.0719) time: 0.4610 data: 0.0043 max mem: 22446 +train: [12] [240/400] eta: 0:01:16 lr: 0.000147 loss: 0.0765 (0.0782) grad: 0.0841 (0.0735) time: 0.4520 data: 0.0043 max mem: 22446 +train: [12] [260/400] eta: 0:01:06 lr: 0.000145 loss: 0.0758 (0.0776) grad: 0.0792 (0.0732) time: 0.4780 data: 0.0044 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.0661 (0.0771) grad: 0.0717 (0.0746) time: 0.4571 data: 0.0046 max mem: 22446 +train: [12] [300/400] eta: 0:00:48 lr: 0.000142 loss: 0.0678 (0.0776) grad: 0.0735 (0.0742) time: 0.6174 data: 0.1659 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.0695 (0.0771) grad: 0.0626 (0.0735) time: 0.4467 data: 0.0040 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.0673 (0.0769) grad: 0.0626 (0.0728) time: 0.4419 data: 0.0044 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.0710 (0.0765) grad: 0.0662 (0.0724) time: 0.4424 data: 0.0042 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.0657 (0.0768) grad: 0.0691 (0.0724) time: 0.4432 data: 0.0042 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.0697 (0.0768) grad: 0.0594 (0.0716) time: 0.4448 data: 0.0041 max mem: 22446 +train: [12] Total time: 0:03:09 (0.4745 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.0697 (0.0768) grad: 0.0594 (0.0716) +eval (validation): [12] [ 0/63] eta: 0:03:14 time: 3.0887 data: 2.8492 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3485 data: 0.0041 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3345 data: 0.0031 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3268 data: 0.0038 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3257 data: 0.0037 max mem: 22446 +eval (validation): [12] Total time: 0:00:24 (0.3844 s / it) +cv: [12] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.035 acc: 0.994 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:21:40 lr: nan time: 3.2516 data: 2.8779 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:40 lr: 0.000133 loss: 0.0667 (0.0674) grad: 0.0394 (0.0449) time: 0.4478 data: 0.0033 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:11 lr: 0.000131 loss: 0.0582 (0.0632) grad: 0.0368 (0.0405) time: 0.4775 data: 0.0041 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:52 lr: 0.000130 loss: 0.0544 (0.0624) grad: 0.0370 (0.0432) time: 0.4626 data: 0.0043 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:39 lr: 0.000128 loss: 0.0621 (0.0637) grad: 0.0439 (0.0449) time: 0.4692 data: 0.0044 max mem: 22446 +train: [13] [100/400] eta: 0:02:26 lr: 0.000127 loss: 0.0597 (0.0637) grad: 0.0362 (0.0440) time: 0.4481 data: 0.0042 max mem: 22446 +train: [13] [120/400] eta: 0:02:15 lr: 0.000125 loss: 0.0572 (0.0637) grad: 0.0360 (0.0447) time: 0.4594 data: 0.0042 max mem: 22446 +train: [13] [140/400] eta: 0:02:05 lr: 0.000124 loss: 0.0607 (0.0646) grad: 0.0414 (0.0456) time: 0.4638 data: 0.0042 max mem: 22446 +train: [13] [160/400] eta: 0:01:54 lr: 0.000122 loss: 0.0607 (0.0646) grad: 0.0436 (0.0457) time: 0.4614 data: 0.0044 max mem: 22446 +train: [13] [180/400] eta: 0:01:44 lr: 0.000120 loss: 0.0730 (0.0659) grad: 0.0369 (0.0459) time: 0.4403 data: 0.0042 max mem: 22446 +train: [13] [200/400] eta: 0:01:34 lr: 0.000119 loss: 0.0696 (0.0670) grad: 0.0369 (0.0464) time: 0.4508 data: 0.0046 max mem: 22446 +train: [13] [220/400] eta: 0:01:24 lr: 0.000117 loss: 0.0636 (0.0673) grad: 0.0392 (0.0469) time: 0.4478 data: 0.0044 max mem: 22446 +train: [13] [240/400] eta: 0:01:14 lr: 0.000116 loss: 0.0614 (0.0670) grad: 0.0394 (0.0470) time: 0.4485 data: 0.0044 max mem: 22446 +train: [13] [260/400] eta: 0:01:05 lr: 0.000114 loss: 0.0612 (0.0667) grad: 0.0379 (0.0470) time: 0.4657 data: 0.0043 max mem: 22446 +train: [13] [280/400] eta: 0:00:56 lr: 0.000113 loss: 0.0586 (0.0661) grad: 0.0395 (0.0471) time: 0.4538 data: 0.0043 max mem: 22446 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 0.0601 (0.0662) grad: 0.0454 (0.0478) time: 0.5894 data: 0.1622 max mem: 22446 +train: [13] [320/400] eta: 0:00:37 lr: 0.000110 loss: 0.0652 (0.0662) grad: 0.0388 (0.0471) time: 0.4499 data: 0.0032 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.0638 (0.0659) grad: 0.0332 (0.0466) time: 0.4514 data: 0.0044 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.0545 (0.0654) grad: 0.0332 (0.0464) time: 0.4482 data: 0.0042 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.0603 (0.0653) grad: 0.0344 (0.0461) time: 0.4467 data: 0.0041 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.0661 (0.0654) grad: 0.0380 (0.0462) time: 0.4470 data: 0.0044 max mem: 22446 +train: [13] Total time: 0:03:07 (0.4691 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.0661 (0.0654) grad: 0.0380 (0.0462) +eval (validation): [13] [ 0/63] eta: 0:03:13 time: 3.0778 data: 2.8044 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3362 data: 0.0044 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3410 data: 0.0027 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3245 data: 0.0036 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3214 data: 0.0036 max mem: 22446 +eval (validation): [13] Total time: 0:00:24 (0.3812 s / it) +cv: [13] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.993 f1: 0.992 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:26 lr: nan time: 3.2167 data: 2.8791 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:40 lr: 0.000102 loss: 0.0604 (0.0616) grad: 0.0318 (0.0338) time: 0.4496 data: 0.0029 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:09 lr: 0.000101 loss: 0.0604 (0.0617) grad: 0.0348 (0.0420) time: 0.4684 data: 0.0043 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:53 lr: 0.000099 loss: 0.0570 (0.0603) grad: 0.0378 (0.0409) time: 0.4747 data: 0.0045 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:40 lr: 0.000098 loss: 0.0518 (0.0601) grad: 0.0374 (0.0411) time: 0.4719 data: 0.0044 max mem: 22446 +train: [14] [100/400] eta: 0:02:27 lr: 0.000096 loss: 0.0522 (0.0598) grad: 0.0332 (0.0396) time: 0.4595 data: 0.0043 max mem: 22446 +train: [14] [120/400] eta: 0:02:16 lr: 0.000095 loss: 0.0571 (0.0615) grad: 0.0327 (0.0389) time: 0.4605 data: 0.0041 max mem: 22446 +train: [14] [140/400] eta: 0:02:05 lr: 0.000093 loss: 0.0613 (0.0627) grad: 0.0366 (0.0388) time: 0.4552 data: 0.0041 max mem: 22446 +train: [14] [160/400] eta: 0:01:54 lr: 0.000092 loss: 0.0511 (0.0621) grad: 0.0351 (0.0386) time: 0.4528 data: 0.0042 max mem: 22446 +train: [14] [180/400] eta: 0:01:44 lr: 0.000090 loss: 0.0538 (0.0613) grad: 0.0337 (0.0391) time: 0.4545 data: 0.0042 max mem: 22446 +train: [14] [200/400] eta: 0:01:34 lr: 0.000089 loss: 0.0562 (0.0618) grad: 0.0407 (0.0396) time: 0.4541 data: 0.0034 max mem: 22446 +train: [14] [220/400] eta: 0:01:24 lr: 0.000088 loss: 0.0587 (0.0615) grad: 0.0400 (0.0392) time: 0.4513 data: 0.0044 max mem: 22446 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 0.0571 (0.0610) grad: 0.0327 (0.0391) time: 0.4491 data: 0.0042 max mem: 22446 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 0.0529 (0.0603) grad: 0.0342 (0.0392) time: 0.4657 data: 0.0044 max mem: 22446 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 0.0608 (0.0610) grad: 0.0377 (0.0394) time: 0.4549 data: 0.0043 max mem: 22446 +train: [14] [300/400] eta: 0:00:47 lr: 0.000082 loss: 0.0647 (0.0609) grad: 0.0427 (0.0396) time: 0.6003 data: 0.1661 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.0571 (0.0608) grad: 0.0353 (0.0392) time: 0.4551 data: 0.0041 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.0632 (0.0614) grad: 0.0320 (0.0389) time: 0.4527 data: 0.0043 max mem: 22446 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 0.0644 (0.0615) grad: 0.0363 (0.0392) time: 0.4374 data: 0.0042 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.0640 (0.0617) grad: 0.0363 (0.0393) time: 0.4425 data: 0.0043 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.0572 (0.0616) grad: 0.0352 (0.0393) time: 0.4485 data: 0.0043 max mem: 22446 +train: [14] Total time: 0:03:08 (0.4704 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.0572 (0.0616) grad: 0.0352 (0.0393) +eval (validation): [14] [ 0/63] eta: 0:03:23 time: 3.2267 data: 2.9359 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:20 time: 0.3498 data: 0.0033 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3369 data: 0.0028 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3284 data: 0.0032 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3277 data: 0.0032 max mem: 22446 +eval (validation): [14] Total time: 0:00:24 (0.3884 s / it) +cv: [14] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.994 f1: 0.992 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:14 lr: nan time: 3.1857 data: 2.8439 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:39 lr: 0.000074 loss: 0.0568 (0.0638) grad: 0.0339 (0.0374) time: 0.4473 data: 0.0037 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:08 lr: 0.000072 loss: 0.0601 (0.0620) grad: 0.0352 (0.0367) time: 0.4683 data: 0.0042 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:51 lr: 0.000071 loss: 0.0504 (0.0580) grad: 0.0331 (0.0367) time: 0.4656 data: 0.0042 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:38 lr: 0.000070 loss: 0.0479 (0.0575) grad: 0.0321 (0.0364) time: 0.4640 data: 0.0042 max mem: 22446 +train: [15] [100/400] eta: 0:02:25 lr: 0.000068 loss: 0.0475 (0.0568) grad: 0.0312 (0.0361) time: 0.4482 data: 0.0041 max mem: 22446 +train: [15] [120/400] eta: 0:02:14 lr: 0.000067 loss: 0.0511 (0.0563) grad: 0.0338 (0.0365) time: 0.4530 data: 0.0044 max mem: 22446 +train: [15] [140/400] eta: 0:02:04 lr: 0.000066 loss: 0.0537 (0.0566) grad: 0.0354 (0.0360) time: 0.4724 data: 0.0045 max mem: 22446 +train: [15] [160/400] eta: 0:01:54 lr: 0.000064 loss: 0.0570 (0.0567) grad: 0.0331 (0.0365) time: 0.4594 data: 0.0042 max mem: 22446 +train: [15] [180/400] eta: 0:01:44 lr: 0.000063 loss: 0.0530 (0.0570) grad: 0.0341 (0.0362) time: 0.4596 data: 0.0044 max mem: 22446 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 0.0595 (0.0577) grad: 0.0341 (0.0360) time: 0.4563 data: 0.0043 max mem: 22446 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 0.0569 (0.0572) grad: 0.0354 (0.0359) time: 0.4519 data: 0.0043 max mem: 22446 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 0.0506 (0.0569) grad: 0.0337 (0.0359) time: 0.4460 data: 0.0044 max mem: 22446 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 0.0499 (0.0569) grad: 0.0327 (0.0357) time: 0.4583 data: 0.0045 max mem: 22446 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 0.0487 (0.0570) grad: 0.0325 (0.0358) time: 0.4646 data: 0.0046 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.0504 (0.0570) grad: 0.0321 (0.0356) time: 0.6130 data: 0.1620 max mem: 22446 +train: [15] [320/400] eta: 0:00:38 lr: 0.000054 loss: 0.0506 (0.0566) grad: 0.0321 (0.0357) time: 0.4522 data: 0.0038 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.0506 (0.0566) grad: 0.0346 (0.0357) time: 0.4478 data: 0.0042 max mem: 22446 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 0.0507 (0.0567) grad: 0.0363 (0.0358) time: 0.4459 data: 0.0040 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.0460 (0.0563) grad: 0.0343 (0.0356) time: 0.4425 data: 0.0044 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.0517 (0.0566) grad: 0.0323 (0.0355) time: 0.4437 data: 0.0043 max mem: 22446 +train: [15] Total time: 0:03:08 (0.4704 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.0517 (0.0566) grad: 0.0323 (0.0355) +eval (validation): [15] [ 0/63] eta: 0:03:18 time: 3.1578 data: 2.9192 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:19 time: 0.3285 data: 0.0032 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3392 data: 0.0027 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3210 data: 0.0034 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3158 data: 0.0034 max mem: 22446 +eval (validation): [15] Total time: 0:00:23 (0.3785 s / it) +cv: [15] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.994 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:21:25 lr: nan time: 3.2146 data: 2.8718 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:43 lr: 0.000048 loss: 0.0506 (0.0569) grad: 0.0308 (0.0332) time: 0.4576 data: 0.0034 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:11 lr: 0.000047 loss: 0.0541 (0.0551) grad: 0.0308 (0.0332) time: 0.4706 data: 0.0041 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:54 lr: 0.000046 loss: 0.0550 (0.0550) grad: 0.0300 (0.0325) time: 0.4808 data: 0.0043 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:40 lr: 0.000045 loss: 0.0561 (0.0564) grad: 0.0304 (0.0325) time: 0.4595 data: 0.0043 max mem: 22446 +train: [16] [100/400] eta: 0:02:28 lr: 0.000044 loss: 0.0556 (0.0568) grad: 0.0319 (0.0332) time: 0.4653 data: 0.0042 max mem: 22446 +train: [16] [120/400] eta: 0:02:16 lr: 0.000043 loss: 0.0484 (0.0563) grad: 0.0352 (0.0334) time: 0.4605 data: 0.0040 max mem: 22446 +train: [16] [140/400] eta: 0:02:06 lr: 0.000042 loss: 0.0484 (0.0564) grad: 0.0347 (0.0339) time: 0.4818 data: 0.0045 max mem: 22446 +train: [16] [160/400] eta: 0:01:55 lr: 0.000041 loss: 0.0543 (0.0564) grad: 0.0340 (0.0338) time: 0.4521 data: 0.0044 max mem: 22446 +train: [16] [180/400] eta: 0:01:45 lr: 0.000040 loss: 0.0546 (0.0563) grad: 0.0315 (0.0336) time: 0.4551 data: 0.0042 max mem: 22446 +train: [16] [200/400] eta: 0:01:35 lr: 0.000039 loss: 0.0548 (0.0572) grad: 0.0334 (0.0341) time: 0.4496 data: 0.0044 max mem: 22446 +train: [16] [220/400] eta: 0:01:25 lr: 0.000038 loss: 0.0510 (0.0568) grad: 0.0345 (0.0339) time: 0.4589 data: 0.0045 max mem: 22446 +train: [16] [240/400] eta: 0:01:15 lr: 0.000036 loss: 0.0481 (0.0564) grad: 0.0294 (0.0335) time: 0.4607 data: 0.0043 max mem: 22446 +train: [16] [260/400] eta: 0:01:06 lr: 0.000035 loss: 0.0537 (0.0566) grad: 0.0305 (0.0336) time: 0.4637 data: 0.0043 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.0492 (0.0563) grad: 0.0344 (0.0339) time: 0.4489 data: 0.0044 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.0474 (0.0563) grad: 0.0339 (0.0340) time: 0.6163 data: 0.1790 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.0546 (0.0562) grad: 0.0341 (0.0341) time: 0.4649 data: 0.0032 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.0577 (0.0566) grad: 0.0342 (0.0343) time: 0.4480 data: 0.0041 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.0575 (0.0565) grad: 0.0320 (0.0342) time: 0.4488 data: 0.0042 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.0488 (0.0561) grad: 0.0332 (0.0341) time: 0.4447 data: 0.0042 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.0465 (0.0560) grad: 0.0345 (0.0342) time: 0.4472 data: 0.0041 max mem: 22446 +train: [16] Total time: 0:03:09 (0.4743 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.0465 (0.0560) grad: 0.0345 (0.0342) +eval (validation): [16] [ 0/63] eta: 0:03:23 time: 3.2275 data: 2.9359 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3697 data: 0.0031 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3419 data: 0.0034 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3254 data: 0.0034 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3219 data: 0.0033 max mem: 22446 +eval (validation): [16] Total time: 0:00:24 (0.3956 s / it) +cv: [16] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.994 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:00 lr: nan time: 3.3011 data: 2.9561 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:46 lr: 0.000028 loss: 0.0462 (0.0522) grad: 0.0327 (0.0327) time: 0.4614 data: 0.0034 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:13 lr: 0.000027 loss: 0.0517 (0.0553) grad: 0.0327 (0.0340) time: 0.4762 data: 0.0042 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:54 lr: 0.000026 loss: 0.0570 (0.0577) grad: 0.0345 (0.0340) time: 0.4603 data: 0.0044 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:40 lr: 0.000025 loss: 0.0610 (0.0580) grad: 0.0332 (0.0338) time: 0.4634 data: 0.0046 max mem: 22446 +train: [17] [100/400] eta: 0:02:26 lr: 0.000024 loss: 0.0545 (0.0572) grad: 0.0324 (0.0337) time: 0.4472 data: 0.0043 max mem: 22446 +train: [17] [120/400] eta: 0:02:15 lr: 0.000023 loss: 0.0522 (0.0570) grad: 0.0311 (0.0335) time: 0.4486 data: 0.0042 max mem: 22446 +train: [17] [140/400] eta: 0:02:06 lr: 0.000023 loss: 0.0504 (0.0568) grad: 0.0302 (0.0336) time: 0.4980 data: 0.0045 max mem: 22446 +train: [17] [160/400] eta: 0:01:55 lr: 0.000022 loss: 0.0541 (0.0565) grad: 0.0329 (0.0337) time: 0.4597 data: 0.0045 max mem: 22446 +train: [17] [180/400] eta: 0:01:45 lr: 0.000021 loss: 0.0532 (0.0558) grad: 0.0324 (0.0335) time: 0.4679 data: 0.0042 max mem: 22446 +train: [17] [200/400] eta: 0:01:35 lr: 0.000020 loss: 0.0446 (0.0554) grad: 0.0317 (0.0334) time: 0.4584 data: 0.0044 max mem: 22446 +train: [17] [220/400] eta: 0:01:26 lr: 0.000019 loss: 0.0467 (0.0553) grad: 0.0290 (0.0334) time: 0.4747 data: 0.0044 max mem: 22446 +train: [17] [240/400] eta: 0:01:16 lr: 0.000019 loss: 0.0478 (0.0551) grad: 0.0295 (0.0334) time: 0.4751 data: 0.0043 max mem: 22446 +train: [17] [260/400] eta: 0:01:06 lr: 0.000018 loss: 0.0478 (0.0553) grad: 0.0331 (0.0334) time: 0.4769 data: 0.0045 max mem: 22446 +train: [17] [280/400] eta: 0:00:57 lr: 0.000017 loss: 0.0537 (0.0554) grad: 0.0336 (0.0332) time: 0.4638 data: 0.0044 max mem: 22446 +train: [17] [300/400] eta: 0:00:48 lr: 0.000016 loss: 0.0537 (0.0555) grad: 0.0336 (0.0335) time: 0.6186 data: 0.1776 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.0562 (0.0556) grad: 0.0355 (0.0337) time: 0.4664 data: 0.0034 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.0570 (0.0557) grad: 0.0332 (0.0335) time: 0.4588 data: 0.0042 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.0497 (0.0554) grad: 0.0295 (0.0333) time: 0.4661 data: 0.0045 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0473 (0.0556) grad: 0.0303 (0.0332) time: 0.4644 data: 0.0044 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.0462 (0.0554) grad: 0.0316 (0.0332) time: 0.4544 data: 0.0043 max mem: 22446 +train: [17] Total time: 0:03:12 (0.4807 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.0462 (0.0554) grad: 0.0316 (0.0332) +eval (validation): [17] [ 0/63] eta: 0:03:18 time: 3.1536 data: 2.8615 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3623 data: 0.0033 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3424 data: 0.0030 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3285 data: 0.0033 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3273 data: 0.0033 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3940 s / it) +cv: [17] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.993 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:56 lr: nan time: 3.2924 data: 2.9418 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:41 lr: 0.000012 loss: 0.0515 (0.0534) grad: 0.0279 (0.0315) time: 0.4485 data: 0.0027 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:07 lr: 0.000012 loss: 0.0515 (0.0562) grad: 0.0326 (0.0329) time: 0.4544 data: 0.0039 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:51 lr: 0.000011 loss: 0.0499 (0.0546) grad: 0.0334 (0.0327) time: 0.4688 data: 0.0045 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:40 lr: 0.000011 loss: 0.0535 (0.0556) grad: 0.0319 (0.0328) time: 0.4896 data: 0.0044 max mem: 22446 +train: [18] [100/400] eta: 0:02:27 lr: 0.000010 loss: 0.0535 (0.0558) grad: 0.0323 (0.0329) time: 0.4523 data: 0.0043 max mem: 22446 +train: [18] [120/400] eta: 0:02:16 lr: 0.000009 loss: 0.0444 (0.0540) grad: 0.0314 (0.0327) time: 0.4608 data: 0.0043 max mem: 22446 +train: [18] [140/400] eta: 0:02:05 lr: 0.000009 loss: 0.0506 (0.0547) grad: 0.0318 (0.0329) time: 0.4685 data: 0.0044 max mem: 22446 +train: [18] [160/400] eta: 0:01:55 lr: 0.000008 loss: 0.0541 (0.0545) grad: 0.0325 (0.0330) time: 0.4574 data: 0.0043 max mem: 22446 +train: [18] [180/400] eta: 0:01:45 lr: 0.000008 loss: 0.0476 (0.0539) grad: 0.0308 (0.0332) time: 0.4625 data: 0.0044 max mem: 22446 +train: [18] [200/400] eta: 0:01:35 lr: 0.000007 loss: 0.0476 (0.0538) grad: 0.0315 (0.0330) time: 0.4527 data: 0.0043 max mem: 22446 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 0.0487 (0.0537) grad: 0.0304 (0.0329) time: 0.4502 data: 0.0044 max mem: 22446 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 0.0498 (0.0536) grad: 0.0317 (0.0329) time: 0.4588 data: 0.0044 max mem: 22446 +train: [18] [260/400] eta: 0:01:06 lr: 0.000006 loss: 0.0498 (0.0540) grad: 0.0310 (0.0327) time: 0.4744 data: 0.0044 max mem: 22446 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 0.0504 (0.0540) grad: 0.0324 (0.0328) time: 0.4528 data: 0.0041 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.0486 (0.0536) grad: 0.0329 (0.0329) time: 0.6284 data: 0.1871 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.0490 (0.0536) grad: 0.0328 (0.0329) time: 0.4625 data: 0.0035 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.0524 (0.0540) grad: 0.0335 (0.0330) time: 0.4544 data: 0.0037 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.0499 (0.0537) grad: 0.0320 (0.0331) time: 0.4698 data: 0.0042 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.0479 (0.0533) grad: 0.0314 (0.0330) time: 0.4522 data: 0.0042 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0456 (0.0531) grad: 0.0314 (0.0329) time: 0.4688 data: 0.0043 max mem: 22446 +train: [18] Total time: 0:03:10 (0.4771 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0456 (0.0531) grad: 0.0314 (0.0329) +eval (validation): [18] [ 0/63] eta: 0:03:22 time: 3.2072 data: 2.9159 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:20 time: 0.3399 data: 0.0071 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3465 data: 0.0027 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3448 data: 0.0035 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3438 data: 0.0035 max mem: 22446 +eval (validation): [18] Total time: 0:00:24 (0.3940 s / it) +cv: [18] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.994 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:22 lr: nan time: 3.3571 data: 3.0048 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:45 lr: 0.000003 loss: 0.0524 (0.0521) grad: 0.0301 (0.0314) time: 0.4565 data: 0.0029 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:14 lr: 0.000003 loss: 0.0524 (0.0520) grad: 0.0310 (0.0320) time: 0.4806 data: 0.0037 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:55 lr: 0.000002 loss: 0.0523 (0.0523) grad: 0.0302 (0.0315) time: 0.4711 data: 0.0043 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:43 lr: 0.000002 loss: 0.0512 (0.0533) grad: 0.0322 (0.0325) time: 0.4930 data: 0.0044 max mem: 22446 +train: [19] [100/400] eta: 0:02:30 lr: 0.000002 loss: 0.0512 (0.0548) grad: 0.0337 (0.0325) time: 0.4639 data: 0.0043 max mem: 22446 +train: [19] [120/400] eta: 0:02:18 lr: 0.000002 loss: 0.0506 (0.0539) grad: 0.0307 (0.0321) time: 0.4666 data: 0.0043 max mem: 22446 +train: [19] [140/400] eta: 0:02:07 lr: 0.000001 loss: 0.0482 (0.0535) grad: 0.0314 (0.0324) time: 0.4655 data: 0.0041 max mem: 22446 +train: [19] [160/400] eta: 0:01:57 lr: 0.000001 loss: 0.0512 (0.0539) grad: 0.0316 (0.0323) time: 0.4706 data: 0.0043 max mem: 22446 +train: [19] [180/400] eta: 0:01:47 lr: 0.000001 loss: 0.0532 (0.0542) grad: 0.0323 (0.0324) time: 0.4733 data: 0.0041 max mem: 22446 +train: [19] [200/400] eta: 0:01:36 lr: 0.000001 loss: 0.0524 (0.0543) grad: 0.0329 (0.0325) time: 0.4603 data: 0.0043 max mem: 22446 +train: [19] [220/400] eta: 0:01:26 lr: 0.000001 loss: 0.0559 (0.0549) grad: 0.0304 (0.0326) time: 0.4698 data: 0.0045 max mem: 22446 +train: [19] [240/400] eta: 0:01:17 lr: 0.000001 loss: 0.0559 (0.0545) grad: 0.0312 (0.0325) time: 0.4906 data: 0.0044 max mem: 22446 +train: [19] [260/400] eta: 0:01:07 lr: 0.000000 loss: 0.0488 (0.0544) grad: 0.0310 (0.0324) time: 0.4799 data: 0.0042 max mem: 22446 +train: [19] [280/400] eta: 0:00:57 lr: 0.000000 loss: 0.0531 (0.0548) grad: 0.0310 (0.0326) time: 0.4592 data: 0.0043 max mem: 22446 +train: [19] [300/400] eta: 0:00:49 lr: 0.000000 loss: 0.0504 (0.0544) grad: 0.0304 (0.0327) time: 0.6370 data: 0.1920 max mem: 22446 +train: [19] [320/400] eta: 0:00:39 lr: 0.000000 loss: 0.0494 (0.0544) grad: 0.0343 (0.0328) time: 0.4658 data: 0.0035 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.0483 (0.0541) grad: 0.0343 (0.0328) time: 0.4573 data: 0.0043 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.0486 (0.0540) grad: 0.0309 (0.0328) time: 0.4765 data: 0.0044 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.0491 (0.0538) grad: 0.0306 (0.0327) time: 0.4714 data: 0.0045 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.0483 (0.0537) grad: 0.0306 (0.0328) time: 0.4730 data: 0.0043 max mem: 22446 +train: [19] Total time: 0:03:14 (0.4869 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.0483 (0.0537) grad: 0.0306 (0.0328) +eval (validation): [19] [ 0/63] eta: 0:03:28 time: 3.3029 data: 3.0054 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:20 time: 0.3423 data: 0.0032 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3484 data: 0.0033 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3453 data: 0.0037 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3409 data: 0.0038 max mem: 22446 +eval (validation): [19] Total time: 0:00:25 (0.3978 s / it) +cv: [19] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.034 acc: 0.994 f1: 0.993 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9935515873015873, "hparam": [5.1, 1.0], "hparam_id": 34, "epoch": 19, "is_best": false, "best_score": 0.9937996031746031} +eval (train): [20] [ 0/297] eta: 0:14:56 time: 3.0200 data: 2.7879 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:18 time: 0.3748 data: 0.0168 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:47 time: 0.3331 data: 0.0028 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:35 time: 0.3694 data: 0.0036 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:25 time: 0.3588 data: 0.0037 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:16 time: 0.3661 data: 0.0040 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3714 data: 0.0038 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:59 time: 0.3356 data: 0.0035 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:51 time: 0.3842 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3586 data: 0.0034 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3539 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3933 data: 0.0041 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3471 data: 0.0032 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3931 data: 0.0039 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3677 data: 0.0034 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3353 data: 0.0036 max mem: 22446 +eval (train): [20] Total time: 0:01:51 (0.3741 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:01 time: 2.8846 data: 2.6453 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:19 time: 0.3431 data: 0.0040 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3848 data: 0.0039 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3441 data: 0.0035 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3424 data: 0.0035 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4018 s / it) +eval (test): [20] [ 0/79] eta: 0:03:42 time: 2.8201 data: 2.5739 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3992 data: 0.0042 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3887 data: 0.0035 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:08 time: 0.3605 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3467 data: 0.0033 max mem: 22446 +eval (test): [20] Total time: 0:00:32 (0.4101 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9937996031746031, "hparam": [5.1, 1.0], "hparam_id": 34, "epoch": 15, "is_best": true, "best_score": 0.9937996031746031} +eval (train): [20] [ 0/297] eta: 0:15:32 time: 3.1392 data: 2.8290 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:24 time: 0.3903 data: 0.0049 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:55 time: 0.3702 data: 0.0035 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:41 time: 0.3842 data: 0.0036 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3640 data: 0.0036 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:19 time: 0.3816 data: 0.0038 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:10 time: 0.3748 data: 0.0036 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:02 time: 0.3694 data: 0.0034 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:54 time: 0.3821 data: 0.0034 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:45 time: 0.3690 data: 0.0036 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3704 data: 0.0033 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3533 data: 0.0032 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3759 data: 0.0035 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3675 data: 0.0033 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3694 data: 0.0035 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3455 data: 0.0035 max mem: 22446 +eval (train): [20] Total time: 0:01:53 (0.3833 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:28 time: 3.3121 data: 3.0079 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:22 time: 0.3814 data: 0.0030 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3534 data: 0.0032 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3358 data: 0.0035 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3324 data: 0.0035 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4069 s / it) +eval (test): [20] [ 0/79] eta: 0:04:12 time: 3.1918 data: 2.8760 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:32 time: 0.4116 data: 0.0047 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3621 data: 0.0033 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:08 time: 0.3873 data: 0.0038 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3564 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:32 (0.4175 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | train | 6.8654e-05 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | validation | 0.034009 | 0.9938 | 0.0012947 | 0.99291 | 0.0016491 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | test | 0.045175 | 0.99087 | 0.0012565 | 0.98833 | 0.0018213 | + + +done! total time: 1:20:04 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..01bf49664ae83a9d75c3c9251994986af2d26165 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.0483060079813002, "train/grad": 0.19685757294297218, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.038499755859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.0352783203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.030074462890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.024801025390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.019603271484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.01248779296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.004234619140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.99521484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.983165283203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.970406494140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9577685546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.938641357421875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9199334716796876, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.892684326171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8662506103515626, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8406549072265626, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.80768798828125, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.768271789550781, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.723161315917969, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.68023193359375, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.625579071044922, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5674642944335937, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5003880310058593, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4260704803466795, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.346519546508789, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.2495885467529297, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.161968650817871, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.082419376373291, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.975806016921997, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.852665467262268, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7465280437469481, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6539199006557466, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5357002928853034, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.438015016168356, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3324224194139243, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.2387801353447139, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1490446001756935, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0720165504701435, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9964348784461617, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9118931731302291, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.85272156576626, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.8046208908595145, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7472582204546779, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.7027806222718209, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6564384748786688, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.6115100010391324, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5755094556044787, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5368222897592932, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5070586631260813, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.036637258129194376, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.036585622038692235, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.036500292364507914, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03641427550464869, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03632976407185197, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.036211030203849075, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03607940813526511, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03593247586861253, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03574049239978194, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03553342562168837, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03533080133609474, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.035025333808735015, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03472563948482275, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.034295030273497104, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03388000170700252, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033486925372853873, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03299752905033529, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03244221525266767, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0318579235766083, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03135189078748226, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.030767465233802794, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03020145292393863, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02959699566476047, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.028960252003744243, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.028296997640281915, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.027493035439401865, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.026766283004544675, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02610327214933932, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.025222625024616717, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.024210990038700402, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02333552232943475, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022547005815431474, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021481703175231813, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.020540233156643807, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01948118855827488, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.018544794124318288, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.017642307347850873, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01686647375463508, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01623819700151216, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015541298532043583, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.014968850598088466, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014384823623113335, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.013802003494347445, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.013617456255014987, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.013488773409626446, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.013195491540827789, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.012814214668469503, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01280971547064837, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.012751622894429602, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9972732067108154, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9877376556396484, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9721648693084717, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9566569328308105, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9412758350372314, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.920069694519043, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.8961827754974365, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.8695292472839355, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8349690437316895, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7983336448669434, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.762611150741577, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7092483043670654, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6580522060394287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5845093727111816, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5150041580200195, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4490511417388916, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.365623950958252, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.2677199840545654, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.157414197921753, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.0533103942871094, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.9226953983306885, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.7860255241394043, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.6327686309814453, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.4701001644134521, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.3058335781097412, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.121551275253296, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.9696838855743408, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.8425169587135315, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.6836550235748291, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.5100052952766418, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.3691115379333496, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.26047736406326294, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15533925592899323, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10088218003511429, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07081970572471619, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.059287864714860916, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.053679004311561584, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.051211532205343246, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.050268664956092834, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0542348213493824, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06152607500553131, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.05666319653391838, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0463535450398922, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.04465016350150108, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.044681746512651443, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.042430371046066284, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.044659584760665894, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.056001607328653336, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06447494775056839, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.11904761904761904, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.14285714285714285, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.21378968253968253, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2750496031746032, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.30654761904761907, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.32043650793650796, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3211805555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3189484126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.31200396825396826, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.30357142857142855, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2961309523809524, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.28621031746031744, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2847222222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.29464285714285715, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.31646825396825395, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.3402777777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.36830357142857145, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.38318452380952384, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.4097222222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.45634920634920634, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5337301587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.6001984126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6364087301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.716765873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7901785714285714, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8447420634920635, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8824404761904762, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9131944444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9221230158730159, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9377480158730159, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9598214285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.982390873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9796626984126984, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.053281646685734665, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06280424096832406, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08618634804263961, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0972726608583294, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.10068465946509754, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.10059532809755495, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.10279686042376171, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.10421989123811162, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.10759492129799214, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.10834664209697221, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10828846873958675, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1072589458541773, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11008999159926072, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12655614723207295, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14822015779766154, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1636779679677438, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17361128821399543, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1690575247989494, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.188619198882382, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.23298809751421723, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.3006631316329881, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.34685494517602083, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.402149505584754, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.5809755548517034, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7008432706867449, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7888883174731951, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8503151343327583, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8976117262477585, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9164216191556198, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9093773598009339, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9282400567733301, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9543970098944069, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9746254916030584, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9773156487812732, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9796490489413724, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9802482237889372, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.980316880358605, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9820482938862324, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9825778436269532, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9811792776236766, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9799576970252742, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.980825335093624, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9825167994589898, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9835420454497503, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9838057627525743, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9827188877664218, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9814516845613749, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9775624886959077, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9754361230797758, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.6564384748786688, "validation/loss_best": 0.044681746512651443, "validation/acc_best": 0.9856150793650794, "validation/f1_best": 0.9838057627525743} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.1069584104418755, "train/grad": 0.14146876960992813, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.9162060546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.895966796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.862818603515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.830450439453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.7989251708984373, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7558587646484374, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7085015869140623, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.6571636962890626, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.5916732788085937, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5241644287109377, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.4596566772460937, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.365672302246094, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.276938629150391, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.15237060546875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.03668342590332, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.9288072967529297, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.796214828491211, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.646766357421875, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.488411808013916, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.3491657638549805, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.1874270153045654, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.031459903717041, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.8708540606498718, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.7169397866725922, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5787656351923942, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4434636849164963, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.34752754975110295, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2782236306928098, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.20619393084198237, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.14661045433022082, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11179163310676814, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.09152498601935805, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.07613935743458569, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.06895605629310012, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06428767175413669, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06141941752284765, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05934002484194934, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05755842102691531, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05528609961271286, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05373046048916876, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.054609308429062364, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.055089235091581944, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05457553061656654, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.054059945428743955, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05780266623012722, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06696829987689852, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07420049479231239, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.10273987696506083, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.17100147595629095, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.034408235661685466, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.034090699832886456, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0335767852049321, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.033080152068287134, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03260330111719668, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.031962944231927394, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03127582456916571, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030561111066490412, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02970414074137807, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02889809257350862, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0282035006955266, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027304657455533742, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02656165275722742, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025636242413893343, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024852824686095117, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0241507613286376, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023289031917229294, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022287375261075795, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021175810229033233, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020158540466800334, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018937590480782092, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.017711542476899923, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016334178098477422, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014828810719773173, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013297921379562467, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.011639035468688235, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01041047653183341, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00953794397879392, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008597315469523891, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007738250926486216, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.007159468706813641, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006764129756484181, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006496012140996754, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006364811795065179, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006326051543001085, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006373757139663212, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0064326357748359445, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00644443306606263, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006355278238770552, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006334951933240518, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0064860753488028425, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006592395543120802, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006639759290555958, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006663247652468272, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007437744427734288, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008798140778962989, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009168777760060039, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.012110602035536431, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01842541679739952, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.818793535232544, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7855117321014404, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.7314460277557373, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6796698570251465, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.62943696975708, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.5625147819519043, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.4895687103271484, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.4118385314941406, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.314274311065674, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.214702844619751, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.1203773021698, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.9839762449264526, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.8566794395446777, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.6810424327850342, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.523164987564087, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.3821632862091064, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.2177708148956299, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.0448033809661865, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.8736198544502258, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7298360466957092, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5658119320869446, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.41122713685035706, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.26984450221061707, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.16788147389888763, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.10865160077810287, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.07794969528913498, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0657864362001419, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.059606656432151794, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.05423179641366005, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.049848511815071106, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.046404726803302765, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04361605644226074, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04040807858109474, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03823133558034897, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.036028485745191574, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.034134794026613235, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03173569589853287, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.03026304952800274, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.02877349965274334, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.028898201882839203, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.032034002244472504, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.03912128508090973, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.03475973382592201, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.031326569616794586, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.04679388180375099, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06207795813679695, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08456912636756897, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.19530035555362701, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.44968631863594055, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.30753968253968256, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.30084325396825395, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2884424603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.283234126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.28943452380952384, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3090277777777778, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3382936507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.36433531746031744, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3878968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.4087301587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.44816468253968256, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5347222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5967261904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6257440476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6941964285714286, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7678571428571429, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8276289682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8658234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8921130952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9151785714285714, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.941468253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9680059523809523, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1113102325883951, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.11092237283976294, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.10871274435530298, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10914933932249687, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.12172773377757846, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.14686512007876273, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.16902771572390285, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.1774868430207896, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.17556915465937153, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.18523999590731938, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.223867693960505, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.30344610653109383, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3430731904813016, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3829185373578449, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5392995611257051, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6831311089501715, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7696174747066715, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8260078139908832, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8625119941291425, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8975010685052118, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9339014794840133, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9563879204200855, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9685131696093737, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9727371054998001, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9753896424351157, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9766140575862501, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9773556880502932, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9783984393356081, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9790911420478068, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9802205239303168, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.981353755418445, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9812717837638609, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9825888720985276, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9828549570722083, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9823111465520995, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9834015723391455, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9839775101245368, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9853405798323451, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9857755529502316, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.985790931681332, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9856209422650486, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9798916302831246, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9845867612469671, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9854010727732969, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.984903311992524, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9778400132249194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9811833932281996, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9703634244297022, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9507786095388986, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.05373046048916876, "validation/loss_best": 0.028898201882839203, "validation/acc_best": 0.9890873015873016, "validation/f1_best": 0.985790931681332} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.8116681677103043, "train/grad": 0.19748358722776174, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6927154541015623, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6454437255859373, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5702703857421874, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4989962768554688, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.431130065917969, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3410494995117186, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2440426635742186, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1409130859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.0122528076171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8828203582763672, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.7627595520019532, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5949251937866211, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4460373687744141, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.2531883430480957, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0903627586364746, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9505947780609131, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.7911679244041443, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6262716162204742, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.47143893122673036, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3558303260803223, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2487492646276951, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.17376712795346974, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.12266153583303094, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.09320543667301535, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0778901223372668, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06879984254017472, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.06397872731089592, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.06099152684211731, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.05803264270536602, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.05550887143239379, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.05361942105926573, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.052304100887849925, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.051206470504403113, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.05074637828394771, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.05011293255724013, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.049170402977615595, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04991981151513755, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04960010072216391, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.049418254727497696, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.055154436575248836, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.061906101927161214, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06713889568112791, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08110998868942261, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.09790191499516368, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.16275661276653408, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.2303908825945109, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6160559101495892, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.0642747736163438, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.053157421667129, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03142589760944247, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03073926749639213, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0296966736484319, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.028784739803522825, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02799917214550078, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.027086658840999006, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.026255530370399356, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.025504537615925072, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02468053959310055, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02390173020772636, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023168469481170177, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02207798993214965, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021016400288790465, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01952395778615028, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018194955452345313, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.017033578865230083, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.015698342537507414, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014241451499983668, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012666856173891574, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011260095699690283, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009700792393414304, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008372988703195005, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007299891944276169, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006563554279273376, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006149930831743405, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005966926426044666, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005918056172668003, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00592741524160374, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005978732613148168, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006039609079307411, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006084095234400593, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006139492597430945, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006258218804141506, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006339157582697225, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00636545652159839, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006350442034599837, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006434293322527083, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006340891844956786, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00632038073563308, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006994617432210362, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007997233769783634, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008225085591548123, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010490965122007765, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.013347493000037503, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.021101517342613077, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.024316106957121518, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04712030460287796, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07366894901351874, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1100267270632321, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.560605764389038, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.498767375946045, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.401575803756714, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3103950023651123, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.224036931991577, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1098732948303223, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.9873130321502686, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.8581759929656982, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.699463963508606, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.5437699556350708, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.404122233390808, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.2177557945251465, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.0604404211044312, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.865856945514679, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.7046571373939514, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5652289986610413, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4080595076084137, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.26382187008857727, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.16202732920646667, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11062288284301758, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.08099495619535446, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06650447845458984, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.057722028344869614, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.05200118198990822, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04810738563537598, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.044690150767564774, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.042514387518167496, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.041292402893304825, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04165792092680931, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.043528102338314056, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0430874228477478, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04117657616734505, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03975015878677368, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03968953713774681, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.041887201368808746, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04563124105334282, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0497983917593956, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.050297241657972336, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04155244305729866, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.035125039517879486, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06203475221991539, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.05211903154850006, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.12656931579113007, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1898907870054245, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3370762765407562, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.43587881326675415, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8317976593971252, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.303600549697876, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.169114589691162, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.29836309523809523, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3271329365079365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3715277777777778, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3990575396825397, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.41195436507936506, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.44742063492063494, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5171130952380952, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6021825396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6413690476190477, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6932043650793651, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7609126984126984, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8345734126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8638392857142857, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8918650793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9166666666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.935515873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.957093253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9776785714285714, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.982390873015873, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.982390873015873, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.13305971521716065, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1615405824919952, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.18859286921113577, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.19830144716148934, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.20090388734657744, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.23501565650641973, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3011080557234709, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.367224663222967, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4022253919024406, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.49651058443469265, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.643186601367693, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7677605050711902, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8122245615412327, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8584611497992368, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8989559164676099, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9249430104877033, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9527410825319313, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9684176129106532, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9750740049250759, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9770671058674936, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9791142747642985, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9811631238809482, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9819291893417993, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9821134560839027, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9822978456945538, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9831854810416616, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9832702386216701, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9834988466656572, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9835001417157782, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9830027105207721, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9832725024572307, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9831060753678564, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9840100692977084, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9849021616710648, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9806397779797597, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9806552120191236, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9791517910743079, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9796123088666856, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9846808343900116, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9880734725832616, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9798025488198112, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9836740237948118, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9748555954753916, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9746210547416054, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9699943221623075, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9701189842041619, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9722208265116459, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9425819605377047, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9694090710976523, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.055154436575248836, "validation/loss_best": 0.035125039517879486, "validation/acc_best": 0.9900793650793651, "validation/f1_best": 0.9880734725832616} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.7324917700886726, "train/grad": 0.30712298542261124, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4272445678710937, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.35230224609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.234385681152344, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.123594055175781, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.018888397216797, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.881539306640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.7366580963134766, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.5882081604003906, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.4126478576660155, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.2481994819641113, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.1062182235717772, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.921539306640625, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7666501426696777, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5750039649009705, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.42543438851833343, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3137354239821434, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.21355066046118737, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.14348131716251372, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.1035539667122066, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08533924095332622, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07400546865537763, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06749756726436316, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06280664990656078, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0591629843134433, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05603384006768465, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.052835738239809873, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.05035987300798297, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0484253042191267, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04608366128057242, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.043598320614546535, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.04130389870144427, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.039366484740749004, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03723182012327016, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03599851203151047, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03521875746548176, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03490969368256629, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.035890587670728565, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0366417345777154, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.040346570974215866, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06280541142448783, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06369446469470859, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09219924787990749, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.22759539536200463, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6060245383996516, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6266146323271096, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1143667276669293, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.108514211336151, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.4847253371123226, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.9296652541588992, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.027520826570689678, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02679114487953484, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025844086334109305, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025117938732728363, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024509361777454615, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023743932815268636, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022902307091280816, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02195297379978001, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02069572316482663, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019393193600699306, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018207034915685653, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01664982382208109, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01536546358373016, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013714127554558218, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01216105543076992, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010679304376244545, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00897164322435856, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0074832654546480625, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00650661422056146, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006090055538224988, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005925380396656692, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0058966601820429785, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00590778128884267, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005916745859431103, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005883851313847117, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00580629417207092, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005724662497232203, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00568598995479988, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005659620195510797, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005573493490810506, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005406660645094235, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005252333854150493, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005072968368476722, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0049621686962200325, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004875048956600949, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0049444367324758785, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0050757985273594385, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0053253820592362895, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006445233573758742, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008479914937342983, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008900261078542826, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.012240062642958946, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.023726307855395135, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.053526790251025885, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05159957856578359, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.072605329170999, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11323542109335337, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1378619809309096, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.16446408401359805, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.2656466960906982, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.1757211685180664, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.03489089012146, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.90358304977417, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.7811371088027954, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.6232625246047974, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.4618821144104004, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.3022706508636475, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.121208667755127, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9576585292816162, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8189734816551208, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6374678611755371, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.48365318775177, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.30591827630996704, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1951569765806198, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13318020105361938, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.09411139786243439, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07398917526006699, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.06307779997587204, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.05719705671072006, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.052461668848991394, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04896315559744835, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04594369977712631, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04330083355307579, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.040748193860054016, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03749825805425644, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03546595573425293, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03501209244132042, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03619864210486412, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.038417521864175797, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03963173180818558, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04193376004695892, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.048394907265901566, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0563868023455143, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05825703963637352, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.060436345636844635, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0693405494093895, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06748882681131363, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04528766870498657, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.08070141822099686, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.18287044763565063, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19056329131126404, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.45491987466812134, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6237845420837402, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.1149754524230957, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.081350088119507, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.4606685638427734, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.675424098968506, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.491014003753662, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3998015873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4260912698412698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.49206349206349204, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5778769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6103670634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6455853174603174, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7234623015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8077876984126984, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8497023809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8764880952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8988095238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9260912698412699, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.972718253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9811507936507936, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.982390873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.984375, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.982390873015873, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9747023809523809, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.18679562300834748, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2056668767848119, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2648325039204912, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3370934575612481, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3568602443425387, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4262020415452676, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.593098905757761, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7342517614865743, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7960519974532471, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8386028017002418, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.873138107840285, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9143316106733008, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9337220472640873, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9608037295849627, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9700087040275546, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9735931329978447, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9765614850346919, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9782111211269945, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9796055038006204, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9806194249904608, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9817351681721033, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9817870121535907, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9835912922731264, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9845020548794335, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9846893795475986, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.985206598491053, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.986153149552077, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9852659687205881, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.986593891647073, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.984955715140143, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.985820142230296, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9853388384650268, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9830821338523431, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9822217278642159, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9814322912813206, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.980472190831545, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9792911560970972, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9832980114630684, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9845888976918228, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9841421283039573, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9743246090593475, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9654076750513173, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9629867974354925, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9790109978146587, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9652579453695088, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9605034568055769, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9680435301410709, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9749391042562385, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9695755577922958, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 0.05035987300798297, "validation/loss_best": 0.03546595573425293, "validation/acc_best": 0.9890873015873016, "validation/f1_best": 0.986153149552077} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.8275506693124771, "train/grad": 0.46195437215268614, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.116669006347656, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.012712860107422, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.8514471435546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.7038371276855468, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.569080810546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.400736961364746, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2342314147949218, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0744929504394531, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8961410903930664, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7343138837814331, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5962105917930604, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4239950680732727, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.29871147096157075, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.18389993369579316, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12685299262404442, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09926955867558718, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08199954561889172, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07183221746236086, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06532891125418246, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06121394008398056, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.057454230273142455, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.054409763012081384, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.051525197084993125, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.048900659251958134, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04662762211635709, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.044390855450183155, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04260321751236915, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04091363816522062, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03895718263462186, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.037095543164759874, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.035150285679847, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.03336455468088388, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03230776019394398, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03161325197666884, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.030871322695165872, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0348591975774616, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0435296780243516, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.048502230988815426, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06957253096625209, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10668283608742059, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.17856605916284024, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.39039574231021107, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9489217358455062, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9089818736817687, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.1877382549177855, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.258957502730191, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.631422154279426, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.081938017494976, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.430753593845293, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025149492435157298, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024552791053429245, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023664559172466397, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022806945210322737, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02194294622167945, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020737666431814433, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01942228868138045, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018085823231376707, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016579085066914558, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015243682912550866, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014069640934467316, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012323184898123144, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010604907725937664, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008415167187340557, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007008935011690483, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006288430681452155, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005925283711403609, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005784785365685821, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005723277197103016, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0056913226941833275, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005666837157914415, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005646395290386863, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005632181479013525, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005638306112959981, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005684660893748515, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0057730546680977565, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005831989084836096, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005833471594960429, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0057982858223840595, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005767949252331163, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005726438444398809, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005639771496353205, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005623526174385916, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0056094150098942915, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005610518577741459, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006368249671722879, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007447823696893465, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008358077423254144, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010716980602410331, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.014823327671620064, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.021441175366635434, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03984283703156549, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07798761175164827, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06968677754753735, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11271388404154332, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12199602083569502, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.16572775169144735, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.22616810414999106, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.23524815827749823, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.9475303888320923, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.8296458721160889, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.6494404077529907, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.4884862899780273, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.3455991744995117, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.172605037689209, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.006911277770996, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8505282998085022, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.675147294998169, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5141380429267883, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.38097402453422546, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.23633885383605957, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.15350575745105743, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09718862175941467, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07548559457063675, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06489726155996323, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05719279125332832, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05171731114387512, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04781116545200348, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.045282892882823944, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04280739277601242, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.041032176464796066, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.039485130459070206, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03808774799108505, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03661783039569855, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03544812276959419, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03518449887633324, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03581751510500908, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03747135400772095, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.037359725683927536, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03535393625497818, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03242592141032219, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03239377588033676, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.033595699816942215, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03911634534597397, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04210515320301056, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04388881102204323, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1019376739859581, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11960773169994354, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.20058287680149078, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7483669519424438, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.007622241973877, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3500571250915527, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5733963847160339, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.2803454399108887, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.831301689147949, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.095106601715088, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.058079242706299, "validation/loss_048_lr5.0e+01_wd1.0e+00": 10.830695152282715, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5510912698412699, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6049107142857143, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.640625, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7165178571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.796875, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8534226190476191, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8799603174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8980654761904762, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9139384920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9350198412698413, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9558531746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9744543650793651, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9811507936507936, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.988343253968254, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9640376984126984, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.31923197896505673, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3552044834681123, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4068664489544497, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5762413769504665, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7226536232901639, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.807085738281086, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.847538949636679, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8721118277702729, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8956644566333716, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9265542150545291, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9520778176403524, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9718041932455345, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9749521512531005, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9770898910938777, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.97856485998245, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9801965674782087, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9813254516273627, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9820548243494732, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9830079656453873, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9842655447179529, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9848133010636615, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9842709749828461, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9845135296381645, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.985676206094053, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9864926992954902, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9862551392660405, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9867892037761756, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.986074895985127, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9850118280434119, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9868985165238511, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9869994529531877, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9879478066309849, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9880816794778932, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9871780967415837, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9866294127716129, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9854892872358376, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9851196605074506, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9763566627345638, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.983273316826161, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9765039922940828, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9590524951025566, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9043721577139956, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9666936617484881, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9829565269646137, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9749648106144688, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9612448158714545, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9684681609144238, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9629419714686608, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9549210623667292, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.03230776019394398, "validation/loss_best": 0.03239377588033676, "validation/acc_best": 0.9900793650793651, "validation/f1_best": 0.9880816794778932} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.8509109271317721, "train/grad": 0.5044459333270788, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.8045951843261718, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.6762208557128906, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4845181274414063, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3183617401123047, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1745804595947265, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.00381685256958, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8411118030548096, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6862393188476562, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5132847881317139, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.36647446155548097, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.26142329454422, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.16530179277062415, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.119196318089962, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09044664356857539, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0784776484966278, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07191204813309014, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06651318714953959, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06207883638329804, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05825614651665092, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.055390174528583885, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05228010285645723, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04949557512998581, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04673281104303897, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04397533245384693, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.041333021223545076, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.038059073658660056, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0353226771671325, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.03317823311313987, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03075423183850944, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.028517050361260773, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.02653273183852434, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.02581875273026526, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.02529551318846643, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02639571925625205, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02939297609962523, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.035642155511304736, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06411888981238008, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0727231671474874, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1466756491921842, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.34620968022383747, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.584107419103384, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.7674317733105273, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.4317487347126008, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.080402827905491, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.94048949342221, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.0118104344327, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.8396283492911607, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.256116505004465, "train/loss_048_lr5.0e+01_wd1.0e+00": 8.68624768308364, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0232896560896188, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022532970029860735, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02125021180137992, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01998375618364662, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01878831140231341, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.017306742961518466, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01590746207628399, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014629205167293548, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013140649260021746, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.011515789441764354, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009913026969879866, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007884879461489618, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006683202543063089, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005956798047991469, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005736098100896925, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005647812130046077, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005605467649875209, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005582203803351149, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005559155319351703, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0055452761152992025, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005519548017764464, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0054907150572398675, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005470411027199589, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005433208885369823, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005393871386477258, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005271656031254679, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005139813649875578, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005039173734840006, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004906847015081439, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004851892478909576, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004799862352228956, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004808385737851495, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004818352673610206, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0048956069725682025, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005500052281859098, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006785265480466478, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009991906471113907, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010993077275579709, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.017453613244124425, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03269189089056454, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0469178468070811, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05672638549964644, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.09110958461712297, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0731162547576939, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.13587175766349452, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.18606032763142138, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.16847029515259557, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.21362503711134195, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2409200034747964, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6503517627716064, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5141891241073608, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3152811527252197, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1475716829299927, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0052645206451416, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8376169204711914, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6765441298484802, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5218820571899414, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.35550636053085327, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.23195801675319672, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.15690656006336212, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.10100778192281723, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07828339189291, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.06322626024484634, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05575447902083397, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.051258910447359085, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04729113727807999, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.043989285826683044, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04123574495315552, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0392792783677578, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.037373002618551254, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03579896688461304, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.034330204129219055, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.032649338245391846, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03089671954512596, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.029524007812142372, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02876255102455616, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.028538336977362633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02844606526196003, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.029288485646247864, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03022153675556183, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.02990235947072506, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02886628359556198, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.027162136510014534, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.028202243149280548, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04519319534301758, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.057846423238515854, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09522081911563873, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.17517071962356567, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2628805339336395, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6288601160049438, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5919026732444763, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.0808534622192383, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6204254627227783, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.7553151845932007, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.6196439266204834, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.4966304302215576, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.0134148597717285, "validation/loss_048_lr5.0e+01_wd1.0e+00": 11.362578392028809, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6364087301587301, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.699156746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.816468253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8578869047619048, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8819444444444444, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9015376984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9392361111111112, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9620535714285714, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9744543650793651, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.984375, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.988343253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.96875, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9580853174603174, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.39754727425952424, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5450130827790806, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7588872104613269, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8205009087011329, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8531585679878406, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8819989395476353, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9061491571978693, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9332379533182743, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9582735887009645, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9716145426524236, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9751496603121456, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9768516372793477, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9791853686569751, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9813788525568153, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9827952174474512, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9839214088228504, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9835610518583974, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9850065673198052, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9857795412688216, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9852374328562602, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9852350896249542, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9853224416937753, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9853026706595649, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9858917206785576, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9870304326803309, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9876647996704645, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9885834703433828, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9871058164134102, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9869810171714826, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9853110245322515, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.985333124611134, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9865600877819234, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9877766389114127, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9861080848795973, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9889379082680778, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9876645698123961, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9789935692437876, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.973280243060856, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9816160761747296, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9763424652300865, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9750227453084376, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9727843353190073, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.980728097614533, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9798431914158111, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9762208775579101, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9625231361369205, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9737004302419162, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9737146554401297, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.950272289565775, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.02939297609962523, "validation/loss_best": 0.028202243149280548, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9889379082680778} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.549410625398159, "train/grad": 0.3845806806907058, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.533233642578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.392828140258789, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1920084381103515, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.025610408782959, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8851127243041992, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7187068748474121, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5585882449150086, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.41108171820640566, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.26971122026443484, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.17746174246072768, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1279183616489172, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09401289213448763, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07967949589714408, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06937210805714131, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0636645938642323, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05988322038203478, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05620601562783122, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05278628500178456, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04948847921565175, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.046710064029321076, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.043680960601195695, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04076375679112971, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03778849263675511, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03473424122668803, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03183109417557716, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.028565809968858956, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.025823746295645832, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.02344967388547957, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.02065700801089406, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.017938914075493813, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.016257205186411737, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.015455284034833311, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.014451774368062616, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.014343940829858183, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.022787743490189315, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.024158448120579122, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03530785027891398, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05964720455929637, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11132458162494004, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.222735934862867, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.33298689568415285, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3295820016972721, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.6338027107995003, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.5096768156066537, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.496927855182439, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.268910955199972, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.8436758659407495, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.438571955244988, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.361217789500952, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021571477083489297, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02054053345695138, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018911485518328844, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017483108630403876, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.016283637890592218, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014919669390656054, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013607059447094798, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01215371123282239, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010148516381159425, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008260835197288543, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006973698489600793, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006089516191277653, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005801608320325613, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005678199817775748, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005640777906519361, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00563112722884398, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005625291627948173, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005609408011077903, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005562899141805247, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005497426590300165, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005418765631038696, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005321973935351707, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005212454944557976, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005076720114157069, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004939295770600438, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004754328017443185, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004573096817912301, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00438582175920601, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00414184972687508, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0038570179452653976, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00367082134456723, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0035817414786288282, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0034454906008249964, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.003484754935889214, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00485027847418678, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005887866823070453, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006834299054244184, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010811063013114647, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014140881573112836, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024889790277123194, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.030430276071700915, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03513265611059548, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.055627721815856485, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05048656130282406, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0883198793581274, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1575837987533653, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.14131217274492258, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.15276860981857873, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1833818972701837, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4076131582260132, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2654906511306763, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0661897659301758, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.903020441532135, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7651411890983582, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6002605557441711, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4424079954624176, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.30516499280929565, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.18773633241653442, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.12132279574871063, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09060084819793701, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07000044733285904, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06036921963095665, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05261985957622528, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04806361347436905, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04508782923221588, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04218081757426262, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.039595577865839005, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03721579164266586, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03542836382985115, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.033435601741075516, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.031821511685848236, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0302142221480608, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02889612317085266, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.027749761939048767, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026548828929662704, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.025440700352191925, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02447354421019554, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.023566655814647675, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02349616214632988, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02476244606077671, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.026188408955931664, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.027387898415327072, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.02874644100666046, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03313729912042618, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05245273932814598, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.051049936562776566, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09787176549434662, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13587360084056854, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.27892446517944336, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2051524817943573, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3009692132472992, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5304763913154602, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5980694890022278, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.9009662866592407, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.8318095207214355, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.212761163711548, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.887282371520996, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.540504455566406, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7658730158730159, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8308531746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.875, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9032738095238095, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9248511904761905, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9454365079365079, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9590773809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9694940476190477, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9766865079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9794146825396826, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9756944444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6760934721758035, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7825263482376482, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8477419397038849, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8881258698824261, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9152839553501916, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9402917133182035, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9559201418432662, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9668728384819599, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9739804357122873, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9763119516478247, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9765833072769667, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.980464327672943, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9809291838891331, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9826431828994828, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9837315640643864, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9840924987905953, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9846541589094376, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9847891979139115, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.985193282481339, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9853706057378696, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9876046183440612, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9879323439601577, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.988017215982489, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9881995881694662, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9885398486683602, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9897069147727868, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.989663374443041, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9899213735865744, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.990666767503356, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9905891224167672, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902213306394323, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9898691885076416, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9908468070688392, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9903683906629114, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9885238374843539, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.985183201713941, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9888428901143759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9824112438978875, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9824083513489106, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9818372008015478, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9840846983680636, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.984811866671093, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9846166228383783, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9829707237834848, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9777028878687537, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9786658920476764, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9806485903028151, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9795893029143, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9749295783667191, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 0.02065700801089406, "validation/loss_best": 0.023566655814647675, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.990666767503356} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.3504068684950471, "train/grad": 0.2985163371451199, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3111087799072265, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1693596267700195, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9730197334289551, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8128208065032959, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6768868827819824, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5152409744262695, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.367372487783432, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.24983222305774688, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.15835615128278732, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.11199909843504428, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.09121075682342053, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07661149002611638, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06923696782439948, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06282473005354404, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.058620071839541196, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.055357324974611405, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05198798430152238, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04849598785862327, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04507992343045771, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04227389480918646, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03906982731074095, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03601042686030269, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03276912591420114, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.029476064844056963, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02619894684292376, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.022476364849135278, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.01935801456682384, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.016754204416647555, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.013583022272214293, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.011328234132379293, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.009365491149947048, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.006582444170489908, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.007475015828385949, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.006050209421664477, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.008784326687455178, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.02218454336747527, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03808478634804487, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08925472554750741, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0458389662578702, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.15324042840860785, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1290037545748055, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.14387080227956175, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.25733237240463497, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.26132312178611755, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6967778086196631, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.4377482924517244, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.6074221739172936, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.187851163391024, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.86702625178732, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020124468822032213, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018932521222159267, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017209564577788116, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01582157135475427, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014690572144463659, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013299221796914935, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011682638488709927, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00984210376162082, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007807223533745855, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006576880101347342, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0060846437269356105, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005854731010040268, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005791416000574827, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005754369180649519, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005721403021016158, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005669356758589856, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005603356274077669, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005515728599275463, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0054139471385860815, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005316322651924566, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005213624493044335, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005104080709861592, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004971150293567916, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004805549388838699, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004644849442702253, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004440932481811615, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004228764054714702, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0040092992522841086, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0036572431986860465, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0033022872359651954, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0028702537896606374, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002400470492802924, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0028255458243415885, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0020704753158656787, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002529159474795506, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0056402269774105205, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006491533507457916, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01370047202590456, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010494894472457812, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01976272829912384, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.018573707557668885, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02014118278289021, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.034544867172236336, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.029840990548958345, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07405712339258874, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.10705839383988099, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0988784717947339, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.11714494376963755, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.14912141348944066, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2152044773101807, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0743505954742432, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.880777895450592, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7223645448684692, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5867838859558105, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.4268077611923218, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.28826937079429626, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1871827095746994, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.11644010990858078, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08441483229398727, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06977780908346176, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05858975648880005, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05265621095895767, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.047351524233818054, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04416347295045853, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04203398898243904, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03998314589262009, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03847942873835564, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03703567385673523, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03583509847521782, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03473111242055893, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03416335955262184, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03335811570286751, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.031762026250362396, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.029811229556798935, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02781144343316555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02678786590695381, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.026189541444182396, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.025774167850613594, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027012770995497704, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028231538832187653, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.029409298673272133, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03033837489783764, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.030772481113672256, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.029628243297338486, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06116217374801636, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05026598647236824, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1255602389574051, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1025296002626419, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.19345375895500183, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.23730464279651642, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3034532070159912, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5280949473381042, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5642185807228088, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.407148838043213, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.0524377822875977, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.6319880485534668, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.632814884185791, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.447181224822998, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8454861111111112, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8752480158730159, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9069940476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9295634920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9437003968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9613095238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9764384920634921, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9794146825396826, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.980406746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.982390873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9828869047619048, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8008728769024894, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8458230549046458, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8914752841101738, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9214482443652633, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9392940532146449, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9587489000867274, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9686625935703287, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9736192021231707, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9763089287920119, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9774774939113529, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9800168401377792, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9818899272498043, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9831065876539591, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9841048441334225, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9841060006947101, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.984103127266095, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9844758209636177, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9842257982837297, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.985584815423646, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9854040095852787, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9856281955276665, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9849737007611616, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9854653580451289, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9865061101412168, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9883996232206466, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9889318485254179, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9888493364202423, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9892109834029771, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9901197246449495, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9899910395146815, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9888348953674735, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9888354566525486, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9895093740193962, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9908282346619666, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9904739784847164, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9873247557404752, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9891742600018831, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9822153037314316, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9875487429688173, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9878135054818084, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9840636659944101, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.985037354491204, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9857638516077264, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9846318649946249, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9829642498721701, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.980480955222058, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.979550130025974, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9701737751935563, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.980251076038986, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.008784326687455178, "validation/loss_best": 0.029628243297338486, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9904739784847164} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.24099206663668155, "train/grad": 0.22400768974795937, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.130923309326172, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9924004554748536, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8025584602355957, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6468374395370483, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.514510817527771, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3643400156497955, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.24364136278629303, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.16169595390558242, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.10951918587088585, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.0866209227219224, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07571358744055033, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06684021081775426, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06164230750873685, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05648874142207205, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0528551593516022, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04994909415952861, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.046807268857955935, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.043518618047237397, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04017842536792159, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03722086919471621, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03378875902853906, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03042977581731975, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.02690805584192276, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.023276096684858204, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.019710618862882258, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.015759400594979524, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.012429368859156966, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.009776210505515337, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.006781172640621662, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004030127981677651, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0027464884519577027, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0022445797733962537, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0035410579014569523, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0029505471140146254, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.004828823078423738, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.01146933065727353, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.028288381369784475, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04231454168446362, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.026031839903444053, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.044443248827010395, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06781736029312015, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0716405045799911, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.14869927380234002, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1558726490009576, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3705349596124142, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.6355473684798926, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.0645276431553066, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.2879269738961012, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.070034203575924, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01842606961261481, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017172173419967292, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015465174955315887, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014128642627038062, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01297162365168333, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011363620669580996, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009496192797087133, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.007700910496059805, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006274922413285822, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005713658857857808, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005521351961651817, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005414518854231574, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005361071887309663, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00529495429364033, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005239283748087473, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005184378483099863, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005104675736511126, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005010829856910277, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004902796429814771, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0047907649635453705, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004652271542727249, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0044994078540185, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0043085904275358185, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0040826306738017595, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0038211139738268687, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0035106010286835953, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.003167115000396734, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0028003359238573467, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0022457618931912295, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0015615624813381146, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0011982271762281016, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0010804926223829626, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0012950737552591817, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0012998521879694636, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0016805432869358584, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003785190131969216, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005703192478581514, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0064904828839541205, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006475336264671736, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009703465669079587, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01269205754815584, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013618629654339917, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02161978574787047, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02122311027925096, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.036939677566215294, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06324092096586705, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07623163957973259, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08487422259176389, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.12514620660680095, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0650488138198853, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9281550049781799, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7398309707641602, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5839774012565613, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4519701600074768, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3066830635070801, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.1965792030096054, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.12743079662322998, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.08723144233226776, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06939529627561569, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06034082546830177, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05283765867352486, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.048489004373550415, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04438130557537079, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.041612908244132996, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03960723802447319, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.037539396435022354, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03561180830001831, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03399530425667763, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03244416415691376, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.030739329755306244, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029157977551221848, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.027856778353452682, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.026748474687337875, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02591833285987377, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02535627782344818, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.025875244289636612, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02761690691113472, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.030261097475886345, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03143986687064171, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.033911969512701035, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03171606734395027, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.030238758772611618, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03513834998011589, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03603251650929451, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08619855344295502, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06173443794250488, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11351725459098816, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1358373910188675, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.18114981055259705, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.24691462516784668, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.34982168674468994, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5945239663124084, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.9776468276977539, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.4647008180618286, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.9080822467803955, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.8257540464401245, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.385469675064087, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.205268859863281, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8687996031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8888888888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9092261904761905, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9310515873015873, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9471726190476191, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.96875, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9756944444444444, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.988343253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9660218253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8352537887586647, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8610348514866518, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8904215800217499, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9219931011033541, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.943106382521636, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9653841843472836, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9726686912561172, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9754091298902248, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.977294408582983, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9793820457341974, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.980388422106711, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.982560976572591, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9837247963706336, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842774353417244, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9847706416493257, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9856256760993515, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9857943575676019, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9861083345486563, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9868756374902451, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9876123392208177, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9879720591608345, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.988130490031735, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9884960176515039, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9899762530093655, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9905192480297199, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9898071470192518, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9898036174628185, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9888009818535275, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9892883506220326, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9885353059380961, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9881171694467263, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891579491540087, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9902079155374557, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9901470575964488, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.98910747695311, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9806579404899614, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9872980156908111, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9877885977889223, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9860642215547635, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9859529308781657, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9864762707510872, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9844285364895642, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865726477312183, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9754605637867148, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9819246851675849, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9800409078540401, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9777756550491569, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9802844952971267, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9681578464335726, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.019710618862882258, "validation/loss_best": 0.02591833285987377, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9905192480297199} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.16786778211593628, "train/grad": 0.1753005841933191, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0098736190795898, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8743164253234863, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6878387022018433, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5338308644294739, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4059515404701233, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.27204577922821044, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.17643764555454255, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.12051796570420265, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08975234232842923, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07589481081813573, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06863051002845168, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06208853416144848, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05790099045261741, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.053397236187011005, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.050018666945397854, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04722320795990527, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04414981259964407, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.040830562449991704, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.037467206576839086, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03456113899126649, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03113019368611276, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.027726430743932724, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024116336768493056, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.020500325057655575, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.016929285004734992, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.013053961861878634, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.010143212014809251, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00796673921868205, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.005381843019276858, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0033754278533160687, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0024284997303038835, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002212163573130965, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0013684152346104383, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0020513090677559374, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00417592098005116, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00745364673435688, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.018866849411278964, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.007998575614765286, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.022918813042342664, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.011199034554883837, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.03054211251437664, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05694790177047253, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1491358015872538, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0994509419053793, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.22771210549399257, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3451662153750658, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5002586934529245, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5738505198992789, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2587326874956488, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01741598393768072, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016210479941219092, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0146122459275648, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01328556947875768, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0119797141244635, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.010085107730701566, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00815214672824368, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006688131074188277, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005905028445413336, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005649766369024292, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005575787071138621, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005537537532509304, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005515243660775013, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005479304775362835, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0054546279343776405, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005417813629319426, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005368567073892336, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005299209965160117, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005188480239303317, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00505599516996881, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0048785651972866615, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0046964603092055765, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004449066109154956, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004168360923504224, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0038379309242009187, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003382885434984928, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002906507357765804, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0024327918980270626, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0018599867810371506, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0013179361733273254, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.000995883270388731, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0008203936531754152, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0005163699613842709, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0008631957785644318, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.001646208819136632, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0024489068353428676, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004341088128983728, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00243838658534289, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005710718773055893, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0042309747225338055, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008431435825153369, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010825051339025019, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02043282183376518, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.017994302648609396, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03024375111463566, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04188802347649589, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05351940031166721, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.05872750937817723, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09197170855616718, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.948814332485199, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8150815367698669, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6303992867469788, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4778691828250885, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.3533552885055542, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.22842179238796234, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.14393067359924316, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09831204265356064, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07330328971147537, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06136170029640198, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.054863013327121735, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04916667565703392, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04561249166727066, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04230714961886406, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.039827264845371246, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.037785645574331284, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03563658148050308, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03343477100133896, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03161684051156044, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03023289144039154, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.02879190631210804, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027546165511012077, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026493223384022713, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.026122448965907097, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.026625296100974083, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.028171570971608162, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02913013845682144, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.027952760457992554, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027052374556660652, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027337530627846718, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02853696420788765, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030765388160943985, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.032531462609767914, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.033245913684368134, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04418211057782173, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05183982104063034, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07475907355546951, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06963217258453369, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09426116198301315, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.16944991052150726, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.21610531210899353, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.278523713350296, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5524146556854248, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.47658732533454895, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.4021536111831665, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.2774722576141357, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1839563846588135, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1149860620498657, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2741241455078125, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8911210317460317, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9117063492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9362599206349206, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9528769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9645337301587301, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9739583333333334, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9831349206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9779265873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8665074506920393, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8963903564243048, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.929939300593694, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9492523802623761, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9617198412071549, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.971290318121067, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9745101504033417, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9781093986962204, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9797444173860363, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9801020945312483, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.982612810824209, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9842689300329207, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9840906191317451, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9848262125910734, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.984948214677115, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9854830267305958, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9852102455872666, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9869382586469472, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9872746717227291, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9874533132535076, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9876319355475522, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9879032647474529, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9887190868611966, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9883522407914596, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9883516756830731, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9876668789878726, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9891284746855246, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.989187104039677, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9898566408979226, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9897633634290726, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9898083436118351, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9904881614945176, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9908342177374966, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9906514565462478, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.989100121387882, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9899754316336403, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9878807722921499, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.989426816643352, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9902566001687136, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9884553150770659, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9855372460524178, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9862031317029608, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9845132542928422, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9835085561382468, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9778211021792943, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9828357295752045, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9849635321690694, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9861401830927169, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9772390770303794, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0013684152346104383, "validation/loss_best": 0.032531462609767914, "validation/acc_best": 0.9920634920634921, "validation/f1_best": 0.9908342177374966} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.1252639682404697, "train/grad": 0.1382300734706223, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9079253959655762, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.7760495281219483, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5933807897567749, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.4438522958755493, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3258651053905487, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.2119574549794197, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.13879826292395592, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.10145067252218723, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08109350837767124, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07109884148463606, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06550159057602287, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0601369100343436, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05650972885079682, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05237186030484736, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04911759262904525, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04638030282221735, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.043179933419451116, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03973478622734547, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0361827203631401, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03307716572657227, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02943271806463599, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02577222403138876, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.021753764245659113, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.017527240216732024, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.013472923701629043, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.009375113481655716, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.006474662236869335, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.004339055875316262, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002494316715747118, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0011519766505807638, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007319987658411264, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005809960141777992, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005100593622773885, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0007154036033898592, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.001418996136635542, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0012197137344628572, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.004342762501910329, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0006557915359735489, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.02582065745256841, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.015211670780554415, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.016907078782096505, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.02318283502012491, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.041782900746911766, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07672616519033909, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08908732693642378, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.13020069050602615, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.3865269573777914, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.36182158697396516, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.6950325141847133, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016420903727412225, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015300256623886526, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013812140068039299, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012463754555210472, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011057736100628972, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009103553902823478, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007299242434091866, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006244166559772566, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005763651303714141, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005599557920941151, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005532843634719029, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005475950848776847, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005428560695727356, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00535528747481294, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005285564218065701, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005218189639272168, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0051289293082663785, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005013392155815382, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004880426490854006, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004743631526071113, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004565716782963136, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004329842522856779, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004034944484010339, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003665054797747871, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003214447098580422, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0026257244025327964, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002051683617028175, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0015675479298442952, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0010470448873911664, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0005126527683023596, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00032065360773231077, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0002635283670906574, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00021315154732974407, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0002197411087877299, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0007345189771103832, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0007454863611190987, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0019513348939649555, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0005321461763288515, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007312382804705099, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.003460463315493226, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005593290875979683, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0064015587019931545, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.009921163741357595, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.013844231082066435, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.017464841360761606, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.025461031001851365, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04423772678840129, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.045016992787194024, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06656202854328529, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8585020303726196, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.7276748418807983, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.5456864833831787, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.39807623624801636, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2843031883239746, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.178348109126091, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.11424867063760757, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.08313776552677155, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.06572064012289047, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.056683916598558426, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05154016613960266, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04674714803695679, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04369029030203819, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.040506817400455475, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03826771676540375, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.036506809294223785, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03474131599068642, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.032829947769641876, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.031068651005625725, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029618671163916588, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028139743953943253, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02675134502351284, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.025468718260526657, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.024606822058558464, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.024377241730690002, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.024607086554169655, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02495778538286686, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02526412531733513, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.025964859873056412, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027231672778725624, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02859247289597988, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.02922845259308815, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.030912956222891808, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.032380905002355576, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04277843236923218, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.052938349545001984, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05135083571076393, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0741511732339859, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1188373938202858, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1783287078142166, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1566922962665558, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19836461544036865, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.36281922459602356, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.36843982338905334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.1081211566925049, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0427204370498657, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1432163715362549, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9299052357673645, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.7340102195739746, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9012896825396826, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9191468253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9419642857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9608134920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9761904761904762, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9791666666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9818948412698413, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.984375, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9923115079365079, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9925595238095238, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9910714285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9786706349206349, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8816703840403286, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9079613758784201, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.936693489179336, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9579103737578852, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9688191690887119, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9731551015704992, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9761317853003865, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.978935613383559, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9803711378726747, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9816963529476839, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9829167509883173, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837256122558764, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9840850714262414, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842821349041642, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.985775998398043, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9868385186114741, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9873923020453258, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9874837143530564, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9885463201549353, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9889490113598515, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9895341539539215, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.989910516332066, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9900587583573804, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9907973125553179, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9907109639831614, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9914329113531027, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9920060693136189, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.990969227978321, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9911821036329517, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9906359685648083, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9899034616946389, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9907180141628473, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9909251680267747, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9889733000601582, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9892033844542228, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9907925545132447, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9893941161394754, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9893932025798969, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9871532701772152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9886215413387699, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9877145353743156, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.98928063778522, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9867903477118399, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9829464129025383, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9858348960747518, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9874934092356579, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9881897127665953, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.979733229555138, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 0.017527240216732024, "validation/loss_best": 0.024606822058558464, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.9907973125553179} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.09166541656479239, "train/grad": 0.09369022644124925, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8249999332427979, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6962064647674561, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5175401043891906, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.3750172781944275, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.26829422891139987, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.17184569776058198, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.11614918023347855, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.08983999907970429, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07490605339407921, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06696667088195682, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06221868763677776, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05735644519329071, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05388157977722585, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04980698809027672, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04658307279460132, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04373485030606389, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04052055613137782, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.036969327442348, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03331202819012105, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030022112810984256, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.026158007690683008, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02220893549732864, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01795603536069393, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.013585856379941107, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009488646583631634, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005662126038223505, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0034058185759931804, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002151598194614053, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012437278032302857, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0007466456480324269, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005169267673045397, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.000369595717638731, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002547175064682961, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001628472935408354, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0002902074530720711, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.8598935827612876e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.002464078487828374, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0001957316044718027, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.008495516097173094, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.002938770605251193, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.008214682284742593, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.007038489999249577, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.013050716295838356, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.02502332947216928, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.01161295204423368, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05298614734783769, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.21414246881380677, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.10307848760858178, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.28194257787428795, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015634099720045924, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014493050398305058, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01289714346639812, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011348777408711612, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009772102588322014, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.007844120559748263, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0064117203315254305, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005774514937074855, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005542570387478918, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005486829219153151, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005474764740793035, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005451670253532939, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005422901743440889, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0053636929311323914, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005298325833282433, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005237139445962384, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005148646236630156, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005027260492788628, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004883751322631724, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0047233630885602906, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004498110899003222, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004243903895985568, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003890218272426864, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003380236372031504, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0027549810853088274, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001985858256884967, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0013742437629116467, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0009516614540189039, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005700574119327939, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00032490723460796287, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002247190230355045, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00016161231151727408, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011881576574069186, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.227030262332847e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00023192542959236563, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.039833706504737e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.001276186944642852, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.000184446497606289, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0037246864326628444, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0017958860602377885, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0026940947336358744, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.003535859912395935, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004336334772628076, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006730862258840512, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007451996646168275, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.013322907459130508, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.030677722797122074, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.022161163144955795, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03743807763454918, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7889665365219116, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6599464416503906, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.48104384541511536, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.33959585428237915, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2365187108516693, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.14624609053134918, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.0971330925822258, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.07413863390684128, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.060559552162885666, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.05329436808824539, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04898454248905182, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.044778354465961456, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.041998084634542465, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03908528387546539, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0371096208691597, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03543688729405403, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03371882066130638, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03201417252421379, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.030488062649965286, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029222186654806137, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028115971013903618, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027339212596416473, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02683943696320057, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02652871049940586, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.026555782184004784, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026930205523967743, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.027265988290309906, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02764740213751793, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02803313173353672, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028934117406606674, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029826324433088303, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0303107388317585, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.031929101794958115, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03237201273441315, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03551496937870979, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04571276530623436, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04664352908730507, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06819206476211548, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13015086948871613, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14451511204242706, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.15205799043178558, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2134608030319214, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4223965108394623, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3952799141407013, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9108005166053772, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8577089905738831, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2033202648162842, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9669463038444519, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.435888409614563, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9097222222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9241071428571429, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9471726190476191, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9642857142857143, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9737103174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9779265873015873, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9838789682539683, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9853670634920635, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.984375, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8933493554426077, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9147651185419324, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9431766640059197, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9612027858682243, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9708847698996327, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9748678127712728, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9784715561409053, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9798341270445106, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9810142488519318, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9825678516685117, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9831506826087952, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837335398000262, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9844621657840117, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860346280249607, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9863827546586152, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9873409049129739, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.988293822346116, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9887232128452332, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9894443523215317, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9893540697016202, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9894370582191927, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9896222867430366, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9903710337038325, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9897252656851685, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9902207429154951, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9903107385314432, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9904934274180429, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9904907801614913, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9906262557977189, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9900340060738737, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9900809999695429, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9908499765412644, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9907464277053327, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9924595543837679, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910569175698934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9913758375639775, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9907444298952641, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9875281146256405, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9894584588845429, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875863847425272, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9887628443500599, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9854069683601002, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9864576607174156, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9854246113822938, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9843913969579906, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.981165071223272, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9853156512743036, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9841891050953095, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.0002902074530720711, "validation/loss_best": 0.03551496937870979, "validation/acc_best": 0.9935515873015873, "validation/f1_best": 0.9924595543837679} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.07678796652704477, "train/grad": 0.07162260564975441, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7725107765197754, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.644645380973816, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.46779690027236936, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.33091811299324037, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.23294628143310547, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1489872020483017, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.10510064944624901, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.08482851393520832, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07261696007102728, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06578092849813402, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06141342686489224, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05669597261585295, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05321124631911516, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.049105303091928364, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.045731117986142635, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04277509724721312, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03943325862288475, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03574130233377218, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03194311460480094, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.028628442510962485, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.024644827628508212, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.020715722870081662, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01648325664922595, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.012110232580453158, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008133763261139394, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004700279133394361, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0028287175856530665, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0018163507990539075, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010437145456671714, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006615092977881431, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004638601373881102, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003463324438780546, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00022817214950919152, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001594857033342123, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010224378667771816, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.311494365334511e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00035125142894685266, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00030426202341914176, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.004772460162639618, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.002356778411194682, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0010138598270714283, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0016697114519774914, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0009405363071709872, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.008716458799317478, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0119680236838758, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.009499865341931582, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03970248748548329, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.09648642241954804, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.1195467637758702, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015177982808090747, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014110833806917071, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012546737059019507, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0109320113575086, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009304034491069615, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.00738546691602096, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.006210316040087491, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005784584552748129, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005633614196558483, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005591927832574584, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005565083807450719, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005517942039296031, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00545496147533413, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005359136484912597, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005259456500934903, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005151265281892847, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005014882109244354, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004834454584633932, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004637783616781235, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004449105155654252, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004205875104817096, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003923173517541727, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0035566331780864857, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003046091224096017, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0024412990177370376, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017001431965036318, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0011291774084384088, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0007532241856279142, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00042018622664272696, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.000259364268013087, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00018114050493750255, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013254465730028642, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.22658802346632e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.887967061175004e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.219971721999172e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8224534226168883e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00020217381632326324, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00022324396421558924, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0020646829350793334, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.001030268751962024, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.000966446971154582, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0010952593973873812, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00043491923534028516, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0039781291692336145, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004728345966535812, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008211375527181153, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.010506806670927165, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017849362751072018, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02503292648457228, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7356380820274353, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.60822594165802, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.432411789894104, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2978976368904114, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2036655694246292, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.12609033286571503, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.08683816343545914, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06849599629640579, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05727799981832504, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.05103309452533722, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04731167107820511, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.043572790920734406, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04105321317911148, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.038434941321611404, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.036457359790802, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03484078869223595, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.033295534551143646, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03177376836538315, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03037405200302601, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.029329780489206314, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028762618079781532, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02848888747394085, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02952718921005726, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030114220455288887, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.029828855767846107, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.028799770399928093, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02798677422106266, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02759464457631111, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0276021845638752, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028670495375990868, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02970282733440399, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030282238498330116, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.032035037875175476, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.032372988760471344, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.034584205597639084, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0455489456653595, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.041438546031713486, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06567718833684921, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12269894778728485, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1538415402173996, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13643763959407806, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1995689868927002, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.35537704825401306, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.36377015709877014, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.611117422580719, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6936885714530945, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8194652795791626, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7790157794952393, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.2933681011199951, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9208829365079365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9389880952380952, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.957093253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9697420634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9744543650793651, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.984375, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9858630952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9109645448511123, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9337223384762141, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9547794173416347, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9668596100827911, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9716695052914227, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9759477731492618, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.979018575419652, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9803741574769811, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9815150724683384, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9829253442209136, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9835095001231409, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9839169188406661, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.98483286686465, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.985861678076427, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9862512213828916, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9869273187895404, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9882168629645985, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9884917178452433, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9882002093771858, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9882002093771858, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9886963481807317, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9890182725992619, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9891326455934585, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9899442314898986, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9899432582020369, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9903811323798237, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9907871718975789, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9915552695680281, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.990082259511641, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9902609998330829, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902634817631576, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9908059121354518, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911093626892237, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.992730247533712, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9912365651668321, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9917342795001423, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.991512663739069, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9884616164651086, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9880521045429413, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9867317376934946, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9883617833950202, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.987060550239948, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9867625952941059, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9860122371968982, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9864205933064161, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9858486599260821, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9876714307184938, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9839585830884965, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00010224378667771816, "validation/loss_best": 0.034584205597639084, "validation/acc_best": 0.9935515873015873, "validation/f1_best": 0.992730247533712} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.06539772806689143, "train/grad": 0.04623968239873648, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7154096794128418, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5886169767379761, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.4154253649711609, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.2859305155277252, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.19720248639583587, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.12596339970827103, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.09153429925441742, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07580261006951332, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06608119752258063, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06037208119407296, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.056649437434971334, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05252518384717405, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.049322700295597315, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04549594547599554, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04237763477489352, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03965035457164049, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.036421346003189685, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03293600655160844, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02923680911771953, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0258897662255913, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02177367345429957, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01767563891597092, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.013364735236391425, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00924512291327119, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005852599171921611, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0032740925159305333, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0019422501884400845, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0012667974550276994, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007755029294639826, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005026446655392647, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00036256017163395883, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00027292488142848014, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00018980401568114758, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000136657627299428, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.812559604644775e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.024929203093052e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.22761370241642e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9996250048279763e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.000521585326641798, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00019934909418225288, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00018595961853861808, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0001101368386298418, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2255273759365083e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00010339004918932915, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.006268643494695425, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.006692391885444522, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.011730875102803111, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.025351483933627607, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04369422712363303, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.014640955892391503, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013570500388741493, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011946225669234992, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010285800790879875, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008697490850463509, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006921005195472389, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005945418989285827, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005593203214230016, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005448853688139934, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005387167262961156, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005345091998751741, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0052934997221746015, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005236359950358747, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005148995544150239, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005060870371235069, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004964506281830836, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0048377966939006, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004668898779491428, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004475766327814199, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004287731671793154, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004001991611876293, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0036505606702849037, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0031823947047814726, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002597617482424539, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0019265886582434178, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001234207132110896, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007830612245379598, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005243148429144639, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003212768527919252, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00021161879739338473, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015506393677469532, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011643797567330694, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.139568805802355e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.16314405118601e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.276046231723285e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.6495176652332474e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0001456161171250792, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.600686339109759e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.000233062356135734, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00028824159872245807, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00044132689718598005, "train/grad_041_lr1.6e+01_wd1.0e+00": 8.733943170987987e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.2891421224355703e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0006601420828258641, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001900117106917692, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0036721682475111595, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.003699354611970389, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006607487028361823, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.007219089878616635, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6960119605064392, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5697982907295227, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.3972240090370178, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.26862362027168274, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.18150725960731506, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.11316297203302383, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.0804886743426323, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0649261474609375, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.055123887956142426, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04944488778710365, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04600010812282562, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0424526110291481, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04001196473836899, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03740851208567619, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0354820117354393, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03389225900173187, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03237128257751465, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.030708981677889824, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.029241573065519333, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.02819952555000782, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.027251729741692543, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.026502275839447975, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.025943363085389137, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025542838498950005, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02554180845618248, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02599327825009823, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026823829859495163, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02718876674771309, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027704717591404915, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028623567894101143, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029611030593514442, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030111370608210564, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03159934654831886, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.031947411596775055, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03440200164914131, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.044884756207466125, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.039516136050224304, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06439617276191711, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10630466789007187, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15217263996601105, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1275027096271515, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18547245860099792, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3362438380718231, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3149653375148773, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5505499243736267, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6467707753181458, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6925458312034607, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5915334820747375, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1077555418014526, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9255952380952381, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9397321428571429, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9600694444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9761904761904762, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9794146825396826, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.982390873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9848710317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.986359126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9933035714285714, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9928075396825397, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9841269841269841, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9166925962619156, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9347392521144545, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9571112475066644, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.968560809925224, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9730739787555174, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.976395269765886, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9796572508570016, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9803726528970448, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9821931631359048, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9833308170176284, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9837335398000262, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9839332389393559, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9842409751893358, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.984898115510689, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.986753426191771, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9875239014155361, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9877508941598505, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9884556795189154, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9889163359329717, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9887389808624268, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9889623548143963, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9902397579912058, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9901894981732607, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9906413962483468, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9903738920672583, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9907796768701667, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9903728387815269, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9902438502277656, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9900719685911196, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.990250749818553, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9904875460325102, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9906659634706149, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911093626892237, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9921828123795112, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910569175698934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9918316296280865, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9874612959616519, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.987283492840678, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.988594499513057, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866526440094917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9880844619810508, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9859406378572453, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876722019238415, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9878159213413479, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9898760112654624, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9837151823279484, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 7.812559604644775e-05, "validation/loss_best": 0.03440200164914131, "validation/acc_best": 0.9933035714285714, "validation/f1_best": 0.9921828123795112} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.06158523050136864, "train/grad": 0.03925383468158543, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6888574028015136, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.56396409034729, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3943584907054901, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.2702435523271561, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.186894034743309, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.12244158200919628, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.09202463164925576, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07736598208546638, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06783751145005226, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06200198478065431, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.058020484447479245, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.053490967582911254, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04997060483321548, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04566001841798425, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.042145823510363695, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.039095069970935585, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03562269545160234, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03189072117209434, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.028032376179471612, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0245762390922755, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.020360272880643605, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.016216734284535052, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01198503098450601, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00809833275154233, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004986290773376823, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0027122889645397664, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0016614891309291125, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001137619661167264, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007361632026731968, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004972164984792471, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003673103265464306, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00028210734948515894, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00019477852620184422, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001413012482225895, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.331609889864921e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.8320588171482086e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2620996460318566e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.6153549551963806e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.382690578699112e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.295390099287033e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.017468050122261e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9498728215694426e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2556895166635513e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 6.68233260512352e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.629844963550568e-08, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0011293270997703075, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0012506145797669887, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.004766132840886712, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.006472883429378271, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.014547056714072824, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013466595313511789, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011761944643221795, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010006332530174404, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008350966034922748, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006653455928899348, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005888156146975234, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005653192028403282, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005576203726814128, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005542420816491358, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005507327145605814, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005450217664474621, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0053806258924305435, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005267374578979797, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005152871825266629, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005033904608862941, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004880243155057542, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004691561046638526, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004474065469839843, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004250410230743001, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003934563425646047, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003562085351004498, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0030533577895039345, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0024073660896829097, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0017112743644611328, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010286069291305466, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006526306501837098, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00044963607707359187, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00028816392521093805, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00019194621877886676, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001409782749169608, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001069704670169358, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.710948021667718e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.7587952322819544e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.020222139615726e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.2545972776555345e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.845072834913475e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.3618839523405627e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.1822635905637991e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.0874656640047181e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.791484383836867e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.843466465578414e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.3597346769347915e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.743058298064248e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0002735664191579342, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0010489385344585821, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0002730000862818138, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0025939100007322854, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004066217745723721, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6675612926483154, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5424003601074219, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.37235191464424133, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.24861815571784973, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.1666349172592163, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.10503821074962616, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.0763649269938469, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.06262625753879547, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05367622524499893, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04845977574586868, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04512748494744301, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.041747402399778366, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03946801275014877, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.036914147436618805, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03509746864438057, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.033616818487644196, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03207729756832123, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.030521417036652565, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.029051147401332855, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.028018435463309288, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.02692396566271782, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02612273395061493, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02558210678398609, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025352265685796738, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025562100112438202, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02601020596921444, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026565780863165855, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.026830455288290977, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027418965473771095, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028381280601024628, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029369046911597252, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03001016564667225, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.031558793038129807, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.031726375222206116, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0341368205845356, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0444919727742672, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03947898745536804, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06344332545995712, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10344163328409195, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1466970294713974, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.12215378135442734, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1781894415616989, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3221927881240845, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.29578423500061035, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5123298168182373, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.608644425868988, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6117250919342041, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5131025910377502, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.099457859992981, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9305555555555556, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9446924603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9623015873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9761904761904762, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9848710317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.986359126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9836309523809523, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9237316315551056, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9402496954498203, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9590033550201469, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.970080234097234, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9735157330121686, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9767519341687629, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9798341270445106, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9814691907845488, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9825449670992848, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9836814487654528, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9842656142434646, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9844172822976541, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9854968636390291, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9850764537562601, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.987120141418338, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9880251090206759, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9882949009571343, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9884556795189154, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9890542361573539, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9888739581903685, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9886936784153371, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9897795036392251, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.989736279300685, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9903248976101913, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9901895543100535, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9903683224031622, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9903742836774745, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9909478891250456, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9908560260938447, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9902621739177578, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9900834818996176, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9900819159858338, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911093626892237, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9923615221946168, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910569175698934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9916527941099467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876400441984924, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875098148732105, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.988234078625683, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9863820702831645, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9880844619810508, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9867913496394491, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9873146578409452, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9867343886304217, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.988994397398711, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9838400858556999, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 8.331609889864921e-05, "validation/loss_best": 0.0341368205845356, "validation/acc_best": 0.9935515873015873, "validation/f1_best": 0.9923615221946168} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.05659168303012848, "train/grad": 0.03552909482270479, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6576949548721314, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5331157398223877, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.365487288236618, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.24553970932960512, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.16712780237197877, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.10916472427546978, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.08276893950998783, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.0699886948429048, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06153381740674377, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.05617588066495955, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05247761527076364, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04825994096696377, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.044931668313220145, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04096010277979076, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03771832825616002, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03492435564287007, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03168106091208756, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.02823750610463321, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.024589003417640923, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.021360453767701983, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017509072730317713, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01374970082193613, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009972697971388698, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006545311501249671, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0039998318161815405, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002223547343164682, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00138246507383883, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009485980495810509, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006130407471209764, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004214298352599144, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00030709578655660155, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00023622470907866955, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00016575848683714867, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00012134207412600517, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.139141671359539e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.794620580971241e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7293132841587067e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.902698539197445e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 6.6222529858350755e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.640763461589813e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 8.284477517008781e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9137375056743623e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.7521513402462005e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.9600149244070055e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 9.160581976175308e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.5066961795091626e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9408372938632963e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4229211956262588e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0008403070364147425, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.014255562159232795, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013190430989488959, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011475218469277024, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.00970727799460292, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008083724449388683, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006535899438895285, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005870148569811135, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0056557152199093255, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005574810210382566, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005527515403809957, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005487850823556073, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005421477724157739, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005340206651599146, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005221068541286514, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00510530390922213, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004989952392934356, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004840001187403686, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004660473336989526, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004446215000643861, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0042218599857005755, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0038939311771537177, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034776041282020744, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029252459538111, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002237695091680507, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015477828055009014, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009322114982387575, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005964800908714096, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00041125111542896777, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002647340630028339, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00018084293392803375, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013398538671708594, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010311187189245175, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.335570081636434e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.5726969292209106e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.793896300138044e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.2910517077043833e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.7393248782242577e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.2959134835908028e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 9.96633394013264e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 6.927409303019805e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.1941524117820104e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 2.778285168961918e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.423605253156583e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.43111585655791e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.6540497295472834e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00010855027942811884, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.8315702844515354e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.0820326473346943e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.001990115248230878, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6480397582054138, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5237325429916382, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.355881005525589, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2353973537683487, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.15700876712799072, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.09984647482633591, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.07366419583559036, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.060951925814151764, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05260245501995087, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04773256927728653, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04455165937542915, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.041354332119226456, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03913460671901703, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03669365122914314, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.034829072654247284, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03334110602736473, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.031714100390672684, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03009122423827648, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.028645366430282593, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.027599463239312172, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.02664046920835972, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.025893472135066986, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02542455494403839, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025311259552836418, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025567444041371346, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0262680035084486, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026862064376473427, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0273625236004591, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027861086651682854, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02892342023551464, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02979615144431591, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030296804383397102, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03169482201337814, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03173555061221123, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0340086854994297, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04424084722995758, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03931613266468048, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06252343207597733, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10124792158603668, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1429048329591751, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1188388392329216, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.17238132655620575, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3117527365684509, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.28286832571029663, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.49060192704200745, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5745646357536316, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5788513422012329, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.48468104004859924, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9606846570968628, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9305555555555556, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9454365079365079, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9635416666666666, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9766865079365079, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.980406746031746, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9841269841269841, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9856150793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9866071428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9937996031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.988343253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9861111111111112, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9232850538009966, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9408730642271663, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9603693927597929, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.970315042079408, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9739674991332191, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.97757042013545, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9798357767972627, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9815617428714942, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9827336947407143, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9835097485500202, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9837372239843509, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9842424484694716, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9844224953908522, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9847674775149502, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9871728476968303, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9880302754027006, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9880303598400137, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9881062196090893, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.98828619137646, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9888743927611455, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9901238428712178, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9905731700731574, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.99027699053076, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9905506228314152, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9908682318097115, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9906855276751438, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9910860690168116, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9906806558270285, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9908091864175664, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9903045233131574, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902604589073449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9906228894160991, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911063398334108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9929089573488176, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910569175698934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9916527941099467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876424155972152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875040067828055, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9884144864390876, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865621951648984, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9876746680235333, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9867427474558317, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9874953053551865, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869138923383322, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.988994397398711, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9854701871476712, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 7.139141671359539e-05, "validation/loss_best": 0.0340086854994297, "validation/acc_best": 0.9937996031746031, "validation/f1_best": 0.9929089573488176} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.05595062915235758, "train/grad": 0.03422842931933701, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6457495355606079, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5219242000579833, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.35600186228752134, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.23862125635147094, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.16296158879995346, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.10845148794353009, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.0838605634495616, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07177561571821571, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06355516855604947, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.05818635650910437, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.054408499943092464, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.049957186793908474, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04646673077717423, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04222066186368465, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.038764490466564896, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03575419824570417, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03231440832838416, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.028636216763406993, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.024719134755432606, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.021205677231773735, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017078151190653444, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013072042344138026, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009165725708007812, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005784546630457044, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.003463960709050298, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.001938007203862071, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0012383992411196232, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008706974051892757, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005782126914709807, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0003965987730771303, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00029188863933086393, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00022675039246678353, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00015910223126411438, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00011930513195693493, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.303972728550434e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9641231521964072e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.725264824926853e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.92311592400074e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.986448377370834e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.549717366695404e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 6.5268296748399736e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.09941528737545e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.926969900727272e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.7878955006599426e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.3614615201950073e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.904305726289749e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.917461097240448e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.0476989448070527e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0014982986636459827, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01421364039182663, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01314237886108458, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011401466811075806, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009615673467051238, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.007999858174007385, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006488762375665829, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005878217990975827, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005683770973118954, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005606959764845669, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005562833603471517, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005524959209142252, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005450495092663914, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00536892655887641, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00523161290329881, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005092706318537239, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004941585152701009, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004750662404403556, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004520145867863903, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00424311085982481, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00396568520773144, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0035787657151377062, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003119718785637815, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025484416565450373, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018942619112931425, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001281438048590644, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007667332126311522, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004984968782810029, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003560011284844222, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00023667679788559327, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00016153966469801162, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012090410539940422, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.367867090759319e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.637001596118352e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.2686380388422546e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.6552632553394916e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.2402405025729877e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.7120627529916277e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.114976158462345e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 9.869166922705979e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 6.673423778645542e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 9.461408877170087e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.220091423664585e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.61020885614841e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 7.461728104697479e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.0141038681737524e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.470130085468309e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.422291255796623e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.8488955377215956e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.000295649524395942, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6360960602760315, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5123327970504761, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.34584230184555054, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.22752231359481812, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.15124747157096863, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.09679799526929855, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.07216525822877884, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0600164569914341, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05201354995369911, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.047296371310949326, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04421255737543106, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04108690842986107, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03891149163246155, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03648456931114197, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.03470005467534065, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03322838991880417, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03166046738624573, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03021308407187462, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.028806893154978752, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.027928663417696953, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.027060117572546005, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.026382019743323326, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.025884879752993584, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025711311027407646, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0260576531291008, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026665104553103447, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.027294578030705452, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02768353372812271, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.028021058067679405, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.029030505567789078, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029885968193411827, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030425289645791054, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0318414531648159, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.031874995678663254, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03407261148095131, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04408291354775429, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03909565880894661, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06208932027220726, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10021425783634186, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14047029614448547, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11667000502347946, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.17033198475837708, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3069627285003662, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2748708128929138, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.47892704606056213, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5550817251205444, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.56328284740448, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4667965769767761, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9605987668037415, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9330357142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9461805555555556, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9640376984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9737103174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9766865079365079, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.984375, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9856150793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9866071428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.986359126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9937996031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9868551587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9258571553537436, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9417770302624704, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9608747300411538, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9710367942672422, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9739674991332191, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9777488702760946, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9803741574769811, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9817405440800671, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9828233588193378, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9835095001231409, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.983376383668204, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9842424484694716, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9849618350410464, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9853082321809604, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9871728476968303, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9883854834925152, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9885654571392648, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9890971169944175, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9891400023306648, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9894094376033077, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9897625654406168, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9904195454500182, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9905935285944681, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9903736782151888, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9905525179765785, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9903683224031622, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9909512294789101, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9903237657301608, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9910357326169938, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9900804590438048, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902604589073449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.990258892993561, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911063398334108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9929089573488176, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910598554126842, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9916527941099467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876424155972152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875040067828055, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.988234078625683, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865621951648984, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9876746680235333, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9867427474558317, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876739845478462, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869138923383322, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9885911964938, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9857915248147077, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 7.303972728550434e-05, "validation/loss_best": 0.03407261148095131, "validation/acc_best": 0.9937996031746031, "validation/f1_best": 0.9929089573488176} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.055352324126288294, "train/grad": 0.03317376932129264, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6391911888122559, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5158950138092041, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3510043442249298, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.23459716618061066, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.15969102442264557, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.10640993684530259, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.08242829281836749, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.07057969491928816, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.06246701526455581, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.057282028142362836, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.053666198290884495, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0495319137442857, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04634353906847537, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04244719076901674, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03923705636523664, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03645039292983711, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.033138612573966386, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.029464279264211655, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.025537974536418914, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.021941118892282247, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.01759087703190744, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013339153127744793, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009174635745584964, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00568388044834137, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0033722131140530107, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0018812731560319661, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0011957520805299282, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008383544534444809, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005600871704518795, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00038779065944254397, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0002886050846427679, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002229795791208744, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00015558761544525623, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00011213087476789951, "train/loss_034_lr5.1e+00_wd1.0e+00": 6.891237571835518e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.5120219215750695e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9797030836343767e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9130492582917213e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.337512120604515e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.0781218558549884e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 6.662085652351379e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6646366566419602e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.7467962354421615e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3158656656742097e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 9.183771908283234e-08, "train/loss_045_lr3.1e+01_wd1.0e+00": 8.70823860168457e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.779718488454819e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.334243178367615e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 9.407289326190949e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013746897969394923, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012712533073499798, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01103485689498484, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009298477505799383, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.007721013010013849, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0063231350900605325, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005789016893832013, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0056260685715824365, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00555342334555462, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005505480180727318, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005454031023546122, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005368083912762813, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005277049754804466, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005138175122556277, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005002186546043959, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004868004501913674, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0046934542659437285, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004479536764556542, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004227150640217587, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003970749153231736, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0036039642967807596, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0031507018322736256, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025575050174666105, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018515022427163786, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0012309416324205813, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007404570874132333, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00048620312327329884, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003452385443415551, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002310286868669209, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00015972158958902583, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00011886126642821182, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.339749177115663e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.751351251523375e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.253627289505403e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.6752205097627666e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.1227913668013797e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.6189747488519758e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.002652786507131e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 8.721218406526437e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 6.576289135330179e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0420226874240186e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.330849346194919e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.6099696467974014e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.197533065299762e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.866221565444753e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.223943932963355e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.1410887833730222e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.3612416648506713e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.1373683556088799e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.62982177734375, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.506462037563324, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.34050172567367554, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.22337907552719116, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.14830216765403748, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.09527871757745743, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.07144708186388016, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.05959213897585869, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.05173635855317116, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04707280173897743, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04404827952384949, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04089882969856262, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03867866098880768, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.036275748163461685, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.034454911947250366, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03298702836036682, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03148166462779045, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.029895242303609848, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.028539596125483513, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.02754286304116249, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.026603126898407936, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.025860365480184555, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02537298947572708, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025294169783592224, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025636158883571625, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02633371576666832, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026882177218794823, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.027312008664011955, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027783066034317017, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02889709547162056, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029753638431429863, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030298156663775444, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03175533562898636, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03172729164361954, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.033985935151576996, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.043979790061712265, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0389568991959095, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06172247976064682, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09942011535167694, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13944289088249207, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11544345319271088, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.16865915060043335, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3030748665332794, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2701088488101959, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.47348544001579285, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5451961755752563, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5531560778617859, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4568554759025574, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9436362385749817, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.933531746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9479166666666666, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9645337301587301, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9734623015873016, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9764384920634921, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.984375, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9858630952380952, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9866071428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9933035714285714, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9868551587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9264614481018505, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.943928735259251, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9615878738696683, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9708565899820192, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9737872056984622, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9781056694194312, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9798341270445106, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9817405440800671, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9829253442209136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.98350846323121, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9835549729327011, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.98406082639529, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9840568078993835, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860233642059432, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.987343111286969, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9880244444855389, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9882045022964567, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9885548861959056, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9884659574962726, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.988638772368129, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9895794474537127, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9903141706608422, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9904101116297157, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9903704316632216, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9903668775072146, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9901862360898053, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.990768525344342, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9910316908503349, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.990853003238032, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9903075461689701, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902604589073449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.990258892993561, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911063398334108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9925502438534889, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910598554126842, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9916527941099467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876424155972152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875040067828055, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9884144864390876, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9867899765136208, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9876746680235333, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9867899701312124, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876739845478462, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869138923383322, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9885911964938, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9857915248147077, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 6.891237571835518e-05, "validation/loss_best": 0.033985935151576996, "validation/acc_best": 0.9933035714285714, "validation/f1_best": 0.9925502438534889} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.05305300997570157, "train/grad": 0.03292848724871874, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6256843376159668, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5037898063659668, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.34107529997825625, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.22635802924633025, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.1530492776632309, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.10117817312479019, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.07797418657690286, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.06647740241140127, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.058618642520159484, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.05358169239014387, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05000240017659962, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04592353156767785, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.042768025463446976, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.03888367702253163, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03571635846048594, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0329460090957582, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.029737142166122794, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.026170793576166033, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.022469009384512902, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.019175673881545664, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.015309442030265928, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011583573035895824, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.008039907747879625, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005039853481575846, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0029763878043740986, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0016487245541065931, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0010430235508829355, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0007315503526479005, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.00048648337833583354, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0003333904221653938, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0002475049067288637, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00019362052902579307, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00013491406105458736, "train/loss_033_lr4.3e+00_wd1.0e+00": 9.75899025797844e-05, "train/loss_034_lr5.1e+00_wd1.0e+00": 6.378645077347755e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.4596676230430603e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6515962779521943e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.87650416046381e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.905935540795326e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.257392138242722e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 6.2421336770057674e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 8.135195821523666e-07, "train/loss_042_lr1.9e+01_wd1.0e+00": 6.560459733009338e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1216290295124053e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.105545088648796e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 9.044911712408066e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.299426943063736e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.1101906895637513e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.900301039218902e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013912942870520055, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012852994548156858, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011125986601691694, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009342798409052193, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0076981859910301865, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0062192919105291365, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005644129598513245, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0054508847102988514, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005364441981073469, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00530989865103038, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005256495720241219, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005169241721159778, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0050814331127912735, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004949200544069754, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004817594021733385, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004691513367579319, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004527266554359813, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004316772390448023, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004072014539997326, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003813073913770495, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0034311032948608046, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.002966204993863357, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0023791605939186412, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.001706304556428222, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001122846442995069, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006729463778538048, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004425081018689525, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003164982218640944, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00021257855986277717, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0001466296701369174, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00011000502306842464, "train/grad_031_lr3.1e+00_wd1.0e+00": 8.623083550219235e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.35359856443074e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 4.9881683542025715e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.372889379477329e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.1120282638449338e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.5527583474879146e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.0376445716498994e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 8.518547062674434e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.016687955107253e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0514002065626551e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.4762361437233415e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.5039961405579269e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.433015322503751e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.3939798436493912e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.803173680465476e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.915367715004646e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 8.432800601637162e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.651806094218201e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6275039911270142, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.5040450692176819, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.3386420011520386, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.22189852595329285, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.14721430838108063, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.09474709630012512, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.07111994922161102, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.05937142297625542, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.051584914326667786, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04694924131035805, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.04392760992050171, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.04083193093538284, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.038604021072387695, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03623637929558754, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.034388571977615356, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.032899290323257446, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03134651854634285, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.029802627861499786, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.028474651277065277, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.027509598061442375, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.026600634679198265, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02594885230064392, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.025456087663769722, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02549389936029911, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02574087493121624, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02639504335820675, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026962554082274437, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.027428142726421356, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027838585898280144, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028879813849925995, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029839474707841873, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.030276942998170853, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03175261616706848, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03172720968723297, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03398554027080536, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04394935071468353, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.038950640708208084, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0615856759250164, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09939806908369064, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13916930556297302, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11539628356695175, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.16838164627552032, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3026024401187897, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.26903218030929565, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4700538218021393, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5411741137504578, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5487013459205627, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4527030289173126, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9353314638137817, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.933531746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9476686507936508, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9640376984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9737103174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9764384920634921, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.984375, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9858630952380952, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9866071428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.988343253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9868551587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9265704035176422, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9435701771426152, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9608717574003571, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9710367942672422, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9737872056984622, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9781056694194312, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9803741574769811, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9817405440800671, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9830486972072194, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.98350846323121, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9841029956831406, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.98406082639529, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9851356744877293, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860233642059432, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9873479027391354, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9883854834925152, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9885654571392648, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9882856415816287, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9886462626253125, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9885119960801086, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9895794474537127, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9903141706608422, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9900494905860319, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9902771066114, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9903220792206562, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9901862360898053, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.990768525344342, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.990853003238032, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9910357326169938, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9903075461689701, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902604589073449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9902619158493738, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911063398334108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9915689542526859, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.992730247533712, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910598554126842, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9914728715997353, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9897381436976971, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876424155972152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875040067828055, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9884144864390876, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865621951648984, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9876746680235333, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9870617102380139, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.987494884168572, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869138923383322, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.988994397398711, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9857915248147077, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 6.378645077347755e-05, "validation/loss_best": 0.03398554027080536, "validation/acc_best": 0.9935515873015873, "validation/f1_best": 0.992730247533712} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.05367026899941266, "train/grad": 0.032753631817176936, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.6287737798690796, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5061137318611145, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.3424182677268982, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.22725335955619813, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.15396258488297462, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.10246070109307766, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.07959562249481678, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.06825422180816532, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.060524895982816816, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.055528213335201144, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.051978123476728796, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.04782750127837062, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.044505617283284664, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.040480190655216575, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.03707261491566896, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03414773061871529, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.030733782956376673, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.027053896440193057, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.023158429078757762, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.01968301453627646, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.015594719992950559, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011723109940066933, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.007978005837649106, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0049366404581815005, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0029445304069668056, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0016630258038640023, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0010575604625046252, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0007437868509441615, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.000492275208234787, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0003421464003622532, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00025575189851224423, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002003265079110861, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00014291912317276, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00010181726887822152, "train/loss_034_lr5.1e+00_wd1.0e+00": 6.087154150009155e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.5604795664548873e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.371556892991066e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.608552411198616e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.721813067793846e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.16777266561985e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 7.036095485091209e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.330329105257988e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.164280205965042e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 8.442346006631851e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.2274086475372315e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 6.317067891359329e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.3498589396476745e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.388461381196976e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.560927093029022e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013645504233427346, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012588672186248005, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0108852894930169, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009140564049594103, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0075740603730082515, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006186459677992389, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005660182102583349, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005502902699518018, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005448690580087714, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005420475485152565, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005388099613483064, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005318992536049336, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005244787763804198, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005115804474626202, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004978417045786046, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004838574387540575, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004648946098895977, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004409164575627074, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004142461970041041, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003856675709103001, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0034533411495795007, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.002976965433481382, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0023742082984244918, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0016910430470306892, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001107525627285213, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006646360038030252, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.000435056361820898, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00031170148768978835, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00021018205210566522, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00014626451272533813, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00010966357268557658, "train/grad_031_lr3.1e+00_wd1.0e+00": 8.690399318425079e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.270023926958856e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 4.868722127298497e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.424050800731493e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.162310374007603e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.4870564537725895e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.9549903467590558e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 8.945756225451103e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 7.554145586471796e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.053818535284792e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.837262098114172e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.4692369978014472e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.373134165531982e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.583383922095264e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.7370279094909063e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.2996042762594753e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.7112696817622325e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.5467330836095916e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.6271306276321411, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.503764271736145, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.3383199870586395, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.2216528207063675, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.14705511927604675, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.09463325142860413, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.07105933129787445, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.05933767184615135, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.051582396030426025, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.04692915081977844, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.043921809643507004, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.040821898728609085, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.03858384117484093, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.03620211035013199, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.034372761845588684, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03292976692318916, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.031338900327682495, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.02980748564004898, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.028473462909460068, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.027538830414414406, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.026625443249940872, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02596616931259632, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.025455845519900322, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025457508862018585, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025741558521986008, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026426654309034348, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026984110474586487, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.027431359514594078, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027911627665162086, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02895386703312397, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.029828641563653946, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03025158680975437, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0317494235932827, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03175153583288193, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03395787626504898, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04389012232422829, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03891115263104439, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.061566486954689026, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09933748841285706, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13904260098934174, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11530046910047531, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.16786687076091766, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.30194374918937683, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.26881536841392517, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.47005122900009155, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5408380627632141, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5475835800170898, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.45245638489723206, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9327917098999023, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.9330357142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9481646825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9645337301587301, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9737103174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9764384920634921, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9831349206349206, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.984375, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9861111111111112, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9866071428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9935515873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9868551587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.9259853749551561, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9439280472381203, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9615879861770693, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9710367942672422, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9737872056984622, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9781056694194312, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9801942051748095, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9817405440800671, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9831506826087952, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9835075979690755, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9841029956831406, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9840608285789559, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9851356744877293, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9860233642059432, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9873479027391354, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9883854834925152, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9885654571392648, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9885561977637591, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9886462626253125, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9885119960801086, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9897582184735219, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9901338655318025, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9902298065006759, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9900983031288326, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9903220792206562, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9901862360898053, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.990768525344342, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.990853003238032, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9910357326169938, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9903075461689701, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9902604589073449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9902619158493738, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9911063398334108, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913889924364098, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.992730247533712, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9910598554126842, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9914728715997353, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9914671729316142, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989559262248713, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9876424155972152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9875040067828055, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9884144864390876, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9865621951648984, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9876746680235333, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9867903831843448, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876739845478462, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9869138923383322, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9885911964938, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9857915248147077, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 6.087154150009155e-05, "validation/loss_best": 0.03395787626504898, "validation/acc_best": 0.9935515873015873, "validation/f1_best": 0.992730247533712} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e46a28959c1ed044f4fac2bcdae69fb16ac4a0a --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..70e6c3759a51d14054cbad6a5c79501a6464abb5 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 13, "eval/id_best": 47, "eval/lr_best": 0.012899999999999998, "eval/wd_best": 0.05, "eval/train/loss": 0.036383695900440216, "eval/train/acc": 0.991894310226854, "eval/train/acc_std": 0.0006661475345740407, "eval/train/f1": 0.9925895534308935, "eval/train/f1_std": 0.0006528365194599703, "eval/validation/loss": 0.059256426990032196, "eval/validation/acc": 0.9831349206349206, "eval/validation/acc_std": 0.002080417637980938, "eval/validation/f1": 0.9813722934361702, "eval/validation/f1_std": 0.002585499726480732, "eval/test/loss": 0.08121136575937271, "eval/test/acc": 0.9744047619047619, "eval/test/acc_std": 0.0021155065234699723, "eval/test/f1": 0.9682842215901492, "eval/test/f1_std": 0.0029572592015188015} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..848a1c779bdcf8828d5992d9d59757fbfdb5fc4a --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 13, "eval/best/id_best": 47, "eval/best/lr_best": 0.012899999999999998, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.036383695900440216, "eval/best/train/acc": 0.991894310226854, "eval/best/train/acc_std": 0.0006661475345740407, "eval/best/train/f1": 0.9925895534308935, "eval/best/train/f1_std": 0.0006528365194599703, "eval/best/validation/loss": 0.059256426990032196, "eval/best/validation/acc": 0.9831349206349206, "eval/best/validation/acc_std": 0.002080417637980938, "eval/best/validation/f1": 0.9813722934361702, "eval/best/validation/f1_std": 0.002585499726480732, "eval/best/test/loss": 0.08121136575937271, "eval/best/test/acc": 0.9744047619047619, "eval/best/test/acc_std": 0.0021155065234699723, "eval/best/test/f1": 0.9682842215901492, "eval/best/test/f1_std": 0.0029572592015188015} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..4e8bee44a17cb2fb07507acc68d42ab32e26d3e0 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 48, "eval/last/lr_best": 0.015, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.03115411475300789, "eval/last/train/acc": 0.9939470498447287, "eval/last/train/acc_std": 0.0005514373031612011, "eval/last/train/f1": 0.9953065675349857, "eval/last/train/f1_std": 0.00043996382283450683, "eval/last/validation/loss": 0.05707860738039017, "eval/last/validation/acc": 0.9826388888888888, "eval/last/validation/acc_std": 0.0020888185419408413, "eval/last/validation/f1": 0.980498942978973, "eval/last/validation/f1_std": 0.002683513019208485, "eval/last/test/loss": 0.07701271772384644, "eval/last/test/acc": 0.9755952380952381, "eval/last/test/acc_std": 0.002092228779269176, "eval/last/test/f1": 0.969662202077634, "eval/last/test/f1_std": 0.002929163066630943} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..4bde86356a959185d63d150fbebc15d26a2e4135 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",train,0.036383695900440216,0.991894310226854,0.0006661475345740407,0.9925895534308935,0.0006528365194599703 +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",validation,0.059256426990032196,0.9831349206349206,0.002080417637980938,0.9813722934361702,0.002585499726480732 +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",test,0.08121136575937271,0.9744047619047619,0.0021155065234699723,0.9682842215901492,0.0029572592015188015 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..4bde86356a959185d63d150fbebc15d26a2e4135 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",train,0.036383695900440216,0.991894310226854,0.0006661475345740407,0.9925895534308935,0.0006528365194599703 +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",validation,0.059256426990032196,0.9831349206349206,0.002080417637980938,0.9813722934361702,0.002585499726480732 +flat_mae,patch,linear,hcpya_task21,best,13,0.012899999999999998,0.05,47,"[43, 1.0]",test,0.08121136575937271,0.9744047619047619,0.0021155065234699723,0.9682842215901492,0.0029572592015188015 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..fe87584ffda865caf219aace450edca8ddd26f9d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,last,19,0.015,0.05,48,"[50, 1.0]",train,0.03115411475300789,0.9939470498447287,0.0005514373031612011,0.9953065675349857,0.00043996382283450683 +flat_mae,patch,linear,hcpya_task21,last,19,0.015,0.05,48,"[50, 1.0]",validation,0.05707860738039017,0.9826388888888888,0.0020888185419408413,0.980498942978973,0.002683513019208485 +flat_mae,patch,linear,hcpya_task21,last,19,0.015,0.05,48,"[50, 1.0]",test,0.07701271772384644,0.9755952380952381,0.002092228779269176,0.969662202077634,0.002929163066630943 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b4dc07906e0ac5ba8a403b3d0d414369aeb4d5d0 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt @@ -0,0 +1,886 @@ +fMRI foundation model probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 23:06:35 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 0.8M (0.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:16 lr: nan time: 3.6409 data: 3.2013 max mem: 3910 +train: [0] [ 20/400] eta: 0:03:33 lr: 0.000003 loss: 3.0958 (3.0964) grad: 0.3068 (0.3221) time: 0.4072 data: 0.0054 max mem: 3951 +train: [0] [ 40/400] eta: 0:02:46 lr: 0.000006 loss: 3.0719 (3.0836) grad: 0.3194 (0.3230) time: 0.3587 data: 0.0029 max mem: 3951 +train: [0] [ 60/400] eta: 0:02:25 lr: 0.000009 loss: 3.0409 (3.0586) grad: 0.3276 (0.3246) time: 0.3596 data: 0.0034 max mem: 3951 +train: [0] [ 80/400] eta: 0:02:11 lr: 0.000012 loss: 2.9853 (3.0340) grad: 0.3225 (0.3197) time: 0.3545 data: 0.0033 max mem: 3951 +train: [0] [100/400] eta: 0:01:59 lr: 0.000015 loss: 2.9198 (3.0066) grad: 0.2994 (0.3188) time: 0.3416 data: 0.0031 max mem: 3951 +train: [0] [120/400] eta: 0:01:48 lr: 0.000018 loss: 2.8658 (2.9762) grad: 0.2956 (0.3145) time: 0.3513 data: 0.0033 max mem: 3951 +train: [0] [140/400] eta: 0:01:40 lr: 0.000021 loss: 2.7894 (2.9463) grad: 0.2826 (0.3105) time: 0.3789 data: 0.0032 max mem: 3951 +train: [0] [160/400] eta: 0:01:33 lr: 0.000024 loss: 2.7091 (2.9113) grad: 0.2737 (0.3065) time: 0.3874 data: 0.0033 max mem: 3951 +train: [0] [180/400] eta: 0:01:25 lr: 0.000027 loss: 2.6103 (2.8745) grad: 0.2804 (0.3043) time: 0.3781 data: 0.0033 max mem: 3951 +train: [0] [200/400] eta: 0:01:16 lr: 0.000030 loss: 2.5491 (2.8385) grad: 0.2817 (0.3027) time: 0.3617 data: 0.0032 max mem: 3951 +train: [0] [220/400] eta: 0:01:08 lr: 0.000033 loss: 2.4825 (2.8045) grad: 0.2755 (0.3006) time: 0.3513 data: 0.0032 max mem: 3951 +train: [0] [240/400] eta: 0:01:00 lr: 0.000036 loss: 2.4397 (2.7725) grad: 0.2754 (0.2988) time: 0.3631 data: 0.0033 max mem: 3951 +train: [0] [260/400] eta: 0:00:53 lr: 0.000039 loss: 2.3763 (2.7401) grad: 0.2773 (0.2969) time: 0.3879 data: 0.0034 max mem: 3951 +train: [0] [280/400] eta: 0:00:45 lr: 0.000042 loss: 2.3375 (2.7097) grad: 0.2693 (0.2944) time: 0.3709 data: 0.0034 max mem: 3951 +train: [0] [300/400] eta: 0:00:39 lr: 0.000045 loss: 2.2952 (2.6800) grad: 0.2587 (0.2916) time: 0.5707 data: 0.1954 max mem: 3951 +train: [0] [320/400] eta: 0:00:31 lr: 0.000048 loss: 2.2375 (2.6500) grad: 0.2467 (0.2897) time: 0.3960 data: 0.0041 max mem: 3951 +train: [0] [340/400] eta: 0:00:23 lr: 0.000051 loss: 2.1757 (2.6221) grad: 0.2495 (0.2876) time: 0.3806 data: 0.0034 max mem: 3951 +train: [0] [360/400] eta: 0:00:15 lr: 0.000054 loss: 2.1533 (2.5959) grad: 0.2495 (0.2852) time: 0.3833 data: 0.0034 max mem: 3951 +train: [0] [380/400] eta: 0:00:07 lr: 0.000057 loss: 2.1255 (2.5699) grad: 0.2426 (0.2835) time: 0.3921 data: 0.0033 max mem: 3951 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 2.0743 (2.5437) grad: 0.2463 (0.2819) time: 0.3856 data: 0.0034 max mem: 3951 +train: [0] Total time: 0:02:36 (0.3915 s / it) +train: [0] Summary: lr: 0.000060 loss: 2.0743 (2.5437) grad: 0.2463 (0.2819) +eval (validation): [0] [ 0/63] eta: 0:03:37 time: 3.4571 data: 3.1640 max mem: 3951 +eval (validation): [0] [20/63] eta: 0:00:22 time: 0.3724 data: 0.0044 max mem: 3951 +eval (validation): [0] [40/63] eta: 0:00:10 time: 0.3804 data: 0.0035 max mem: 3951 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3469 data: 0.0032 max mem: 3951 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3475 data: 0.0031 max mem: 3951 +eval (validation): [0] Total time: 0:00:26 (0.4202 s / it) +cv: [0] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.329 acc: 0.918 f1: 0.902 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:24:21 lr: nan time: 3.6537 data: 3.3482 max mem: 3951 +train: [1] [ 20/400] eta: 0:03:24 lr: 0.000063 loss: 2.0243 (2.0257) grad: 0.2344 (0.2390) time: 0.3831 data: 0.0040 max mem: 3951 +train: [1] [ 40/400] eta: 0:02:43 lr: 0.000066 loss: 2.0167 (2.0131) grad: 0.2344 (0.2401) time: 0.3647 data: 0.0035 max mem: 3951 +train: [1] [ 60/400] eta: 0:02:24 lr: 0.000069 loss: 1.9833 (1.9954) grad: 0.2369 (0.2385) time: 0.3685 data: 0.0035 max mem: 3951 +train: [1] [ 80/400] eta: 0:02:11 lr: 0.000072 loss: 1.9477 (1.9761) grad: 0.2358 (0.2376) time: 0.3710 data: 0.0035 max mem: 3951 +train: [1] [100/400] eta: 0:02:00 lr: 0.000075 loss: 1.9154 (1.9645) grad: 0.2243 (0.2355) time: 0.3576 data: 0.0034 max mem: 3951 +train: [1] [120/400] eta: 0:01:51 lr: 0.000078 loss: 1.8981 (1.9518) grad: 0.2217 (0.2340) time: 0.3898 data: 0.0036 max mem: 3951 +train: [1] [140/400] eta: 0:01:42 lr: 0.000081 loss: 1.8616 (1.9364) grad: 0.2226 (0.2336) time: 0.3630 data: 0.0036 max mem: 3951 +train: [1] [160/400] eta: 0:01:33 lr: 0.000084 loss: 1.8258 (1.9203) grad: 0.2213 (0.2328) time: 0.3608 data: 0.0035 max mem: 3951 +train: [1] [180/400] eta: 0:01:24 lr: 0.000087 loss: 1.8011 (1.9071) grad: 0.2158 (0.2323) time: 0.3397 data: 0.0033 max mem: 3951 +train: [1] [200/400] eta: 0:01:16 lr: 0.000090 loss: 1.7776 (1.8935) grad: 0.2135 (0.2303) time: 0.3493 data: 0.0036 max mem: 3951 +train: [1] [220/400] eta: 0:01:08 lr: 0.000093 loss: 1.7489 (1.8791) grad: 0.2135 (0.2288) time: 0.3626 data: 0.0037 max mem: 3951 +train: [1] [240/400] eta: 0:01:00 lr: 0.000096 loss: 1.7261 (1.8641) grad: 0.2154 (0.2281) time: 0.3663 data: 0.0034 max mem: 3951 +train: [1] [260/400] eta: 0:00:52 lr: 0.000099 loss: 1.6916 (1.8516) grad: 0.2078 (0.2263) time: 0.3590 data: 0.0034 max mem: 3951 +train: [1] [280/400] eta: 0:00:45 lr: 0.000102 loss: 1.6841 (1.8388) grad: 0.2110 (0.2260) time: 0.3537 data: 0.0033 max mem: 3951 +train: [1] [300/400] eta: 0:00:38 lr: 0.000105 loss: 1.6567 (1.8274) grad: 0.2149 (0.2252) time: 0.5597 data: 0.2048 max mem: 3951 +train: [1] [320/400] eta: 0:00:30 lr: 0.000108 loss: 1.6581 (1.8164) grad: 0.2026 (0.2236) time: 0.3511 data: 0.0035 max mem: 3951 +train: [1] [340/400] eta: 0:00:22 lr: 0.000111 loss: 1.6408 (1.8048) grad: 0.1965 (0.2221) time: 0.3481 data: 0.0023 max mem: 3951 +train: [1] [360/400] eta: 0:00:15 lr: 0.000114 loss: 1.6238 (1.7938) grad: 0.1992 (0.2211) time: 0.3571 data: 0.0035 max mem: 3951 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 1.5862 (1.7815) grad: 0.1942 (0.2199) time: 0.3602 data: 0.0034 max mem: 3951 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.5558 (1.7700) grad: 0.2097 (0.2195) time: 0.3681 data: 0.0034 max mem: 3951 +train: [1] Total time: 0:02:32 (0.3802 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.5558 (1.7700) grad: 0.2097 (0.2195) +eval (validation): [1] [ 0/63] eta: 0:03:40 time: 3.4974 data: 3.2363 max mem: 3951 +eval (validation): [1] [20/63] eta: 0:00:22 time: 0.3801 data: 0.0048 max mem: 3951 +eval (validation): [1] [40/63] eta: 0:00:10 time: 0.3605 data: 0.0036 max mem: 3951 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3115 data: 0.0033 max mem: 3951 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3135 data: 0.0033 max mem: 3951 +eval (validation): [1] Total time: 0:00:25 (0.4048 s / it) +cv: [1] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.162 acc: 0.962 f1: 0.955 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:24:09 lr: nan time: 3.6249 data: 3.3167 max mem: 3951 +train: [2] [ 20/400] eta: 0:03:27 lr: 0.000123 loss: 1.5143 (1.5324) grad: 0.2157 (0.2148) time: 0.3924 data: 0.0037 max mem: 3951 +train: [2] [ 40/400] eta: 0:02:41 lr: 0.000126 loss: 1.5164 (1.5335) grad: 0.2028 (0.2060) time: 0.3483 data: 0.0031 max mem: 3951 +train: [2] [ 60/400] eta: 0:02:21 lr: 0.000129 loss: 1.5165 (1.5268) grad: 0.1957 (0.2031) time: 0.3471 data: 0.0036 max mem: 3951 +train: [2] [ 80/400] eta: 0:02:07 lr: 0.000132 loss: 1.4943 (1.5152) grad: 0.1940 (0.2013) time: 0.3403 data: 0.0034 max mem: 3951 +train: [2] [100/400] eta: 0:01:55 lr: 0.000135 loss: 1.4815 (1.5094) grad: 0.1946 (0.2006) time: 0.3432 data: 0.0034 max mem: 3951 +train: [2] [120/400] eta: 0:01:46 lr: 0.000138 loss: 1.4708 (1.4994) grad: 0.1960 (0.1994) time: 0.3507 data: 0.0034 max mem: 3951 +train: [2] [140/400] eta: 0:01:37 lr: 0.000141 loss: 1.4473 (1.4911) grad: 0.1851 (0.1978) time: 0.3469 data: 0.0033 max mem: 3951 +train: [2] [160/400] eta: 0:01:29 lr: 0.000144 loss: 1.4188 (1.4823) grad: 0.1851 (0.1972) time: 0.3409 data: 0.0033 max mem: 3951 +train: [2] [180/400] eta: 0:01:21 lr: 0.000147 loss: 1.4088 (1.4749) grad: 0.1920 (0.1969) time: 0.3413 data: 0.0033 max mem: 3951 +train: [2] [200/400] eta: 0:01:13 lr: 0.000150 loss: 1.3834 (1.4662) grad: 0.1889 (0.1964) time: 0.3557 data: 0.0034 max mem: 3951 +train: [2] [220/400] eta: 0:01:05 lr: 0.000153 loss: 1.3955 (1.4601) grad: 0.1812 (0.1954) time: 0.3607 data: 0.0032 max mem: 3951 +train: [2] [240/400] eta: 0:00:58 lr: 0.000156 loss: 1.3962 (1.4528) grad: 0.1784 (0.1940) time: 0.3603 data: 0.0033 max mem: 3951 +train: [2] [260/400] eta: 0:00:51 lr: 0.000159 loss: 1.3509 (1.4440) grad: 0.1784 (0.1935) time: 0.3566 data: 0.0033 max mem: 3951 +train: [2] [280/400] eta: 0:00:43 lr: 0.000162 loss: 1.3439 (1.4369) grad: 0.1748 (0.1922) time: 0.3578 data: 0.0034 max mem: 3951 +train: [2] [300/400] eta: 0:00:37 lr: 0.000165 loss: 1.3249 (1.4275) grad: 0.1742 (0.1916) time: 0.5355 data: 0.1944 max mem: 3951 +train: [2] [320/400] eta: 0:00:30 lr: 0.000168 loss: 1.3049 (1.4211) grad: 0.1753 (0.1906) time: 0.3631 data: 0.0041 max mem: 3951 +train: [2] [340/400] eta: 0:00:22 lr: 0.000171 loss: 1.3113 (1.4136) grad: 0.1771 (0.1903) time: 0.3515 data: 0.0035 max mem: 3951 +train: [2] [360/400] eta: 0:00:14 lr: 0.000174 loss: 1.2832 (1.4066) grad: 0.1771 (0.1899) time: 0.3605 data: 0.0034 max mem: 3951 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 1.2802 (1.4003) grad: 0.1730 (0.1891) time: 0.3480 data: 0.0034 max mem: 3951 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 1.2711 (1.3927) grad: 0.1698 (0.1884) time: 0.3594 data: 0.0037 max mem: 3951 +train: [2] Total time: 0:02:28 (0.3715 s / it) +train: [2] Summary: lr: 0.000180 loss: 1.2711 (1.3927) grad: 0.1698 (0.1884) +eval (validation): [2] [ 0/63] eta: 0:03:47 time: 3.6126 data: 3.3162 max mem: 3951 +eval (validation): [2] [20/63] eta: 0:00:22 time: 0.3743 data: 0.0038 max mem: 3951 +eval (validation): [2] [40/63] eta: 0:00:10 time: 0.3444 data: 0.0032 max mem: 3951 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3346 data: 0.0031 max mem: 3951 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3289 data: 0.0031 max mem: 3951 +eval (validation): [2] Total time: 0:00:25 (0.4065 s / it) +cv: [2] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.101 acc: 0.971 f1: 0.966 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:24:13 lr: nan time: 3.6348 data: 3.3343 max mem: 3951 +train: [3] [ 20/400] eta: 0:03:23 lr: 0.000183 loss: 1.2293 (1.2414) grad: 0.1675 (0.1750) time: 0.3819 data: 0.0039 max mem: 3951 +train: [3] [ 40/400] eta: 0:02:41 lr: 0.000186 loss: 1.2232 (1.2284) grad: 0.1665 (0.1708) time: 0.3544 data: 0.0030 max mem: 3951 +train: [3] [ 60/400] eta: 0:02:24 lr: 0.000189 loss: 1.2371 (1.2369) grad: 0.1653 (0.1696) time: 0.3796 data: 0.0035 max mem: 3951 +train: [3] [ 80/400] eta: 0:02:10 lr: 0.000192 loss: 1.2372 (1.2348) grad: 0.1653 (0.1699) time: 0.3563 data: 0.0034 max mem: 3951 +train: [3] [100/400] eta: 0:02:01 lr: 0.000195 loss: 1.2189 (1.2307) grad: 0.1666 (0.1700) time: 0.3967 data: 0.0034 max mem: 3951 +train: [3] [120/400] eta: 0:01:52 lr: 0.000198 loss: 1.2189 (1.2285) grad: 0.1657 (0.1682) time: 0.3733 data: 0.0034 max mem: 3951 +train: [3] [140/400] eta: 0:01:43 lr: 0.000201 loss: 1.1931 (1.2200) grad: 0.1616 (0.1692) time: 0.3735 data: 0.0035 max mem: 3951 +train: [3] [160/400] eta: 0:01:33 lr: 0.000204 loss: 1.1646 (1.2136) grad: 0.1723 (0.1699) time: 0.3491 data: 0.0033 max mem: 3951 +train: [3] [180/400] eta: 0:01:24 lr: 0.000207 loss: 1.1710 (1.2113) grad: 0.1703 (0.1699) time: 0.3432 data: 0.0033 max mem: 3951 +train: [3] [200/400] eta: 0:01:16 lr: 0.000210 loss: 1.1558 (1.2044) grad: 0.1643 (0.1695) time: 0.3570 data: 0.0032 max mem: 3951 +train: [3] [220/400] eta: 0:01:08 lr: 0.000213 loss: 1.1502 (1.1985) grad: 0.1565 (0.1687) time: 0.3540 data: 0.0034 max mem: 3951 +train: [3] [240/400] eta: 0:01:00 lr: 0.000216 loss: 1.1415 (1.1931) grad: 0.1542 (0.1679) time: 0.3719 data: 0.0037 max mem: 3951 +train: [3] [260/400] eta: 0:00:53 lr: 0.000219 loss: 1.1306 (1.1896) grad: 0.1542 (0.1674) time: 0.3703 data: 0.0034 max mem: 3951 +train: [3] [280/400] eta: 0:00:45 lr: 0.000222 loss: 1.1334 (1.1851) grad: 0.1539 (0.1663) time: 0.3477 data: 0.0036 max mem: 3951 +train: [3] [300/400] eta: 0:00:38 lr: 0.000225 loss: 1.1316 (1.1801) grad: 0.1539 (0.1658) time: 0.5388 data: 0.1983 max mem: 3951 +train: [3] [320/400] eta: 0:00:30 lr: 0.000228 loss: 1.0943 (1.1742) grad: 0.1598 (0.1654) time: 0.3666 data: 0.0035 max mem: 3951 +train: [3] [340/400] eta: 0:00:23 lr: 0.000231 loss: 1.0878 (1.1692) grad: 0.1569 (0.1649) time: 0.3484 data: 0.0036 max mem: 3951 +train: [3] [360/400] eta: 0:00:15 lr: 0.000234 loss: 1.0902 (1.1654) grad: 0.1503 (0.1640) time: 0.3525 data: 0.0027 max mem: 3951 +train: [3] [380/400] eta: 0:00:07 lr: 0.000237 loss: 1.0744 (1.1597) grad: 0.1484 (0.1634) time: 0.3525 data: 0.0034 max mem: 3951 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.0684 (1.1561) grad: 0.1551 (0.1630) time: 0.3613 data: 0.0035 max mem: 3951 +train: [3] Total time: 0:02:31 (0.3799 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.0684 (1.1561) grad: 0.1551 (0.1630) +eval (validation): [3] [ 0/63] eta: 0:03:41 time: 3.5184 data: 3.2392 max mem: 3951 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3479 data: 0.0036 max mem: 3951 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3484 data: 0.0034 max mem: 3951 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3221 data: 0.0033 max mem: 3951 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3190 data: 0.0032 max mem: 3951 +eval (validation): [3] Total time: 0:00:24 (0.3946 s / it) +cv: [3] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.086 acc: 0.978 f1: 0.973 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:24:13 lr: nan time: 3.6328 data: 3.3353 max mem: 3951 +train: [4] [ 20/400] eta: 0:03:24 lr: 0.000243 loss: 1.0538 (1.0450) grad: 0.1465 (0.1502) time: 0.3844 data: 0.0033 max mem: 3951 +train: [4] [ 40/400] eta: 0:02:44 lr: 0.000246 loss: 1.0465 (1.0418) grad: 0.1463 (0.1500) time: 0.3687 data: 0.0031 max mem: 3951 +train: [4] [ 60/400] eta: 0:02:24 lr: 0.000249 loss: 1.0328 (1.0397) grad: 0.1439 (0.1489) time: 0.3617 data: 0.0035 max mem: 3951 +train: [4] [ 80/400] eta: 0:02:10 lr: 0.000252 loss: 1.0191 (1.0343) grad: 0.1424 (0.1477) time: 0.3528 data: 0.0033 max mem: 3951 +train: [4] [100/400] eta: 0:01:58 lr: 0.000255 loss: 1.0108 (1.0284) grad: 0.1431 (0.1477) time: 0.3511 data: 0.0035 max mem: 3951 +train: [4] [120/400] eta: 0:01:49 lr: 0.000258 loss: 1.0057 (1.0235) grad: 0.1510 (0.1488) time: 0.3712 data: 0.0034 max mem: 3951 +train: [4] [140/400] eta: 0:01:41 lr: 0.000261 loss: 0.9825 (1.0154) grad: 0.1530 (0.1489) time: 0.3681 data: 0.0035 max mem: 3951 +train: [4] [160/400] eta: 0:01:32 lr: 0.000264 loss: 0.9825 (1.0122) grad: 0.1459 (0.1485) time: 0.3656 data: 0.0034 max mem: 3951 +train: [4] [180/400] eta: 0:01:24 lr: 0.000267 loss: 0.9881 (1.0086) grad: 0.1459 (0.1481) time: 0.3769 data: 0.0031 max mem: 3951 +train: [4] [200/400] eta: 0:01:16 lr: 0.000270 loss: 0.9527 (1.0024) grad: 0.1471 (0.1478) time: 0.3618 data: 0.0033 max mem: 3951 +train: [4] [220/400] eta: 0:01:08 lr: 0.000273 loss: 0.9555 (1.0003) grad: 0.1479 (0.1478) time: 0.3640 data: 0.0033 max mem: 3951 +train: [4] [240/400] eta: 0:01:00 lr: 0.000276 loss: 0.9691 (0.9981) grad: 0.1372 (0.1469) time: 0.3621 data: 0.0036 max mem: 3951 +train: [4] [260/400] eta: 0:00:52 lr: 0.000279 loss: 0.9583 (0.9957) grad: 0.1350 (0.1464) time: 0.3551 data: 0.0033 max mem: 3951 +train: [4] [280/400] eta: 0:00:45 lr: 0.000282 loss: 0.9479 (0.9922) grad: 0.1397 (0.1460) time: 0.3544 data: 0.0035 max mem: 3951 +train: [4] [300/400] eta: 0:00:38 lr: 0.000285 loss: 0.9289 (0.9872) grad: 0.1396 (0.1456) time: 0.5479 data: 0.2012 max mem: 3951 +train: [4] [320/400] eta: 0:00:30 lr: 0.000288 loss: 0.9075 (0.9815) grad: 0.1396 (0.1451) time: 0.3734 data: 0.0039 max mem: 3951 +train: [4] [340/400] eta: 0:00:23 lr: 0.000291 loss: 0.8828 (0.9770) grad: 0.1378 (0.1444) time: 0.3621 data: 0.0033 max mem: 3951 +train: [4] [360/400] eta: 0:00:15 lr: 0.000294 loss: 0.8888 (0.9743) grad: 0.1333 (0.1439) time: 0.3518 data: 0.0032 max mem: 3951 +train: [4] [380/400] eta: 0:00:07 lr: 0.000297 loss: 0.9082 (0.9709) grad: 0.1333 (0.1435) time: 0.3485 data: 0.0035 max mem: 3951 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.9082 (0.9683) grad: 0.1365 (0.1432) time: 0.3548 data: 0.0033 max mem: 3951 +train: [4] Total time: 0:02:32 (0.3803 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.9082 (0.9683) grad: 0.1365 (0.1432) +eval (validation): [4] [ 0/63] eta: 0:03:38 time: 3.4664 data: 3.2435 max mem: 3951 +eval (validation): [4] [20/63] eta: 0:00:22 time: 0.3673 data: 0.0045 max mem: 3951 +eval (validation): [4] [40/63] eta: 0:00:10 time: 0.3559 data: 0.0029 max mem: 3951 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3239 data: 0.0033 max mem: 3951 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3178 data: 0.0033 max mem: 3951 +eval (validation): [4] Total time: 0:00:25 (0.4036 s / it) +cv: [4] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.123 acc: 0.971 f1: 0.966 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:23:32 lr: nan time: 3.5309 data: 3.2465 max mem: 3951 +train: [5] [ 20/400] eta: 0:03:22 lr: 0.000300 loss: 0.8552 (0.8643) grad: 0.1407 (0.1375) time: 0.3836 data: 0.0037 max mem: 3951 +train: [5] [ 40/400] eta: 0:02:41 lr: 0.000300 loss: 0.8598 (0.8710) grad: 0.1324 (0.1370) time: 0.3583 data: 0.0030 max mem: 3951 +train: [5] [ 60/400] eta: 0:02:23 lr: 0.000300 loss: 0.8799 (0.8738) grad: 0.1324 (0.1352) time: 0.3713 data: 0.0036 max mem: 3951 +train: [5] [ 80/400] eta: 0:02:10 lr: 0.000300 loss: 0.8750 (0.8734) grad: 0.1335 (0.1350) time: 0.3576 data: 0.0034 max mem: 3951 +train: [5] [100/400] eta: 0:01:58 lr: 0.000300 loss: 0.8593 (0.8708) grad: 0.1292 (0.1336) time: 0.3524 data: 0.0035 max mem: 3951 +train: [5] [120/400] eta: 0:01:48 lr: 0.000300 loss: 0.8513 (0.8664) grad: 0.1292 (0.1328) time: 0.3509 data: 0.0034 max mem: 3951 +train: [5] [140/400] eta: 0:01:39 lr: 0.000300 loss: 0.8506 (0.8646) grad: 0.1299 (0.1331) time: 0.3584 data: 0.0036 max mem: 3951 +train: [5] [160/400] eta: 0:01:31 lr: 0.000299 loss: 0.8613 (0.8648) grad: 0.1315 (0.1328) time: 0.3506 data: 0.0034 max mem: 3951 +train: [5] [180/400] eta: 0:01:22 lr: 0.000299 loss: 0.8453 (0.8626) grad: 0.1345 (0.1328) time: 0.3469 data: 0.0033 max mem: 3951 +train: [5] [200/400] eta: 0:01:14 lr: 0.000299 loss: 0.8421 (0.8619) grad: 0.1309 (0.1320) time: 0.3535 data: 0.0037 max mem: 3951 +train: [5] [220/400] eta: 0:01:07 lr: 0.000299 loss: 0.8380 (0.8585) grad: 0.1356 (0.1326) time: 0.3587 data: 0.0034 max mem: 3951 +train: [5] [240/400] eta: 0:00:59 lr: 0.000299 loss: 0.8253 (0.8554) grad: 0.1333 (0.1321) time: 0.3635 data: 0.0034 max mem: 3951 +train: [5] [260/400] eta: 0:00:51 lr: 0.000299 loss: 0.8275 (0.8547) grad: 0.1267 (0.1314) time: 0.3636 data: 0.0033 max mem: 3951 +train: [5] [280/400] eta: 0:00:44 lr: 0.000298 loss: 0.8262 (0.8508) grad: 0.1311 (0.1317) time: 0.3646 data: 0.0033 max mem: 3951 +train: [5] [300/400] eta: 0:00:38 lr: 0.000298 loss: 0.8183 (0.8486) grad: 0.1320 (0.1316) time: 0.5839 data: 0.2076 max mem: 3951 +train: [5] [320/400] eta: 0:00:30 lr: 0.000298 loss: 0.8106 (0.8454) grad: 0.1282 (0.1312) time: 0.3825 data: 0.0031 max mem: 3951 +train: [5] [340/400] eta: 0:00:23 lr: 0.000298 loss: 0.7963 (0.8417) grad: 0.1266 (0.1313) time: 0.3813 data: 0.0034 max mem: 3951 +train: [5] [360/400] eta: 0:00:15 lr: 0.000297 loss: 0.7807 (0.8391) grad: 0.1253 (0.1308) time: 0.3892 data: 0.0034 max mem: 3951 +train: [5] [380/400] eta: 0:00:07 lr: 0.000297 loss: 0.7849 (0.8377) grad: 0.1250 (0.1308) time: 0.3863 data: 0.0035 max mem: 3951 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.7867 (0.8353) grad: 0.1272 (0.1307) time: 0.3940 data: 0.0034 max mem: 3951 +train: [5] Total time: 0:02:34 (0.3857 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.7867 (0.8353) grad: 0.1272 (0.1307) +eval (validation): [5] [ 0/63] eta: 0:04:11 time: 3.9875 data: 3.6715 max mem: 3951 +eval (validation): [5] [20/63] eta: 0:00:23 time: 0.3730 data: 0.0066 max mem: 3951 +eval (validation): [5] [40/63] eta: 0:00:10 time: 0.3610 data: 0.0031 max mem: 3951 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3226 data: 0.0033 max mem: 3951 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3222 data: 0.0034 max mem: 3951 +eval (validation): [5] Total time: 0:00:26 (0.4147 s / it) +cv: [5] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.087 acc: 0.976 f1: 0.970 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:23:51 lr: nan time: 3.5797 data: 3.3259 max mem: 3951 +train: [6] [ 20/400] eta: 0:03:25 lr: 0.000296 loss: 0.7555 (0.7764) grad: 0.1257 (0.1240) time: 0.3895 data: 0.0038 max mem: 3951 +train: [6] [ 40/400] eta: 0:02:44 lr: 0.000296 loss: 0.7555 (0.7702) grad: 0.1248 (0.1254) time: 0.3667 data: 0.0031 max mem: 3951 +train: [6] [ 60/400] eta: 0:02:29 lr: 0.000296 loss: 0.7770 (0.7744) grad: 0.1231 (0.1241) time: 0.4052 data: 0.0031 max mem: 3951 +train: [6] [ 80/400] eta: 0:02:14 lr: 0.000295 loss: 0.7628 (0.7701) grad: 0.1231 (0.1242) time: 0.3631 data: 0.0033 max mem: 3951 +train: [6] [100/400] eta: 0:02:04 lr: 0.000295 loss: 0.7397 (0.7651) grad: 0.1226 (0.1235) time: 0.3908 data: 0.0034 max mem: 3951 +train: [6] [120/400] eta: 0:01:54 lr: 0.000295 loss: 0.7392 (0.7672) grad: 0.1165 (0.1221) time: 0.3743 data: 0.0033 max mem: 3951 +train: [6] [140/400] eta: 0:01:45 lr: 0.000294 loss: 0.7342 (0.7621) grad: 0.1161 (0.1223) time: 0.3962 data: 0.0034 max mem: 3951 +train: [6] [160/400] eta: 0:01:36 lr: 0.000294 loss: 0.7327 (0.7637) grad: 0.1161 (0.1214) time: 0.3838 data: 0.0038 max mem: 3951 +train: [6] [180/400] eta: 0:01:27 lr: 0.000293 loss: 0.7459 (0.7607) grad: 0.1171 (0.1214) time: 0.3673 data: 0.0033 max mem: 3951 +train: [6] [200/400] eta: 0:01:19 lr: 0.000293 loss: 0.7352 (0.7592) grad: 0.1192 (0.1208) time: 0.3871 data: 0.0035 max mem: 3951 +train: [6] [220/400] eta: 0:01:11 lr: 0.000292 loss: 0.7348 (0.7580) grad: 0.1163 (0.1205) time: 0.3748 data: 0.0032 max mem: 3951 +train: [6] [240/400] eta: 0:01:03 lr: 0.000292 loss: 0.7397 (0.7567) grad: 0.1150 (0.1204) time: 0.3771 data: 0.0036 max mem: 3951 +train: [6] [260/400] eta: 0:00:55 lr: 0.000291 loss: 0.7447 (0.7548) grad: 0.1141 (0.1199) time: 0.3827 data: 0.0034 max mem: 3951 +train: [6] [280/400] eta: 0:00:47 lr: 0.000291 loss: 0.7460 (0.7531) grad: 0.1158 (0.1203) time: 0.3814 data: 0.0033 max mem: 3951 +train: [6] [300/400] eta: 0:00:40 lr: 0.000290 loss: 0.7288 (0.7519) grad: 0.1158 (0.1199) time: 0.5645 data: 0.2096 max mem: 3951 +train: [6] [320/400] eta: 0:00:32 lr: 0.000290 loss: 0.7222 (0.7500) grad: 0.1143 (0.1199) time: 0.4039 data: 0.0036 max mem: 3951 +train: [6] [340/400] eta: 0:00:24 lr: 0.000289 loss: 0.7131 (0.7481) grad: 0.1162 (0.1195) time: 0.3977 data: 0.0045 max mem: 3951 +train: [6] [360/400] eta: 0:00:16 lr: 0.000288 loss: 0.7133 (0.7469) grad: 0.1122 (0.1193) time: 0.4125 data: 0.0036 max mem: 3951 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 0.7026 (0.7440) grad: 0.1122 (0.1190) time: 0.3701 data: 0.0035 max mem: 3951 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.7026 (0.7428) grad: 0.1122 (0.1186) time: 0.3825 data: 0.0032 max mem: 3951 +train: [6] Total time: 0:02:40 (0.4018 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.7026 (0.7428) grad: 0.1122 (0.1186) +eval (validation): [6] [ 0/63] eta: 0:03:44 time: 3.5696 data: 3.3062 max mem: 3951 +eval (validation): [6] [20/63] eta: 0:00:22 time: 0.3621 data: 0.0032 max mem: 3951 +eval (validation): [6] [40/63] eta: 0:00:10 time: 0.3623 data: 0.0027 max mem: 3951 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3246 data: 0.0032 max mem: 3951 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3233 data: 0.0032 max mem: 3951 +eval (validation): [6] Total time: 0:00:25 (0.4046 s / it) +cv: [6] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.074 acc: 0.980 f1: 0.976 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:23:24 lr: nan time: 3.5122 data: 3.2105 max mem: 3951 +train: [7] [ 20/400] eta: 0:03:26 lr: 0.000286 loss: 0.7079 (0.7212) grad: 0.1097 (0.1123) time: 0.3946 data: 0.0137 max mem: 3951 +train: [7] [ 40/400] eta: 0:02:50 lr: 0.000286 loss: 0.7127 (0.7180) grad: 0.1103 (0.1116) time: 0.4007 data: 0.0023 max mem: 3951 +train: [7] [ 60/400] eta: 0:02:29 lr: 0.000285 loss: 0.7135 (0.7130) grad: 0.1156 (0.1138) time: 0.3703 data: 0.0033 max mem: 3951 +train: [7] [ 80/400] eta: 0:02:13 lr: 0.000284 loss: 0.6957 (0.7065) grad: 0.1140 (0.1138) time: 0.3527 data: 0.0033 max mem: 3951 +train: [7] [100/400] eta: 0:02:01 lr: 0.000284 loss: 0.6894 (0.7015) grad: 0.1098 (0.1128) time: 0.3585 data: 0.0033 max mem: 3951 +train: [7] [120/400] eta: 0:01:52 lr: 0.000283 loss: 0.6958 (0.6991) grad: 0.1050 (0.1115) time: 0.3694 data: 0.0034 max mem: 3951 +train: [7] [140/400] eta: 0:01:42 lr: 0.000282 loss: 0.6872 (0.6971) grad: 0.1088 (0.1117) time: 0.3695 data: 0.0034 max mem: 3951 +train: [7] [160/400] eta: 0:01:35 lr: 0.000282 loss: 0.6818 (0.6969) grad: 0.1098 (0.1118) time: 0.3986 data: 0.0034 max mem: 3951 +train: [7] [180/400] eta: 0:01:27 lr: 0.000281 loss: 0.6760 (0.6913) grad: 0.1083 (0.1120) time: 0.3935 data: 0.0034 max mem: 3951 +train: [7] [200/400] eta: 0:01:18 lr: 0.000280 loss: 0.6467 (0.6894) grad: 0.1083 (0.1118) time: 0.3680 data: 0.0034 max mem: 3951 +train: [7] [220/400] eta: 0:01:10 lr: 0.000279 loss: 0.6382 (0.6857) grad: 0.1114 (0.1119) time: 0.3706 data: 0.0034 max mem: 3951 +train: [7] [240/400] eta: 0:01:02 lr: 0.000278 loss: 0.6519 (0.6830) grad: 0.1144 (0.1122) time: 0.3657 data: 0.0035 max mem: 3951 +train: [7] [260/400] eta: 0:00:54 lr: 0.000278 loss: 0.6555 (0.6815) grad: 0.1091 (0.1118) time: 0.3610 data: 0.0035 max mem: 3951 +train: [7] [280/400] eta: 0:00:46 lr: 0.000277 loss: 0.6515 (0.6794) grad: 0.1067 (0.1115) time: 0.3386 data: 0.0033 max mem: 3951 +train: [7] [300/400] eta: 0:00:39 lr: 0.000276 loss: 0.6333 (0.6764) grad: 0.1099 (0.1118) time: 0.5396 data: 0.1935 max mem: 3951 +train: [7] [320/400] eta: 0:00:31 lr: 0.000275 loss: 0.6333 (0.6745) grad: 0.1108 (0.1117) time: 0.3600 data: 0.0037 max mem: 3951 +train: [7] [340/400] eta: 0:00:23 lr: 0.000274 loss: 0.6448 (0.6735) grad: 0.1088 (0.1115) time: 0.3464 data: 0.0028 max mem: 3951 +train: [7] [360/400] eta: 0:00:15 lr: 0.000273 loss: 0.6448 (0.6722) grad: 0.1047 (0.1112) time: 0.3497 data: 0.0034 max mem: 3951 +train: [7] [380/400] eta: 0:00:07 lr: 0.000272 loss: 0.6465 (0.6714) grad: 0.1015 (0.1108) time: 0.3517 data: 0.0033 max mem: 3951 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.6506 (0.6700) grad: 0.1015 (0.1106) time: 0.3598 data: 0.0033 max mem: 3951 +train: [7] Total time: 0:02:33 (0.3841 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.6506 (0.6700) grad: 0.1015 (0.1106) +eval (validation): [7] [ 0/63] eta: 0:03:57 time: 3.7711 data: 3.4918 max mem: 3951 +eval (validation): [7] [20/63] eta: 0:00:23 time: 0.3766 data: 0.0030 max mem: 3951 +eval (validation): [7] [40/63] eta: 0:00:10 time: 0.3547 data: 0.0031 max mem: 3951 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3375 data: 0.0035 max mem: 3951 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3361 data: 0.0034 max mem: 3951 +eval (validation): [7] Total time: 0:00:26 (0.4141 s / it) +cv: [7] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.078 acc: 0.981 f1: 0.978 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:22:20 lr: nan time: 3.3509 data: 3.1225 max mem: 3951 +train: [8] [ 20/400] eta: 0:03:11 lr: 0.000270 loss: 0.6516 (0.6478) grad: 0.1056 (0.1068) time: 0.3614 data: 0.0045 max mem: 3951 +train: [8] [ 40/400] eta: 0:02:36 lr: 0.000270 loss: 0.6516 (0.6474) grad: 0.1077 (0.1088) time: 0.3631 data: 0.0029 max mem: 3951 +train: [8] [ 60/400] eta: 0:02:19 lr: 0.000269 loss: 0.6426 (0.6427) grad: 0.1074 (0.1077) time: 0.3553 data: 0.0034 max mem: 3951 +train: [8] [ 80/400] eta: 0:02:05 lr: 0.000268 loss: 0.6254 (0.6395) grad: 0.1074 (0.1085) time: 0.3451 data: 0.0032 max mem: 3951 +train: [8] [100/400] eta: 0:01:55 lr: 0.000267 loss: 0.6390 (0.6431) grad: 0.1086 (0.1076) time: 0.3516 data: 0.0033 max mem: 3951 +train: [8] [120/400] eta: 0:01:46 lr: 0.000266 loss: 0.6390 (0.6424) grad: 0.1026 (0.1074) time: 0.3522 data: 0.0035 max mem: 3951 +train: [8] [140/400] eta: 0:01:38 lr: 0.000265 loss: 0.6085 (0.6377) grad: 0.1039 (0.1074) time: 0.3732 data: 0.0036 max mem: 3951 +train: [8] [160/400] eta: 0:01:30 lr: 0.000264 loss: 0.5999 (0.6380) grad: 0.1039 (0.1073) time: 0.3643 data: 0.0036 max mem: 3951 +train: [8] [180/400] eta: 0:01:22 lr: 0.000263 loss: 0.6169 (0.6363) grad: 0.0984 (0.1064) time: 0.3653 data: 0.0033 max mem: 3951 +train: [8] [200/400] eta: 0:01:14 lr: 0.000262 loss: 0.6177 (0.6354) grad: 0.0979 (0.1058) time: 0.3527 data: 0.0032 max mem: 3951 +train: [8] [220/400] eta: 0:01:06 lr: 0.000260 loss: 0.6108 (0.6335) grad: 0.1011 (0.1056) time: 0.3503 data: 0.0037 max mem: 3951 +train: [8] [240/400] eta: 0:00:59 lr: 0.000259 loss: 0.5928 (0.6322) grad: 0.1027 (0.1055) time: 0.3705 data: 0.0035 max mem: 3951 +train: [8] [260/400] eta: 0:00:51 lr: 0.000258 loss: 0.5897 (0.6296) grad: 0.1027 (0.1052) time: 0.3731 data: 0.0033 max mem: 3951 +train: [8] [280/400] eta: 0:00:44 lr: 0.000257 loss: 0.6139 (0.6302) grad: 0.0994 (0.1048) time: 0.3671 data: 0.0031 max mem: 3951 +train: [8] [300/400] eta: 0:00:38 lr: 0.000256 loss: 0.6149 (0.6296) grad: 0.1015 (0.1049) time: 0.5433 data: 0.2003 max mem: 3951 +train: [8] [320/400] eta: 0:00:30 lr: 0.000255 loss: 0.6149 (0.6290) grad: 0.1036 (0.1049) time: 0.4417 data: 0.0032 max mem: 3951 +train: [8] [340/400] eta: 0:00:23 lr: 0.000254 loss: 0.5988 (0.6272) grad: 0.1012 (0.1047) time: 0.3972 data: 0.0034 max mem: 3951 +train: [8] [360/400] eta: 0:00:15 lr: 0.000253 loss: 0.5988 (0.6261) grad: 0.1005 (0.1045) time: 0.3883 data: 0.0034 max mem: 3951 +train: [8] [380/400] eta: 0:00:07 lr: 0.000252 loss: 0.5972 (0.6237) grad: 0.0981 (0.1042) time: 0.3926 data: 0.0033 max mem: 3951 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.5906 (0.6227) grad: 0.0976 (0.1040) time: 0.4056 data: 0.0035 max mem: 3951 +train: [8] Total time: 0:02:35 (0.3883 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.5906 (0.6227) grad: 0.0976 (0.1040) +eval (validation): [8] [ 0/63] eta: 0:03:55 time: 3.7315 data: 3.4623 max mem: 3951 +eval (validation): [8] [20/63] eta: 0:00:24 time: 0.4167 data: 0.0125 max mem: 3951 +eval (validation): [8] [40/63] eta: 0:00:11 time: 0.3979 data: 0.0028 max mem: 3951 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3795 data: 0.0034 max mem: 3951 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3817 data: 0.0034 max mem: 3951 +eval (validation): [8] Total time: 0:00:28 (0.4548 s / it) +cv: [8] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.069 acc: 0.981 f1: 0.978 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:23:00 lr: nan time: 3.4501 data: 3.2063 max mem: 3951 +train: [9] [ 20/400] eta: 0:03:32 lr: 0.000249 loss: 0.5838 (0.6091) grad: 0.1012 (0.1043) time: 0.4150 data: 0.0163 max mem: 3951 +train: [9] [ 40/400] eta: 0:02:53 lr: 0.000248 loss: 0.5830 (0.5942) grad: 0.1012 (0.1020) time: 0.4009 data: 0.0028 max mem: 3951 +train: [9] [ 60/400] eta: 0:02:35 lr: 0.000247 loss: 0.5738 (0.5894) grad: 0.1002 (0.1026) time: 0.4047 data: 0.0036 max mem: 3951 +train: [9] [ 80/400] eta: 0:02:20 lr: 0.000246 loss: 0.5854 (0.5918) grad: 0.0981 (0.1016) time: 0.3798 data: 0.0034 max mem: 3951 +train: [9] [100/400] eta: 0:02:07 lr: 0.000244 loss: 0.5796 (0.5879) grad: 0.0951 (0.1010) time: 0.3746 data: 0.0033 max mem: 3951 +train: [9] [120/400] eta: 0:01:56 lr: 0.000243 loss: 0.5781 (0.5866) grad: 0.0960 (0.1004) time: 0.3782 data: 0.0037 max mem: 3951 +train: [9] [140/400] eta: 0:01:46 lr: 0.000242 loss: 0.5842 (0.5864) grad: 0.0981 (0.1001) time: 0.3618 data: 0.0034 max mem: 3951 +train: [9] [160/400] eta: 0:01:38 lr: 0.000241 loss: 0.5727 (0.5849) grad: 0.0993 (0.1002) time: 0.4020 data: 0.0033 max mem: 3951 +train: [9] [180/400] eta: 0:01:29 lr: 0.000240 loss: 0.5727 (0.5873) grad: 0.0995 (0.1006) time: 0.3799 data: 0.0034 max mem: 3951 +train: [9] [200/400] eta: 0:01:20 lr: 0.000238 loss: 0.6009 (0.5884) grad: 0.0982 (0.1004) time: 0.3730 data: 0.0034 max mem: 3951 +train: [9] [220/400] eta: 0:01:11 lr: 0.000237 loss: 0.5781 (0.5863) grad: 0.0976 (0.1006) time: 0.3616 data: 0.0035 max mem: 3951 +train: [9] [240/400] eta: 0:01:03 lr: 0.000236 loss: 0.5600 (0.5858) grad: 0.0976 (0.1004) time: 0.3808 data: 0.0034 max mem: 3951 +train: [9] [260/400] eta: 0:00:55 lr: 0.000234 loss: 0.5858 (0.5863) grad: 0.0970 (0.1002) time: 0.3898 data: 0.0033 max mem: 3951 +train: [9] [280/400] eta: 0:00:47 lr: 0.000233 loss: 0.5956 (0.5865) grad: 0.0997 (0.1002) time: 0.3693 data: 0.0033 max mem: 3951 +train: [9] [300/400] eta: 0:00:40 lr: 0.000232 loss: 0.5728 (0.5852) grad: 0.0999 (0.1000) time: 0.5808 data: 0.2087 max mem: 3951 +train: [9] [320/400] eta: 0:00:32 lr: 0.000230 loss: 0.5634 (0.5840) grad: 0.0973 (0.0998) time: 0.3841 data: 0.0044 max mem: 3951 +train: [9] [340/400] eta: 0:00:24 lr: 0.000229 loss: 0.5765 (0.5835) grad: 0.0934 (0.0995) time: 0.3728 data: 0.0027 max mem: 3951 +train: [9] [360/400] eta: 0:00:16 lr: 0.000228 loss: 0.5522 (0.5821) grad: 0.0932 (0.0993) time: 0.3524 data: 0.0033 max mem: 3951 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 0.5496 (0.5809) grad: 0.0959 (0.0994) time: 0.3576 data: 0.0035 max mem: 3951 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.5583 (0.5812) grad: 0.0980 (0.0994) time: 0.3691 data: 0.0033 max mem: 3951 +train: [9] Total time: 0:02:38 (0.3974 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.5583 (0.5812) grad: 0.0980 (0.0994) +eval (validation): [9] [ 0/63] eta: 0:03:39 time: 3.4898 data: 3.2688 max mem: 3951 +eval (validation): [9] [20/63] eta: 0:00:22 time: 0.3669 data: 0.0159 max mem: 3951 +eval (validation): [9] [40/63] eta: 0:00:10 time: 0.3592 data: 0.0031 max mem: 3951 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3393 data: 0.0033 max mem: 3951 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3363 data: 0.0032 max mem: 3951 +eval (validation): [9] Total time: 0:00:25 (0.4096 s / it) +cv: [9] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.069 acc: 0.979 f1: 0.976 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:47 lr: nan time: 3.4181 data: 3.1818 max mem: 3951 +train: [10] [ 20/400] eta: 0:03:19 lr: 0.000224 loss: 0.5467 (0.5598) grad: 0.0916 (0.0969) time: 0.3813 data: 0.0030 max mem: 3951 +train: [10] [ 40/400] eta: 0:02:41 lr: 0.000222 loss: 0.5503 (0.5577) grad: 0.0920 (0.0959) time: 0.3680 data: 0.0030 max mem: 3951 +train: [10] [ 60/400] eta: 0:02:25 lr: 0.000221 loss: 0.5512 (0.5539) grad: 0.0958 (0.0969) time: 0.3819 data: 0.0034 max mem: 3951 +train: [10] [ 80/400] eta: 0:02:12 lr: 0.000220 loss: 0.5383 (0.5479) grad: 0.0933 (0.0961) time: 0.3782 data: 0.0034 max mem: 3951 +train: [10] [100/400] eta: 0:02:01 lr: 0.000218 loss: 0.5445 (0.5540) grad: 0.0967 (0.0976) time: 0.3611 data: 0.0033 max mem: 3951 +train: [10] [120/400] eta: 0:01:50 lr: 0.000217 loss: 0.5544 (0.5545) grad: 0.0976 (0.0972) time: 0.3503 data: 0.0033 max mem: 3951 +train: [10] [140/400] eta: 0:01:41 lr: 0.000215 loss: 0.5629 (0.5564) grad: 0.0976 (0.0976) time: 0.3613 data: 0.0037 max mem: 3951 +train: [10] [160/400] eta: 0:01:33 lr: 0.000214 loss: 0.5553 (0.5540) grad: 0.1002 (0.0979) time: 0.3673 data: 0.0035 max mem: 3951 +train: [10] [180/400] eta: 0:01:24 lr: 0.000213 loss: 0.5282 (0.5519) grad: 0.0965 (0.0974) time: 0.3566 data: 0.0037 max mem: 3951 +train: [10] [200/400] eta: 0:01:16 lr: 0.000211 loss: 0.5326 (0.5518) grad: 0.0920 (0.0972) time: 0.3547 data: 0.0035 max mem: 3951 +train: [10] [220/400] eta: 0:01:08 lr: 0.000210 loss: 0.5465 (0.5501) grad: 0.0974 (0.0972) time: 0.3643 data: 0.0035 max mem: 3951 +train: [10] [240/400] eta: 0:01:00 lr: 0.000208 loss: 0.5238 (0.5491) grad: 0.0980 (0.0972) time: 0.3905 data: 0.0037 max mem: 3951 +train: [10] [260/400] eta: 0:00:52 lr: 0.000207 loss: 0.5395 (0.5492) grad: 0.0936 (0.0967) time: 0.3536 data: 0.0036 max mem: 3951 +train: [10] [280/400] eta: 0:00:45 lr: 0.000205 loss: 0.5379 (0.5486) grad: 0.0908 (0.0962) time: 0.3500 data: 0.0032 max mem: 3951 +train: [10] [300/400] eta: 0:00:38 lr: 0.000204 loss: 0.5344 (0.5476) grad: 0.0898 (0.0959) time: 0.5304 data: 0.1931 max mem: 3951 +train: [10] [320/400] eta: 0:00:30 lr: 0.000202 loss: 0.5380 (0.5483) grad: 0.0900 (0.0957) time: 0.3821 data: 0.0038 max mem: 3951 +train: [10] [340/400] eta: 0:00:23 lr: 0.000201 loss: 0.5465 (0.5481) grad: 0.0945 (0.0959) time: 0.3522 data: 0.0032 max mem: 3951 +train: [10] [360/400] eta: 0:00:15 lr: 0.000199 loss: 0.5219 (0.5457) grad: 0.0971 (0.0959) time: 0.3660 data: 0.0037 max mem: 3951 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 0.5219 (0.5456) grad: 0.0959 (0.0960) time: 0.3560 data: 0.0033 max mem: 3951 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.5340 (0.5453) grad: 0.0935 (0.0958) time: 0.3572 data: 0.0033 max mem: 3951 +train: [10] Total time: 0:02:32 (0.3810 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.5340 (0.5453) grad: 0.0935 (0.0958) +eval (validation): [10] [ 0/63] eta: 0:03:36 time: 3.4308 data: 3.2115 max mem: 3951 +eval (validation): [10] [20/63] eta: 0:00:21 time: 0.3646 data: 0.0046 max mem: 3951 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3430 data: 0.0036 max mem: 3951 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3372 data: 0.0034 max mem: 3951 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3333 data: 0.0033 max mem: 3951 +eval (validation): [10] Total time: 0:00:25 (0.4009 s / it) +cv: [10] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.071 acc: 0.979 f1: 0.975 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:41 lr: nan time: 3.4027 data: 3.1299 max mem: 3951 +train: [11] [ 20/400] eta: 0:03:27 lr: 0.000195 loss: 0.5227 (0.5219) grad: 0.0884 (0.0913) time: 0.4029 data: 0.0036 max mem: 3951 +train: [11] [ 40/400] eta: 0:02:43 lr: 0.000193 loss: 0.5103 (0.5181) grad: 0.0898 (0.0931) time: 0.3590 data: 0.0031 max mem: 3951 +train: [11] [ 60/400] eta: 0:02:24 lr: 0.000192 loss: 0.5067 (0.5165) grad: 0.0912 (0.0926) time: 0.3665 data: 0.0032 max mem: 3951 +train: [11] [ 80/400] eta: 0:02:10 lr: 0.000190 loss: 0.5199 (0.5208) grad: 0.0898 (0.0921) time: 0.3506 data: 0.0034 max mem: 3951 +train: [11] [100/400] eta: 0:01:59 lr: 0.000189 loss: 0.5395 (0.5240) grad: 0.0936 (0.0929) time: 0.3612 data: 0.0034 max mem: 3951 +train: [11] [120/400] eta: 0:01:49 lr: 0.000187 loss: 0.5301 (0.5246) grad: 0.0933 (0.0930) time: 0.3453 data: 0.0033 max mem: 3951 +train: [11] [140/400] eta: 0:01:40 lr: 0.000186 loss: 0.5175 (0.5265) grad: 0.0925 (0.0929) time: 0.3641 data: 0.0035 max mem: 3951 +train: [11] [160/400] eta: 0:01:31 lr: 0.000184 loss: 0.5148 (0.5249) grad: 0.0899 (0.0927) time: 0.3594 data: 0.0034 max mem: 3951 +train: [11] [180/400] eta: 0:01:23 lr: 0.000183 loss: 0.5178 (0.5245) grad: 0.0898 (0.0923) time: 0.3587 data: 0.0035 max mem: 3951 +train: [11] [200/400] eta: 0:01:15 lr: 0.000181 loss: 0.5052 (0.5221) grad: 0.0906 (0.0923) time: 0.3419 data: 0.0033 max mem: 3951 +train: [11] [220/400] eta: 0:01:07 lr: 0.000180 loss: 0.5157 (0.5225) grad: 0.0906 (0.0921) time: 0.3621 data: 0.0035 max mem: 3951 +train: [11] [240/400] eta: 0:00:59 lr: 0.000178 loss: 0.5230 (0.5232) grad: 0.0972 (0.0925) time: 0.3638 data: 0.0034 max mem: 3951 +train: [11] [260/400] eta: 0:00:52 lr: 0.000177 loss: 0.5176 (0.5225) grad: 0.0883 (0.0919) time: 0.3604 data: 0.0033 max mem: 3951 +train: [11] [280/400] eta: 0:00:44 lr: 0.000175 loss: 0.5133 (0.5219) grad: 0.0836 (0.0918) time: 0.3556 data: 0.0030 max mem: 3951 +train: [11] [300/400] eta: 0:00:38 lr: 0.000174 loss: 0.4994 (0.5193) grad: 0.0865 (0.0916) time: 0.6238 data: 0.2446 max mem: 3951 +train: [11] [320/400] eta: 0:00:31 lr: 0.000172 loss: 0.5133 (0.5188) grad: 0.0865 (0.0915) time: 0.3772 data: 0.0040 max mem: 3951 +train: [11] [340/400] eta: 0:00:23 lr: 0.000170 loss: 0.5207 (0.5192) grad: 0.0891 (0.0914) time: 0.3642 data: 0.0028 max mem: 3951 +train: [11] [360/400] eta: 0:00:15 lr: 0.000169 loss: 0.5207 (0.5192) grad: 0.0893 (0.0912) time: 0.3754 data: 0.0033 max mem: 3951 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 0.5088 (0.5179) grad: 0.0891 (0.0911) time: 0.3665 data: 0.0035 max mem: 3951 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.5068 (0.5174) grad: 0.0863 (0.0910) time: 0.3826 data: 0.0036 max mem: 3951 +train: [11] Total time: 0:02:33 (0.3848 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.5068 (0.5174) grad: 0.0863 (0.0910) +eval (validation): [11] [ 0/63] eta: 0:03:35 time: 3.4257 data: 3.2041 max mem: 3951 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3347 data: 0.0044 max mem: 3951 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3482 data: 0.0032 max mem: 3951 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3272 data: 0.0029 max mem: 3951 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3249 data: 0.0031 max mem: 3951 +eval (validation): [11] Total time: 0:00:24 (0.3899 s / it) +cv: [11] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.072 acc: 0.981 f1: 0.978 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:54 lr: nan time: 3.4365 data: 3.1819 max mem: 3951 +train: [12] [ 20/400] eta: 0:03:22 lr: 0.000164 loss: 0.5258 (0.5286) grad: 0.0920 (0.0894) time: 0.3873 data: 0.0041 max mem: 3951 +train: [12] [ 40/400] eta: 0:02:42 lr: 0.000163 loss: 0.5120 (0.5103) grad: 0.0899 (0.0903) time: 0.3675 data: 0.0034 max mem: 3951 +train: [12] [ 60/400] eta: 0:02:25 lr: 0.000161 loss: 0.4900 (0.5039) grad: 0.0849 (0.0888) time: 0.3755 data: 0.0035 max mem: 3951 +train: [12] [ 80/400] eta: 0:02:11 lr: 0.000160 loss: 0.4900 (0.5016) grad: 0.0878 (0.0899) time: 0.3645 data: 0.0037 max mem: 3951 +train: [12] [100/400] eta: 0:02:03 lr: 0.000158 loss: 0.4806 (0.4994) grad: 0.0903 (0.0892) time: 0.4166 data: 0.0038 max mem: 3951 +train: [12] [120/400] eta: 0:01:53 lr: 0.000156 loss: 0.4831 (0.4984) grad: 0.0890 (0.0891) time: 0.3586 data: 0.0035 max mem: 3951 +train: [12] [140/400] eta: 0:01:43 lr: 0.000155 loss: 0.4909 (0.4979) grad: 0.0888 (0.0897) time: 0.3630 data: 0.0033 max mem: 3951 +train: [12] [160/400] eta: 0:01:34 lr: 0.000153 loss: 0.4841 (0.4965) grad: 0.0888 (0.0900) time: 0.3644 data: 0.0034 max mem: 3951 +train: [12] [180/400] eta: 0:01:26 lr: 0.000152 loss: 0.4930 (0.4985) grad: 0.0896 (0.0899) time: 0.3694 data: 0.0036 max mem: 3951 +train: [12] [200/400] eta: 0:01:17 lr: 0.000150 loss: 0.5136 (0.5010) grad: 0.0896 (0.0902) time: 0.3442 data: 0.0033 max mem: 3951 +train: [12] [220/400] eta: 0:01:09 lr: 0.000149 loss: 0.4969 (0.4992) grad: 0.0856 (0.0897) time: 0.3759 data: 0.0034 max mem: 3951 +train: [12] [240/400] eta: 0:01:01 lr: 0.000147 loss: 0.4910 (0.5004) grad: 0.0841 (0.0895) time: 0.3752 data: 0.0034 max mem: 3951 +train: [12] [260/400] eta: 0:00:53 lr: 0.000145 loss: 0.4922 (0.4998) grad: 0.0876 (0.0894) time: 0.3519 data: 0.0036 max mem: 3951 +train: [12] [280/400] eta: 0:00:45 lr: 0.000144 loss: 0.4922 (0.5003) grad: 0.0872 (0.0892) time: 0.3472 data: 0.0034 max mem: 3951 +train: [12] [300/400] eta: 0:00:39 lr: 0.000142 loss: 0.5019 (0.5011) grad: 0.0860 (0.0889) time: 0.5628 data: 0.1968 max mem: 3951 +train: [12] [320/400] eta: 0:00:31 lr: 0.000141 loss: 0.4911 (0.4996) grad: 0.0876 (0.0888) time: 0.3856 data: 0.0038 max mem: 3951 +train: [12] [340/400] eta: 0:00:23 lr: 0.000139 loss: 0.4911 (0.4996) grad: 0.0887 (0.0888) time: 0.3503 data: 0.0035 max mem: 3951 +train: [12] [360/400] eta: 0:00:15 lr: 0.000138 loss: 0.4952 (0.4996) grad: 0.0859 (0.0888) time: 0.3637 data: 0.0037 max mem: 3951 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 0.4912 (0.4996) grad: 0.0835 (0.0885) time: 0.3568 data: 0.0033 max mem: 3951 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.4912 (0.4996) grad: 0.0815 (0.0884) time: 0.3593 data: 0.0034 max mem: 3951 +train: [12] Total time: 0:02:33 (0.3848 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.4912 (0.4996) grad: 0.0815 (0.0884) +eval (validation): [12] [ 0/63] eta: 0:03:38 time: 3.4712 data: 3.2483 max mem: 3951 +eval (validation): [12] [20/63] eta: 0:00:22 time: 0.3653 data: 0.0129 max mem: 3951 +eval (validation): [12] [40/63] eta: 0:00:10 time: 0.3766 data: 0.0036 max mem: 3951 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3261 data: 0.0028 max mem: 3951 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3260 data: 0.0031 max mem: 3951 +eval (validation): [12] Total time: 0:00:25 (0.4098 s / it) +cv: [12] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.063 acc: 0.981 f1: 0.980 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:21:51 lr: nan time: 3.2782 data: 3.0532 max mem: 3951 +train: [13] [ 20/400] eta: 0:03:19 lr: 0.000133 loss: 0.4840 (0.4904) grad: 0.0803 (0.0859) time: 0.3873 data: 0.0265 max mem: 3951 +train: [13] [ 40/400] eta: 0:02:40 lr: 0.000131 loss: 0.4950 (0.4942) grad: 0.0818 (0.0860) time: 0.3610 data: 0.0028 max mem: 3951 +train: [13] [ 60/400] eta: 0:02:21 lr: 0.000130 loss: 0.4967 (0.4924) grad: 0.0868 (0.0881) time: 0.3575 data: 0.0032 max mem: 3951 +train: [13] [ 80/400] eta: 0:02:07 lr: 0.000128 loss: 0.4830 (0.4946) grad: 0.0867 (0.0870) time: 0.3474 data: 0.0032 max mem: 3951 +train: [13] [100/400] eta: 0:01:57 lr: 0.000127 loss: 0.4924 (0.4977) grad: 0.0821 (0.0872) time: 0.3670 data: 0.0032 max mem: 3951 +train: [13] [120/400] eta: 0:01:49 lr: 0.000125 loss: 0.4872 (0.4927) grad: 0.0867 (0.0873) time: 0.3778 data: 0.0034 max mem: 3951 +train: [13] [140/400] eta: 0:01:39 lr: 0.000124 loss: 0.4747 (0.4902) grad: 0.0842 (0.0871) time: 0.3476 data: 0.0033 max mem: 3951 +train: [13] [160/400] eta: 0:01:32 lr: 0.000122 loss: 0.4820 (0.4898) grad: 0.0885 (0.0878) time: 0.3857 data: 0.0034 max mem: 3951 +train: [13] [180/400] eta: 0:01:23 lr: 0.000120 loss: 0.4820 (0.4885) grad: 0.0863 (0.0874) time: 0.3551 data: 0.0035 max mem: 3951 +train: [13] [200/400] eta: 0:01:15 lr: 0.000119 loss: 0.4878 (0.4889) grad: 0.0823 (0.0870) time: 0.3486 data: 0.0033 max mem: 3951 +train: [13] [220/400] eta: 0:01:07 lr: 0.000117 loss: 0.4919 (0.4879) grad: 0.0820 (0.0867) time: 0.3662 data: 0.0034 max mem: 3951 +train: [13] [240/400] eta: 0:01:00 lr: 0.000116 loss: 0.4619 (0.4859) grad: 0.0845 (0.0868) time: 0.3735 data: 0.0038 max mem: 3951 +train: [13] [260/400] eta: 0:00:52 lr: 0.000114 loss: 0.4554 (0.4854) grad: 0.0875 (0.0870) time: 0.3703 data: 0.0033 max mem: 3951 +train: [13] [280/400] eta: 0:00:44 lr: 0.000113 loss: 0.4899 (0.4867) grad: 0.0865 (0.0868) time: 0.3502 data: 0.0033 max mem: 3951 +train: [13] [300/400] eta: 0:00:38 lr: 0.000111 loss: 0.4899 (0.4869) grad: 0.0830 (0.0866) time: 0.5815 data: 0.2056 max mem: 3951 +train: [13] [320/400] eta: 0:00:31 lr: 0.000110 loss: 0.4724 (0.4858) grad: 0.0830 (0.0865) time: 0.3870 data: 0.0037 max mem: 3951 +train: [13] [340/400] eta: 0:00:23 lr: 0.000108 loss: 0.4695 (0.4848) grad: 0.0857 (0.0866) time: 0.3614 data: 0.0033 max mem: 3951 +train: [13] [360/400] eta: 0:00:15 lr: 0.000107 loss: 0.4677 (0.4843) grad: 0.0876 (0.0865) time: 0.3600 data: 0.0034 max mem: 3951 +train: [13] [380/400] eta: 0:00:07 lr: 0.000105 loss: 0.4677 (0.4835) grad: 0.0833 (0.0862) time: 0.3744 data: 0.0035 max mem: 3951 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.4625 (0.4824) grad: 0.0825 (0.0862) time: 0.3718 data: 0.0035 max mem: 3951 +train: [13] Total time: 0:02:33 (0.3841 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.4625 (0.4824) grad: 0.0825 (0.0862) +eval (validation): [13] [ 0/63] eta: 0:03:49 time: 3.6384 data: 3.3582 max mem: 3951 +eval (validation): [13] [20/63] eta: 0:00:22 time: 0.3715 data: 0.0033 max mem: 3951 +eval (validation): [13] [40/63] eta: 0:00:10 time: 0.3580 data: 0.0032 max mem: 3951 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3413 data: 0.0034 max mem: 3951 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3376 data: 0.0034 max mem: 3951 +eval (validation): [13] Total time: 0:00:26 (0.4135 s / it) +cv: [13] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.059 acc: 0.983 f1: 0.981 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:23:24 lr: nan time: 3.5107 data: 3.2863 max mem: 3951 +train: [14] [ 20/400] eta: 0:03:24 lr: 0.000102 loss: 0.4543 (0.4564) grad: 0.0850 (0.0880) time: 0.3901 data: 0.0191 max mem: 3951 +train: [14] [ 40/400] eta: 0:02:46 lr: 0.000101 loss: 0.4554 (0.4589) grad: 0.0835 (0.0862) time: 0.3824 data: 0.0027 max mem: 3951 +train: [14] [ 60/400] eta: 0:02:26 lr: 0.000099 loss: 0.4571 (0.4621) grad: 0.0803 (0.0863) time: 0.3681 data: 0.0037 max mem: 3951 +train: [14] [ 80/400] eta: 0:02:12 lr: 0.000098 loss: 0.4698 (0.4642) grad: 0.0877 (0.0873) time: 0.3651 data: 0.0032 max mem: 3951 +train: [14] [100/400] eta: 0:02:01 lr: 0.000096 loss: 0.4623 (0.4645) grad: 0.0907 (0.0878) time: 0.3660 data: 0.0033 max mem: 3951 +train: [14] [120/400] eta: 0:01:52 lr: 0.000095 loss: 0.4672 (0.4668) grad: 0.0912 (0.0879) time: 0.3785 data: 0.0037 max mem: 3951 +train: [14] [140/400] eta: 0:01:42 lr: 0.000093 loss: 0.4751 (0.4672) grad: 0.0822 (0.0869) time: 0.3657 data: 0.0034 max mem: 3951 +train: [14] [160/400] eta: 0:01:34 lr: 0.000092 loss: 0.4669 (0.4687) grad: 0.0849 (0.0871) time: 0.3761 data: 0.0034 max mem: 3951 +train: [14] [180/400] eta: 0:01:25 lr: 0.000090 loss: 0.4663 (0.4681) grad: 0.0857 (0.0870) time: 0.3690 data: 0.0037 max mem: 3951 +train: [14] [200/400] eta: 0:01:17 lr: 0.000089 loss: 0.4730 (0.4696) grad: 0.0837 (0.0866) time: 0.3551 data: 0.0032 max mem: 3951 +train: [14] [220/400] eta: 0:01:09 lr: 0.000088 loss: 0.4803 (0.4702) grad: 0.0814 (0.0864) time: 0.3827 data: 0.0033 max mem: 3951 +train: [14] [240/400] eta: 0:01:02 lr: 0.000086 loss: 0.4623 (0.4692) grad: 0.0825 (0.0860) time: 0.3968 data: 0.0034 max mem: 3951 +train: [14] [260/400] eta: 0:00:54 lr: 0.000085 loss: 0.4623 (0.4680) grad: 0.0824 (0.0856) time: 0.3712 data: 0.0035 max mem: 3951 +train: [14] [280/400] eta: 0:00:46 lr: 0.000083 loss: 0.4648 (0.4680) grad: 0.0782 (0.0853) time: 0.3621 data: 0.0035 max mem: 3951 +train: [14] [300/400] eta: 0:00:39 lr: 0.000082 loss: 0.4688 (0.4687) grad: 0.0822 (0.0854) time: 0.5616 data: 0.2065 max mem: 3951 +train: [14] [320/400] eta: 0:00:31 lr: 0.000081 loss: 0.4732 (0.4688) grad: 0.0863 (0.0854) time: 0.4011 data: 0.0035 max mem: 3951 +train: [14] [340/400] eta: 0:00:23 lr: 0.000079 loss: 0.4676 (0.4679) grad: 0.0813 (0.0853) time: 0.3762 data: 0.0032 max mem: 3951 +train: [14] [360/400] eta: 0:00:15 lr: 0.000078 loss: 0.4517 (0.4664) grad: 0.0796 (0.0851) time: 0.3803 data: 0.0033 max mem: 3951 +train: [14] [380/400] eta: 0:00:07 lr: 0.000076 loss: 0.4519 (0.4655) grad: 0.0829 (0.0851) time: 0.3736 data: 0.0035 max mem: 3951 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.4584 (0.4657) grad: 0.0816 (0.0849) time: 0.3803 data: 0.0035 max mem: 3951 +train: [14] Total time: 0:02:37 (0.3930 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.4584 (0.4657) grad: 0.0816 (0.0849) +eval (validation): [14] [ 0/63] eta: 0:03:47 time: 3.6077 data: 3.3251 max mem: 3951 +eval (validation): [14] [20/63] eta: 0:00:24 time: 0.4219 data: 0.0050 max mem: 3951 +eval (validation): [14] [40/63] eta: 0:00:10 time: 0.3484 data: 0.0034 max mem: 3951 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3559 data: 0.0034 max mem: 3951 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3572 data: 0.0034 max mem: 3951 +eval (validation): [14] Total time: 0:00:27 (0.4312 s / it) +cv: [14] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.069 acc: 0.981 f1: 0.977 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:14 lr: nan time: 3.4864 data: 3.2080 max mem: 3951 +train: [15] [ 20/400] eta: 0:03:34 lr: 0.000074 loss: 0.4641 (0.4513) grad: 0.0806 (0.0819) time: 0.4196 data: 0.0036 max mem: 3951 +train: [15] [ 40/400] eta: 0:02:50 lr: 0.000072 loss: 0.4641 (0.4569) grad: 0.0809 (0.0824) time: 0.3744 data: 0.0034 max mem: 3951 +train: [15] [ 60/400] eta: 0:02:30 lr: 0.000071 loss: 0.4484 (0.4516) grad: 0.0818 (0.0830) time: 0.3841 data: 0.0035 max mem: 3951 +train: [15] [ 80/400] eta: 0:02:14 lr: 0.000070 loss: 0.4487 (0.4548) grad: 0.0839 (0.0834) time: 0.3486 data: 0.0033 max mem: 3951 +train: [15] [100/400] eta: 0:02:03 lr: 0.000068 loss: 0.4708 (0.4604) grad: 0.0812 (0.0836) time: 0.3699 data: 0.0034 max mem: 3951 +train: [15] [120/400] eta: 0:01:54 lr: 0.000067 loss: 0.4635 (0.4599) grad: 0.0851 (0.0840) time: 0.3951 data: 0.0035 max mem: 3951 +train: [15] [140/400] eta: 0:01:44 lr: 0.000066 loss: 0.4585 (0.4591) grad: 0.0885 (0.0848) time: 0.3733 data: 0.0033 max mem: 3951 +train: [15] [160/400] eta: 0:01:35 lr: 0.000064 loss: 0.4605 (0.4608) grad: 0.0849 (0.0849) time: 0.3553 data: 0.0034 max mem: 3951 +train: [15] [180/400] eta: 0:01:26 lr: 0.000063 loss: 0.4577 (0.4594) grad: 0.0814 (0.0845) time: 0.3670 data: 0.0034 max mem: 3951 +train: [15] [200/400] eta: 0:01:17 lr: 0.000062 loss: 0.4384 (0.4576) grad: 0.0823 (0.0846) time: 0.3457 data: 0.0035 max mem: 3951 +train: [15] [220/400] eta: 0:01:10 lr: 0.000061 loss: 0.4475 (0.4582) grad: 0.0843 (0.0846) time: 0.4009 data: 0.0030 max mem: 3951 +train: [15] [240/400] eta: 0:01:02 lr: 0.000059 loss: 0.4703 (0.4602) grad: 0.0843 (0.0847) time: 0.3721 data: 0.0032 max mem: 3951 +train: [15] [260/400] eta: 0:00:54 lr: 0.000058 loss: 0.4635 (0.4597) grad: 0.0817 (0.0846) time: 0.3676 data: 0.0034 max mem: 3951 +train: [15] [280/400] eta: 0:00:46 lr: 0.000057 loss: 0.4552 (0.4585) grad: 0.0783 (0.0844) time: 0.3447 data: 0.0035 max mem: 3951 +train: [15] [300/400] eta: 0:00:39 lr: 0.000056 loss: 0.4444 (0.4580) grad: 0.0811 (0.0844) time: 0.5556 data: 0.1993 max mem: 3951 +train: [15] [320/400] eta: 0:00:31 lr: 0.000054 loss: 0.4674 (0.4585) grad: 0.0842 (0.0845) time: 0.4097 data: 0.0275 max mem: 3951 +train: [15] [340/400] eta: 0:00:23 lr: 0.000053 loss: 0.4694 (0.4587) grad: 0.0815 (0.0843) time: 0.3897 data: 0.0024 max mem: 3951 +train: [15] [360/400] eta: 0:00:15 lr: 0.000052 loss: 0.4568 (0.4590) grad: 0.0822 (0.0843) time: 0.3754 data: 0.0033 max mem: 3951 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 0.4563 (0.4579) grad: 0.0836 (0.0843) time: 0.3703 data: 0.0034 max mem: 3951 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.4565 (0.4588) grad: 0.0825 (0.0842) time: 0.3793 data: 0.0037 max mem: 3951 +train: [15] Total time: 0:02:37 (0.3929 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.4565 (0.4588) grad: 0.0825 (0.0842) +eval (validation): [15] [ 0/63] eta: 0:03:45 time: 3.5850 data: 3.3448 max mem: 3951 +eval (validation): [15] [20/63] eta: 0:00:23 time: 0.3903 data: 0.0033 max mem: 3951 +eval (validation): [15] [40/63] eta: 0:00:10 time: 0.3624 data: 0.0031 max mem: 3951 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3339 data: 0.0034 max mem: 3951 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3325 data: 0.0032 max mem: 3951 +eval (validation): [15] Total time: 0:00:26 (0.4172 s / it) +cv: [15] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.071 acc: 0.979 f1: 0.976 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:20 lr: nan time: 3.3511 data: 3.1290 max mem: 3951 +train: [16] [ 20/400] eta: 0:03:18 lr: 0.000048 loss: 0.4410 (0.4479) grad: 0.0856 (0.0869) time: 0.3809 data: 0.0035 max mem: 3951 +train: [16] [ 40/400] eta: 0:02:42 lr: 0.000047 loss: 0.4410 (0.4425) grad: 0.0878 (0.0890) time: 0.3743 data: 0.0033 max mem: 3951 +train: [16] [ 60/400] eta: 0:02:24 lr: 0.000046 loss: 0.4435 (0.4486) grad: 0.0849 (0.0867) time: 0.3731 data: 0.0033 max mem: 3951 +train: [16] [ 80/400] eta: 0:02:10 lr: 0.000045 loss: 0.4699 (0.4522) grad: 0.0817 (0.0858) time: 0.3567 data: 0.0033 max mem: 3951 +train: [16] [100/400] eta: 0:01:59 lr: 0.000044 loss: 0.4398 (0.4493) grad: 0.0810 (0.0853) time: 0.3609 data: 0.0034 max mem: 3951 +train: [16] [120/400] eta: 0:01:49 lr: 0.000043 loss: 0.4388 (0.4497) grad: 0.0865 (0.0859) time: 0.3627 data: 0.0037 max mem: 3951 +train: [16] [140/400] eta: 0:01:40 lr: 0.000042 loss: 0.4670 (0.4538) grad: 0.0853 (0.0857) time: 0.3591 data: 0.0034 max mem: 3951 +train: [16] [160/400] eta: 0:01:32 lr: 0.000041 loss: 0.4682 (0.4535) grad: 0.0838 (0.0853) time: 0.3525 data: 0.0034 max mem: 3951 +train: [16] [180/400] eta: 0:01:23 lr: 0.000040 loss: 0.4526 (0.4556) grad: 0.0836 (0.0852) time: 0.3491 data: 0.0033 max mem: 3951 +train: [16] [200/400] eta: 0:01:15 lr: 0.000039 loss: 0.4599 (0.4570) grad: 0.0824 (0.0849) time: 0.3417 data: 0.0033 max mem: 3951 +train: [16] [220/400] eta: 0:01:07 lr: 0.000038 loss: 0.4661 (0.4589) grad: 0.0787 (0.0848) time: 0.3636 data: 0.0033 max mem: 3951 +train: [16] [240/400] eta: 0:00:59 lr: 0.000036 loss: 0.4773 (0.4589) grad: 0.0846 (0.0850) time: 0.3641 data: 0.0035 max mem: 3951 +train: [16] [260/400] eta: 0:00:52 lr: 0.000035 loss: 0.4773 (0.4609) grad: 0.0835 (0.0847) time: 0.3518 data: 0.0036 max mem: 3951 +train: [16] [280/400] eta: 0:00:44 lr: 0.000034 loss: 0.4492 (0.4596) grad: 0.0798 (0.0844) time: 0.3444 data: 0.0034 max mem: 3951 +train: [16] [300/400] eta: 0:00:38 lr: 0.000033 loss: 0.4257 (0.4582) grad: 0.0812 (0.0846) time: 0.6024 data: 0.1913 max mem: 3951 +train: [16] [320/400] eta: 0:00:30 lr: 0.000032 loss: 0.4298 (0.4574) grad: 0.0833 (0.0846) time: 0.3693 data: 0.0037 max mem: 3951 +train: [16] [340/400] eta: 0:00:23 lr: 0.000031 loss: 0.4429 (0.4576) grad: 0.0832 (0.0844) time: 0.3667 data: 0.0033 max mem: 3951 +train: [16] [360/400] eta: 0:00:15 lr: 0.000031 loss: 0.4396 (0.4569) grad: 0.0804 (0.0844) time: 0.3565 data: 0.0035 max mem: 3951 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 0.4332 (0.4562) grad: 0.0838 (0.0843) time: 0.3534 data: 0.0035 max mem: 3951 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.4249 (0.4548) grad: 0.0819 (0.0841) time: 0.3675 data: 0.0034 max mem: 3951 +train: [16] Total time: 0:02:32 (0.3804 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.4249 (0.4548) grad: 0.0819 (0.0841) +eval (validation): [16] [ 0/63] eta: 0:03:43 time: 3.5479 data: 3.2563 max mem: 3951 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3577 data: 0.0045 max mem: 3951 +eval (validation): [16] [40/63] eta: 0:00:10 time: 0.3607 data: 0.0033 max mem: 3951 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3320 data: 0.0031 max mem: 3951 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3304 data: 0.0031 max mem: 3951 +eval (validation): [16] Total time: 0:00:25 (0.4061 s / it) +cv: [16] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.071 acc: 0.980 f1: 0.977 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:29 lr: nan time: 3.5229 data: 3.2494 max mem: 3951 +train: [17] [ 20/400] eta: 0:03:29 lr: 0.000028 loss: 0.4358 (0.4484) grad: 0.0905 (0.0910) time: 0.4023 data: 0.0176 max mem: 3951 +train: [17] [ 40/400] eta: 0:02:51 lr: 0.000027 loss: 0.4479 (0.4463) grad: 0.0843 (0.0858) time: 0.4001 data: 0.0033 max mem: 3951 +train: [17] [ 60/400] eta: 0:02:30 lr: 0.000026 loss: 0.4592 (0.4518) grad: 0.0819 (0.0863) time: 0.3733 data: 0.0035 max mem: 3951 +train: [17] [ 80/400] eta: 0:02:14 lr: 0.000025 loss: 0.4597 (0.4546) grad: 0.0819 (0.0851) time: 0.3448 data: 0.0034 max mem: 3951 +train: [17] [100/400] eta: 0:02:03 lr: 0.000024 loss: 0.4397 (0.4521) grad: 0.0787 (0.0846) time: 0.3788 data: 0.0035 max mem: 3951 +train: [17] [120/400] eta: 0:01:53 lr: 0.000023 loss: 0.4362 (0.4507) grad: 0.0805 (0.0843) time: 0.3722 data: 0.0033 max mem: 3951 +train: [17] [140/400] eta: 0:01:44 lr: 0.000023 loss: 0.4456 (0.4503) grad: 0.0828 (0.0839) time: 0.3957 data: 0.0036 max mem: 3951 +train: [17] [160/400] eta: 0:01:36 lr: 0.000022 loss: 0.4467 (0.4503) grad: 0.0808 (0.0835) time: 0.3821 data: 0.0036 max mem: 3951 +train: [17] [180/400] eta: 0:01:26 lr: 0.000021 loss: 0.4481 (0.4509) grad: 0.0807 (0.0834) time: 0.3462 data: 0.0034 max mem: 3951 +train: [17] [200/400] eta: 0:01:17 lr: 0.000020 loss: 0.4550 (0.4522) grad: 0.0796 (0.0831) time: 0.3479 data: 0.0034 max mem: 3951 +train: [17] [220/400] eta: 0:01:10 lr: 0.000019 loss: 0.4510 (0.4526) grad: 0.0828 (0.0834) time: 0.3845 data: 0.0035 max mem: 3951 +train: [17] [240/400] eta: 0:01:02 lr: 0.000019 loss: 0.4427 (0.4518) grad: 0.0843 (0.0834) time: 0.3767 data: 0.0032 max mem: 3951 +train: [17] [260/400] eta: 0:00:54 lr: 0.000018 loss: 0.4394 (0.4516) grad: 0.0843 (0.0837) time: 0.3663 data: 0.0034 max mem: 3951 +train: [17] [280/400] eta: 0:00:46 lr: 0.000017 loss: 0.4394 (0.4525) grad: 0.0831 (0.0837) time: 0.3618 data: 0.0034 max mem: 3951 +train: [17] [300/400] eta: 0:00:39 lr: 0.000016 loss: 0.4342 (0.4514) grad: 0.0831 (0.0837) time: 0.5698 data: 0.2035 max mem: 3951 +train: [17] [320/400] eta: 0:00:31 lr: 0.000016 loss: 0.4353 (0.4510) grad: 0.0815 (0.0835) time: 0.3912 data: 0.0037 max mem: 3951 +train: [17] [340/400] eta: 0:00:23 lr: 0.000015 loss: 0.4441 (0.4513) grad: 0.0806 (0.0834) time: 0.3795 data: 0.0033 max mem: 3951 +train: [17] [360/400] eta: 0:00:15 lr: 0.000014 loss: 0.4455 (0.4516) grad: 0.0790 (0.0831) time: 0.3861 data: 0.0036 max mem: 3951 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 0.4497 (0.4517) grad: 0.0794 (0.0829) time: 0.3667 data: 0.0034 max mem: 3951 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.4486 (0.4514) grad: 0.0818 (0.0827) time: 0.3715 data: 0.0033 max mem: 3951 +train: [17] Total time: 0:02:37 (0.3930 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.4486 (0.4514) grad: 0.0818 (0.0827) +eval (validation): [17] [ 0/63] eta: 0:03:48 time: 3.6303 data: 3.4042 max mem: 3951 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3531 data: 0.0037 max mem: 3951 +eval (validation): [17] [40/63] eta: 0:00:10 time: 0.3600 data: 0.0031 max mem: 3951 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3438 data: 0.0034 max mem: 3951 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3375 data: 0.0033 max mem: 3951 +eval (validation): [17] Total time: 0:00:25 (0.4090 s / it) +cv: [17] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.058 acc: 0.982 f1: 0.980 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:32 lr: nan time: 3.3804 data: 3.1288 max mem: 3951 +train: [18] [ 20/400] eta: 0:03:17 lr: 0.000012 loss: 0.4613 (0.4570) grad: 0.0785 (0.0801) time: 0.3754 data: 0.0042 max mem: 3951 +train: [18] [ 40/400] eta: 0:02:42 lr: 0.000012 loss: 0.4613 (0.4498) grad: 0.0805 (0.0832) time: 0.3819 data: 0.0028 max mem: 3951 +train: [18] [ 60/400] eta: 0:02:25 lr: 0.000011 loss: 0.4495 (0.4514) grad: 0.0824 (0.0829) time: 0.3748 data: 0.0041 max mem: 3951 +train: [18] [ 80/400] eta: 0:02:10 lr: 0.000011 loss: 0.4526 (0.4561) grad: 0.0816 (0.0832) time: 0.3552 data: 0.0036 max mem: 3951 +train: [18] [100/400] eta: 0:02:00 lr: 0.000010 loss: 0.4489 (0.4529) grad: 0.0811 (0.0834) time: 0.3648 data: 0.0034 max mem: 3951 +train: [18] [120/400] eta: 0:01:50 lr: 0.000009 loss: 0.4464 (0.4518) grad: 0.0797 (0.0837) time: 0.3745 data: 0.0036 max mem: 3951 +train: [18] [140/400] eta: 0:01:42 lr: 0.000009 loss: 0.4594 (0.4528) grad: 0.0810 (0.0838) time: 0.3707 data: 0.0035 max mem: 3951 +train: [18] [160/400] eta: 0:01:33 lr: 0.000008 loss: 0.4568 (0.4533) grad: 0.0840 (0.0840) time: 0.3665 data: 0.0036 max mem: 3951 +train: [18] [180/400] eta: 0:01:24 lr: 0.000008 loss: 0.4482 (0.4525) grad: 0.0853 (0.0841) time: 0.3602 data: 0.0034 max mem: 3951 +train: [18] [200/400] eta: 0:01:16 lr: 0.000007 loss: 0.4498 (0.4531) grad: 0.0828 (0.0841) time: 0.3415 data: 0.0033 max mem: 3951 +train: [18] [220/400] eta: 0:01:08 lr: 0.000007 loss: 0.4511 (0.4534) grad: 0.0803 (0.0838) time: 0.3788 data: 0.0034 max mem: 3951 +train: [18] [240/400] eta: 0:01:00 lr: 0.000006 loss: 0.4633 (0.4538) grad: 0.0807 (0.0838) time: 0.3762 data: 0.0033 max mem: 3951 +train: [18] [260/400] eta: 0:00:53 lr: 0.000006 loss: 0.4637 (0.4538) grad: 0.0806 (0.0836) time: 0.3611 data: 0.0036 max mem: 3951 +train: [18] [280/400] eta: 0:00:45 lr: 0.000006 loss: 0.4447 (0.4536) grad: 0.0809 (0.0837) time: 0.3463 data: 0.0033 max mem: 3951 +train: [18] [300/400] eta: 0:00:38 lr: 0.000005 loss: 0.4447 (0.4534) grad: 0.0825 (0.0836) time: 0.5367 data: 0.1955 max mem: 3951 +train: [18] [320/400] eta: 0:00:30 lr: 0.000005 loss: 0.4334 (0.4519) grad: 0.0799 (0.0835) time: 0.3756 data: 0.0140 max mem: 3951 +train: [18] [340/400] eta: 0:00:23 lr: 0.000004 loss: 0.4346 (0.4519) grad: 0.0777 (0.0833) time: 0.3540 data: 0.0034 max mem: 3951 +train: [18] [360/400] eta: 0:00:15 lr: 0.000004 loss: 0.4352 (0.4509) grad: 0.0778 (0.0832) time: 0.3582 data: 0.0033 max mem: 3951 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 0.4352 (0.4512) grad: 0.0780 (0.0830) time: 0.3549 data: 0.0037 max mem: 3951 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.4530 (0.4515) grad: 0.0789 (0.0831) time: 0.3572 data: 0.0033 max mem: 3951 +train: [18] Total time: 0:02:32 (0.3810 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.4530 (0.4515) grad: 0.0789 (0.0831) +eval (validation): [18] [ 0/63] eta: 0:03:41 time: 3.5234 data: 3.2408 max mem: 3951 +eval (validation): [18] [20/63] eta: 0:00:23 time: 0.3917 data: 0.0043 max mem: 3951 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3688 data: 0.0035 max mem: 3951 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3409 data: 0.0035 max mem: 3951 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3408 data: 0.0034 max mem: 3951 +eval (validation): [18] Total time: 0:00:26 (0.4219 s / it) +cv: [18] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.059 acc: 0.982 f1: 0.980 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:15 lr: nan time: 3.3388 data: 3.1226 max mem: 3951 +train: [19] [ 20/400] eta: 0:03:13 lr: 0.000003 loss: 0.4673 (0.4686) grad: 0.0799 (0.0805) time: 0.3671 data: 0.0037 max mem: 3951 +train: [19] [ 40/400] eta: 0:02:38 lr: 0.000003 loss: 0.4652 (0.4623) grad: 0.0802 (0.0806) time: 0.3680 data: 0.0029 max mem: 3951 +train: [19] [ 60/400] eta: 0:02:20 lr: 0.000002 loss: 0.4527 (0.4534) grad: 0.0798 (0.0808) time: 0.3550 data: 0.0033 max mem: 3951 +train: [19] [ 80/400] eta: 0:02:06 lr: 0.000002 loss: 0.4443 (0.4514) grad: 0.0774 (0.0805) time: 0.3497 data: 0.0033 max mem: 3951 +train: [19] [100/400] eta: 0:01:57 lr: 0.000002 loss: 0.4426 (0.4483) grad: 0.0788 (0.0806) time: 0.3650 data: 0.0034 max mem: 3951 +train: [19] [120/400] eta: 0:01:47 lr: 0.000002 loss: 0.4328 (0.4473) grad: 0.0794 (0.0803) time: 0.3591 data: 0.0033 max mem: 3951 +train: [19] [140/400] eta: 0:01:39 lr: 0.000001 loss: 0.4544 (0.4483) grad: 0.0810 (0.0809) time: 0.3596 data: 0.0034 max mem: 3951 +train: [19] [160/400] eta: 0:01:30 lr: 0.000001 loss: 0.4426 (0.4474) grad: 0.0828 (0.0817) time: 0.3593 data: 0.0032 max mem: 3951 +train: [19] [180/400] eta: 0:01:22 lr: 0.000001 loss: 0.4339 (0.4472) grad: 0.0835 (0.0822) time: 0.3574 data: 0.0033 max mem: 3951 +train: [19] [200/400] eta: 0:01:14 lr: 0.000001 loss: 0.4339 (0.4466) grad: 0.0844 (0.0825) time: 0.3542 data: 0.0031 max mem: 3951 +train: [19] [220/400] eta: 0:01:07 lr: 0.000001 loss: 0.4297 (0.4447) grad: 0.0785 (0.0820) time: 0.3709 data: 0.0033 max mem: 3951 +train: [19] [240/400] eta: 0:00:59 lr: 0.000001 loss: 0.4373 (0.4454) grad: 0.0785 (0.0820) time: 0.3621 data: 0.0034 max mem: 3951 +train: [19] [260/400] eta: 0:00:52 lr: 0.000000 loss: 0.4415 (0.4455) grad: 0.0826 (0.0821) time: 0.3635 data: 0.0034 max mem: 3951 +train: [19] [280/400] eta: 0:00:44 lr: 0.000000 loss: 0.4415 (0.4467) grad: 0.0787 (0.0816) time: 0.3447 data: 0.0035 max mem: 3951 +train: [19] [300/400] eta: 0:00:38 lr: 0.000000 loss: 0.4613 (0.4477) grad: 0.0787 (0.0818) time: 0.5604 data: 0.1969 max mem: 3951 +train: [19] [320/400] eta: 0:00:30 lr: 0.000000 loss: 0.4623 (0.4483) grad: 0.0789 (0.0815) time: 0.3780 data: 0.0041 max mem: 3951 +train: [19] [340/400] eta: 0:00:22 lr: 0.000000 loss: 0.4435 (0.4475) grad: 0.0782 (0.0816) time: 0.3697 data: 0.0035 max mem: 3951 +train: [19] [360/400] eta: 0:00:15 lr: 0.000000 loss: 0.4367 (0.4469) grad: 0.0788 (0.0815) time: 0.3684 data: 0.0035 max mem: 3951 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 0.4346 (0.4467) grad: 0.0789 (0.0816) time: 0.3730 data: 0.0036 max mem: 3951 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.4504 (0.4477) grad: 0.0835 (0.0817) time: 0.3655 data: 0.0036 max mem: 3951 +train: [19] Total time: 0:02:32 (0.3802 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.4504 (0.4477) grad: 0.0835 (0.0817) +eval (validation): [19] [ 0/63] eta: 0:03:49 time: 3.6358 data: 3.3313 max mem: 3951 +eval (validation): [19] [20/63] eta: 0:00:21 time: 0.3471 data: 0.0042 max mem: 3951 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3599 data: 0.0030 max mem: 3951 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3476 data: 0.0034 max mem: 3951 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3466 data: 0.0033 max mem: 3951 +eval (validation): [19] Total time: 0:00:25 (0.4079 s / it) +cv: [19] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.057 acc: 0.983 f1: 0.980 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +eval model info: +{"score": 0.9826388888888888, "hparam": [50, 1.0], "hparam_id": 48, "epoch": 19, "is_best": false, "best_score": 0.9831349206349206} +eval (train): [20] [ 0/297] eta: 0:17:22 time: 3.5098 data: 3.2884 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:14 time: 0.3361 data: 0.0031 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:52 time: 0.3883 data: 0.0038 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:38 time: 0.3645 data: 0.0034 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:25 time: 0.3394 data: 0.0035 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:15 time: 0.3397 data: 0.0032 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3860 data: 0.0039 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:59 time: 0.3642 data: 0.0037 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:51 time: 0.3469 data: 0.0038 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3365 data: 0.0034 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3582 data: 0.0036 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3295 data: 0.0036 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3354 data: 0.0033 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3439 data: 0.0033 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3647 data: 0.0038 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3215 data: 0.0033 max mem: 3951 +eval (train): [20] Total time: 0:01:47 (0.3635 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:39 time: 3.4778 data: 3.2511 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3378 data: 0.0034 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3801 data: 0.0033 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3433 data: 0.0032 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3411 data: 0.0027 max mem: 3951 +eval (validation): [20] Total time: 0:00:25 (0.4093 s / it) +eval (test): [20] [ 0/79] eta: 0:04:52 time: 3.6962 data: 3.4058 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:32 time: 0.3850 data: 0.0030 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3354 data: 0.0033 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3537 data: 0.0035 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3307 data: 0.0034 max mem: 3951 +eval (test): [20] Total time: 0:00:31 (0.3997 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +eval model info: +{"score": 0.9831349206349206, "hparam": [43, 1.0], "hparam_id": 47, "epoch": 13, "is_best": true, "best_score": 0.9831349206349206} +eval (train): [20] [ 0/297] eta: 0:16:59 time: 3.4331 data: 3.1605 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:18 time: 0.3521 data: 0.0140 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:48 time: 0.3431 data: 0.0039 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:33 time: 0.3387 data: 0.0030 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:22 time: 0.3387 data: 0.0036 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:13 time: 0.3465 data: 0.0035 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:05 time: 0.3551 data: 0.0034 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:58 time: 0.3850 data: 0.0037 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:50 time: 0.3427 data: 0.0033 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3248 data: 0.0035 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3233 data: 0.0034 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3201 data: 0.0035 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3197 data: 0.0034 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:12 time: 0.3166 data: 0.0032 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:05 time: 0.3254 data: 0.0033 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3019 data: 0.0032 max mem: 3951 +eval (train): [20] Total time: 0:01:43 (0.3488 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:23 time: 3.2251 data: 2.9758 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:19 time: 0.3245 data: 0.0137 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3252 data: 0.0037 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3123 data: 0.0030 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3115 data: 0.0028 max mem: 3951 +eval (validation): [20] Total time: 0:00:23 (0.3715 s / it) +eval (test): [20] [ 0/79] eta: 0:04:15 time: 3.2331 data: 3.0279 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3448 data: 0.0083 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:15 time: 0.3264 data: 0.0028 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3184 data: 0.0032 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3038 data: 0.0030 max mem: 3951 +eval (test): [20] Total time: 0:00:28 (0.3636 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|---------:|--------:|-----------:|--------:|-----------:| +| flat_mae | patch | linear | hcpya_task21 | best | 13 | 0.0129 | 0.05 | 47 | [43, 1.0] | train | 0.036384 | 0.99189 | 0.00066615 | 0.99259 | 0.00065284 | +| flat_mae | patch | linear | hcpya_task21 | best | 13 | 0.0129 | 0.05 | 47 | [43, 1.0] | validation | 0.059256 | 0.98313 | 0.0020804 | 0.98137 | 0.0025855 | +| flat_mae | patch | linear | hcpya_task21 | best | 13 | 0.0129 | 0.05 | 47 | [43, 1.0] | test | 0.081211 | 0.9744 | 0.0021155 | 0.96828 | 0.0029573 | + + +done! total time: 1:06:45 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..c4f86763b7689b7d8fe4058e1f020921ad7b3169 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.5437028187513353, "train/grad": 0.2819386228173971, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.079501953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.0787548828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.077481689453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.076265869140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0750146484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.07331298828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.071329345703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.06910888671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0662060546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.063040771484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.059892578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.055103759765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.050218505859375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.043165283203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.036082763671875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.029033203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.019805908203125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.008380126953125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.9948876953125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.981597900390625, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.964248046875, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.94521240234375, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.922396240234375, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.896226806640625, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.8671533203125, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.8298626708984376, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.7939434814453126, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.7595648193359374, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.7104364013671876, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.6488192749023436, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.591294860839844, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.5372500610351563, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.4620274353027343, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.3929779052734377, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3092694091796875, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.224924850463867, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.133794174194336, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.0468006896972657, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9527017593383789, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8372017669677734, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7499240684509276, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6753618812561035, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.5811051654815673, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.502964038848877, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.4167426061630248, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.3296569299697876, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.2589216709136963, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.178660625219345, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.1138127434253693, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.047475336268544194, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04745998343452811, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04743484599515796, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04740838883444667, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.047381768692284824, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047344272695481776, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.047303255815058946, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.047256477270275356, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04719372207298875, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04712577305734158, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04705887135118246, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04695662073791027, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04685484942048788, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04670426778495312, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04655281452462077, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04640420842915773, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.046209282465279104, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.045967273972928525, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04568173423409462, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04540114217437804, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.045037322817370294, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04463702864944935, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04416229189373553, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04362379688769579, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.043031710851937535, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04228814424946904, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04158878218382597, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.040937812076881526, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04004037538543344, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03897630398161709, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03803945366293192, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.037202558936551215, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03609740528278053, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03512401808053255, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03398490719497204, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.032866300055757164, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03168318301904947, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.030568774878047406, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02937799929175526, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.027937839338555932, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02685942707117647, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0259429419785738, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024790432923473417, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023837983249686657, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02278935662470758, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.021749154769349844, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.020924212248064577, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.020004603639245035, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.019250994115136564, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.069312334060669, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.067159414291382, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0633068084716797, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.059629440307617, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.055917978286743, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.050849676132202, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0448381900787354, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0384926795959473, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0297272205352783, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.020442008972168, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0111169815063477, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9968454837799072, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.98290753364563, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.962132692337036, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.9418137073516846, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.921562671661377, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.8950374126434326, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.862764358520508, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.82499098777771, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.7879464626312256, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.7404162883758545, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.688739538192749, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6279947757720947, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.559471845626831, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.484433889389038, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3902387619018555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.30196475982666, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.219071626663208, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.103013753890991, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.9623113870620728, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.8354140520095825, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.720797061920166, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.5688954591751099, "validation/loss_033_lr4.3e+00_wd1.0e+00": 1.4381697177886963, "validation/loss_034_lr5.1e+00_wd1.0e+00": 1.2915865182876587, "validation/loss_035_lr6.0e+00_wd1.0e+00": 1.1571966409683228, "validation/loss_036_lr7.1e+00_wd1.0e+00": 1.0265485048294067, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.9152950048446655, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.8086269497871399, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6949895620346069, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6201463937759399, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5626293420791626, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4977971017360687, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.45004281401634216, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4025147557258606, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3598903715610504, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3293780982494354, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.29978394508361816, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.2780390679836273, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.025049603174603176, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.02554563492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.02703373015873016, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.030009920634920636, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.032242063492063495, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.03571428571428571, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.04141865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.047867063492063495, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.05555555555555555, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06696428571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08283730158730158, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10639880952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.13839285714285715, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.18501984126984128, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21899801587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24702380952380953, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2663690476190476, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2854662698412698, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2993551587301587, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.31225198412698413, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.32514880952380953, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.3343253968253968, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.3464781746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.35714285714285715, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.37375992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.3948412698412698, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.4164186507936508, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.4417162698412698, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.4851190476190476, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.5486111111111112, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.6140873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.6569940476190477, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.7150297619047619, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.7849702380952381, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.830109126984127, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8474702380952381, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8663194444444444, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8745039682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8809523809523809, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.886656746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8916170634920635, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9027777777777778, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9060019841269841, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9139384920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9176587301587301, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9184027777777778, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9169146825396826, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9174107142857143, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008940962545397986, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009111247990444393, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009769334080160536, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011160125773508498, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012190266425703158, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013853491864649332, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016941403565739126, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019850216224383807, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022876962617182146, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02776629462865525, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.03383966327612716, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04139451366699578, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05056803226152637, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.062205717777386274, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.07226173985188064, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.07926761273676343, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.08655692850887796, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.09887137107648544, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.10954626571925359, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.12119094017843637, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1319326308548981, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.140232864364642, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1506804136048214, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.15976685056444045, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17115230295144407, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.187627946415106, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20543081107505956, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2298321845428352, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.27734520441493116, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.34469344343180236, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.39998753102019535, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.45236130050366696, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.563174292007569, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7008925141415465, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.7690014432345285, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.7960373388975474, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8281424846247967, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8423591649209632, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8536850993677826, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8626557448667762, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8700577060160074, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8796742906017397, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.887117624965079, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8912227161632484, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9010399177010138, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.903937353310104, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9015066946444472, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8936865809126404, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8934370290699879, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 1.2589216709136963, "validation/loss_best": 0.3293780982494354, "validation/acc_best": 0.9184027777777778, "validation/f1_best": 0.9015066946444472} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.769964628815651, "train/grad": 0.21950162135064602, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.047862548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.04276611328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.034405517578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.02610107421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.017838134765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0063232421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.993369140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.978995361328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.95997314453125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.939735107421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.919720458984375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.889725341796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.860384521484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.8176165771484376, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.7762860107421874, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.7363128662109375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6846942138671874, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6230328369140623, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5527117919921873, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4859368896484373, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.401980285644531, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.313359375, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.2127740478515623, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1034207153320312, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9885134887695313, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8516880798339843, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.730914306640625, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6238570404052735, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4848423767089844, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3317454910278321, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.207079963684082, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1041839218139649, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9800985717773437, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.882818717956543, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.782156629562378, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.696284475326538, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.6175371408462524, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.5533096289634705, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.4934717893600464, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.4312592375278473, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3907847535610199, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3598363870382309, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.324835125207901, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2988504827022552, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.2728439584374428, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.24922832868993283, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2318970075249672, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.2140890061110258, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.20081744708120822, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.046654192507266996, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04654451558366418, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0463661483488977, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04618662007153034, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0460091676749289, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.045764781031757594, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.045488205086439845, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.045180120896548034, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04477881111204624, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04435070501640439, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.043932189084589485, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.043302545323967934, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04269137008115649, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0418117661587894, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04096944806165993, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0401693263091147, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.039160584844648835, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03798512242734432, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03669870958663523, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03552887422963977, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03413931293413043, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03276173194870353, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0313025117199868, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02982038010843098, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.028356045233085752, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.026699185213074086, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.025294161243364213, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.024079559175297617, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02252860678359866, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020835267840884627, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.019455694248899816, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.018307808474637567, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01691109379287809, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01580407977104187, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.014653883231803775, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013673204549122601, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.012782939565367997, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012070229891687632, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011412900539580733, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.010737095407675952, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010286665975581855, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.009917632637079805, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.009481214340776206, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00915920509956777, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008850069383624941, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008641787462402135, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.008557720885146409, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008530589448055253, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008467493632342666, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0254786014556885, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.017016649246216, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0029120445251465, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9887197017669678, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9749796390533447, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.955583095550537, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.933969736099243, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.910216808319092, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.878757953643799, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.845608711242676, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8132195472717285, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7649190425872803, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.718319892883301, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.65103816986084, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5868799686431885, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.525552988052368, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4478065967559814, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.356135368347168, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.25370192527771, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.1579384803771973, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.0400474071502686, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.9190623760223389, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.7858330011367798, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.6465204954147339, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.5069886445999146, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.3509784936904907, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.2227728366851807, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.1164166927337646, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.9885556697845459, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.860055685043335, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.7643270492553711, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6903931498527527, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.6070466041564941, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.5452026128768921, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.48410823941230774, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.4339013993740082, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.38893091678619385, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3527051508426666, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.31891578435897827, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2829778790473938, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2593173682689667, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.24079330265522003, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.21952830255031586, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.20421519875526428, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1888832151889801, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1734732985496521, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.16156005859375, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15156443417072296, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.14729894697666168, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06076388888888889, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07415674603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.09499007936507936, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.12202380952380952, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.15674603174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.19518849206349206, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2316468253968254, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.25669642857142855, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.279265873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.296875, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.31026785714285715, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.32862103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.34151785714285715, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.3568948412698413, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.3707837301587302, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.3841765873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.40773809523809523, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.4347718253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.4618055555555556, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.49255952380952384, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5342261904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5811011904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6393849206349206, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6867559523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.751984126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8271329365079365, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.855406746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8668154761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8737599206349206, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8851686507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8896329365079365, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.892609126984127, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9010416666666666, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9084821428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9114583333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9141865079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9188988095238095, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9283234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9357638888888888, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.941468253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9464285714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9615575396825397, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9563492063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02519766231765125, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0309084905702091, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.03821407681811821, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.045956849326865085, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.05556974377329728, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.06393772859076167, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07555129625924181, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0830203913459759, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.09409479095557587, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.10527879328790066, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11711112080061785, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13329864317229534, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14535257786008166, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15832457269396025, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17025778347207948, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.180914404263961, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19931433939016974, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22196985680519413, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.24478883034993504, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2773705808934802, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.32141121814293144, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.3700310955065967, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.42645287777311097, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.5062938046603912, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.6445190928256634, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7816419190429647, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8211719560237368, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8384684021898604, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8473889678955947, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8643924253634024, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8727908950135982, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8772232186379766, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8881916041165161, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8972773326981682, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9007027809304693, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9037710284806046, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9085412737474979, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9111067202477698, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9182492353556607, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9263063959789856, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9323767359521428, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9387609529393327, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9441456677862122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9455789022534524, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9468319384051158, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.952849598522859, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9551643020132312, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9550231623557602, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9491783626154456, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.2318970075249672, "validation/loss_best": 0.16156005859375, "validation/acc_best": 0.9623015873015873, "validation/f1_best": 0.9551643020132312} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.3926880192756652, "train/grad": 0.18844108343124388, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.98925048828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.97604248046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.954268798828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.933031005859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.911875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.88314453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.850802001953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.8154296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.7697119140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.721905517578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.675665283203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.607537841796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.54260009765625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4508258056640626, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3647283935546874, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.28384521484375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.182967224121094, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0668905639648436, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.940416259765625, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.8262532043457032, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.6906718444824218, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.557794189453125, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.419063720703125, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.282479476928711, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.1538116073608398, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.0184891891479493, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.9131003761291504, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.8294642639160156, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.7323405075073243, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.638014669418335, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.5693365430831909, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.5170868062973022, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.4586922597885132, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.41561461210250855, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.3731205594539642, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.33809867143630984, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.3066000705957413, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2811464148759842, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.25717897683382035, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.231952446103096, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2151032041758299, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.20203707367181778, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1869327400624752, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1755393612384796, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.16403918720781804, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.15342371374368668, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.14537548277527093, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1369752012938261, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.13103988409042358, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.046026670038700104, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04574831426143646, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.045290950741618874, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04484496839344501, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0444075801409781, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04380301890894771, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.043134483750909565, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04240225585177541, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.041469727037474514, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0405056618899107, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03958718014881015, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.038270500022917986, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.037054075114429, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03540677917189896, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.033951312880963085, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03266109758988023, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.031164031187072398, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.029574668295681478, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.027976129511371253, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.026616693595424294, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.025069447392597796, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023581281453371048, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022033546455204486, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02049525149166584, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.019027028139680623, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.017454962371848525, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.016212714058347046, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01520794234238565, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014028768502175808, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012864715168252588, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012004874267149716, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0113451117859222, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010602683783508838, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010047718761488795, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009494405388832092, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009034918283578008, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008620456433854997, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008264286234043539, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007928425257559865, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0075650588853750375, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007339844054076821, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007159048723988235, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006936629904666916, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006777982120402157, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00664736075210385, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0065556539769750085, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0064493454480543735, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006316903057741002, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00627960744430311, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9555063247680664, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9373416900634766, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9074745178222656, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.878046751022339, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.8493869304656982, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.8101611137390137, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.766658306121826, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.7192604541778564, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.658592462539673, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.5957040786743164, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.535440444946289, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.4476499557495117, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.364931106567383, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.249527931213379, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.142770528793335, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.0440804958343506, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.922951102256775, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.7868818044662476, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.643079400062561, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.5175946950912476, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.3747040033340454, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.241446852684021, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.1093006134033203, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.9861934185028076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.8759868741035461, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.765507698059082, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.6828933954238892, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.6189963817596436, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.54636150598526, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4773794114589691, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.42768433690071106, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.39005494117736816, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.34792670607566833, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3166691064834595, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.28585225343704224, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.26037806272506714, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2377920150756836, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.21930593252182007, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20174986124038696, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.18195052444934845, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.16838467121124268, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.15789127349853516, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.14624662697315216, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1385522186756134, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.13139328360557556, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.12272289395332336, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.11522624641656876, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.10800248384475708, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.10119745135307312, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.19642857142857142, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2269345238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.25818452380952384, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2767857142857143, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.28794642857142855, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.30481150793650796, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.31746031746031744, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3298611111111111, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3420138888888889, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3531746031746032, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.3645833333333333, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.38665674603174605, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.41096230158730157, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.44543650793650796, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.47643849206349204, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5138888888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5667162698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6274801587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6793154761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7477678571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8251488095238095, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8561507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8735119047619048, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8834325396825397, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8923611111111112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9025297619047619, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9060019841269841, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.910218253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9159226190476191, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9308035714285714, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9375, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9419642857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9479166666666666, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9652777777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9714781746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06346335644189398, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07238314510757823, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08227144337956485, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.09191640994792596, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09817676253845302, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.11235935740562478, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.12348645846297715, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.135637393150822, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.14790857485800044, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.15731997337409284, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.16755537994440997, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.18520692100406702, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.20510556067792432, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.23413350204292546, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2652805699273727, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.30657122271593856, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.36320037593188836, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.42034936460465644, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5089989796931422, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6495121829692708, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7815737100779472, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8251780232659652, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8471258588774592, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8589662878245211, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8724320106837851, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8871389787009407, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8926634030719507, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8988689287865684, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9056500425106506, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9132022663602839, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9170839302323457, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.922456454816708, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9291202786291115, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9336212414356664, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9384242100507177, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9401481150700086, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9428575605556989, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9455675601748484, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9501935752577877, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.953522921771088, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9562625785805177, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9581927814189993, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9604377588071481, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9601890732069759, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9574788889280683, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9603531464636099, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9635988674808028, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9653783026112487, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9661229971162507, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.13103988409042358, "validation/loss_best": 0.10119745135307312, "validation/acc_best": 0.9714781746031746, "validation/f1_best": 0.9661229971162507} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.1560752043128013, "train/grad": 0.1629555743932724, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.910040283203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.886173095703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.84738037109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.80969970703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.7729266357421873, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.72315673828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.6684539794921873, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.6093560791015626, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.5342803955078126, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.457354736328125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.3844732666015624, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.279267578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.1814849853515623, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.0471038818359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.9255426025390625, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.8151570129394532, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.6834288024902344, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.5403610229492188, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.3951465606689453, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.2737716674804687, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.140909080505371, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.0218642807006837, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.9081597709655762, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.8054070472717285, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.7157822895050049, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.6277118587493896, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.5629119968414307, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5131962633132935, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.45728198528289793, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.40408896923065185, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3658812713623047, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3369374811649323, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.3044630318880081, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.28040961623191835, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2563174393773079, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.23599958330392837, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.21745231583714486, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.202098768055439, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.18751273680478334, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.17179338082671167, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.16093562602996825, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.15246571086347102, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.142750013358891, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13538912497460842, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.1282196755334735, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1221684550307691, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11793399025686085, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.11441640984266996, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.11266862862743438, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04373443778604269, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04323683915659785, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04242724781855941, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04164864801801741, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.040897002341225744, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03989726632833481, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.038815535446628927, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03768238109536469, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03628904717974365, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0349274117872119, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03370213707908988, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0320559490006417, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030651290770620107, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02889343992806971, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027436886820942162, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026187253296375276, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024746366450563072, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02320012429729104, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021618387484923004, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02027253369335085, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018769480013288556, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0173970931628719, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016057684323750435, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014826092021539807, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013731873310171068, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01263804611749947, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011824087500572204, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011192413615062832, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01047454667976126, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00979131787782535, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.009302409715019166, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008929145012516529, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.008516851381864398, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008215875807218253, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.007897311982233077, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007611543018138036, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007337033821968362, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007113911604974419, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006906796400435269, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0066851811471860855, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006513052051886916, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006395827255910262, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006321005540667102, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0062925982032902535, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006278814114630223, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006290237366338261, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006296245536068454, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006393467523157597, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006476553424727171, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.863833427429199, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8334856033325195, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.7845447063446045, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7372591495513916, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.6914174556732178, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6298952102661133, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5626320838928223, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.490806818008423, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.40006422996521, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.308072328567505, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.2217037677764893, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.0987138748168945, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.98585844039917, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.833664059638977, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.6993699073791504, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.5804411172866821, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.442787528038025, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.2988386154174805, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.1587213277816772, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0460596084594727, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9273759722709656, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8245553970336914, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7290539741516113, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6446457505226135, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.572425365447998, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5024057626724243, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.45113298296928406, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.4118487536907196, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.36759158968925476, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3253653049468994, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.29501962661743164, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2719687223434448, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.24595649540424347, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.22654351592063904, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.20685197412967682, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.19017259776592255, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1748456507921219, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1621389240026474, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14990867674350739, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13708455860614777, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.12867875397205353, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.12175891548395157, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.11419462412595749, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.10891082137823105, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.103723905980587, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.09766949713230133, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.09199947863817215, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08597719669342041, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08308514952659607, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2837301587301587, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2943948412698413, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3134920634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3253968253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.33630952380952384, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.34945436507936506, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.36334325396825395, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.38244047619047616, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4089781746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.43725198412698413, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.46106150793650796, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5062003968253969, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5510912698412699, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6215277777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6671626984126984, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7259424603174603, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8013392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8524305555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8752480158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8861607142857143, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8968253968253969, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9027777777777778, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9087301587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9117063492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9176587301587301, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9236111111111112, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9285714285714286, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9330357142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9367559523809523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9402281746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9454365079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9585813492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.966765873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9766865079365079, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.09692005806303686, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10307495554993382, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12090306170739161, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.13179184747337805, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.14232506693060423, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.15558632147593535, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.16579547877730474, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.18220982431217422, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.2018683862580503, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.22532218606202656, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.24730947232208128, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.29601743795375635, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.34265782849859816, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.41069451094133214, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.4795964894192905, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6091528753992846, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7472348982507464, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8212825795863364, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8521630871596079, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8688333808626697, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.88235522062653, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.889017497421122, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8961807804715749, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9003542581712565, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9075644920698229, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9136514317609242, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9186977649716884, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9237921232554566, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9275138362085559, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9315588017370254, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9378612116139914, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9415042165473892, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9446680726842321, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9481777667500224, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9506351700732739, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9531005279275352, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9549291373919422, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9581716302604315, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9600275132451327, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9630717142763772, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9634263180731439, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9658535817219833, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9659102290764724, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9658594611321344, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9668735873424782, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9702118673947803, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9706495218961871, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9731036595935502, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9717295042257402, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.11441640984266996, "validation/loss_best": 0.08597719669342041, "validation/acc_best": 0.9781746031746031, "validation/f1_best": 0.9731036595935502} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.9683287939429284, "train/grad": 0.14321617379784585, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.8069677734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.77006591796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.7106842041015624, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.6536767578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5989385986328126, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.525709228515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.4464404296875, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.3622735595703124, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2574468994140626, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.152115478515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.0544088745117186, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.9173895263671874, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.7942385864257813, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.632350616455078, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.4937629699707031, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.3748464965820313, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.2415355682373046, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.1068917846679687, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9802597808837891, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.881061897277832, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7791058540344238, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6924663162231446, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6133875465393066, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5444253158569335, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.4856958150863647, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4291037034988403, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.38777700424194333, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.35619760274887086, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3204473602771759, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.2863433343172073, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.26150495529174805, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.2424952945113182, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.22098002701997757, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.20482126295566558, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.18854872450232507, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.17465554043650627, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.16162225902080535, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.15077929452061653, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.14029241558164357, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1289107034355402, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12117229858413339, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11518616126850248, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.10841617673635483, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.10347843796014786, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09874652477912604, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.09499274572357536, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09286618320271373, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.09150502173230052, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.09112312394194305, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04160905093885958, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04086338724941015, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03968193848617375, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03857076342217624, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03752726081758737, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.036178086549043656, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03477986735291779, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03337469276040792, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0317430298961699, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03023840286768973, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028951863115653394, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027282895362004636, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02587812609039247, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024093664437532424, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02257759289816022, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021263418751768768, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019767862288281323, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018226690865121783, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.016752592865377666, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015582440434955061, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014360038912855089, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.013308534501120448, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012330137635581195, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011465456418227405, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010713104903697968, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00997619982343167, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009429116491228342, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.009001019150018693, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008512773488182574, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.008041422499809414, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0076989484252408145, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.007437959433300421, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007152816713787615, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00694336217478849, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006729466567048803, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006547506133792922, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006354276502970606, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006175575342494994, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005989501436706632, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0057726012688362974, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005613271761103533, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005489540843991563, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005353153985342942, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005292024913360365, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005284810500452295, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.005365553851006553, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005502706678817049, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.005649249914567917, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.005719069876940921, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7535510063171387, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.710081100463867, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.6405069828033447, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.5742931365966797, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.5109376907348633, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.426828384399414, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.3365678787231445, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.241429090499878, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.123974084854126, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.0073788166046143, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.9002904891967773, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7525008916854858, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.622326374053955, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4558430910110474, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.3175902366638184, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2022159099578857, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0764793157577515, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9529625773429871, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.8400488495826721, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7532810568809509, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.665399432182312, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5917510390281677, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.5249382257461548, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.4668821096420288, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.41771647334098816, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.3701229393482208, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3352399468421936, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.3086206614971161, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.27836769819259644, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.24934236705303192, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2279023826122284, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.21145188808441162, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.19261156022548676, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.17852677404880524, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.16415031254291534, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1518326699733734, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1404639035463333, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13122285902500153, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1227434054017067, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.11486689746379852, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11019546538591385, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10779222846031189, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1065334603190422, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.10567328333854675, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.10267459601163864, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.09725554287433624, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.09082888066768646, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08688095211982727, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08706053346395493, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3229166666666667, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.33556547619047616, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.34771825396825395, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.36259920634920634, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.37822420634920634, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.40327380952380953, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.42956349206349204, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4603174603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5009920634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5471230158730159, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5935019841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6500496031746031, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7043650793650794, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7934027777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8442460317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.861359126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8740079365079365, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8958333333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9032738095238095, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9092261904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9146825396825397, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9206349206349206, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9270833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9347718253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9407242063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9441964285714286, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.964781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.970734126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9709821428571429, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1301751849065853, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.14177875492846737, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.15439717498295924, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16714980684875658, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.17978147935053676, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.20205648120710012, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.22341443890150078, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.25114286042484335, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.2955317408077818, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3409965268758923, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3829158315088131, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.44146772120738187, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5483289825348112, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7221829007898831, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.7972830164796055, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8225633059487423, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8435015628883585, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.864802380111107, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8792360398226591, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8867138601283981, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8902562603554921, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8984149003485962, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9051394011841861, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9119766956534157, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.917965237146115, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9260583455896055, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.931853311224698, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9360996691898331, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9410231803227163, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9455146336032221, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9468794620806618, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9497177054417539, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9530580280057501, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9563962935319607, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9588916167837882, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9584461432519563, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9611327269597161, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9640476963368565, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9658041272649298, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9663795613857709, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9655959935581131, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9642894546489086, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9635998849963515, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9622308916994236, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9614189731101245, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9630868322205531, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9641438022060795, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9659287059697024, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9656899268825021, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.14029241558164357, "validation/loss_best": 0.1227434054017067, "validation/acc_best": 0.9714781746031746, "validation/f1_best": 0.9658041272649298} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.8353389406204224, "train/grad": 0.13066591568291186, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6945440673828127, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.64478271484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5655645751953124, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.49052978515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.419298095703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.325206298828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.224912109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1200161743164063, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9919158935546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8661834716796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.7525784301757812, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5986248779296874, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.46635986328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3013787078857422, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.1684539031982422, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.0600699615478515, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.9444639205932617, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.8332139587402344, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.7331316375732422, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.6575119400024414, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5816843891143799, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.5185523796081543, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.46172831058502195, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.41256874561309814, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.37087284326553344, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.33067272424697874, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.3011959391832352, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2786420726776123, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.2529111221432686, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.22812414824962615, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.20984753742814063, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1957394726574421, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.17942425668239592, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.1670246072113514, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.154204146489501, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.1432939578220248, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.13304513245820998, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.12456548158079386, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1164334763213992, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10768837384879588, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10195189584046602, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09760638261213898, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.09286467956379056, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08954772105440498, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08686834209598601, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.08524585870094598, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.08400058170780539, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.08293639614246785, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.08362763599492609, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.039812426865100864, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03884649556130171, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.037353212870657446, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03599868051707745, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0347660072427243, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.033228751067072154, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03171183543279767, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030257222428917884, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02863254231400788, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.027162603018805385, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025898799495771528, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024221556577831507, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022779743550345302, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020946792224422098, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019436674690805376, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018174092592671512, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.016805303124710916, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015465517020784319, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014236754295416177, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.013295344961807131, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01233477114699781, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.011518399477936328, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.010764392507262528, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01009841674938798, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009519706573337316, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00894659674493596, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.008513236993458122, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008176241545006633, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007788115381263196, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007396356899989769, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00710714521817863, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006871649927925319, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006599334144266322, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0063847817003261295, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006152104068314656, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005949410484172404, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005754678602097556, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005590492894407361, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005423997387988493, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005258288580807857, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005183205246576108, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0051499332539970055, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005136684051249177, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005142668049084023, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005192208305234089, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.005279672697652131, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005288749251631089, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.005209041342604906, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.005240261172875762, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6406588554382324, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5847671031951904, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4964210987091064, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4130966663360596, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3345284461975098, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2314646244049072, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1221749782562256, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.009044647216797, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8721588850021362, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7398442029953003, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6220191717147827, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.4654184579849243, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3337596654891968, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1735599040985107, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0477855205535889, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9471481442451477, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8415595889091492, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7413635849952698, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6524572372436523, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5856413245201111, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5191418528556824, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.46384063363075256, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4141308069229126, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.370990127325058, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.33449289202690125, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.29892098903656006, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.27276378870010376, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2525516748428345, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.22942674160003662, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2068784087896347, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1904478371143341, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1776973307132721, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.16320453584194183, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.15230590105056763, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1413406878709793, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13192957639694214, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1231074407696724, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11495867371559143, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1068684309720993, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0987207442522049, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0934741199016571, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08996216952800751, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08692849427461624, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0859895870089531, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08698847144842148, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08946138620376587, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.09274034947156906, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.09544289112091064, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.09766338765621185, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.34598214285714285, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3576388888888889, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.37822420634920634, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4025297619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.42410714285714285, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.45610119047619047, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.49181547619047616, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5364583333333334, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5974702380952381, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6495535714285714, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6919642857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7862103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8432539682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8720238095238095, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8851686507936508, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9015376984126984, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9064980158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9099702380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9149305555555556, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9196428571428571, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.923859126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9305555555555556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.935515873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9464285714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9627976190476191, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.15286786546346154, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.16250747167298896, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1792533485229609, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.20074978454202763, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.21790902584154748, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.24596463526940396, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.28565054347976726, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.33118937690156586, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3897169692859449, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.44713487073356634, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5356763780077852, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7299374085006707, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8138625813748872, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8516614676779228, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8699887534098322, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8766565729128312, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8890347060081102, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8959586232261969, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9003027149004598, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9064841083993778, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.911173047335692, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9158046338126075, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9228052807478881, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9280174631660093, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9336359217830539, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9387499494760686, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9398770588674095, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9422745695037236, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9455662752609741, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9486551213364123, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9510273140936689, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9533629963892277, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.955093946429703, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9576793062916563, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9590353476723888, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9611557053363443, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9636679669285597, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9649421454816101, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9670650659814649, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9689254722736077, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9690187192861375, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9700268637567475, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9704639946803986, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9686501147863473, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9646408950345359, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9604196589736139, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.95619942257105, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9530874211615886, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9537513798660794, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.09286467956379056, "validation/loss_best": 0.08692849427461624, "validation/acc_best": 0.9764384920634921, "validation/f1_best": 0.9704639946803986} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.7428102669119835, "train/grad": 0.11863843526691198, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5865521240234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.52552001953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.4293280029296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.339267578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.25462158203125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.144062194824219, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.027887878417969, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.90838134765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.7654293823242186, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.6292684936523438, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.5096858215332032, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.3535803985595702, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.2249917602539062, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.0717237854003907, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.9534944152832031, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.8603515243530273, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.763698558807373, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6732309341430665, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.5934805774688721, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5340794944763183, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4751087093353272, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.4262531757354736, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3823414444923401, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3443362069129944, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3119427466392517, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.28062932431697846, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2575239604711533, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.23961287707090378, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.21906240701675414, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.19900265365839004, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1840194134414196, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.17229539178311826, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.15864689704030752, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.148068543151021, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.13707133483141662, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.12752618290483952, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.11861015766859055, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11122410612180829, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.10425243690609932, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.09698915999382735, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.09233763985335827, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0888805968221277, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08517755936831235, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08271169437095523, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08040552611462772, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.07858152759261429, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07902610928751529, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.08192781365476548, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0855023244395852, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03731492918916047, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03618374580517411, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03447844217531383, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03297872351482511, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03166443131864071, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03009202839806676, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028589252624660732, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027171771954745055, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02558359006419778, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024120217319577933, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02284076387062669, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021149715427309274, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019729147758334876, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.017999900826252996, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.016641527246683836, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.015555504565127195, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.014416283345781267, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01333329949527979, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012364046655129642, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011627670640591533, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01088354418054223, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.010251041541341693, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009668332205619664, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.009150122774299234, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008695291769690812, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008249000159557908, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00790735884103924, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.007640703939832747, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007327658106805757, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007015925064915791, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006781754096737131, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006593520400347188, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006363379758549854, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006169109626207501, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005954915197798982, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005757086945232004, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005576907565118745, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005433344772318378, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0053001585765741765, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005168747768620961, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005098602735670283, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005082475040690042, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005099873918225057, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005170812483411283, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005274851343710907, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.005397652224055491, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005643708716379478, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00600533377728425, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006230451469309628, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5383541584014893, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4721314907073975, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.3680977821350098, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.271310806274414, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.180699110031128, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.063176155090332, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.940600037574768, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.8155643939971924, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.6677743196487427, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.5289790630340576, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.408941626548767, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.2547473907470703, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.1301363706588745, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.9838799238204956, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.8729045391082764, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.7863497734069824, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.6973017454147339, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6143940687179565, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.5416504144668579, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.48762446641921997, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.4338415861129761, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.3894033133983612, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.34923917055130005, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3144490420818329, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.28483667969703674, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2558099031448364, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23430894315242767, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.21774381399154663, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1986827701330185, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1800425797700882, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1662694364786148, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15552383661270142, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14330443739891052, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13379429280757904, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12397840619087219, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11529206484556198, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10697261989116669, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10007442533969879, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09380842745304108, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.08745918422937393, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.08333630859851837, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0809137374162674, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07931676506996155, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.07814407348632812, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07608228176832199, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.07353837043046951, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07457907497882843, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.0804683119058609, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08386744558811188, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.36433531746031744, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.38293650793650796, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4104662698412698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4419642857142857, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4670138888888889, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5104166666666666, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5642361111111112, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6175595238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6708829365079365, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7395833333333334, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8125, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8606150793650794, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.876984126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9052579365079365, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9117063492063492, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9154265873015873, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.921875, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9253472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.933531746031746, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9412202380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.951140873015873, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.953125, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9719742063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.16818718547308173, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.18285717044891012, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.20522476076020485, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23145799667191969, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.256278769930136, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.302770378204166, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3578513767280261, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.40707076147002585, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4928762517913281, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6464340000367697, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7702852143085508, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8368283733265407, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8581023173939963, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8805846652919224, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8885494323314787, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.89352245984352, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9016708603323038, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9068314981624572, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9132717489744747, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9169484031042514, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9203394910216434, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9252896025948019, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9294288088240479, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.932354490431945, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9397120448546732, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9438043967373155, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9469087091089902, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9498124654442526, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9523807700605942, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9553720417731756, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9576222872049373, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9601550386902686, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.962413719820997, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9658083752880467, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9659208181213381, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9688427194952413, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9708861251416314, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9720524191609835, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9731297895676528, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9727521078195023, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9731843055119708, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9726773602547438, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9722456686278917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9713428091840005, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9728691351427646, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9762357152552849, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.973733307717846, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9694861148627906, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9647617244738479, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.07858152759261429, "validation/loss_best": 0.07353837043046951, "validation/acc_best": 0.9796626984126984, "validation/f1_best": 0.9762357152552849} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.6700447991490364, "train/grad": 0.11064903527498245, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.49290283203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.422193603515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.31188232421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.20938720703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.1138897705078126, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.9906439208984374, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.8627850341796874, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.7336636352539063, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.5827632141113281, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.4426930236816407, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.3232879638671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.1722247314453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.0518056488037109, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.9129773712158203, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.8087558364868164, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.7283339118957519, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.6462542724609375, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.570340633392334, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.5040874099731445, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.45494123458862307, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4061728000640869, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3656501936912537, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3291750168800354, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2973904609680176, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2701852869987488, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2435845583677292, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.22382169932127, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2083079406619072, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19042909771203995, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17279543034732342, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.159634260982275, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14923667959868908, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1371435284987092, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12775151893496514, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11804449256509543, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10959701905027032, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10170147411525249, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09505675012245775, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08876634396612644, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08192301050759852, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0774974437057972, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07410644500516356, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07017457154579461, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06753575139679015, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06526777585968375, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06384977650828659, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06391097443178297, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0665596011839807, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07111215838231147, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03583848967216909, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03459281840361655, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03276829462498426, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.031216072253882884, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02989077491685748, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028326037637889386, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.026829299004748463, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02539318862371147, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02374813084490597, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022214622404426336, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02088486370630562, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019166134786792098, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.017766884956508876, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.016122515094466508, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.014866609568707645, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.013887900821864606, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012871849723160267, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011923767570406198, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011079995778854936, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010442983561661094, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009802516750060022, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009254874351900071, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008757762687746435, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008315197764895857, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007927492230664938, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007542730297427624, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007254030710319057, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.007022856515832245, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006755126744974404, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006479426049627364, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006277225081576034, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006105665058130399, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005894043755251915, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005725817546481267, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005519621628336608, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0053304897213820365, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005162983351619914, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00503895076806657, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004917867196490988, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004772750824922695, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.004678061816375703, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004619933208450675, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00453040047781542, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004495681721600704, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004541135503677652, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004622974582016468, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004738637992995791, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00504603425913956, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.005480961062712595, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.44631028175354, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.3713316917419434, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.254732608795166, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.1470487117767334, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.0471243858337402, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.9189658164978027, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.7871341705322266, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.6552019119262695, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.5028042793273926, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.3633838891983032, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.2458184957504272, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.0991250276565552, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.9837828278541565, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.8519076108932495, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.7539510726928711, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.678672194480896, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.6020612120628357, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.5315271615982056, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.4697602391242981, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4241996109485626, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.378824383020401, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.34138235449790955, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30741992592811584, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2779759168624878, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2527760863304138, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2281506061553955, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.209675133228302, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1954401582479477, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.178990438580513, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.16288156807422638, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.15088312327861786, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.14161409437656403, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1308453232049942, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12240088731050491, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11336743831634521, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10567007958889008, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.09883131086826324, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09329497814178467, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08840371668338776, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0834793820977211, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.08049134165048599, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07848935574293137, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07606242597103119, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.07422735542058945, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07217112183570862, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.07078580558300018, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07070083171129227, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07316083461046219, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08016762882471085, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3861607142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.40749007936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4417162698412698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4756944444444444, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5128968253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5714285714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6287202380952381, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6753472222222222, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7569444444444444, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8311011904761905, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8603670634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.878968253968254, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8983134920634921, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9045138888888888, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.908234126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9109623015873016, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9169146825396826, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9216269841269841, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9265873015873016, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9310515873015873, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9469246031746031, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.949156746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9744543650793651, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1856744599298254, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.20231731036176, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.23015204322642047, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.26667209089769606, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3060574337769732, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3648954137033864, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4184671118052718, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.49882360894234296, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.675373433117028, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7982415318459556, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8363883606467057, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8603242538041231, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8778912604467802, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8863020089051769, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8938534041826949, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8984287990582436, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9010352081952372, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9076001466876449, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9125747877007804, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9177191395994587, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9226200503542618, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.92848234967622, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9340366518682075, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9388489938696816, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9409956361470354, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9473695975246249, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9503683010317436, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9514180871492111, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9551259697678034, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9569121577825526, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9590814831678022, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.96067861277925, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9627020766011412, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9659510210283581, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9690569417860928, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.970588494175473, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713140746624729, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9727007582965006, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.973965508773654, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9748777897206066, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9748450348679202, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9777407593991307, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9772505097727007, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9769323046799103, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9771617797855056, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9766871240928575, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9761365142570815, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9743813139991924, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9713451384389554, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.07410644500516356, "validation/loss_best": 0.07848935574293137, "validation/acc_best": 0.9809027777777778, "validation/f1_best": 0.9777407593991307} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.6226811341941356, "train/grad": 0.10402281574904919, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.40741455078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.329249267578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.20771484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.0959487915039063, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.992708740234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.8606072998046874, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.725711669921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.5918281555175782, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.43872802734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.300305633544922, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.1851229095458984, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.042848129272461, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.9323427581787109, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.807318229675293, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7152042388916016, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.6449463081359863, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5738815212249756, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.5085290145874023, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.4517314863204956, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.40967569351196287, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.36786662578582763, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3332161962985992, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.30182227849960325, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2744682425260544, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2509182173013687, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2276536187529564, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.21030528768897055, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.19667833045125008, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.18081992357969284, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.16502684339880944, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.15312069818377494, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14379663705825807, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.13289275290444494, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12443530546501279, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11570106863975525, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10816102409735322, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10103198863565922, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0949417382851243, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08903297243639827, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08286142958328128, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07879658060148359, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07581302504055203, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07264976926147938, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07020001025870443, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06791150443255901, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06634524614550173, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06591696914285422, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06699228297919035, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07018237609416246, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.033973284121602776, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03269001154229045, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030864764358848332, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.029357839170843364, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02808797176927328, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.026589635415002703, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.025139420852065088, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0237222458422184, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022091278173029423, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020583827467635273, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01930382861290127, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017687792843207716, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016407552063465117, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014931461443193256, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013826342793181538, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012971901181153954, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012088967114686966, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011262663546949625, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0105286008794792, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00996749892598018, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009397130582947284, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008913234248757363, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008456367345061153, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008053522696718574, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007690319498069584, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007318626859923825, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0070267271948978305, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0067929259070660915, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0065067677712067964, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006223702055867761, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006002723521087319, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0058363847259897735, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005625493198167532, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00545868856832385, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005279635360930115, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005109993437654339, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004942261873511597, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004797567288042046, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004665236733853817, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0045662267896113915, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.004506403004634194, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0044677172123920176, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004425684986053966, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004394202099065297, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004350385635043494, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0043303855293197555, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004345782002201304, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004461341246496886, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00461697198508773, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.365313768386841, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2832071781158447, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.1561625003814697, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0398356914520264, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.932908058166504, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.7969391345977783, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.6593377590179443, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.5238919258117676, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.370763897895813, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.233778476715088, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.120772123336792, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9828442931175232, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8762940168380737, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.7568313479423523, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.6693261861801147, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.602769672870636, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.5354155898094177, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.47366249561309814, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.4199090898036957, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3801359534263611, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3407384753227234, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.30797818303108215, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2783187925815582, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2523779571056366, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.23029103875160217, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.20837193727493286, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.19205287098884583, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.17938576638698578, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.16466407477855682, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14991185069084167, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13891752064228058, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13027241826057434, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12041546404361725, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11311550438404083, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.10599026829004288, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10011500120162964, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0945509523153305, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08970892429351807, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08523223549127579, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.08077482879161835, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07762796431779861, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07441932708024979, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07065249979496002, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06912817806005478, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06938200443983078, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06880603730678558, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06714212894439697, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06640234589576721, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.0674591213464737, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.408234126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.43526785714285715, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4722222222222222, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5148809523809523, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5647321428571429, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.623015873015873, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.673859126984127, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.738343253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8249007936507936, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8608630952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8745039682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8938492063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9030257936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9077380952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9124503968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9174107142857143, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9260912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9317956349206349, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.939484126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9469246031746031, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9516369047619048, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9575892857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.980406746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.980406746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.980406746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9799107142857143, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2024695065552428, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.22468768107069192, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.26166445914897, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.30701006833954186, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.35835512166728084, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4119574879812789, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4985717608698905, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6447230188992708, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7901592764202372, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8357437076723981, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8551865838993667, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8739069528984265, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.879529554090596, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8914108901051867, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8969362597030488, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9018619142523049, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9068139497608718, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9123880416218763, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9169314744865383, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9226606048892149, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9305117292594012, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9335479972787171, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9388921288720562, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9441400272366746, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.94701676571434, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9506980809930665, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9543843428535926, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9560302235956574, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9581082235441853, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9603736593396648, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9609993531473984, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9647007520882693, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9678142550325565, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9687811965696043, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9709623456518883, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.970503410099598, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9709382623282922, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9716419634390276, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9724506333894377, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9731460923336182, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9729755747795914, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9744829537849276, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9767388572593949, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9775312637010755, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9775803547098906, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.977594964239799, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9779052117341523, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9783208163686274, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9779527478130629, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.07020001025870443, "validation/loss_best": 0.06912817806005478, "validation/acc_best": 0.9806547619047619, "validation/f1_best": 0.9775312637010755} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.5811608108878136, "train/grad": 0.09941805753856897, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.3346771240234374, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.249931640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.119002685546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.9994784545898439, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.8899090576171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.7513937377929687, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.611803741455078, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.475670166015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.3231924438476563, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.1883294677734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.0780213165283203, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.9444710922241211, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.8424725723266602, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.7287173461914063, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.6459654808044434, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.5832702255249024, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5200316905975342, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4621045684814453, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.411742787361145, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.37435437440872193, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.33720309734344484, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3061524736881256, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.2780111169815063, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2532204374670982, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2317709070444107, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2104552921652794, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1945265609025955, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.18182990483939648, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.16700935743749143, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.15225106611847877, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1410555088147521, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.13222317960113286, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.12188461873680353, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.11386815899983048, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.10566502856090665, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.09850593034178018, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09192375472746789, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08647389096207916, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08133841601200402, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07591837282292545, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07235096951946617, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0694893835298717, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06627820971421898, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06387462843209506, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06195585220120847, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.061060950960963964, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06125810444355011, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06204326287843287, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.06274190595373512, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03284422029741108, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03154972014948726, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.029740176731720566, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.028259375747293234, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02700616148300469, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.025504513243213296, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02401906351558864, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022563609573990107, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020894171250984073, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019378080680035055, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01811266186181456, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.016547427284531294, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01532799490261823, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013943385421298445, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012918083011172712, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012124166314024479, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011310239259619265, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0105470952228643, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009860180178657174, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009340078700333834, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008809503614902497, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008354617569129913, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007929590854328126, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007550459199119359, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007217755900928751, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006885828615631908, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006629739722702652, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006422857530415058, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006179844763828442, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005926721469732002, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005719303282676265, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005556266292696819, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005357521520927548, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00519961636280641, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005039373897598125, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0049013982561882585, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00478536345937755, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004700307209859602, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004632256836048327, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004566296105040237, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.004520775896380656, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004477690903586335, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004438047958537936, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004392599174752832, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004398269457742572, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0044916000822559, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004608774654334411, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004741124993306585, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004787264124606736, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.29494571685791, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.206892490386963, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.071554183959961, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9484463930130005, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.83620023727417, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.6950578689575195, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.5539613962173462, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4174150228500366, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2658896446228027, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.1328091621398926, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.0249167680740356, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.8950868248939514, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.7965803146362305, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6871368288993835, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.6077576875686646, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5476877689361572, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.48736318945884705, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4320546090602875, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3841986358165741, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3486602306365967, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.31340864300727844, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.2842259407043457, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.25756123661994934, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.23423133790493011, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.21416763961315155, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.19436214864253998, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.17936623096466064, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.16769744455814362, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15411019325256348, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14070503413677216, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1305689960718155, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12278192490339279, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11358833312988281, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10664553940296173, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.09945330768823624, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09336672723293304, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08786098659038544, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08371961116790771, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08010480552911758, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07706006616353989, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07472351938486099, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07291993498802185, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07099904119968414, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06938738375902176, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0674552470445633, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06800085306167603, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07214411348104477, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08076708763837814, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08617668598890305, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4310515873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.45709325396825395, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5047123015873016, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.558531746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6106150793650794, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.660218253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7264384920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8050595238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8596230158730159, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.876984126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8871527777777778, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8953373015873016, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9037698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.910218253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9141865079365079, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9233630952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9325396825396826, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9441964285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9516369047619048, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9548611111111112, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9598214285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9682539682539683, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.22177365980534444, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.24463748639612173, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2955752414808239, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3519519350859308, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.39828684248252394, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4675988254443748, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6177557680817568, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7599732841809181, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8362981624513784, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8604255955838741, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8736982515394379, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8815201158360922, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8928782431381983, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.900071169165828, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9045631212315639, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9088930656025986, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9141344924837017, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9193238129400233, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9242011201287506, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9306996124962916, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9367295464408544, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9401554297645041, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9444776316293124, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9473679296375799, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9493568605711434, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9527184805327192, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9552223720365154, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9570437054211738, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9592071628304375, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9604742758483162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9627614779590229, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9653599576616443, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9675307535994664, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9698756832624699, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9714032974906145, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.972125162218452, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9720009985244091, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9717616498888287, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9728211520739214, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9747082522756999, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9754635970429952, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.975476954728386, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9756372198332324, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9760112086107069, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9754184809586183, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9741437548962774, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.972495494044404, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.970234884118242, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.967208892108566, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.06387462843209506, "validation/loss_best": 0.06938738375902176, "validation/acc_best": 0.9794146825396826, "validation/f1_best": 0.9760112086107069} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.5453243359923363, "train/grad": 0.09578289926052093, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.2712872314453123, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.1813232421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.0429312133789064, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.9172689819335937, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.8029531860351562, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.6596258544921876, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.5170556640625, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3799600219726562, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.2287905883789063, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0972523498535156, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9913407516479492, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.8646729278564453, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7691147804260254, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6636537170410156, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.587570447921753, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.5301201820373536, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.4725498056411743, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.41991629123687746, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.374273042678833, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.34047683477401736, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.3068659496307373, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2789476788043976, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.253523907661438, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.23118332743644715, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.21187370762228966, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.19269620656967162, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.17819994941353798, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.16674741730093956, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.15330854378640651, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.13988934732973576, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.12969531137496232, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.12161986824125051, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.11218459805473685, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.10489761881530285, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.09737957637757062, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.09088657001033425, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.08477122152224183, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07966419798322022, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07462389817461372, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06912704279646277, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06555627587251366, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06282288152724505, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.060039325123652815, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05828387065790594, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05697623712942004, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05656959630548954, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.05675203216262162, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.056767436666414144, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.05690217511728406, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03182161351665855, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030538873923942448, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02876438930630684, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02731113027781248, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.026062555741518736, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.024543236903846265, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023033053101971745, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0215526379738003, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01987967608962208, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018388912039808927, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01716761177405715, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015683280085213483, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014548602304421366, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01327218560501933, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012332143876701594, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0116090168338269, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01086767913075164, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010170540676917882, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009549176839645951, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00907199173234403, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008585909956600517, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008168655177578331, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007775911467615515, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007419729414395988, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007102528368122875, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006775481962831691, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006520830404479056, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006312334772665054, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006059935262892395, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005804994916543365, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005608413945883513, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005446928436867893, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0052622611611150205, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005116037152474746, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004964536131592467, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004844365202006884, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004726041434332728, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004625456735375337, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00450594920781441, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004360484963981435, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.004272583385463804, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004218013205099851, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004200775426579639, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004225013031391427, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004274488017545082, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004400397485878785, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004534503257018514, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004596673218766227, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004674626272171736, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.235187292098999, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.142336130142212, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.00028657913208, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.872087001800537, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.7559146881103516, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.6113383769989014, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.4685375690460205, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.332152247428894, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.1832154989242554, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.054474115371704, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.9514849185943604, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.8289033770561218, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.7366986870765686, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6351626515388489, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5621618628501892, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5070220828056335, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.45185211300849915, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4013477563858032, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.35755497217178345, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3250635266304016, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2928355038166046, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.26604005694389343, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.24169929325580597, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.22043707966804504, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.20210789144039154, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.18390882015228271, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.17020228505134583, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15939119458198547, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.14673258364200592, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.13420933485031128, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12481708079576492, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11745940148830414, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10913684219121933, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10285201668739319, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.09667813777923584, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0913073942065239, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0863797590136528, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0820494070649147, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07785515487194061, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0741737112402916, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07228299230337143, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07113423943519592, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06966467201709747, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06811783462762833, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06678522378206253, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06711097806692123, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06760066002607346, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06687898188829422, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06714945286512375, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4476686507936508, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4751984126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5322420634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5870535714285714, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6383928571428571, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6917162698412699, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7760416666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8392857142857143, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8725198412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8851686507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8923611111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9074900793650794, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9134424603174603, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9176587301587301, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9223710317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9273313492063492, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9360119047619048, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9397321428571429, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9474206349206349, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.970734126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9764384920634921, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2349617887128621, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2641984423347477, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.32465277910055707, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.377761179760713, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4302722803302406, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5366829120241426, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.713124004914612, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8109634000541093, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8534581940267458, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8713938243329064, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8785511423613441, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8895201314014387, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8974309379079338, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9036809848789874, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9081986883807421, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.913728418055607, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9192227282626233, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.921832998317754, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9276934220097877, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9313209762810537, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9402799215188542, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.942864844901015, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9479495420965772, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9515051432111201, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9526899910357913, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9551402929034005, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.956864917527058, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9579242495283135, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9601744970899011, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.962421583700189, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9649519322463271, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9686042419440183, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9693569185862773, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9694380041468013, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9710417982064906, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9718157579940891, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.972252242435738, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9724353621880899, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9722624133968009, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9739683252736482, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9750274603218979, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9749416154356015, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9750352018514169, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9741823314594407, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9747006534080892, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9746015140507286, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9749451402659675, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.974996612606645, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9734222092310467, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.06282288152724505, "validation/loss_best": 0.07113423943519592, "validation/acc_best": 0.9791666666666666, "validation/f1_best": 0.9749416154356015} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.517425209581852, "train/grad": 0.09101692482829093, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.2108612060546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.1161602783203124, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9712591552734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8408816528320313, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7231051635742187, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5771820068359375, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.4338618469238282, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.2979330444335937, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.150193862915039, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0233663177490235, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9224969100952148, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.8030852890014648, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7137287902832031, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6158906364440918, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5456212139129639, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4927487277984619, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.43977118015289307, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.39147629976272585, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.349756600856781, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3186796534061432, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2877836501598358, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.26218618988990783, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.23873509258031844, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2181943428516388, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2003285400569439, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.18251571275293826, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.16893386654555798, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.15820265352725982, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.14558535527437924, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.13291557505726814, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1232770599424839, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.11566231079399586, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.10672364458441734, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09980156812816858, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.09274378623813391, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.08664727638475597, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.08089166761375964, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07605001519434154, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07142324609681965, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06649915914051235, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0633695274963975, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06075601805001497, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.057748599080368876, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05540541968308389, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0535690850391984, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05280596638098359, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.05260288550518453, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.052311346679925916, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.052106341272592546, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03088543032296002, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.029553575878962873, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027717098873108626, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.026209179954603316, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024908217452466488, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023323178701102735, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021755436044186352, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020241139689460397, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01856218295637518, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017096016220748425, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015914674513041974, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014503987156786025, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013439765786752106, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012262993669137358, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011406580954790115, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010753801511600613, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010091576238628476, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0094743287935853, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00892781459260732, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008513813926838339, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008095132429152727, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007734508286230266, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007394055649638176, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00708008639048785, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006795714964391663, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006491263342322782, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006244625794934109, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006044327352428809, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005797938827890902, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005541138574481011, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0053369246493093665, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005170148051693104, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004973083239165134, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00481642386817839, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004665732862195, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004536553643993102, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004417362682288512, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00431802764069289, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00423378081235569, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0041725189855787905, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0041346023144433275, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004089322804065887, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.003999620949325617, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003920950167521369, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.003952251996088308, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0040992241224739705, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004219631683081388, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004256891090190038, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004274476490973029, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.185168743133545, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.0886223316192627, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.941335678100586, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.8092650175094604, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.6904304027557373, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.5437054634094238, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.400390625, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.2651169300079346, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.11893892288208, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9942655563354492, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8952357769012451, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7787298560142517, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.691659152507782, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.596405029296875, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5281832218170166, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4768129289150238, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4253520369529724, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3784480094909668, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3378448784351349, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.307595819234848, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2776434123516083, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.2525653839111328, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.22988489270210266, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2098277062177658, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.19234316051006317, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.17521800100803375, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1621609926223755, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15189388394355774, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.13994625210762024, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12804432213306427, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1191662922501564, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11220762878656387, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1040990874171257, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.09804942458868027, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.09198033064603806, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08700387924909592, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08267316967248917, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07930092513561249, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07590453326702118, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07216423004865646, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06992079317569733, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06870074570178986, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06775760650634766, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06746724247932434, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06701020151376724, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06582260131835938, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.0646277517080307, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06370839476585388, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06373214721679688, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.46056547619047616, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.49528769841269843, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5572916666666666, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6160714285714286, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.660218253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7301587301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8127480158730159, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8588789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.878968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8876488095238095, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8953373015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.904265873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9079861111111112, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9141865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9233630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9270833333333334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9320436507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9513888888888888, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9791666666666666, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.24859946431266722, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.28718027452466677, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3509772288465872, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.40476573614346245, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.46684182750876957, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6213408858228673, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7745133104612811, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8360660883751543, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8630311741863819, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8737875627788847, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8835239451846051, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8940071828518245, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8977177696971647, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9046456233363204, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9093770332734998, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9147236060223067, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9191336014473928, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9240920797910174, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9306176478416504, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9360546889298846, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9404923651647062, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9442263941243395, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9463975854525091, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9493664764162917, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9521725783183091, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9553345993008642, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9566670740968499, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9583038462216743, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9621099890273778, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9636464949306506, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9657032742736298, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9686714787867349, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.969852208711802, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9699061653918903, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9710709839226559, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9715438154699507, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9720882524529616, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9737012039980423, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9755722231363035, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9775361278519412, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9770727821379446, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753728505836025, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9750484636001013, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.975454758702732, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9763610912957716, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9763577526262649, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.976537214176253, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9772205997049028, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9780705978011435, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.06649915914051235, "validation/loss_best": 0.07216423004865646, "validation/acc_best": 0.9806547619047619, "validation/f1_best": 0.9775361278519412} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.4996423818171024, "train/grad": 0.08840087566524744, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1720220947265627, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.074395751953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9255694580078124, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.7921148681640624, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.6722531127929687, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.524632110595703, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3808653259277344, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.2456124877929688, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.1002275848388672, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.9767554473876953, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.8791547393798829, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7646480560302734, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.679399242401123, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5865400218963623, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5201027870178223, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4702870225906372, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.4203763866424561, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.37477067708969114, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.3351838207244873, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3058810353279114, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.27668834030628203, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.25222492694854737, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.2299968734383583, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.210332899838686, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1932305271923542, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.17611695736646651, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.16316628865897656, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1529303017631173, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.14082976743578912, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1287305796146393, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11955253835767507, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.11221037890762091, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.10357784751802683, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09693170823156834, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08999370032921433, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.08401506546884775, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07835263890214264, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.073638556599617, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06900462586432696, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0638961558137089, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06042527509853244, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.057753344653174284, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05473417770117521, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05251924108713865, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05042446809820831, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.04875351442955434, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.04769414185546338, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.046953025702387095, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.047007130738347766, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03012320668436587, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02884187312796712, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027078417735174297, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025614618714898826, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024338208427652715, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022768671605736018, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021212571142241357, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019715667641721665, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0180708940140903, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016645230408757924, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015501934620551765, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014144948655739427, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013116560694761574, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011978760382626207, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011146163309458643, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0105113883363083, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009859630013816059, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009248134423978626, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008704103010240943, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008290685357060283, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007864972350653261, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0074954441259615125, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007144893057411537, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006824677782133221, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006532540995394811, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006231883866712451, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005992868347093463, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005794732087524608, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0055599454778712245, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005313639978412539, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005121047897264361, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00496839723084122, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0047838871192652736, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004642977135954424, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004498885249486193, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004370939615764655, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004249358737724833, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004143155031488277, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004032832123921253, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0038845333171775563, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003770643877214752, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.003674838396254927, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.003577285794308409, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003513029059395194, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0034560975353815594, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.003425198495970108, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.003423349793301895, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0034668598396820015, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0035563185944920404, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.1449921131134033, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.0452771186828613, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8941431045532227, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.7591458559036255, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.6382768154144287, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4903188943862915, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.346863865852356, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.2128945589065552, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0693840980529785, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9480271339416504, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8525212407112122, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7406962513923645, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6575025320053101, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5672446489334106, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5025334358215332, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4540557563304901, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.40551072359085083, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.36118847131729126, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3228907287120819, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.29427826404571533, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2660196125507355, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.2424263209104538, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.22084124386310577, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.20192821323871613, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.18545183539390564, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16914232075214386, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.15691259503364563, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1471172571182251, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.13593119382858276, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12459564208984375, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11617515236139297, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10950099676847458, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10162093490362167, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.09560982882976532, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08941322565078735, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08435603231191635, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07980626821517944, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07623489201068878, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07303690165281296, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07008317857980728, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0680856928229332, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06649894267320633, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06474759429693222, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06341595202684402, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.062455762177705765, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06184384971857071, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06158393621444702, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06200656294822693, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06320661306381226, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.47197420634920634, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5094246031746031, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5768849206349206, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6354166666666666, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6778273809523809, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7596726190476191, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8360615079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8665674603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8826884920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8990575396825397, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9052579365079365, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9107142857142857, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9161706349206349, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9211309523809523, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.935515873015873, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.949156746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9536210317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9565972222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.972718253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9813988095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2603069346258128, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3012302355755902, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.37088657731883323, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4286169714256086, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5040325105844199, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.683642252332364, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8081436097251249, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8457432304102914, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8692079834211588, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8776728499527389, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8875976346816824, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8950466813901599, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9008384437338663, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.907129801873069, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9121630464326003, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9164492710991667, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9208959841046287, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9269210384143607, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9319559578846844, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9379626154038186, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9420488463566594, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9469660817961432, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9497270960677078, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9524192698209049, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9548495077381137, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9570267984163556, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.958098875230171, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9602693963938217, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.962005627805384, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9642256033444655, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9656982343243886, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9679096407246204, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9687180491538411, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9703457385210913, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9723228119906515, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9724251303249095, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9745736878247564, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9763388034879507, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9760690855040379, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9764272837772829, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9761567000497047, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9767095924771166, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9767246575888562, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9775344364151672, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9775919689033582, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9773869943996716, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9792840475692517, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9794651166611181, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9799239067419759, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.047007130738347766, "validation/loss_best": 0.06320661306381226, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.9799239067419759} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.4823566949367523, "train/grad": 0.08615778774023056, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.134619140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.0341339111328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.881707763671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.7456356811523437, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.6240325927734376, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.4753085327148439, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3316993713378906, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.19796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.0551543426513672, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.9350187301635742, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.8406022262573242, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7305044746398925, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6490176391601562, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5605781745910644, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4974463367462158, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.45027636051177977, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.40297104597091676, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3597988772392273, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.3223481273651123, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.29455123484134677, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.26684451997280123, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2436372956633568, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.22237027317285538, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.20343581572175026, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.18690072610974312, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.17030437894165515, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1576218132674694, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.14749542236328125, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1356368024647236, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.12357663054019213, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11447945803403854, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10720352062955499, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09866884941235185, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09202785575762391, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08510201528668404, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07909362421371043, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07335469138808548, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06845300304703414, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0637891159299761, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05870293961837888, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05527021840214729, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.052634885050356385, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04952041131444276, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04726318358443678, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.045099699860438706, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.04311080599203706, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.041626919005066154, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03994907623156905, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0389312893897295, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.029532228987663986, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02825991249643266, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0265082604624331, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025042813634499908, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023757304046303034, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022171543892472983, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0206039156857878, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01910921575501561, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017475685751996935, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016076832888647915, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014966501425951718, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013650751714594662, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01266643566545099, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011578186394181102, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010783691725227982, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010175236037466675, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009553792255464941, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008970243420917541, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00844728588592261, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008045622371137142, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0076345987198874355, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007278585752937943, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00694697797880508, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00664448193507269, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006368937224615365, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006086539668031037, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00586578113026917, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005681715762475506, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005465290737338364, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005240162663394585, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005056725216563791, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00490946619422175, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00472897358587943, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0045894991181558, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004434360685991123, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004296419397578574, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00415699198783841, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004030919131473638, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.003906840116833336, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.003761382864031475, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003662905765522737, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.003586255587870255, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00349940886895638, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0034420472662895917, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0034012370725395157, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.003387643305468373, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0033794724679319186, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.003369508147297893, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.003385613912541885, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.11368465423584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.011671781539917, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8574655055999756, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.7204309701919556, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.598210096359253, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4494221210479736, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.3063143491744995, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1733893156051636, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0321197509765625, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9135104417800903, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8207751512527466, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7124664187431335, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6323631405830383, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5456388592720032, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.48365336656570435, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4373348355293274, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.39089086651802063, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.34864404797554016, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.31205084919929504, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2847703695297241, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2577355206012726, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.23506712913513184, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2143069952726364, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.19603483378887177, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.18014194071292877, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16430188715457916, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.15228186547756195, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.14292636513710022, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.131835475564003, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1209782287478447, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11288243532180786, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10653377324342728, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09916617721319199, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.09377266466617584, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08819181472063065, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08358436077833176, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07931982725858688, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07574059069156647, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07251834869384766, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0692441388964653, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06692806631326675, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06536553800106049, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06382095068693161, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06289073079824448, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06208403781056404, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06088884174823761, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.05993418022990227, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.059256426990032196, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.05889028310775757, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4818948412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5208333333333334, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5915178571428571, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6483134920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6956845238095238, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7847222222222222, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8472222222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8735119047619048, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8871527777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8975694444444444, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9017857142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.90625, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9139384920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9241071428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9263392857142857, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9308035714285714, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9516369047619048, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9543650793650794, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9563492063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.980406746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.982390873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9831349206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2724002160499942, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3131461666225561, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3836097970081687, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4454804227331122, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5428098931329871, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7297538740589073, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8212649007875409, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8566798966633992, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.873688849552767, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8854478971177838, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8907764197587262, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8960528551998966, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9041841338750655, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9094764571258008, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9149932625631161, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9180707545775552, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9228954967363421, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9292456676609333, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9352357934028341, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9401428625647997, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9446092835348149, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9475236816629117, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9495925700433568, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9527589412275355, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9547913004781695, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9565921555281756, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.958959156129143, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.961166573454038, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9640944332083079, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9673118211448928, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9702519580191924, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9700436775772943, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9704888685837462, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9730055070190348, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9730559233001692, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.973976264312527, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9747942998967807, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9765499289623134, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.977985852799146, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9794193553445374, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9793251666377356, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9796219864786229, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9799388658528356, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9797107174052548, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9791615231571802, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9807954673939246, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9813299116342284, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9813722934361702, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9812393566292605, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.03994907623156905, "validation/loss_best": 0.059256426990032196, "validation/acc_best": 0.9831349206349206, "validation/f1_best": 0.9813722934361702} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.46571480229496953, "train/grad": 0.08491282187402248, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.095301208496094, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.9921743774414062, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.8362570190429688, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.697852783203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5750852966308593, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.4257566833496094, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2827407836914062, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1504692077636718, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.0105126953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8935328292846679, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.8023545455932617, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6963163566589355, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.618344783782959, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5341050434112549, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4740532350540161, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.42921780586242675, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3843498134613037, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3434766292572021, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.30800035953521726, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.28165466666221617, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2552784502506256, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2332020714879036, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.21291894629597663, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.19478473469614982, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17907799169421196, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.16319635823369028, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.15107250839471817, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.14143632333725692, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1300097045302391, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11856119718402625, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10985561991110444, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10294935652986169, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09477010222151876, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.088417394105345, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0818539872765541, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07614607292227447, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07074067872948944, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.066203902522102, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.061783641539514066, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05698557890020311, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05366222560405731, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05109705630689859, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04804734416306019, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04570895695127547, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.043395097656175496, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.04134241033345461, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03990174995735288, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03850242258049548, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.037567923283204435, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02930560098029673, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02803532828576863, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.026284878412261604, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024810778554528953, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023512884881347416, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021911484571173787, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02033270141109824, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018835126222111285, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017208337876945735, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01581972185987979, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014717181781306862, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013413934907875956, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012439096155576408, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011359648061916233, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010573177658952772, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00997084243223071, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009357237759977578, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008781935486476869, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008274916044902056, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007884791502729058, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007484994139522314, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007140803183428943, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00681213095318526, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006509187809424475, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006236704797483981, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005943704821402207, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005714875777484849, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005525451422436163, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005289967668941245, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005049428065540269, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0048578862217254935, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004705346182454377, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004518141350708902, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0043697766412515195, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004208175252424553, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004066564212553203, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.003927968127536588, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.003806032985448837, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0036878314532805233, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.003555973936454393, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0034592474024975673, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0033762360265245663, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0032807270222110674, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003206989638856612, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.003137656967737712, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.003078296918538399, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0030464297518483364, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.003024513180134818, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0030163434907444753, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0903470516204834, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9867933988571167, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8304234743118286, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6918648481369019, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5688531398773193, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.419519305229187, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2766339778900146, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1448198556900024, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0052732229232788, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8886162638664246, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7978119850158691, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6922236084938049, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6143435835838318, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5300864577293396, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.470077246427536, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4251697361469269, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.38024380803108215, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3393271565437317, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.30386948585510254, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.27769267559051514, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.25134286284446716, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.2294502854347229, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2093922644853592, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.19164223968982697, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17634166777133942, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.160868838429451, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14926768839359283, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.14006227254867554, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12929441034793854, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11852025985717773, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11050327867269516, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10424146056175232, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09709179401397705, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.09170477837324142, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08627066016197205, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08184930682182312, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07790239155292511, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07466831058263779, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.0717955008149147, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06900368630886078, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06726307421922684, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06594321131706238, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06454342603683472, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06348514556884766, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0625772625207901, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06192495673894882, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06149773299694061, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06126272305846214, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06130165606737137, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4905753968253968, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5324900793650794, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6046626984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6584821428571429, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7132936507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8018353174603174, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8551587301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8762400793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8891369047619048, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8973214285714286, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9025297619047619, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9077380952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9124503968253969, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9226190476190477, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.925843253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9392361111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9434523809523809, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9508928571428571, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.955109126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.966765873015873, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.96875, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.980406746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9806547619047619, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.28234366424744456, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.32692463713531106, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.39498220627221864, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.460277039015171, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5845599527163886, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7571614675016178, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8307386425241341, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8593491753446433, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8756082351266389, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8846975860689812, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8916249824709778, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.897097711579384, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9027138844037337, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9092925316849599, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9138167729075071, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9174481954337746, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9242564964720983, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9309739081758155, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.935488714154397, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9412649703103313, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9435151013475804, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9472376120248589, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9502606670251542, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9531494345807511, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9554168646757385, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9572101771087257, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9590224324374512, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9600541861225503, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9621672548407881, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9655706573454317, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9684681388392934, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9704568387823118, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9720862891141282, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9726111258742725, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.973422145907667, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9732435368156986, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9734399683687058, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9744879653716504, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9748901879700702, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.976883980221238, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9766063833969815, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9762106134741225, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9761638646611956, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9762054919647445, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9759335190006523, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9757390081487008, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9763717648727899, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9767467576717831, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9764187692279666, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.05698557890020311, "validation/loss_best": 0.06900368630886078, "validation/acc_best": 0.9806547619047619, "validation/f1_best": 0.976883980221238} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.45882311806082726, "train/grad": 0.08417606547474861, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.079361572265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.9750653076171876, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.817506103515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6780459594726562, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5544589233398438, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.4046786499023438, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2616753387451172, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1301251983642577, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9914936828613281, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8760125350952148, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7863301467895508, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6825581169128419, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6063219928741455, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5240670108795166, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4656663990020752, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4218642044067383, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3781267476081848, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3383476209640503, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.3036768686771393, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2778790408372879, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.25208504021167755, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.230417640209198, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.21054940029978753, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.19286568641662596, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17729882270097733, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.1616076398268342, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1495859356969595, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.14007277406752108, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.12872898500412702, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11730330338701606, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10857402149587869, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10165670996531845, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09340809351764619, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08701781802810729, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08033393803983926, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07449391110800206, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06893201069906355, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06420101867988706, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.059589505540207026, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05454310374334455, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05111186589114368, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04841227171011269, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04530276264064014, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04300745314918458, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.040721208667382595, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03872694617137313, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03733116221614182, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03599692902527749, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.03519594823010266, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02903701922856271, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027778551178053022, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.026037346003577114, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024571883771568537, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023278474155813456, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02168646623380482, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0201235256716609, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018644004757516087, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017048986684530972, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015692164143547415, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014617693121545016, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013348504188470542, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012394475759938359, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011342333115171641, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010570463687181474, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009982367993798107, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009374848913867027, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008807956434320658, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008300122022628784, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007914226450957358, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00751339369919151, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007164772371761501, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00683531733811833, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006526976068271324, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006243858413072303, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005949430300388485, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0057124313071835785, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0055147825262974945, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005275242202333175, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005023258064175025, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004829097675392404, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004669453677488491, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004475589619833045, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004321226444444619, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00415686889726203, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004008522988879121, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.003864159098593518, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.003734770354931243, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.003604240960557945, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0034502155997324733, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0033422747030272147, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0032558678940404207, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0031517569988500325, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0030736557228374295, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0029985695716459302, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00293970533326501, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0029010873951483516, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0028722766583086923, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.002870489921187982, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.074272632598877, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9696778059005737, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.811882495880127, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.672141432762146, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5485032796859741, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.398908257484436, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2563155889511108, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.125179409980774, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9868934750556946, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8718747496604919, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7821555733680725, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6784008741378784, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.6020709276199341, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5194840431213379, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.46087488532066345, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.41696855425834656, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.37307310104370117, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.33312931656837463, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2985386550426483, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.27276331186294556, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2470913529396057, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22566558420658112, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.20597559213638306, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18866688013076782, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17341971397399902, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15838593244552612, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14691314101219177, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13786393404006958, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12724249064922333, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11676517128944397, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1088477224111557, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1028086319565773, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0956825390458107, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.09044654667377472, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0851336196064949, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08077193051576614, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07692770659923553, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07380332052707672, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07119089365005493, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06860577315092087, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06706423312425613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06621995568275452, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06564109027385712, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06558066606521606, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06571630388498306, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06587322801351547, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.066243976354599, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.06618575751781464, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.06561074405908585, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4955357142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5384424603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6130952380952381, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6646825396825397, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7232142857142857, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8125, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8598710317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8759920634920635, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8980654761904762, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9037698412698413, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9092261904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9149305555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.923859126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9449404761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.949156746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9528769841269841, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.964781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.970734126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9779265873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2876361819364894, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.33260597319593826, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.40285359111044017, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.47679426844698664, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6083489387885405, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7736811637343753, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8369385766538733, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8589028834914488, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8775401181364918, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8855118651150006, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8928315305005645, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8992176075290369, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9051169032189424, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.911240456631894, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9152640453820401, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9199790645836274, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9245223363526245, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.931826944476817, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9370470478126536, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9417406050249274, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9456158640839538, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9486282911656774, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9512344937405095, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9536192433701894, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9564878208803391, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9582199542389315, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.959663190797144, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9612399749023138, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9651595133627076, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9689361244255685, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.970840819398854, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9721970312255452, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9731701535191986, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9729948145983162, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9742627982039137, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9743740267335312, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9749132696848025, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9754505873257077, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9764347081660999, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9768881788013443, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9777914944023546, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.977156531966182, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9775174700434603, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9777407448540972, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.977472600408124, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.977068896432135, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.977068896432135, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9767418981527518, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9769703747951924, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.059589505540207026, "validation/loss_best": 0.07119089365005493, "validation/acc_best": 0.9794146825396826, "validation/f1_best": 0.9764347081660999} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.45476209208369256, "train/grad": 0.08414901979267597, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.065510559082031, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.96038818359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.8018655395507812, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6618392944335938, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5380296325683593, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3883412170410157, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.246118621826172, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1154232788085938, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.97821044921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8642458724975586, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7758566284179688, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.673682689666748, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5987975692749024, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5181086158752441, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4606725835800171, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4177438259124756, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.37491053104400635, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.33583510875701905, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.30182281613349915, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.27646150708198547, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.25109885454177855, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.22975638449192048, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.21013849452137948, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.19260330393910408, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17713477827608584, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.16159193575382233, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1496489791572094, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1400887793302536, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.12875376734882593, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11730243016034364, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10851386887952685, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10153941104188562, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09328958282247185, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08689058161340654, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08018038367852569, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07438265052624046, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06889243067242205, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06421816736459732, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.059795741718262435, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.054873354928568006, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05153545185923576, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04887902734801173, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04585007662884891, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04352326677180827, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.04117454175837338, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03907390696927905, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.037541190581396225, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0360646902769804, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0351423383038491, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.029125094125047327, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02785474739037454, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.026094931317493318, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02460121036507189, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023280625650659203, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021654034219682216, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020064405351877212, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01856410547159612, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016953609129413964, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015589340422302485, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014513772451318801, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013251333022490144, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012307462207973003, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01126539750257507, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010506832667160779, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009925991552881897, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009332408390473574, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008772079409100115, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008271113256923854, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007885335760656744, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007490786691196263, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0071449865819886325, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0068178481771610676, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006511784577742219, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006233664200408384, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005939746593357995, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0057042453379835935, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005512019681045786, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005273414632538334, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005023372543510049, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00482328089827206, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004660816566902213, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00446584694494959, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0043101302318973465, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004139663553214632, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003990582724800334, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0038422068528598174, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0037144316342892125, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0035909257119055835, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0034481593340751713, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003350336533912923, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0032734421294298953, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0031849826980032956, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0031158568972023204, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0030397489841561764, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0029724178317701443, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0029202938833623195, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002889209249406122, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0028868873114697635, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.064135789871216, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9589606523513794, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8001179695129395, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6598691940307617, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5358954668045044, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.38605535030365, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2437164783477783, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.112813115119934, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9754619002342224, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8612074851989746, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7724633812904358, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6696943640708923, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5944012403488159, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5129694938659668, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.455084890127182, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4119175672531128, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.36852553486824036, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3291952908039093, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.295086145401001, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2697460949420929, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2443617284297943, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22315427660942078, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.20385496318340302, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18666808307170868, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17164942622184753, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1567857712507248, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14545132219791412, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13658055663108826, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1260581612586975, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11579623818397522, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1080072745680809, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10188227146863937, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09499417990446091, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08976998180150986, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08456980437040329, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08015494048595428, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07635615766048431, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07329603284597397, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07050473242998123, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0678400844335556, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0662132129073143, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06507575511932373, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0640825554728508, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06344394385814667, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0631491169333458, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06290826946496964, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06248282641172409, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.061868421733379364, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.061254359781742096, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.49875992063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5426587301587301, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6168154761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.667906746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7306547619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8172123015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8618551587301587, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8764880952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8988095238095238, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.904265873015873, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9097222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9156746031746031, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9206349206349206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9248511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9337797619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9397321428571429, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9489087301587301, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.955109126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9613095238095238, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9794146825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.29034542556462367, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.336862763878483, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4075271676223682, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4827288200048613, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6227810230004949, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7823411118350149, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8394428577690557, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8598811053818426, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8782123587141875, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8867144329767027, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8936520386952476, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8998291010978057, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9055723391719651, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9113141466562708, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9164858161364007, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9208805076150842, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9252429839287977, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9313698066290932, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9380977076411421, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9415757316458898, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9453910344704072, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9482689089505478, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9502439288540918, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9541576929841944, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9562251436014956, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9575003797854758, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9599046555028702, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.962246991188551, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9653018832845428, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9683916804509675, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9711091807312073, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9729245835325377, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9732875762441698, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9736579839574014, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9740979720171697, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9743232935503634, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.974827413489851, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9757268017827326, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9773029151637426, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9773086570446364, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9771435006736494, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9769203366334098, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9769287693681589, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9765689166702929, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9765303342479131, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9770615744869021, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.977019948156063, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.97738055112416, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.977762426115348, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.059795741718262435, "validation/loss_best": 0.07050473242998123, "validation/acc_best": 0.9801587301587301, "validation/f1_best": 0.9773029151637426} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.4513836732506752, "train/grad": 0.08268973432481289, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.0621762084960937, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.9563638305664062, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7970230102539062, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6562106323242187, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5318443298339843, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.381746826171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2391332244873048, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1084564971923827, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9712413787841797, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8574361038208008, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7691989517211915, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6673916816711426, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5928080749511718, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5124314117431641, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4554241132736206, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4127657890319824, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.37025397539138794, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3315223228931427, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2978602069616318, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2727827614545822, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2477778187394142, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.22673241823911666, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.20731026470661162, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.18996373265981675, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17478752359747887, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.1594080686569214, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.147640445753932, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.13817384410649539, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1270459357649088, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11578439086675645, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10716990131884813, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10027811836451292, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09209911862388254, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08578923802822828, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07911610282957554, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0733253558818251, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06772949596866966, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06298100246116518, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.058343496210873126, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05324726630933583, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04976959479041398, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04699570402503014, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04381129772402346, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04138453481718898, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03893551543354988, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03676491151563823, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.035216614855453375, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03360621667467058, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.032541141379624605, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.028805973725393414, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027542047584429383, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02578102529980242, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0242921744287014, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022974218409508466, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021356281954795123, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019775434606708588, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018288835543207826, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016691135079599916, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01534023274667561, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01427282728254795, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01301618967205286, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012073936751112341, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011030982169322669, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010271118259988726, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009688968360424042, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009089254960417747, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008531863642856479, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008032214541453869, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007647502969484776, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0072558803134597834, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006921080320607871, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006596959225134924, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0062957264843862505, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006026288926368579, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005744198418688029, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005515566470567137, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005326730840606615, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005100603268947452, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0048619213339407, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004677231385139749, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004523373702540994, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0043364874355029314, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004185727888252586, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004021344635984861, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00387362293433398, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.003725467263138853, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0035948289866792036, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0034583156212465837, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0033052720394334757, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0031996573865762912, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0031118698374484667, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0030090180612751282, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0029264425387373195, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0028403523706947453, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0027687562501523644, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0027078820028691554, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0026437275420175864, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0026030906732194127, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.059028387069702, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.953297734260559, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.794067621231079, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.653388500213623, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.529220461845398, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3794735670089722, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2370649576187134, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1064305305480957, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9693499207496643, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8556239008903503, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7671754360198975, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6651912331581116, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5902974009513855, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5094656348228455, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4519578516483307, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4090072810649872, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3661491572856903, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3271453380584717, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.29320457577705383, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2680400013923645, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.24288715422153473, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22190594673156738, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2025907039642334, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18558162450790405, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17072251439094543, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15585780143737793, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14462365210056305, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13564947247505188, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12522612512111664, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11489441245794296, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10700567811727524, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10100235790014267, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09406236559152603, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0887446403503418, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08353579044342041, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07907785475254059, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07514549046754837, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07204506546258926, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06914437562227249, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06624065339565277, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0645224079489708, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06309719383716583, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0617351308465004, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06076634302735329, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.059735845774412155, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.059069886803627014, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.05855630710721016, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.05814088508486748, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.057936616241931915, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.49975198412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5436507936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6185515873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6684027777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.730406746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8199404761904762, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8628472222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8777281746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8931051587301587, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8990575396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9050099206349206, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.910218253968254, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9151785714285714, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9201388888888888, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9248511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.949156746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9536210317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.957093253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.982390873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9818948412698413, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2914116765002305, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.33808857982796325, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.40911067474287177, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4848151416990629, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6257310987583283, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7864551471674216, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8401100285219394, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.861165147119322, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8795501151229975, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8870951302123099, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8946294933158176, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8999302828749143, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9056078293250299, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9108091231215583, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9159917883642988, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9199999322032572, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9253376216017328, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9303206888399542, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9385549743625302, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9420294608980665, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9462682970735423, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9484487656429538, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9505273796712452, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9545349041949385, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9562251436014956, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9584019575852045, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9603439103872583, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9624199337549536, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.964800838958585, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9683807038377193, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.970576471556923, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9725475401472556, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.973289673579921, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9739298376953287, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.973742841507207, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.973479484057959, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.975690719745397, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9767611098056996, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.977661445488987, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9779162534833855, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9782811284075974, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9787280900961859, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9783795387491129, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9783370356038652, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9792444714162811, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9792444714162811, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9793760984730584, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9804165860347548, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9800604531580972, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.03360621667467058, "validation/loss_best": 0.05814088508486748, "validation/acc_best": 0.982390873015873, "validation/f1_best": 0.9804165860347548} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.4515386889874935, "train/grad": 0.08313124977052212, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.06139892578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.9556552124023439, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7963931274414062, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.655655975341797, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5312205505371095, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3812420654296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2386376190185546, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1079633331298828, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9709992599487305, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8574209976196289, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7693580436706543, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6676973533630371, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5931507110595703, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5127208805084229, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4555713176727295, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.41295811653137204, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.37020102977752684, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3313723635673523, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.29763261556625364, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2725054895877838, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.24745881855487822, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.22643854707479477, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.20712320864200592, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.18986236721277236, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17471552819013594, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15946562189608812, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.14779832441359758, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.13840709898620843, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1274272446706891, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11623665262013674, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10762110607698559, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1008106108661741, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0927167686726898, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08645178687758744, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07981705156154931, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07403233401477337, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06848890813998879, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06377656294964254, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.059174904096871615, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.054141400195658204, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05059521908871829, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0478393382485956, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04463667121715844, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.042095840740948916, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.039571129977703096, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03722410417161882, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03548814522102475, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03373117769137025, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.03249471635557711, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02887611048296094, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027605611523613335, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025840149093419314, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024341631587594746, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023011816795915366, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02138194376602769, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0197903124243021, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01829930699430406, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01670650952029973, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015362142333760858, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014306903360411525, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013070545606315136, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012150291786529124, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011133377554360778, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01039259706158191, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00982961670961231, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009251752148848026, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008709657327271999, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00822971161454916, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007854733725544065, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007468966653104872, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007133228160673752, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006809268014039844, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006506667215144262, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006230963231064379, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005931230410933495, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005692549340892583, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005492066957522183, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005246504025999457, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004983739034505561, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004780507098184898, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004610730965505354, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004404815155430697, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0042386442859424275, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004064578012912534, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003903301090467721, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0037448412080993876, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0036053662153426556, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.003464829682779964, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0033041460771346463, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0031834416178753598, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0030866809957660734, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.002969453306286596, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0028713586734374985, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.002768685009505134, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0026760250830557197, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.002607090373057872, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002533530729706399, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.002483805324009154, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.056997060775757, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9510763883590698, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.7916845083236694, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.650975227355957, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5266677141189575, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3768072128295898, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.234359622001648, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1039109230041504, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.967144250869751, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8533564209938049, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7651948928833008, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6633985042572021, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5885918736457825, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5080448985099792, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4507259130477905, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.40794286131858826, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3652072548866272, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3262927830219269, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2924782633781433, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.267428994178772, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.24235723912715912, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22139698266983032, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2021007388830185, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1852128505706787, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1703861504793167, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15554624795913696, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14432451128959656, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13538780808448792, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12504374980926514, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11466751247644424, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10691861063241959, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10088743269443512, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09394225478172302, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08868047595024109, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08344883471727371, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07904753088951111, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07514027506113052, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07204434275627136, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06916004419326782, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06629844009876251, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.064463309943676, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06305365264415741, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06169695034623146, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.060671206563711166, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05971170589327812, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.058801162987947464, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.05824858695268631, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.05762052908539772, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.057188279926776886, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5004960317460317, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5438988095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6202876984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6691468253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7326388888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8226686507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8628472222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8777281746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8918650793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8990575396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9052579365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9097222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9144345238095238, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9246031746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9345238095238095, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.939484126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.949156746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.970734126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.982390873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.982390873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.982390873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2920587899590128, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.33844467759504776, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.41120557315312967, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.48690215629008166, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6294617288921484, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7902197116798393, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8401641335084937, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8613433788129861, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8781146747784625, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8869560089913995, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8947697608235161, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.899655917229967, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9048159473688584, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9110301839119643, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9160598583707656, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9206692019926915, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9259216532808342, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9308734568888175, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9383695205925542, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9419408773209235, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9464912839533978, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9487212696269355, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9507081766561495, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9539955308689834, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9562251436014956, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9586691203592143, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9606134130809503, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9632310796007089, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9651609420700499, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9686515440159272, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9701834375641779, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9723689623581128, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.973270084111701, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9739102482271087, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9734962983674462, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.973479484057959, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9760938766766057, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9760429849493574, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9780131221634827, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.978187789153826, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9781974567072947, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9785599115740927, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9789232939454483, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9798741258275732, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9801405678914018, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9803202211820574, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9801414556317919, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9803202211820574, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9803228959832928, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.03722410417161882, "validation/loss_best": 0.058801162987947464, "validation/acc_best": 0.982390873015873, "validation/f1_best": 0.9803202211820574} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.4476894773542881, "train/grad": 0.0817364240810275, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.0550323486328126, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.94895263671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7893032836914062, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6483570861816406, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5237226867675782, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.373692169189453, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2311431884765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1006598663330078, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9639627456665039, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8506889724731446, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.763015422821045, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6619093322753906, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5878998470306397, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5081365394592285, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.45149808406829833, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4093481159210205, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.36702747821807863, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3285097694396973, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.29509936928749086, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.27007723450660703, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2450728100538254, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.22402482688426972, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.20468736067414284, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1873593906313181, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17220665335655214, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15685060635209083, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.145125818438828, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1356912650913, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1245363749563694, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11329052869230509, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10467185899615288, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.09782661903649569, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08970331519842148, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0833782716281712, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07684172732755541, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0710677700676024, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06561244512908161, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06095727743580937, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05641047813929617, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05147547067143023, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.048001370867714284, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.045272320685908196, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04206909849308431, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03965219574980438, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0371486378274858, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.034873711923137304, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0331500572245568, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03147787733003497, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.03031249813735485, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.028499796502292157, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027243450917303562, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025503830006346106, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02402778563089669, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022717062113806606, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021108708716928958, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01953248239122331, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018049479853361847, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016460009743459524, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015117294173687696, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014062306117266417, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012823676117695868, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011900093457661569, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010883873992133886, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010148143463302403, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009588004967663438, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00900871902704239, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008472217987291515, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007990695640910418, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007620052222628146, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007239115820266306, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006910797714954242, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006591350261587649, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006300456618191674, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006033552126027643, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005747948394855484, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0055231821991037575, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005336278689792379, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00510348136536777, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004860382133629173, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004668317475588993, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004509253049618565, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00431164454028476, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004152572834282182, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003980478078592569, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00382010827481281, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0036625886603724213, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0035194672649959103, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0033732340869028123, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0032041890561231414, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003080153163464274, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.002980091645149514, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0028550990021904, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002756304360809736, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0026526205000118354, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0025543122077942826, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.002478075070248451, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0024026259692618624, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0023500099543889517, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.056727647781372, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9507545232772827, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.7913316488265991, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6506150960922241, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5263080596923828, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3763998746871948, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.234041452407837, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1036078929901123, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.966801106929779, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8530623316764832, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7649586200714111, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6631960272789001, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5883839726448059, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5078599452972412, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4505597651004791, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4077795743942261, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.365009605884552, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3261600434780121, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.29234787821769714, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2673201858997345, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2422875314950943, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22128374874591827, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.20207752287387848, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.185215562582016, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17032739520072937, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15550893545150757, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14427824318408966, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13535380363464355, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12501280009746552, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11464223265647888, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10688091814517975, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10085205733776093, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09389225393533707, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08866046369075775, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08338837325572968, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07898447662591934, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07514939457178116, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07200595736503601, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.0691303089261055, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06627853959798813, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06446819007396698, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06306905299425125, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06160375103354454, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.060612693428993225, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05966072529554367, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.058792319148778915, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.058214589953422546, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.05758722871541977, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.05707860738039017, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5012400793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5441468253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6200396825396826, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6691468253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7326388888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8234126984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8635912698412699, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8774801587301587, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8918650793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8990575396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9050099206349206, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9097222222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9144345238095238, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9342757936507936, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9392361111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.949156746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9543650793650794, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.970734126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.982390873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.982390873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9826388888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.29286444182036964, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3388184401031974, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.41084385199650725, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.48614939037944016, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6295150814628365, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7910526016708699, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8412482968668513, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8608143861468234, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8781146747784625, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8869560089913995, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8944871655016244, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.899658659534249, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9048159473688584, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9110301839119643, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9168790902992976, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9206692019926915, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9256540501911914, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9306069192319293, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9383695205925542, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9419408773209235, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9468958755496456, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9487212696269355, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9507107799778434, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9539897494433733, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9562251436014956, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9586691203592143, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9598865754696775, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9632310796007089, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9651609420700499, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9684731319406291, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9704544212746781, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9725475401472556, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.973270084111701, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9739102482271087, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9739019092109958, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9737501338460524, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.975511076603776, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9762286992955408, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9780131221634827, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9787296877658511, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9787385496414752, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9785707467212348, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9791019320128308, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9794708724999462, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9801405678914018, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9803202211820574, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9801405678914018, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9803202211820574, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.980498942978973, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.03031249813735485, "validation/loss_best": 0.05707860738039017, "validation/acc_best": 0.9826388888888888, "validation/f1_best": 0.980498942978973} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6478715e8cc1461dc51a26a40eef190bc31c923b --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..7b2429ce7b45f45ebeb35ccde4dc97cc6346ec5d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 13, "eval/id_best": 33, "eval/lr_best": 0.00129, "eval/wd_best": 0.05, "eval/train/loss": 0.07881108671426773, "eval/train/acc": 0.9806831938523081, "eval/train/acc_std": 0.001004795455155122, "eval/train/f1": 0.9817011814272549, "eval/train/f1_std": 0.0009872080021991895, "eval/validation/loss": 0.13199007511138916, "eval/validation/acc": 0.9575892857142857, "eval/validation/acc_std": 0.0029500813680348823, "eval/validation/f1": 0.9476952135405334, "eval/validation/f1_std": 0.004169931962571726, "eval/test/loss": 0.1504492163658142, "eval/test/acc": 0.9519841269841269, "eval/test/acc_std": 0.0029252862321808137, "eval/test/f1": 0.9425772738954178, "eval/test/f1_std": 0.0038393499359640304} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..b8ac2af9574929b1e56405f9a2df09018974f82d --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 13, "eval/best/id_best": 33, "eval/best/lr_best": 0.00129, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.07881108671426773, "eval/best/train/acc": 0.9806831938523081, "eval/best/train/acc_std": 0.001004795455155122, "eval/best/train/f1": 0.9817011814272549, "eval/best/train/f1_std": 0.0009872080021991895, "eval/best/validation/loss": 0.13199007511138916, "eval/best/validation/acc": 0.9575892857142857, "eval/best/validation/acc_std": 0.0029500813680348823, "eval/best/validation/f1": 0.9476952135405334, "eval/best/validation/f1_std": 0.004169931962571726, "eval/best/test/loss": 0.1504492163658142, "eval/best/test/acc": 0.9519841269841269, "eval/best/test/acc_std": 0.0029252862321808137, "eval/best/test/f1": 0.9425772738954178, "eval/best/test/f1_std": 0.0038393499359640304} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..67c025aacce0eced2f3380ef2434ab6543766007 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 35, "eval/last/lr_best": 0.0018, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.060294657945632935, "eval/last/train/acc": 0.9866308753092268, "eval/last/train/acc_std": 0.0008357775871157448, "eval/last/train/f1": 0.9878552084646418, "eval/last/train/f1_std": 0.0007907689421859001, "eval/last/validation/loss": 0.12642061710357666, "eval/last/validation/acc": 0.9558531746031746, "eval/last/validation/acc_std": 0.0030896194106203047, "eval/last/validation/f1": 0.9457554712137102, "eval/last/validation/f1_std": 0.0043275502845066685, "eval/last/test/loss": 0.14610043168067932, "eval/last/test/acc": 0.9551587301587302, "eval/last/test/acc_std": 0.0027401803367486425, "eval/last/test/f1": 0.9455601598906218, "eval/last/test/f1_std": 0.0036275710526126456} diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..2a2fefde69375b1765688606d506dee9b55793a1 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",train,0.07881108671426773,0.9806831938523081,0.001004795455155122,0.9817011814272549,0.0009872080021991895 +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",validation,0.13199007511138916,0.9575892857142857,0.0029500813680348823,0.9476952135405334,0.004169931962571726 +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",test,0.1504492163658142,0.9519841269841269,0.0029252862321808137,0.9425772738954178,0.0038393499359640304 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..2a2fefde69375b1765688606d506dee9b55793a1 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",train,0.07881108671426773,0.9806831938523081,0.001004795455155122,0.9817011814272549,0.0009872080021991895 +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",validation,0.13199007511138916,0.9575892857142857,0.0029500813680348823,0.9476952135405334,0.004169931962571726 +flat_mae,reg,linear,hcpya_task21,best,13,0.00129,0.05,33,"[4.3, 1.0]",test,0.1504492163658142,0.9519841269841269,0.0029252862321808137,0.9425772738954178,0.0038393499359640304 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..f6dedc373213815cb4d39bf4dcae5145bc56cd33 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",train,0.060294657945632935,0.9866308753092268,0.0008357775871157448,0.9878552084646418,0.0007907689421859001 +flat_mae,reg,linear,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",validation,0.12642061710357666,0.9558531746031746,0.0030896194106203047,0.9457554712137102,0.0043275502845066685 +flat_mae,reg,linear,hcpya_task21,last,19,0.0018,0.05,35,"[6, 1.0]",test,0.14610043168067932,0.9551587301587302,0.0027401803367486425,0.9455601598906218,0.0036275710526126456 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..27c6dc26ed6b69ff707bd276ecdccfaacfa80074 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt @@ -0,0 +1,886 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 21:41:48 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (hcpya_task21 reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (reg): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 0.8M (0.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:20:46 lr: nan time: 3.1162 data: 2.7798 max mem: 3910 +train: [0] [ 20/400] eta: 0:02:52 lr: 0.000003 loss: 3.1948 (3.2123) grad: 0.6661 (0.6969) time: 0.3217 data: 0.0025 max mem: 3951 +train: [0] [ 40/400] eta: 0:02:18 lr: 0.000006 loss: 3.1918 (3.1967) grad: 0.6965 (0.6961) time: 0.3110 data: 0.0031 max mem: 3951 +train: [0] [ 60/400] eta: 0:02:02 lr: 0.000009 loss: 3.1422 (3.1718) grad: 0.7146 (0.6989) time: 0.3119 data: 0.0038 max mem: 3951 +train: [0] [ 80/400] eta: 0:01:51 lr: 0.000012 loss: 3.0533 (3.1361) grad: 0.6904 (0.6861) time: 0.3156 data: 0.0039 max mem: 3951 +train: [0] [100/400] eta: 0:01:42 lr: 0.000015 loss: 2.9345 (3.0874) grad: 0.6320 (0.6801) time: 0.3066 data: 0.0036 max mem: 3951 +train: [0] [120/400] eta: 0:01:34 lr: 0.000018 loss: 2.8286 (3.0409) grad: 0.6207 (0.6692) time: 0.3278 data: 0.0042 max mem: 3951 +train: [0] [140/400] eta: 0:01:27 lr: 0.000021 loss: 2.7765 (2.9952) grad: 0.5969 (0.6591) time: 0.3271 data: 0.0042 max mem: 3951 +train: [0] [160/400] eta: 0:01:20 lr: 0.000024 loss: 2.6511 (2.9436) grad: 0.5730 (0.6502) time: 0.3176 data: 0.0041 max mem: 3951 +train: [0] [180/400] eta: 0:01:13 lr: 0.000027 loss: 2.5246 (2.8936) grad: 0.5785 (0.6434) time: 0.3294 data: 0.0042 max mem: 3951 +train: [0] [200/400] eta: 0:01:06 lr: 0.000030 loss: 2.4677 (2.8461) grad: 0.5823 (0.6380) time: 0.3164 data: 0.0039 max mem: 3951 +train: [0] [220/400] eta: 0:00:59 lr: 0.000033 loss: 2.3708 (2.8020) grad: 0.5586 (0.6319) time: 0.3244 data: 0.0038 max mem: 3951 +train: [0] [240/400] eta: 0:00:53 lr: 0.000036 loss: 2.3333 (2.7603) grad: 0.5568 (0.6261) time: 0.3296 data: 0.0045 max mem: 3951 +train: [0] [260/400] eta: 0:00:46 lr: 0.000039 loss: 2.2539 (2.7191) grad: 0.5592 (0.6208) time: 0.3207 data: 0.0040 max mem: 3951 +train: [0] [280/400] eta: 0:00:39 lr: 0.000042 loss: 2.2135 (2.6813) grad: 0.5426 (0.6141) time: 0.3200 data: 0.0043 max mem: 3951 +train: [0] [300/400] eta: 0:00:33 lr: 0.000045 loss: 2.1463 (2.6438) grad: 0.5147 (0.6075) time: 0.4536 data: 0.1530 max mem: 3951 +train: [0] [320/400] eta: 0:00:26 lr: 0.000048 loss: 2.0863 (2.6082) grad: 0.5124 (0.6028) time: 0.3267 data: 0.0038 max mem: 3951 +train: [0] [340/400] eta: 0:00:20 lr: 0.000051 loss: 2.0499 (2.5748) grad: 0.5084 (0.5975) time: 0.3128 data: 0.0029 max mem: 3951 +train: [0] [360/400] eta: 0:00:13 lr: 0.000054 loss: 2.0143 (2.5422) grad: 0.4970 (0.5917) time: 0.3322 data: 0.0042 max mem: 3951 +train: [0] [380/400] eta: 0:00:06 lr: 0.000057 loss: 1.9767 (2.5115) grad: 0.4970 (0.5869) time: 0.3257 data: 0.0042 max mem: 3951 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.9125 (2.4806) grad: 0.5012 (0.5827) time: 0.3251 data: 0.0042 max mem: 3951 +train: [0] Total time: 0:02:14 (0.3352 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.9125 (2.4806) grad: 0.5012 (0.5827) +eval (validation): [0] [ 0/63] eta: 0:03:15 time: 3.0986 data: 2.8598 max mem: 3951 +eval (validation): [0] [20/63] eta: 0:00:19 time: 0.3230 data: 0.0236 max mem: 3951 +eval (validation): [0] [40/63] eta: 0:00:08 time: 0.3018 data: 0.0075 max mem: 3951 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.2890 data: 0.0040 max mem: 3951 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.2882 data: 0.0040 max mem: 3951 +eval (validation): [0] Total time: 0:00:22 (0.3532 s / it) +cv: [0] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.241 acc: 0.925 f1: 0.913 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:20:19 lr: nan time: 3.0498 data: 2.8007 max mem: 3951 +train: [1] [ 20/400] eta: 0:02:50 lr: 0.000063 loss: 1.8808 (1.8826) grad: 0.4627 (0.4764) time: 0.3187 data: 0.0035 max mem: 3951 +train: [1] [ 40/400] eta: 0:02:20 lr: 0.000066 loss: 1.8510 (1.8586) grad: 0.4627 (0.4749) time: 0.3298 data: 0.0030 max mem: 3951 +train: [1] [ 60/400] eta: 0:02:05 lr: 0.000069 loss: 1.8308 (1.8452) grad: 0.4702 (0.4722) time: 0.3266 data: 0.0044 max mem: 3951 +train: [1] [ 80/400] eta: 0:01:54 lr: 0.000072 loss: 1.7872 (1.8216) grad: 0.4700 (0.4715) time: 0.3224 data: 0.0043 max mem: 3951 +train: [1] [100/400] eta: 0:01:45 lr: 0.000075 loss: 1.7728 (1.8082) grad: 0.4529 (0.4676) time: 0.3192 data: 0.0043 max mem: 3951 +train: [1] [120/400] eta: 0:01:37 lr: 0.000078 loss: 1.7170 (1.7947) grad: 0.4385 (0.4640) time: 0.3282 data: 0.0042 max mem: 3951 +train: [1] [140/400] eta: 0:01:29 lr: 0.000081 loss: 1.7150 (1.7789) grad: 0.4352 (0.4624) time: 0.3237 data: 0.0042 max mem: 3951 +train: [1] [160/400] eta: 0:01:22 lr: 0.000084 loss: 1.6692 (1.7627) grad: 0.4301 (0.4607) time: 0.3346 data: 0.0043 max mem: 3951 +train: [1] [180/400] eta: 0:01:15 lr: 0.000087 loss: 1.6341 (1.7491) grad: 0.4301 (0.4592) time: 0.3559 data: 0.0044 max mem: 3951 +train: [1] [200/400] eta: 0:01:08 lr: 0.000090 loss: 1.6072 (1.7328) grad: 0.4138 (0.4544) time: 0.3518 data: 0.0043 max mem: 3951 +train: [1] [220/400] eta: 0:01:02 lr: 0.000093 loss: 1.5788 (1.7192) grad: 0.4134 (0.4517) time: 0.3563 data: 0.0044 max mem: 3951 +train: [1] [240/400] eta: 0:00:55 lr: 0.000096 loss: 1.5636 (1.7035) grad: 0.4258 (0.4495) time: 0.3458 data: 0.0044 max mem: 3951 +train: [1] [260/400] eta: 0:00:48 lr: 0.000099 loss: 1.5178 (1.6899) grad: 0.4074 (0.4461) time: 0.3453 data: 0.0043 max mem: 3951 +train: [1] [280/400] eta: 0:00:41 lr: 0.000102 loss: 1.5088 (1.6762) grad: 0.4181 (0.4449) time: 0.3298 data: 0.0037 max mem: 3951 +train: [1] [300/400] eta: 0:00:35 lr: 0.000105 loss: 1.4937 (1.6640) grad: 0.4181 (0.4426) time: 0.4947 data: 0.1778 max mem: 3951 +train: [1] [320/400] eta: 0:00:28 lr: 0.000108 loss: 1.4899 (1.6525) grad: 0.3915 (0.4393) time: 0.3348 data: 0.0033 max mem: 3951 +train: [1] [340/400] eta: 0:00:21 lr: 0.000111 loss: 1.4550 (1.6402) grad: 0.3839 (0.4358) time: 0.3208 data: 0.0034 max mem: 3951 +train: [1] [360/400] eta: 0:00:14 lr: 0.000114 loss: 1.4363 (1.6284) grad: 0.3751 (0.4336) time: 0.3527 data: 0.0040 max mem: 3951 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 1.4014 (1.6156) grad: 0.3751 (0.4311) time: 0.3429 data: 0.0040 max mem: 3951 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.3769 (1.6034) grad: 0.3962 (0.4295) time: 0.3410 data: 0.0038 max mem: 3951 +train: [1] Total time: 0:02:20 (0.3509 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.3769 (1.6034) grad: 0.3962 (0.4295) +eval (validation): [1] [ 0/63] eta: 0:03:40 time: 3.5050 data: 3.2151 max mem: 3951 +eval (validation): [1] [20/63] eta: 0:00:20 time: 0.3353 data: 0.0038 max mem: 3951 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3450 data: 0.0036 max mem: 3951 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3236 data: 0.0036 max mem: 3951 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3134 data: 0.0037 max mem: 3951 +eval (validation): [1] Total time: 0:00:24 (0.3891 s / it) +cv: [1] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.175 acc: 0.942 f1: 0.926 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:27:21 lr: nan time: 4.1039 data: 3.8174 max mem: 3951 +train: [2] [ 20/400] eta: 0:03:20 lr: 0.000123 loss: 1.3630 (1.3696) grad: 0.4135 (0.4101) time: 0.3475 data: 0.0023 max mem: 3951 +train: [2] [ 40/400] eta: 0:02:39 lr: 0.000126 loss: 1.3676 (1.3731) grad: 0.3818 (0.3937) time: 0.3531 data: 0.0035 max mem: 3951 +train: [2] [ 60/400] eta: 0:02:18 lr: 0.000129 loss: 1.3588 (1.3604) grad: 0.3773 (0.3889) time: 0.3331 data: 0.0036 max mem: 3951 +train: [2] [ 80/400] eta: 0:02:05 lr: 0.000132 loss: 1.2839 (1.3396) grad: 0.3732 (0.3854) time: 0.3534 data: 0.0040 max mem: 3951 +train: [2] [100/400] eta: 0:01:55 lr: 0.000135 loss: 1.2815 (1.3301) grad: 0.3740 (0.3844) time: 0.3443 data: 0.0042 max mem: 3951 +train: [2] [120/400] eta: 0:01:45 lr: 0.000138 loss: 1.2938 (1.3220) grad: 0.3809 (0.3812) time: 0.3367 data: 0.0044 max mem: 3951 +train: [2] [140/400] eta: 0:01:35 lr: 0.000141 loss: 1.2713 (1.3138) grad: 0.3496 (0.3773) time: 0.3177 data: 0.0041 max mem: 3951 +train: [2] [160/400] eta: 0:01:27 lr: 0.000144 loss: 1.2505 (1.3053) grad: 0.3488 (0.3748) time: 0.3411 data: 0.0043 max mem: 3951 +train: [2] [180/400] eta: 0:01:19 lr: 0.000147 loss: 1.2467 (1.2982) grad: 0.3580 (0.3737) time: 0.3554 data: 0.0043 max mem: 3951 +train: [2] [200/400] eta: 0:01:12 lr: 0.000150 loss: 1.2007 (1.2887) grad: 0.3580 (0.3726) time: 0.3456 data: 0.0041 max mem: 3951 +train: [2] [220/400] eta: 0:01:04 lr: 0.000153 loss: 1.2083 (1.2821) grad: 0.3428 (0.3708) time: 0.3405 data: 0.0042 max mem: 3951 +train: [2] [240/400] eta: 0:00:57 lr: 0.000156 loss: 1.2100 (1.2743) grad: 0.3380 (0.3680) time: 0.3395 data: 0.0041 max mem: 3951 +train: [2] [260/400] eta: 0:00:49 lr: 0.000159 loss: 1.1598 (1.2646) grad: 0.3385 (0.3666) time: 0.3381 data: 0.0039 max mem: 3951 +train: [2] [280/400] eta: 0:00:42 lr: 0.000162 loss: 1.1499 (1.2567) grad: 0.3360 (0.3641) time: 0.3339 data: 0.0042 max mem: 3951 +train: [2] [300/400] eta: 0:00:36 lr: 0.000165 loss: 1.1308 (1.2474) grad: 0.3272 (0.3626) time: 0.5111 data: 0.1897 max mem: 3951 +train: [2] [320/400] eta: 0:00:29 lr: 0.000168 loss: 1.1231 (1.2415) grad: 0.3272 (0.3604) time: 0.3246 data: 0.0123 max mem: 3951 +train: [2] [340/400] eta: 0:00:21 lr: 0.000171 loss: 1.1234 (1.2338) grad: 0.3261 (0.3593) time: 0.3186 data: 0.0039 max mem: 3951 +train: [2] [360/400] eta: 0:00:14 lr: 0.000174 loss: 1.1070 (1.2264) grad: 0.3280 (0.3583) time: 0.3479 data: 0.0039 max mem: 3951 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 1.1212 (1.2209) grad: 0.3252 (0.3566) time: 0.3409 data: 0.0039 max mem: 3951 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 1.0859 (1.2125) grad: 0.3182 (0.3550) time: 0.3445 data: 0.0040 max mem: 3951 +train: [2] Total time: 0:02:23 (0.3583 s / it) +train: [2] Summary: lr: 0.000180 loss: 1.0859 (1.2125) grad: 0.3182 (0.3550) +eval (validation): [2] [ 0/63] eta: 0:03:23 time: 3.2304 data: 3.0221 max mem: 3951 +eval (validation): [2] [20/63] eta: 0:00:20 time: 0.3344 data: 0.0040 max mem: 3951 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3625 data: 0.0033 max mem: 3951 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3177 data: 0.0034 max mem: 3951 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3138 data: 0.0031 max mem: 3951 +eval (validation): [2] Total time: 0:00:24 (0.3884 s / it) +cv: [2] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.170 acc: 0.943 f1: 0.934 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:21:48 lr: nan time: 3.2712 data: 3.0500 max mem: 3951 +train: [3] [ 20/400] eta: 0:03:10 lr: 0.000183 loss: 1.0592 (1.0591) grad: 0.3162 (0.3235) time: 0.3638 data: 0.0269 max mem: 3951 +train: [3] [ 40/400] eta: 0:02:31 lr: 0.000186 loss: 1.0278 (1.0392) grad: 0.3054 (0.3157) time: 0.3364 data: 0.0031 max mem: 3951 +train: [3] [ 60/400] eta: 0:02:15 lr: 0.000189 loss: 1.0353 (1.0517) grad: 0.3027 (0.3135) time: 0.3507 data: 0.0036 max mem: 3951 +train: [3] [ 80/400] eta: 0:02:04 lr: 0.000192 loss: 1.0612 (1.0490) grad: 0.3083 (0.3127) time: 0.3632 data: 0.0041 max mem: 3951 +train: [3] [100/400] eta: 0:01:54 lr: 0.000195 loss: 1.0261 (1.0442) grad: 0.3138 (0.3134) time: 0.3525 data: 0.0047 max mem: 3951 +train: [3] [120/400] eta: 0:01:45 lr: 0.000198 loss: 1.0138 (1.0406) grad: 0.3067 (0.3109) time: 0.3509 data: 0.0044 max mem: 3951 +train: [3] [140/400] eta: 0:01:36 lr: 0.000201 loss: 0.9868 (1.0307) grad: 0.3023 (0.3118) time: 0.3424 data: 0.0041 max mem: 3951 +train: [3] [160/400] eta: 0:01:28 lr: 0.000204 loss: 0.9771 (1.0257) grad: 0.3136 (0.3124) time: 0.3471 data: 0.0042 max mem: 3951 +train: [3] [180/400] eta: 0:01:20 lr: 0.000207 loss: 0.9973 (1.0253) grad: 0.3136 (0.3121) time: 0.3498 data: 0.0043 max mem: 3951 +train: [3] [200/400] eta: 0:01:13 lr: 0.000210 loss: 0.9747 (1.0183) grad: 0.3062 (0.3112) time: 0.3483 data: 0.0040 max mem: 3951 +train: [3] [220/400] eta: 0:01:05 lr: 0.000213 loss: 0.9456 (1.0116) grad: 0.2926 (0.3095) time: 0.3324 data: 0.0041 max mem: 3951 +train: [3] [240/400] eta: 0:00:57 lr: 0.000216 loss: 0.9457 (1.0073) grad: 0.2805 (0.3079) time: 0.3382 data: 0.0040 max mem: 3951 +train: [3] [260/400] eta: 0:00:50 lr: 0.000219 loss: 0.9723 (1.0057) grad: 0.2937 (0.3073) time: 0.3370 data: 0.0038 max mem: 3951 +train: [3] [280/400] eta: 0:00:42 lr: 0.000222 loss: 0.9671 (1.0014) grad: 0.2912 (0.3057) time: 0.3399 data: 0.0042 max mem: 3951 +train: [3] [300/400] eta: 0:00:36 lr: 0.000225 loss: 0.9333 (0.9970) grad: 0.2878 (0.3047) time: 0.5141 data: 0.1822 max mem: 3951 +train: [3] [320/400] eta: 0:00:29 lr: 0.000228 loss: 0.9164 (0.9913) grad: 0.2868 (0.3035) time: 0.3364 data: 0.0038 max mem: 3951 +train: [3] [340/400] eta: 0:00:21 lr: 0.000231 loss: 0.8957 (0.9860) grad: 0.2784 (0.3023) time: 0.3070 data: 0.0038 max mem: 3951 +train: [3] [360/400] eta: 0:00:14 lr: 0.000234 loss: 0.8957 (0.9819) grad: 0.2705 (0.3004) time: 0.3560 data: 0.0030 max mem: 3951 +train: [3] [380/400] eta: 0:00:07 lr: 0.000237 loss: 0.8806 (0.9758) grad: 0.2703 (0.2994) time: 0.3464 data: 0.0043 max mem: 3951 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.8858 (0.9729) grad: 0.2728 (0.2986) time: 0.3329 data: 0.0038 max mem: 3951 +train: [3] Total time: 0:02:24 (0.3602 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.8858 (0.9729) grad: 0.2728 (0.2986) +eval (validation): [3] [ 0/63] eta: 0:03:32 time: 3.3707 data: 3.1150 max mem: 3951 +eval (validation): [3] [20/63] eta: 0:00:20 time: 0.3370 data: 0.0250 max mem: 3951 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3232 data: 0.0030 max mem: 3951 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3187 data: 0.0033 max mem: 3951 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3094 data: 0.0034 max mem: 3951 +eval (validation): [3] Total time: 0:00:23 (0.3794 s / it) +cv: [3] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.144 acc: 0.951 f1: 0.942 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:26 lr: nan time: 3.3675 data: 3.0935 max mem: 3951 +train: [4] [ 20/400] eta: 0:03:28 lr: 0.000243 loss: 0.8759 (0.8676) grad: 0.2706 (0.2755) time: 0.4069 data: 0.0044 max mem: 3951 +train: [4] [ 40/400] eta: 0:02:41 lr: 0.000246 loss: 0.8600 (0.8620) grad: 0.2681 (0.2749) time: 0.3453 data: 0.0037 max mem: 3951 +train: [4] [ 60/400] eta: 0:02:21 lr: 0.000249 loss: 0.8454 (0.8555) grad: 0.2644 (0.2723) time: 0.3461 data: 0.0040 max mem: 3951 +train: [4] [ 80/400] eta: 0:02:07 lr: 0.000252 loss: 0.8210 (0.8485) grad: 0.2542 (0.2694) time: 0.3491 data: 0.0043 max mem: 3951 +train: [4] [100/400] eta: 0:01:56 lr: 0.000255 loss: 0.8236 (0.8440) grad: 0.2593 (0.2695) time: 0.3457 data: 0.0040 max mem: 3951 +train: [4] [120/400] eta: 0:01:46 lr: 0.000258 loss: 0.8356 (0.8431) grad: 0.2710 (0.2706) time: 0.3430 data: 0.0037 max mem: 3951 +train: [4] [140/400] eta: 0:01:36 lr: 0.000261 loss: 0.8282 (0.8371) grad: 0.2684 (0.2702) time: 0.3210 data: 0.0037 max mem: 3951 +train: [4] [160/400] eta: 0:01:28 lr: 0.000264 loss: 0.8094 (0.8323) grad: 0.2594 (0.2690) time: 0.3446 data: 0.0044 max mem: 3951 +train: [4] [180/400] eta: 0:01:20 lr: 0.000267 loss: 0.8021 (0.8286) grad: 0.2561 (0.2680) time: 0.3569 data: 0.0042 max mem: 3951 +train: [4] [200/400] eta: 0:01:13 lr: 0.000270 loss: 0.7699 (0.8228) grad: 0.2564 (0.2673) time: 0.3553 data: 0.0045 max mem: 3951 +train: [4] [220/400] eta: 0:01:05 lr: 0.000273 loss: 0.7809 (0.8218) grad: 0.2623 (0.2672) time: 0.3401 data: 0.0043 max mem: 3951 +train: [4] [240/400] eta: 0:00:57 lr: 0.000276 loss: 0.7807 (0.8201) grad: 0.2512 (0.2653) time: 0.3299 data: 0.0043 max mem: 3951 +train: [4] [260/400] eta: 0:00:50 lr: 0.000279 loss: 0.7807 (0.8190) grad: 0.2418 (0.2644) time: 0.3591 data: 0.0043 max mem: 3951 +train: [4] [280/400] eta: 0:00:43 lr: 0.000282 loss: 0.7959 (0.8163) grad: 0.2553 (0.2640) time: 0.3571 data: 0.0046 max mem: 3951 +train: [4] [300/400] eta: 0:00:36 lr: 0.000285 loss: 0.7527 (0.8118) grad: 0.2546 (0.2634) time: 0.4899 data: 0.1780 max mem: 3951 +train: [4] [320/400] eta: 0:00:29 lr: 0.000288 loss: 0.7310 (0.8056) grad: 0.2460 (0.2623) time: 0.3374 data: 0.0044 max mem: 3951 +train: [4] [340/400] eta: 0:00:21 lr: 0.000291 loss: 0.7069 (0.8004) grad: 0.2407 (0.2610) time: 0.3424 data: 0.0035 max mem: 3951 +train: [4] [360/400] eta: 0:00:14 lr: 0.000294 loss: 0.7347 (0.7978) grad: 0.2349 (0.2596) time: 0.3344 data: 0.0036 max mem: 3951 +train: [4] [380/400] eta: 0:00:07 lr: 0.000297 loss: 0.7238 (0.7942) grad: 0.2372 (0.2587) time: 0.3586 data: 0.0042 max mem: 3951 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.7209 (0.7917) grad: 0.2437 (0.2581) time: 0.3445 data: 0.0042 max mem: 3951 +train: [4] Total time: 0:02:25 (0.3635 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.7209 (0.7917) grad: 0.2437 (0.2581) +eval (validation): [4] [ 0/63] eta: 0:03:31 time: 3.3644 data: 3.1097 max mem: 3951 +eval (validation): [4] [20/63] eta: 0:00:20 time: 0.3302 data: 0.0109 max mem: 3951 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3473 data: 0.0032 max mem: 3951 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3218 data: 0.0038 max mem: 3951 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3069 data: 0.0033 max mem: 3951 +eval (validation): [4] Total time: 0:00:24 (0.3858 s / it) +cv: [4] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.173 acc: 0.943 f1: 0.932 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:34 lr: nan time: 3.2372 data: 2.9604 max mem: 3951 +train: [5] [ 20/400] eta: 0:03:15 lr: 0.000300 loss: 0.6879 (0.6963) grad: 0.2467 (0.2458) time: 0.3788 data: 0.0049 max mem: 3951 +train: [5] [ 40/400] eta: 0:02:34 lr: 0.000300 loss: 0.6984 (0.7052) grad: 0.2446 (0.2455) time: 0.3413 data: 0.0042 max mem: 3951 +train: [5] [ 60/400] eta: 0:02:15 lr: 0.000300 loss: 0.7045 (0.7005) grad: 0.2367 (0.2425) time: 0.3350 data: 0.0042 max mem: 3951 +train: [5] [ 80/400] eta: 0:02:04 lr: 0.000300 loss: 0.6980 (0.7029) grad: 0.2383 (0.2428) time: 0.3564 data: 0.0043 max mem: 3951 +train: [5] [100/400] eta: 0:01:54 lr: 0.000300 loss: 0.6718 (0.6985) grad: 0.2333 (0.2404) time: 0.3473 data: 0.0042 max mem: 3951 +train: [5] [120/400] eta: 0:01:43 lr: 0.000300 loss: 0.6631 (0.6942) grad: 0.2331 (0.2393) time: 0.3246 data: 0.0039 max mem: 3951 +train: [5] [140/400] eta: 0:01:34 lr: 0.000300 loss: 0.6631 (0.6931) grad: 0.2331 (0.2398) time: 0.3110 data: 0.0035 max mem: 3951 +train: [5] [160/400] eta: 0:01:27 lr: 0.000299 loss: 0.6813 (0.6936) grad: 0.2323 (0.2394) time: 0.3691 data: 0.0043 max mem: 3951 +train: [5] [180/400] eta: 0:01:19 lr: 0.000299 loss: 0.6688 (0.6900) grad: 0.2323 (0.2389) time: 0.3588 data: 0.0036 max mem: 3951 +train: [5] [200/400] eta: 0:01:12 lr: 0.000299 loss: 0.6685 (0.6898) grad: 0.2262 (0.2372) time: 0.3389 data: 0.0040 max mem: 3951 +train: [5] [220/400] eta: 0:01:04 lr: 0.000299 loss: 0.6794 (0.6884) grad: 0.2365 (0.2380) time: 0.3228 data: 0.0040 max mem: 3951 +train: [5] [240/400] eta: 0:00:57 lr: 0.000299 loss: 0.6665 (0.6861) grad: 0.2365 (0.2377) time: 0.3471 data: 0.0042 max mem: 3951 +train: [5] [260/400] eta: 0:00:49 lr: 0.000299 loss: 0.6560 (0.6847) grad: 0.2316 (0.2367) time: 0.3518 data: 0.0038 max mem: 3951 +train: [5] [280/400] eta: 0:00:42 lr: 0.000298 loss: 0.6529 (0.6822) grad: 0.2343 (0.2372) time: 0.3489 data: 0.0040 max mem: 3951 +train: [5] [300/400] eta: 0:00:36 lr: 0.000298 loss: 0.6529 (0.6814) grad: 0.2349 (0.2369) time: 0.4766 data: 0.1754 max mem: 3951 +train: [5] [320/400] eta: 0:00:28 lr: 0.000298 loss: 0.6355 (0.6780) grad: 0.2266 (0.2361) time: 0.3263 data: 0.0040 max mem: 3951 +train: [5] [340/400] eta: 0:00:21 lr: 0.000298 loss: 0.6227 (0.6746) grad: 0.2257 (0.2359) time: 0.3184 data: 0.0030 max mem: 3951 +train: [5] [360/400] eta: 0:00:14 lr: 0.000297 loss: 0.6140 (0.6714) grad: 0.2257 (0.2352) time: 0.3200 data: 0.0041 max mem: 3951 +train: [5] [380/400] eta: 0:00:07 lr: 0.000297 loss: 0.6147 (0.6698) grad: 0.2224 (0.2349) time: 0.3163 data: 0.0040 max mem: 3951 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.6232 (0.6676) grad: 0.2268 (0.2346) time: 0.3312 data: 0.0043 max mem: 3951 +train: [5] Total time: 0:02:21 (0.3538 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.6232 (0.6676) grad: 0.2268 (0.2346) +eval (validation): [5] [ 0/63] eta: 0:03:06 time: 2.9638 data: 2.7650 max mem: 3951 +eval (validation): [5] [20/63] eta: 0:00:18 time: 0.2931 data: 0.0030 max mem: 3951 +eval (validation): [5] [40/63] eta: 0:00:08 time: 0.3305 data: 0.0028 max mem: 3951 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.2942 data: 0.0034 max mem: 3951 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.2931 data: 0.0034 max mem: 3951 +eval (validation): [5] Total time: 0:00:22 (0.3543 s / it) +cv: [5] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.153 acc: 0.950 f1: 0.940 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:19:42 lr: nan time: 2.9560 data: 2.7521 max mem: 3951 +train: [6] [ 20/400] eta: 0:02:51 lr: 0.000296 loss: 0.5712 (0.5950) grad: 0.2186 (0.2189) time: 0.3269 data: 0.0041 max mem: 3951 +train: [6] [ 40/400] eta: 0:02:20 lr: 0.000296 loss: 0.5913 (0.5952) grad: 0.2267 (0.2237) time: 0.3269 data: 0.0034 max mem: 3951 +train: [6] [ 60/400] eta: 0:02:05 lr: 0.000296 loss: 0.6051 (0.6049) grad: 0.2134 (0.2205) time: 0.3277 data: 0.0040 max mem: 3951 +train: [6] [ 80/400] eta: 0:01:55 lr: 0.000295 loss: 0.6051 (0.6015) grad: 0.2132 (0.2196) time: 0.3323 data: 0.0042 max mem: 3951 +train: [6] [100/400] eta: 0:01:46 lr: 0.000295 loss: 0.5751 (0.5947) grad: 0.2115 (0.2176) time: 0.3330 data: 0.0042 max mem: 3951 +train: [6] [120/400] eta: 0:01:38 lr: 0.000295 loss: 0.5726 (0.5964) grad: 0.1999 (0.2153) time: 0.3269 data: 0.0044 max mem: 3951 +train: [6] [140/400] eta: 0:01:29 lr: 0.000294 loss: 0.6001 (0.5944) grad: 0.2058 (0.2153) time: 0.3098 data: 0.0041 max mem: 3951 +train: [6] [160/400] eta: 0:01:22 lr: 0.000294 loss: 0.5964 (0.5962) grad: 0.2126 (0.2150) time: 0.3256 data: 0.0042 max mem: 3951 +train: [6] [180/400] eta: 0:01:14 lr: 0.000293 loss: 0.5800 (0.5928) grad: 0.2130 (0.2148) time: 0.3253 data: 0.0043 max mem: 3951 +train: [6] [200/400] eta: 0:01:08 lr: 0.000293 loss: 0.5681 (0.5921) grad: 0.2121 (0.2141) time: 0.3434 data: 0.0043 max mem: 3951 +train: [6] [220/400] eta: 0:01:01 lr: 0.000292 loss: 0.5819 (0.5909) grad: 0.2101 (0.2140) time: 0.3463 data: 0.0042 max mem: 3951 +train: [6] [240/400] eta: 0:00:54 lr: 0.000292 loss: 0.5819 (0.5906) grad: 0.2060 (0.2142) time: 0.3422 data: 0.0039 max mem: 3951 +train: [6] [260/400] eta: 0:00:47 lr: 0.000291 loss: 0.5758 (0.5886) grad: 0.2060 (0.2135) time: 0.3425 data: 0.0044 max mem: 3951 +train: [6] [280/400] eta: 0:00:41 lr: 0.000291 loss: 0.5575 (0.5877) grad: 0.2115 (0.2143) time: 0.3443 data: 0.0043 max mem: 3951 +train: [6] [300/400] eta: 0:00:35 lr: 0.000290 loss: 0.5575 (0.5863) grad: 0.2163 (0.2141) time: 0.4862 data: 0.1726 max mem: 3951 +train: [6] [320/400] eta: 0:00:28 lr: 0.000290 loss: 0.5513 (0.5846) grad: 0.2088 (0.2141) time: 0.3475 data: 0.0038 max mem: 3951 +train: [6] [340/400] eta: 0:00:21 lr: 0.000289 loss: 0.5411 (0.5823) grad: 0.2032 (0.2134) time: 0.3483 data: 0.0040 max mem: 3951 +train: [6] [360/400] eta: 0:00:14 lr: 0.000288 loss: 0.5418 (0.5823) grad: 0.1992 (0.2129) time: 0.3368 data: 0.0029 max mem: 3951 +train: [6] [380/400] eta: 0:00:06 lr: 0.000288 loss: 0.5404 (0.5796) grad: 0.1992 (0.2121) time: 0.3322 data: 0.0042 max mem: 3951 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.5240 (0.5786) grad: 0.1993 (0.2115) time: 0.3302 data: 0.0036 max mem: 3951 +train: [6] Total time: 0:02:19 (0.3488 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.5240 (0.5786) grad: 0.1993 (0.2115) +eval (validation): [6] [ 0/63] eta: 0:03:34 time: 3.4040 data: 3.1247 max mem: 3951 +eval (validation): [6] [20/63] eta: 0:00:21 time: 0.3502 data: 0.0042 max mem: 3951 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3378 data: 0.0034 max mem: 3951 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3136 data: 0.0039 max mem: 3951 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3145 data: 0.0037 max mem: 3951 +eval (validation): [6] Total time: 0:00:24 (0.3878 s / it) +cv: [6] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.142 acc: 0.949 f1: 0.941 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:22:35 lr: nan time: 3.3891 data: 3.1086 max mem: 3951 +train: [7] [ 20/400] eta: 0:03:25 lr: 0.000286 loss: 0.5397 (0.5605) grad: 0.1992 (0.2019) time: 0.3971 data: 0.0042 max mem: 3951 +train: [7] [ 40/400] eta: 0:02:40 lr: 0.000286 loss: 0.5540 (0.5526) grad: 0.1965 (0.1984) time: 0.3446 data: 0.0036 max mem: 3951 +train: [7] [ 60/400] eta: 0:02:22 lr: 0.000285 loss: 0.5356 (0.5469) grad: 0.1987 (0.2015) time: 0.3642 data: 0.0043 max mem: 3951 +train: [7] [ 80/400] eta: 0:02:08 lr: 0.000284 loss: 0.5256 (0.5431) grad: 0.2060 (0.2027) time: 0.3493 data: 0.0041 max mem: 3951 +train: [7] [100/400] eta: 0:01:57 lr: 0.000284 loss: 0.5173 (0.5366) grad: 0.2052 (0.2019) time: 0.3469 data: 0.0040 max mem: 3951 +train: [7] [120/400] eta: 0:01:47 lr: 0.000283 loss: 0.5216 (0.5340) grad: 0.1934 (0.2000) time: 0.3459 data: 0.0037 max mem: 3951 +train: [7] [140/400] eta: 0:01:37 lr: 0.000282 loss: 0.5338 (0.5325) grad: 0.1928 (0.1999) time: 0.3204 data: 0.0038 max mem: 3951 +train: [7] [160/400] eta: 0:01:28 lr: 0.000282 loss: 0.5117 (0.5308) grad: 0.1965 (0.2000) time: 0.3455 data: 0.0042 max mem: 3951 +train: [7] [180/400] eta: 0:01:20 lr: 0.000281 loss: 0.5054 (0.5279) grad: 0.1965 (0.1997) time: 0.3385 data: 0.0041 max mem: 3951 +train: [7] [200/400] eta: 0:01:12 lr: 0.000280 loss: 0.4938 (0.5285) grad: 0.1945 (0.1995) time: 0.3175 data: 0.0038 max mem: 3951 +train: [7] [220/400] eta: 0:01:04 lr: 0.000279 loss: 0.5007 (0.5254) grad: 0.1974 (0.1997) time: 0.3454 data: 0.0039 max mem: 3951 +train: [7] [240/400] eta: 0:00:57 lr: 0.000278 loss: 0.4951 (0.5226) grad: 0.2036 (0.1998) time: 0.3451 data: 0.0043 max mem: 3951 +train: [7] [260/400] eta: 0:00:50 lr: 0.000278 loss: 0.5019 (0.5222) grad: 0.1982 (0.1995) time: 0.3430 data: 0.0038 max mem: 3951 +train: [7] [280/400] eta: 0:00:42 lr: 0.000277 loss: 0.5089 (0.5202) grad: 0.1934 (0.1990) time: 0.3387 data: 0.0040 max mem: 3951 +train: [7] [300/400] eta: 0:00:36 lr: 0.000276 loss: 0.4856 (0.5181) grad: 0.1934 (0.1992) time: 0.4819 data: 0.1769 max mem: 3951 +train: [7] [320/400] eta: 0:00:29 lr: 0.000275 loss: 0.4814 (0.5157) grad: 0.1963 (0.1989) time: 0.3617 data: 0.0038 max mem: 3951 +train: [7] [340/400] eta: 0:00:21 lr: 0.000274 loss: 0.4956 (0.5156) grad: 0.1916 (0.1985) time: 0.3487 data: 0.0042 max mem: 3951 +train: [7] [360/400] eta: 0:00:14 lr: 0.000273 loss: 0.4956 (0.5146) grad: 0.1848 (0.1978) time: 0.3583 data: 0.0039 max mem: 3951 +train: [7] [380/400] eta: 0:00:07 lr: 0.000272 loss: 0.4758 (0.5141) grad: 0.1815 (0.1972) time: 0.3387 data: 0.0044 max mem: 3951 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.4825 (0.5122) grad: 0.1828 (0.1968) time: 0.3198 data: 0.0040 max mem: 3951 +train: [7] Total time: 0:02:24 (0.3607 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.4825 (0.5122) grad: 0.1828 (0.1968) +eval (validation): [7] [ 0/63] eta: 0:03:46 time: 3.6029 data: 3.3079 max mem: 3951 +eval (validation): [7] [20/63] eta: 0:00:22 time: 0.3597 data: 0.0028 max mem: 3951 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3315 data: 0.0033 max mem: 3951 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3091 data: 0.0036 max mem: 3951 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3086 data: 0.0033 max mem: 3951 +eval (validation): [7] Total time: 0:00:24 (0.3893 s / it) +cv: [7] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.143 acc: 0.953 f1: 0.943 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:58 lr: nan time: 3.2950 data: 3.0174 max mem: 3951 +train: [8] [ 20/400] eta: 0:03:19 lr: 0.000270 loss: 0.4763 (0.4929) grad: 0.1895 (0.1938) time: 0.3873 data: 0.0048 max mem: 3951 +train: [8] [ 40/400] eta: 0:02:38 lr: 0.000270 loss: 0.4793 (0.4884) grad: 0.1951 (0.1939) time: 0.3481 data: 0.0044 max mem: 3951 +train: [8] [ 60/400] eta: 0:02:17 lr: 0.000269 loss: 0.4749 (0.4823) grad: 0.1843 (0.1907) time: 0.3374 data: 0.0043 max mem: 3951 +train: [8] [ 80/400] eta: 0:02:04 lr: 0.000268 loss: 0.4800 (0.4836) grad: 0.1864 (0.1925) time: 0.3380 data: 0.0042 max mem: 3951 +train: [8] [100/400] eta: 0:01:55 lr: 0.000267 loss: 0.4874 (0.4871) grad: 0.1899 (0.1914) time: 0.3612 data: 0.0042 max mem: 3951 +train: [8] [120/400] eta: 0:01:44 lr: 0.000266 loss: 0.4969 (0.4881) grad: 0.1814 (0.1908) time: 0.3298 data: 0.0039 max mem: 3951 +train: [8] [140/400] eta: 0:01:35 lr: 0.000265 loss: 0.4586 (0.4825) grad: 0.1863 (0.1901) time: 0.3345 data: 0.0034 max mem: 3951 +train: [8] [160/400] eta: 0:01:27 lr: 0.000264 loss: 0.4568 (0.4834) grad: 0.1859 (0.1897) time: 0.3457 data: 0.0039 max mem: 3951 +train: [8] [180/400] eta: 0:01:20 lr: 0.000263 loss: 0.4676 (0.4806) grad: 0.1768 (0.1879) time: 0.3464 data: 0.0039 max mem: 3951 +train: [8] [200/400] eta: 0:01:12 lr: 0.000262 loss: 0.4602 (0.4787) grad: 0.1722 (0.1867) time: 0.3324 data: 0.0041 max mem: 3951 +train: [8] [220/400] eta: 0:01:04 lr: 0.000260 loss: 0.4602 (0.4766) grad: 0.1726 (0.1861) time: 0.3245 data: 0.0035 max mem: 3951 +train: [8] [240/400] eta: 0:00:57 lr: 0.000259 loss: 0.4416 (0.4763) grad: 0.1822 (0.1859) time: 0.3487 data: 0.0041 max mem: 3951 +train: [8] [260/400] eta: 0:00:49 lr: 0.000258 loss: 0.4267 (0.4731) grad: 0.1798 (0.1851) time: 0.3338 data: 0.0038 max mem: 3951 +train: [8] [280/400] eta: 0:00:42 lr: 0.000257 loss: 0.4484 (0.4735) grad: 0.1750 (0.1849) time: 0.3379 data: 0.0039 max mem: 3951 +train: [8] [300/400] eta: 0:00:36 lr: 0.000256 loss: 0.4687 (0.4740) grad: 0.1848 (0.1851) time: 0.4904 data: 0.1763 max mem: 3951 +train: [8] [320/400] eta: 0:00:28 lr: 0.000255 loss: 0.4560 (0.4728) grad: 0.1854 (0.1849) time: 0.3318 data: 0.0040 max mem: 3951 +train: [8] [340/400] eta: 0:00:21 lr: 0.000254 loss: 0.4368 (0.4711) grad: 0.1740 (0.1844) time: 0.3436 data: 0.0037 max mem: 3951 +train: [8] [360/400] eta: 0:00:14 lr: 0.000253 loss: 0.4435 (0.4702) grad: 0.1740 (0.1842) time: 0.3367 data: 0.0046 max mem: 3951 +train: [8] [380/400] eta: 0:00:07 lr: 0.000252 loss: 0.4320 (0.4679) grad: 0.1736 (0.1836) time: 0.3476 data: 0.0046 max mem: 3951 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.4253 (0.4669) grad: 0.1725 (0.1831) time: 0.3410 data: 0.0041 max mem: 3951 +train: [8] Total time: 0:02:23 (0.3578 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.4253 (0.4669) grad: 0.1725 (0.1831) +eval (validation): [8] [ 0/63] eta: 0:03:15 time: 3.1082 data: 2.8298 max mem: 3951 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3737 data: 0.0049 max mem: 3951 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3249 data: 0.0035 max mem: 3951 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3129 data: 0.0043 max mem: 3951 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3122 data: 0.0041 max mem: 3951 +eval (validation): [8] Total time: 0:00:24 (0.3852 s / it) +cv: [8] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.133 acc: 0.955 f1: 0.945 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:21:54 lr: nan time: 3.2864 data: 3.0170 max mem: 3951 +train: [9] [ 20/400] eta: 0:03:10 lr: 0.000249 loss: 0.4296 (0.4548) grad: 0.1845 (0.1861) time: 0.3631 data: 0.0249 max mem: 3951 +train: [9] [ 40/400] eta: 0:02:32 lr: 0.000248 loss: 0.4296 (0.4398) grad: 0.1787 (0.1803) time: 0.3395 data: 0.0038 max mem: 3951 +train: [9] [ 60/400] eta: 0:02:14 lr: 0.000247 loss: 0.4239 (0.4336) grad: 0.1730 (0.1798) time: 0.3441 data: 0.0042 max mem: 3951 +train: [9] [ 80/400] eta: 0:02:02 lr: 0.000246 loss: 0.4227 (0.4354) grad: 0.1711 (0.1782) time: 0.3372 data: 0.0042 max mem: 3951 +train: [9] [100/400] eta: 0:01:53 lr: 0.000244 loss: 0.4193 (0.4319) grad: 0.1701 (0.1775) time: 0.3592 data: 0.0045 max mem: 3951 +train: [9] [120/400] eta: 0:01:44 lr: 0.000243 loss: 0.4193 (0.4300) grad: 0.1644 (0.1758) time: 0.3505 data: 0.0039 max mem: 3951 +train: [9] [140/400] eta: 0:01:36 lr: 0.000242 loss: 0.4307 (0.4314) grad: 0.1681 (0.1749) time: 0.3507 data: 0.0037 max mem: 3951 +train: [9] [160/400] eta: 0:01:28 lr: 0.000241 loss: 0.4088 (0.4290) grad: 0.1696 (0.1744) time: 0.3446 data: 0.0045 max mem: 3951 +train: [9] [180/400] eta: 0:01:20 lr: 0.000240 loss: 0.4442 (0.4312) grad: 0.1736 (0.1761) time: 0.3604 data: 0.0042 max mem: 3951 +train: [9] [200/400] eta: 0:01:13 lr: 0.000238 loss: 0.4456 (0.4316) grad: 0.1737 (0.1760) time: 0.3669 data: 0.0041 max mem: 3951 +train: [9] [220/400] eta: 0:01:05 lr: 0.000237 loss: 0.4088 (0.4296) grad: 0.1742 (0.1766) time: 0.3390 data: 0.0038 max mem: 3951 +train: [9] [240/400] eta: 0:00:57 lr: 0.000236 loss: 0.4053 (0.4298) grad: 0.1684 (0.1762) time: 0.3293 data: 0.0044 max mem: 3951 +train: [9] [260/400] eta: 0:00:50 lr: 0.000234 loss: 0.4174 (0.4309) grad: 0.1680 (0.1759) time: 0.3544 data: 0.0044 max mem: 3951 +train: [9] [280/400] eta: 0:00:43 lr: 0.000233 loss: 0.4505 (0.4326) grad: 0.1722 (0.1761) time: 0.3452 data: 0.0042 max mem: 3951 +train: [9] [300/400] eta: 0:00:36 lr: 0.000232 loss: 0.4273 (0.4321) grad: 0.1750 (0.1762) time: 0.4959 data: 0.1797 max mem: 3951 +train: [9] [320/400] eta: 0:00:29 lr: 0.000230 loss: 0.4090 (0.4304) grad: 0.1675 (0.1757) time: 0.3544 data: 0.0122 max mem: 3951 +train: [9] [340/400] eta: 0:00:21 lr: 0.000229 loss: 0.4084 (0.4296) grad: 0.1675 (0.1751) time: 0.3447 data: 0.0027 max mem: 3951 +train: [9] [360/400] eta: 0:00:14 lr: 0.000228 loss: 0.4103 (0.4285) grad: 0.1659 (0.1747) time: 0.3530 data: 0.0042 max mem: 3951 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 0.4039 (0.4274) grad: 0.1734 (0.1750) time: 0.3553 data: 0.0044 max mem: 3951 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.4039 (0.4279) grad: 0.1793 (0.1751) time: 0.3698 data: 0.0046 max mem: 3951 +train: [9] Total time: 0:02:26 (0.3657 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.4039 (0.4279) grad: 0.1793 (0.1751) +eval (validation): [9] [ 0/63] eta: 0:03:17 time: 3.1382 data: 2.8650 max mem: 3951 +eval (validation): [9] [20/63] eta: 0:00:21 time: 0.3718 data: 0.0043 max mem: 3951 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3441 data: 0.0028 max mem: 3951 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3123 data: 0.0040 max mem: 3951 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3073 data: 0.0038 max mem: 3951 +eval (validation): [9] Total time: 0:00:24 (0.3920 s / it) +cv: [9] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.150 acc: 0.954 f1: 0.944 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:01 lr: nan time: 3.3042 data: 3.0747 max mem: 3951 +train: [10] [ 20/400] eta: 0:03:05 lr: 0.000224 loss: 0.4086 (0.4001) grad: 0.1674 (0.1739) time: 0.3480 data: 0.0105 max mem: 3951 +train: [10] [ 40/400] eta: 0:02:30 lr: 0.000222 loss: 0.4086 (0.4011) grad: 0.1653 (0.1696) time: 0.3451 data: 0.0041 max mem: 3951 +train: [10] [ 60/400] eta: 0:02:12 lr: 0.000221 loss: 0.4090 (0.4018) grad: 0.1651 (0.1709) time: 0.3306 data: 0.0034 max mem: 3951 +train: [10] [ 80/400] eta: 0:02:00 lr: 0.000220 loss: 0.3836 (0.3954) grad: 0.1625 (0.1682) time: 0.3405 data: 0.0041 max mem: 3951 +train: [10] [100/400] eta: 0:01:51 lr: 0.000218 loss: 0.3912 (0.4032) grad: 0.1657 (0.1719) time: 0.3453 data: 0.0044 max mem: 3951 +train: [10] [120/400] eta: 0:01:42 lr: 0.000217 loss: 0.4148 (0.4046) grad: 0.1672 (0.1710) time: 0.3427 data: 0.0044 max mem: 3951 +train: [10] [140/400] eta: 0:01:33 lr: 0.000215 loss: 0.4123 (0.4070) grad: 0.1653 (0.1713) time: 0.3249 data: 0.0040 max mem: 3951 +train: [10] [160/400] eta: 0:01:26 lr: 0.000214 loss: 0.4123 (0.4063) grad: 0.1692 (0.1720) time: 0.3448 data: 0.0038 max mem: 3951 +train: [10] [180/400] eta: 0:01:18 lr: 0.000213 loss: 0.3892 (0.4041) grad: 0.1639 (0.1708) time: 0.3580 data: 0.0047 max mem: 3951 +train: [10] [200/400] eta: 0:01:11 lr: 0.000211 loss: 0.3892 (0.4039) grad: 0.1577 (0.1702) time: 0.3463 data: 0.0038 max mem: 3951 +train: [10] [220/400] eta: 0:01:04 lr: 0.000210 loss: 0.3958 (0.4023) grad: 0.1633 (0.1702) time: 0.3547 data: 0.0039 max mem: 3951 +train: [10] [240/400] eta: 0:00:57 lr: 0.000208 loss: 0.3856 (0.4015) grad: 0.1691 (0.1702) time: 0.3534 data: 0.0041 max mem: 3951 +train: [10] [260/400] eta: 0:00:49 lr: 0.000207 loss: 0.3907 (0.4012) grad: 0.1660 (0.1697) time: 0.3362 data: 0.0043 max mem: 3951 +train: [10] [280/400] eta: 0:00:42 lr: 0.000205 loss: 0.3882 (0.4007) grad: 0.1599 (0.1688) time: 0.3515 data: 0.0044 max mem: 3951 +train: [10] [300/400] eta: 0:00:36 lr: 0.000204 loss: 0.3842 (0.4000) grad: 0.1599 (0.1683) time: 0.5035 data: 0.1845 max mem: 3951 +train: [10] [320/400] eta: 0:00:29 lr: 0.000202 loss: 0.3880 (0.4004) grad: 0.1617 (0.1680) time: 0.3694 data: 0.0041 max mem: 3951 +train: [10] [340/400] eta: 0:00:21 lr: 0.000201 loss: 0.4010 (0.3999) grad: 0.1644 (0.1684) time: 0.3319 data: 0.0036 max mem: 3951 +train: [10] [360/400] eta: 0:00:14 lr: 0.000199 loss: 0.3630 (0.3972) grad: 0.1698 (0.1685) time: 0.3564 data: 0.0043 max mem: 3951 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 0.3688 (0.3974) grad: 0.1683 (0.1685) time: 0.3478 data: 0.0035 max mem: 3951 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.3842 (0.3974) grad: 0.1639 (0.1682) time: 0.3450 data: 0.0040 max mem: 3951 +train: [10] Total time: 0:02:24 (0.3618 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.3842 (0.3974) grad: 0.1639 (0.1682) +eval (validation): [10] [ 0/63] eta: 0:03:20 time: 3.1780 data: 2.9192 max mem: 3951 +eval (validation): [10] [20/63] eta: 0:00:19 time: 0.3073 data: 0.0031 max mem: 3951 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3477 data: 0.0033 max mem: 3951 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3142 data: 0.0038 max mem: 3951 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3080 data: 0.0038 max mem: 3951 +eval (validation): [10] Total time: 0:00:23 (0.3721 s / it) +cv: [10] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.135 acc: 0.957 f1: 0.949 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:22:37 lr: nan time: 3.3928 data: 3.1111 max mem: 3951 +train: [11] [ 20/400] eta: 0:03:06 lr: 0.000195 loss: 0.3816 (0.3783) grad: 0.1516 (0.1578) time: 0.3467 data: 0.0033 max mem: 3951 +train: [11] [ 40/400] eta: 0:02:30 lr: 0.000193 loss: 0.3610 (0.3745) grad: 0.1516 (0.1589) time: 0.3384 data: 0.0031 max mem: 3951 +train: [11] [ 60/400] eta: 0:02:12 lr: 0.000192 loss: 0.3610 (0.3715) grad: 0.1502 (0.1578) time: 0.3356 data: 0.0037 max mem: 3951 +train: [11] [ 80/400] eta: 0:02:04 lr: 0.000190 loss: 0.3725 (0.3753) grad: 0.1540 (0.1589) time: 0.3801 data: 0.0045 max mem: 3951 +train: [11] [100/400] eta: 0:01:54 lr: 0.000189 loss: 0.3786 (0.3771) grad: 0.1575 (0.1602) time: 0.3499 data: 0.0045 max mem: 3951 +train: [11] [120/400] eta: 0:01:44 lr: 0.000187 loss: 0.3756 (0.3770) grad: 0.1582 (0.1604) time: 0.3373 data: 0.0046 max mem: 3951 +train: [11] [140/400] eta: 0:01:35 lr: 0.000186 loss: 0.3843 (0.3807) grad: 0.1550 (0.1608) time: 0.3215 data: 0.0041 max mem: 3951 +train: [11] [160/400] eta: 0:01:27 lr: 0.000184 loss: 0.3867 (0.3813) grad: 0.1582 (0.1609) time: 0.3548 data: 0.0043 max mem: 3951 +train: [11] [180/400] eta: 0:01:20 lr: 0.000183 loss: 0.3769 (0.3795) grad: 0.1566 (0.1602) time: 0.3573 data: 0.0044 max mem: 3951 +train: [11] [200/400] eta: 0:01:12 lr: 0.000181 loss: 0.3469 (0.3761) grad: 0.1502 (0.1601) time: 0.3486 data: 0.0043 max mem: 3951 +train: [11] [220/400] eta: 0:01:05 lr: 0.000180 loss: 0.3546 (0.3761) grad: 0.1522 (0.1594) time: 0.3567 data: 0.0040 max mem: 3951 +train: [11] [240/400] eta: 0:00:57 lr: 0.000178 loss: 0.3723 (0.3763) grad: 0.1572 (0.1602) time: 0.3504 data: 0.0042 max mem: 3951 +train: [11] [260/400] eta: 0:00:50 lr: 0.000177 loss: 0.3562 (0.3759) grad: 0.1557 (0.1595) time: 0.3345 data: 0.0041 max mem: 3951 +train: [11] [280/400] eta: 0:00:43 lr: 0.000175 loss: 0.3488 (0.3746) grad: 0.1509 (0.1592) time: 0.3550 data: 0.0040 max mem: 3951 +train: [11] [300/400] eta: 0:00:36 lr: 0.000174 loss: 0.3497 (0.3726) grad: 0.1498 (0.1589) time: 0.5049 data: 0.1880 max mem: 3951 +train: [11] [320/400] eta: 0:00:29 lr: 0.000172 loss: 0.3527 (0.3722) grad: 0.1484 (0.1584) time: 0.3444 data: 0.0107 max mem: 3951 +train: [11] [340/400] eta: 0:00:21 lr: 0.000170 loss: 0.3802 (0.3728) grad: 0.1548 (0.1582) time: 0.3456 data: 0.0029 max mem: 3951 +train: [11] [360/400] eta: 0:00:14 lr: 0.000169 loss: 0.3802 (0.3727) grad: 0.1516 (0.1577) time: 0.3454 data: 0.0041 max mem: 3951 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 0.3490 (0.3712) grad: 0.1516 (0.1577) time: 0.3462 data: 0.0045 max mem: 3951 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.3452 (0.3705) grad: 0.1500 (0.1574) time: 0.3445 data: 0.0039 max mem: 3951 +train: [11] Total time: 0:02:25 (0.3630 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.3452 (0.3705) grad: 0.1500 (0.1574) +eval (validation): [11] [ 0/63] eta: 0:03:29 time: 3.3208 data: 3.0528 max mem: 3951 +eval (validation): [11] [20/63] eta: 0:00:19 time: 0.3033 data: 0.0139 max mem: 3951 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3597 data: 0.0036 max mem: 3951 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3268 data: 0.0033 max mem: 3951 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3264 data: 0.0035 max mem: 3951 +eval (validation): [11] Total time: 0:00:24 (0.3827 s / it) +cv: [11] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.169 acc: 0.956 f1: 0.946 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:14 lr: nan time: 3.3352 data: 3.0604 max mem: 3951 +train: [12] [ 20/400] eta: 0:03:20 lr: 0.000164 loss: 0.3700 (0.3775) grad: 0.1597 (0.1602) time: 0.3860 data: 0.0045 max mem: 3951 +train: [12] [ 40/400] eta: 0:02:36 lr: 0.000163 loss: 0.3642 (0.3627) grad: 0.1565 (0.1580) time: 0.3374 data: 0.0047 max mem: 3951 +train: [12] [ 60/400] eta: 0:02:16 lr: 0.000161 loss: 0.3459 (0.3599) grad: 0.1500 (0.1549) time: 0.3329 data: 0.0035 max mem: 3951 +train: [12] [ 80/400] eta: 0:02:04 lr: 0.000160 loss: 0.3459 (0.3566) grad: 0.1526 (0.1565) time: 0.3504 data: 0.0038 max mem: 3951 +train: [12] [100/400] eta: 0:01:53 lr: 0.000158 loss: 0.3323 (0.3544) grad: 0.1569 (0.1552) time: 0.3321 data: 0.0037 max mem: 3951 +train: [12] [120/400] eta: 0:01:44 lr: 0.000156 loss: 0.3498 (0.3545) grad: 0.1505 (0.1546) time: 0.3453 data: 0.0041 max mem: 3951 +train: [12] [140/400] eta: 0:01:34 lr: 0.000155 loss: 0.3494 (0.3526) grad: 0.1536 (0.1556) time: 0.3198 data: 0.0034 max mem: 3951 +train: [12] [160/400] eta: 0:01:26 lr: 0.000153 loss: 0.3411 (0.3521) grad: 0.1569 (0.1563) time: 0.3450 data: 0.0040 max mem: 3951 +train: [12] [180/400] eta: 0:01:19 lr: 0.000152 loss: 0.3483 (0.3528) grad: 0.1554 (0.1563) time: 0.3520 data: 0.0040 max mem: 3951 +train: [12] [200/400] eta: 0:01:11 lr: 0.000150 loss: 0.3631 (0.3541) grad: 0.1554 (0.1569) time: 0.3399 data: 0.0038 max mem: 3951 +train: [12] [220/400] eta: 0:01:04 lr: 0.000149 loss: 0.3544 (0.3534) grad: 0.1496 (0.1559) time: 0.3405 data: 0.0043 max mem: 3951 +train: [12] [240/400] eta: 0:00:56 lr: 0.000147 loss: 0.3346 (0.3533) grad: 0.1456 (0.1556) time: 0.3396 data: 0.0037 max mem: 3951 +train: [12] [260/400] eta: 0:00:49 lr: 0.000145 loss: 0.3547 (0.3527) grad: 0.1507 (0.1551) time: 0.3390 data: 0.0038 max mem: 3951 +train: [12] [280/400] eta: 0:00:42 lr: 0.000144 loss: 0.3561 (0.3534) grad: 0.1496 (0.1546) time: 0.3367 data: 0.0036 max mem: 3951 +train: [12] [300/400] eta: 0:00:36 lr: 0.000142 loss: 0.3540 (0.3533) grad: 0.1467 (0.1541) time: 0.5360 data: 0.2086 max mem: 3951 +train: [12] [320/400] eta: 0:00:29 lr: 0.000141 loss: 0.3381 (0.3518) grad: 0.1490 (0.1539) time: 0.3460 data: 0.0100 max mem: 3951 +train: [12] [340/400] eta: 0:00:21 lr: 0.000139 loss: 0.3387 (0.3515) grad: 0.1490 (0.1538) time: 0.3467 data: 0.0031 max mem: 3951 +train: [12] [360/400] eta: 0:00:14 lr: 0.000138 loss: 0.3515 (0.3513) grad: 0.1502 (0.1538) time: 0.3274 data: 0.0039 max mem: 3951 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 0.3515 (0.3514) grad: 0.1462 (0.1534) time: 0.3540 data: 0.0042 max mem: 3951 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.3437 (0.3514) grad: 0.1408 (0.1533) time: 0.3531 data: 0.0041 max mem: 3951 +train: [12] Total time: 0:02:24 (0.3610 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.3437 (0.3514) grad: 0.1408 (0.1533) +eval (validation): [12] [ 0/63] eta: 0:03:27 time: 3.2881 data: 2.9936 max mem: 3951 +eval (validation): [12] [20/63] eta: 0:00:19 time: 0.3112 data: 0.0032 max mem: 3951 +eval (validation): [12] [40/63] eta: 0:00:08 time: 0.3254 data: 0.0033 max mem: 3951 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3216 data: 0.0031 max mem: 3951 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3174 data: 0.0034 max mem: 3951 +eval (validation): [12] Total time: 0:00:23 (0.3722 s / it) +cv: [12] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.138 acc: 0.954 f1: 0.944 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:34 lr: nan time: 3.3868 data: 3.1112 max mem: 3951 +train: [13] [ 20/400] eta: 0:03:16 lr: 0.000133 loss: 0.3514 (0.3578) grad: 0.1370 (0.1504) time: 0.3723 data: 0.0138 max mem: 3951 +train: [13] [ 40/400] eta: 0:02:36 lr: 0.000131 loss: 0.3534 (0.3567) grad: 0.1435 (0.1504) time: 0.3483 data: 0.0032 max mem: 3951 +train: [13] [ 60/400] eta: 0:02:17 lr: 0.000130 loss: 0.3534 (0.3519) grad: 0.1517 (0.1529) time: 0.3424 data: 0.0036 max mem: 3951 +train: [13] [ 80/400] eta: 0:02:04 lr: 0.000128 loss: 0.3383 (0.3559) grad: 0.1495 (0.1505) time: 0.3456 data: 0.0041 max mem: 3951 +train: [13] [100/400] eta: 0:01:53 lr: 0.000127 loss: 0.3640 (0.3597) grad: 0.1429 (0.1502) time: 0.3388 data: 0.0039 max mem: 3951 +train: [13] [120/400] eta: 0:01:44 lr: 0.000125 loss: 0.3352 (0.3537) grad: 0.1430 (0.1495) time: 0.3514 data: 0.0039 max mem: 3951 +train: [13] [140/400] eta: 0:01:35 lr: 0.000124 loss: 0.3257 (0.3510) grad: 0.1460 (0.1494) time: 0.3269 data: 0.0036 max mem: 3951 +train: [13] [160/400] eta: 0:01:27 lr: 0.000122 loss: 0.3175 (0.3489) grad: 0.1473 (0.1504) time: 0.3558 data: 0.0039 max mem: 3951 +train: [13] [180/400] eta: 0:01:20 lr: 0.000120 loss: 0.3206 (0.3482) grad: 0.1472 (0.1500) time: 0.3514 data: 0.0039 max mem: 3951 +train: [13] [200/400] eta: 0:01:12 lr: 0.000119 loss: 0.3448 (0.3477) grad: 0.1370 (0.1493) time: 0.3465 data: 0.0044 max mem: 3951 +train: [13] [220/400] eta: 0:01:04 lr: 0.000117 loss: 0.3448 (0.3471) grad: 0.1406 (0.1489) time: 0.3359 data: 0.0038 max mem: 3951 +train: [13] [240/400] eta: 0:00:57 lr: 0.000116 loss: 0.3300 (0.3453) grad: 0.1446 (0.1491) time: 0.3426 data: 0.0043 max mem: 3951 +train: [13] [260/400] eta: 0:00:50 lr: 0.000114 loss: 0.3300 (0.3448) grad: 0.1523 (0.1493) time: 0.3378 data: 0.0039 max mem: 3951 +train: [13] [280/400] eta: 0:00:42 lr: 0.000113 loss: 0.3456 (0.3457) grad: 0.1501 (0.1492) time: 0.3455 data: 0.0043 max mem: 3951 +train: [13] [300/400] eta: 0:00:36 lr: 0.000111 loss: 0.3443 (0.3461) grad: 0.1436 (0.1490) time: 0.5225 data: 0.2000 max mem: 3951 +train: [13] [320/400] eta: 0:00:29 lr: 0.000110 loss: 0.3430 (0.3457) grad: 0.1442 (0.1488) time: 0.3440 data: 0.0038 max mem: 3951 +train: [13] [340/400] eta: 0:00:21 lr: 0.000108 loss: 0.3303 (0.3443) grad: 0.1435 (0.1489) time: 0.3676 data: 0.0037 max mem: 3951 +train: [13] [360/400] eta: 0:00:14 lr: 0.000107 loss: 0.3158 (0.3433) grad: 0.1441 (0.1487) time: 0.3413 data: 0.0041 max mem: 3951 +train: [13] [380/400] eta: 0:00:07 lr: 0.000105 loss: 0.3173 (0.3427) grad: 0.1441 (0.1483) time: 0.3555 data: 0.0043 max mem: 3951 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.3184 (0.3417) grad: 0.1418 (0.1482) time: 0.3730 data: 0.0043 max mem: 3951 +train: [13] Total time: 0:02:26 (0.3653 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.3184 (0.3417) grad: 0.1418 (0.1482) +eval (validation): [13] [ 0/63] eta: 0:03:31 time: 3.3556 data: 3.0789 max mem: 3951 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3644 data: 0.0045 max mem: 3951 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3180 data: 0.0033 max mem: 3951 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3128 data: 0.0038 max mem: 3951 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3143 data: 0.0038 max mem: 3951 +eval (validation): [13] Total time: 0:00:24 (0.3848 s / it) +cv: [13] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.132 acc: 0.958 f1: 0.948 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:22:09 lr: nan time: 3.3226 data: 3.0562 max mem: 3951 +train: [14] [ 20/400] eta: 0:03:04 lr: 0.000102 loss: 0.3137 (0.3207) grad: 0.1472 (0.1519) time: 0.3443 data: 0.0043 max mem: 3951 +train: [14] [ 40/400] eta: 0:02:31 lr: 0.000101 loss: 0.3163 (0.3212) grad: 0.1427 (0.1480) time: 0.3541 data: 0.0028 max mem: 3951 +train: [14] [ 60/400] eta: 0:02:14 lr: 0.000099 loss: 0.3177 (0.3207) grad: 0.1365 (0.1479) time: 0.3413 data: 0.0040 max mem: 3951 +train: [14] [ 80/400] eta: 0:02:03 lr: 0.000098 loss: 0.3233 (0.3240) grad: 0.1539 (0.1506) time: 0.3602 data: 0.0043 max mem: 3951 +train: [14] [100/400] eta: 0:01:53 lr: 0.000096 loss: 0.3418 (0.3223) grad: 0.1541 (0.1505) time: 0.3512 data: 0.0046 max mem: 3951 +train: [14] [120/400] eta: 0:01:44 lr: 0.000095 loss: 0.3293 (0.3230) grad: 0.1500 (0.1508) time: 0.3468 data: 0.0043 max mem: 3951 +train: [14] [140/400] eta: 0:01:35 lr: 0.000093 loss: 0.3293 (0.3248) grad: 0.1442 (0.1494) time: 0.3291 data: 0.0041 max mem: 3951 +train: [14] [160/400] eta: 0:01:27 lr: 0.000092 loss: 0.3253 (0.3268) grad: 0.1435 (0.1498) time: 0.3552 data: 0.0045 max mem: 3951 +train: [14] [180/400] eta: 0:01:20 lr: 0.000090 loss: 0.3248 (0.3260) grad: 0.1498 (0.1500) time: 0.3585 data: 0.0047 max mem: 3951 +train: [14] [200/400] eta: 0:01:12 lr: 0.000089 loss: 0.3246 (0.3259) grad: 0.1404 (0.1489) time: 0.3603 data: 0.0042 max mem: 3951 +train: [14] [220/400] eta: 0:01:05 lr: 0.000088 loss: 0.3251 (0.3274) grad: 0.1333 (0.1485) time: 0.3440 data: 0.0043 max mem: 3951 +train: [14] [240/400] eta: 0:00:57 lr: 0.000086 loss: 0.3323 (0.3273) grad: 0.1384 (0.1478) time: 0.3561 data: 0.0039 max mem: 3951 +train: [14] [260/400] eta: 0:00:50 lr: 0.000085 loss: 0.3214 (0.3277) grad: 0.1399 (0.1470) time: 0.3575 data: 0.0044 max mem: 3951 +train: [14] [280/400] eta: 0:00:43 lr: 0.000083 loss: 0.3140 (0.3275) grad: 0.1347 (0.1464) time: 0.3391 data: 0.0043 max mem: 3951 +train: [14] [300/400] eta: 0:00:36 lr: 0.000082 loss: 0.3329 (0.3293) grad: 0.1389 (0.1465) time: 0.4878 data: 0.1747 max mem: 3951 +train: [14] [320/400] eta: 0:00:29 lr: 0.000081 loss: 0.3335 (0.3291) grad: 0.1423 (0.1464) time: 0.3523 data: 0.0041 max mem: 3951 +train: [14] [340/400] eta: 0:00:22 lr: 0.000079 loss: 0.3288 (0.3284) grad: 0.1417 (0.1461) time: 0.3478 data: 0.0034 max mem: 3951 +train: [14] [360/400] eta: 0:00:14 lr: 0.000078 loss: 0.3123 (0.3267) grad: 0.1371 (0.1457) time: 0.3450 data: 0.0043 max mem: 3951 +train: [14] [380/400] eta: 0:00:07 lr: 0.000076 loss: 0.3129 (0.3257) grad: 0.1373 (0.1454) time: 0.3328 data: 0.0040 max mem: 3951 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.3137 (0.3259) grad: 0.1385 (0.1451) time: 0.3469 data: 0.0042 max mem: 3951 +train: [14] Total time: 0:02:25 (0.3635 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.3137 (0.3259) grad: 0.1385 (0.1451) +eval (validation): [14] [ 0/63] eta: 0:03:29 time: 3.3275 data: 3.0610 max mem: 3951 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3700 data: 0.0048 max mem: 3951 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3450 data: 0.0038 max mem: 3951 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3218 data: 0.0035 max mem: 3951 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3206 data: 0.0036 max mem: 3951 +eval (validation): [14] Total time: 0:00:25 (0.3969 s / it) +cv: [14] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 0.142 acc: 0.954 f1: 0.945 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:28 lr: nan time: 3.5209 data: 3.2915 max mem: 3951 +train: [15] [ 20/400] eta: 0:03:10 lr: 0.000074 loss: 0.3071 (0.3172) grad: 0.1387 (0.1409) time: 0.3492 data: 0.0034 max mem: 3951 +train: [15] [ 40/400] eta: 0:02:32 lr: 0.000072 loss: 0.3225 (0.3223) grad: 0.1387 (0.1409) time: 0.3425 data: 0.0032 max mem: 3951 +train: [15] [ 60/400] eta: 0:02:16 lr: 0.000071 loss: 0.3063 (0.3184) grad: 0.1402 (0.1409) time: 0.3604 data: 0.0040 max mem: 3951 +train: [15] [ 80/400] eta: 0:02:05 lr: 0.000070 loss: 0.3063 (0.3188) grad: 0.1421 (0.1417) time: 0.3577 data: 0.0025 max mem: 3951 +train: [15] [100/400] eta: 0:01:55 lr: 0.000068 loss: 0.3208 (0.3210) grad: 0.1394 (0.1428) time: 0.3516 data: 0.0039 max mem: 3951 +train: [15] [120/400] eta: 0:01:45 lr: 0.000067 loss: 0.3240 (0.3204) grad: 0.1426 (0.1435) time: 0.3433 data: 0.0039 max mem: 3951 +train: [15] [140/400] eta: 0:01:36 lr: 0.000066 loss: 0.3086 (0.3198) grad: 0.1454 (0.1445) time: 0.3326 data: 0.0037 max mem: 3951 +train: [15] [160/400] eta: 0:01:28 lr: 0.000064 loss: 0.3142 (0.3209) grad: 0.1447 (0.1447) time: 0.3599 data: 0.0037 max mem: 3951 +train: [15] [180/400] eta: 0:01:21 lr: 0.000063 loss: 0.3135 (0.3187) grad: 0.1380 (0.1439) time: 0.3604 data: 0.0046 max mem: 3951 +train: [15] [200/400] eta: 0:01:13 lr: 0.000062 loss: 0.2965 (0.3177) grad: 0.1387 (0.1442) time: 0.3505 data: 0.0040 max mem: 3951 +train: [15] [220/400] eta: 0:01:05 lr: 0.000061 loss: 0.3073 (0.3182) grad: 0.1435 (0.1443) time: 0.3596 data: 0.0040 max mem: 3951 +train: [15] [240/400] eta: 0:00:58 lr: 0.000059 loss: 0.3241 (0.3209) grad: 0.1421 (0.1447) time: 0.3470 data: 0.0034 max mem: 3951 +train: [15] [260/400] eta: 0:00:50 lr: 0.000058 loss: 0.3241 (0.3201) grad: 0.1403 (0.1449) time: 0.3601 data: 0.0045 max mem: 3951 +train: [15] [280/400] eta: 0:00:43 lr: 0.000057 loss: 0.3067 (0.3183) grad: 0.1353 (0.1444) time: 0.3555 data: 0.0040 max mem: 3951 +train: [15] [300/400] eta: 0:00:37 lr: 0.000056 loss: 0.3093 (0.3180) grad: 0.1387 (0.1445) time: 0.5350 data: 0.2145 max mem: 3951 +train: [15] [320/400] eta: 0:00:29 lr: 0.000054 loss: 0.3241 (0.3182) grad: 0.1428 (0.1445) time: 0.3402 data: 0.0105 max mem: 3951 +train: [15] [340/400] eta: 0:00:22 lr: 0.000053 loss: 0.3397 (0.3189) grad: 0.1367 (0.1440) time: 0.3685 data: 0.0024 max mem: 3951 +train: [15] [360/400] eta: 0:00:14 lr: 0.000052 loss: 0.3327 (0.3195) grad: 0.1367 (0.1437) time: 0.3485 data: 0.0035 max mem: 3951 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 0.3066 (0.3183) grad: 0.1379 (0.1435) time: 0.3325 data: 0.0035 max mem: 3951 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.2994 (0.3189) grad: 0.1384 (0.1434) time: 0.3618 data: 0.0035 max mem: 3951 +train: [15] Total time: 0:02:27 (0.3694 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.2994 (0.3189) grad: 0.1384 (0.1434) +eval (validation): [15] [ 0/63] eta: 0:03:27 time: 3.2988 data: 3.0722 max mem: 3951 +eval (validation): [15] [20/63] eta: 0:00:21 time: 0.3547 data: 0.0073 max mem: 3951 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3220 data: 0.0028 max mem: 3951 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3183 data: 0.0035 max mem: 3951 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3134 data: 0.0035 max mem: 3951 +eval (validation): [15] Total time: 0:00:24 (0.3819 s / it) +cv: [15] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 0.142 acc: 0.954 f1: 0.944 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:30 lr: nan time: 3.3757 data: 3.0860 max mem: 3951 +train: [16] [ 20/400] eta: 0:03:17 lr: 0.000048 loss: 0.3174 (0.3170) grad: 0.1468 (0.1481) time: 0.3783 data: 0.0041 max mem: 3951 +train: [16] [ 40/400] eta: 0:02:36 lr: 0.000047 loss: 0.3116 (0.3104) grad: 0.1504 (0.1508) time: 0.3414 data: 0.0043 max mem: 3951 +train: [16] [ 60/400] eta: 0:02:17 lr: 0.000046 loss: 0.3059 (0.3127) grad: 0.1451 (0.1466) time: 0.3492 data: 0.0046 max mem: 3951 +train: [16] [ 80/400] eta: 0:02:04 lr: 0.000045 loss: 0.3134 (0.3167) grad: 0.1416 (0.1454) time: 0.3374 data: 0.0042 max mem: 3951 +train: [16] [100/400] eta: 0:01:54 lr: 0.000044 loss: 0.3006 (0.3138) grad: 0.1387 (0.1451) time: 0.3589 data: 0.0044 max mem: 3951 +train: [16] [120/400] eta: 0:01:46 lr: 0.000043 loss: 0.3000 (0.3144) grad: 0.1400 (0.1463) time: 0.3583 data: 0.0046 max mem: 3951 +train: [16] [140/400] eta: 0:01:37 lr: 0.000042 loss: 0.3274 (0.3172) grad: 0.1463 (0.1464) time: 0.3398 data: 0.0037 max mem: 3951 +train: [16] [160/400] eta: 0:01:28 lr: 0.000041 loss: 0.3094 (0.3172) grad: 0.1423 (0.1454) time: 0.3449 data: 0.0037 max mem: 3951 +train: [16] [180/400] eta: 0:01:21 lr: 0.000040 loss: 0.3094 (0.3190) grad: 0.1380 (0.1452) time: 0.3563 data: 0.0042 max mem: 3951 +train: [16] [200/400] eta: 0:01:13 lr: 0.000039 loss: 0.3260 (0.3194) grad: 0.1380 (0.1445) time: 0.3535 data: 0.0038 max mem: 3951 +train: [16] [220/400] eta: 0:01:05 lr: 0.000038 loss: 0.3453 (0.3211) grad: 0.1345 (0.1441) time: 0.3472 data: 0.0038 max mem: 3951 +train: [16] [240/400] eta: 0:00:58 lr: 0.000036 loss: 0.3428 (0.3212) grad: 0.1374 (0.1445) time: 0.3527 data: 0.0037 max mem: 3951 +train: [16] [260/400] eta: 0:00:50 lr: 0.000035 loss: 0.3428 (0.3234) grad: 0.1384 (0.1438) time: 0.3432 data: 0.0039 max mem: 3951 +train: [16] [280/400] eta: 0:00:43 lr: 0.000034 loss: 0.3112 (0.3223) grad: 0.1343 (0.1431) time: 0.3609 data: 0.0041 max mem: 3951 +train: [16] [300/400] eta: 0:00:37 lr: 0.000033 loss: 0.2943 (0.3213) grad: 0.1417 (0.1436) time: 0.5241 data: 0.1808 max mem: 3951 +train: [16] [320/400] eta: 0:00:29 lr: 0.000032 loss: 0.2988 (0.3202) grad: 0.1409 (0.1431) time: 0.3949 data: 0.0042 max mem: 3951 +train: [16] [340/400] eta: 0:00:22 lr: 0.000031 loss: 0.3031 (0.3204) grad: 0.1368 (0.1430) time: 0.3310 data: 0.0039 max mem: 3951 +train: [16] [360/400] eta: 0:00:14 lr: 0.000031 loss: 0.3164 (0.3204) grad: 0.1368 (0.1431) time: 0.3555 data: 0.0047 max mem: 3951 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 0.3164 (0.3196) grad: 0.1409 (0.1430) time: 0.3493 data: 0.0042 max mem: 3951 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.2885 (0.3184) grad: 0.1391 (0.1426) time: 0.3368 data: 0.0041 max mem: 3951 +train: [16] Total time: 0:02:27 (0.3688 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.2885 (0.3184) grad: 0.1391 (0.1426) +eval (validation): [16] [ 0/63] eta: 0:04:33 time: 4.3357 data: 4.0351 max mem: 3951 +eval (validation): [16] [20/63] eta: 0:00:23 time: 0.3570 data: 0.0035 max mem: 3951 +eval (validation): [16] [40/63] eta: 0:00:10 time: 0.3553 data: 0.0033 max mem: 3951 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3052 data: 0.0037 max mem: 3951 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.2972 data: 0.0035 max mem: 3951 +eval (validation): [16] Total time: 0:00:25 (0.4058 s / it) +cv: [16] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.150 acc: 0.955 f1: 0.947 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:39 lr: nan time: 3.2499 data: 2.9502 max mem: 3951 +train: [17] [ 20/400] eta: 0:03:12 lr: 0.000028 loss: 0.2995 (0.3063) grad: 0.1474 (0.1520) time: 0.3702 data: 0.0035 max mem: 3951 +train: [17] [ 40/400] eta: 0:02:31 lr: 0.000027 loss: 0.3108 (0.3100) grad: 0.1382 (0.1438) time: 0.3304 data: 0.0042 max mem: 3951 +train: [17] [ 60/400] eta: 0:02:14 lr: 0.000026 loss: 0.3132 (0.3120) grad: 0.1341 (0.1439) time: 0.3450 data: 0.0044 max mem: 3951 +train: [17] [ 80/400] eta: 0:02:02 lr: 0.000025 loss: 0.3235 (0.3159) grad: 0.1358 (0.1427) time: 0.3380 data: 0.0030 max mem: 3951 +train: [17] [100/400] eta: 0:01:52 lr: 0.000024 loss: 0.3157 (0.3149) grad: 0.1352 (0.1426) time: 0.3553 data: 0.0037 max mem: 3951 +train: [17] [120/400] eta: 0:01:45 lr: 0.000023 loss: 0.3157 (0.3126) grad: 0.1358 (0.1418) time: 0.3712 data: 0.0032 max mem: 3951 +train: [17] [140/400] eta: 0:01:36 lr: 0.000023 loss: 0.3044 (0.3107) grad: 0.1387 (0.1413) time: 0.3338 data: 0.0039 max mem: 3951 +train: [17] [160/400] eta: 0:01:28 lr: 0.000022 loss: 0.3044 (0.3115) grad: 0.1387 (0.1409) time: 0.3612 data: 0.0041 max mem: 3951 +train: [17] [180/400] eta: 0:01:20 lr: 0.000021 loss: 0.3216 (0.3130) grad: 0.1374 (0.1409) time: 0.3547 data: 0.0042 max mem: 3951 +train: [17] [200/400] eta: 0:01:13 lr: 0.000020 loss: 0.3216 (0.3138) grad: 0.1344 (0.1406) time: 0.3546 data: 0.0043 max mem: 3951 +train: [17] [220/400] eta: 0:01:05 lr: 0.000019 loss: 0.3190 (0.3138) grad: 0.1408 (0.1413) time: 0.3457 data: 0.0042 max mem: 3951 +train: [17] [240/400] eta: 0:00:57 lr: 0.000019 loss: 0.3080 (0.3136) grad: 0.1443 (0.1412) time: 0.3452 data: 0.0042 max mem: 3951 +train: [17] [260/400] eta: 0:00:50 lr: 0.000018 loss: 0.3091 (0.3146) grad: 0.1443 (0.1417) time: 0.3581 data: 0.0042 max mem: 3951 +train: [17] [280/400] eta: 0:00:43 lr: 0.000017 loss: 0.3070 (0.3148) grad: 0.1355 (0.1414) time: 0.3533 data: 0.0043 max mem: 3951 +train: [17] [300/400] eta: 0:00:37 lr: 0.000016 loss: 0.2935 (0.3135) grad: 0.1353 (0.1413) time: 0.5151 data: 0.1850 max mem: 3951 +train: [17] [320/400] eta: 0:00:29 lr: 0.000016 loss: 0.2994 (0.3136) grad: 0.1402 (0.1411) time: 0.3883 data: 0.0044 max mem: 3951 +train: [17] [340/400] eta: 0:00:22 lr: 0.000015 loss: 0.3081 (0.3136) grad: 0.1394 (0.1410) time: 0.3615 data: 0.0038 max mem: 3951 +train: [17] [360/400] eta: 0:00:14 lr: 0.000014 loss: 0.3176 (0.3141) grad: 0.1349 (0.1406) time: 0.3371 data: 0.0040 max mem: 3951 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 0.3156 (0.3138) grad: 0.1340 (0.1402) time: 0.3544 data: 0.0044 max mem: 3951 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.3156 (0.3141) grad: 0.1340 (0.1399) time: 0.3412 data: 0.0043 max mem: 3951 +train: [17] Total time: 0:02:27 (0.3685 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.3156 (0.3141) grad: 0.1340 (0.1399) +eval (validation): [17] [ 0/63] eta: 0:03:36 time: 3.4311 data: 3.2230 max mem: 3951 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3476 data: 0.0038 max mem: 3951 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3248 data: 0.0034 max mem: 3951 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3128 data: 0.0037 max mem: 3951 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3094 data: 0.0036 max mem: 3951 +eval (validation): [17] Total time: 0:00:24 (0.3810 s / it) +cv: [17] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.126 acc: 0.955 f1: 0.946 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:20:08 lr: nan time: 3.0212 data: 2.8068 max mem: 3951 +train: [18] [ 20/400] eta: 0:02:59 lr: 0.000012 loss: 0.3073 (0.3048) grad: 0.1339 (0.1370) time: 0.3439 data: 0.0036 max mem: 3951 +train: [18] [ 40/400] eta: 0:02:28 lr: 0.000012 loss: 0.2996 (0.3073) grad: 0.1362 (0.1401) time: 0.3522 data: 0.0040 max mem: 3951 +train: [18] [ 60/400] eta: 0:02:14 lr: 0.000011 loss: 0.3079 (0.3131) grad: 0.1355 (0.1391) time: 0.3584 data: 0.0039 max mem: 3951 +train: [18] [ 80/400] eta: 0:02:01 lr: 0.000011 loss: 0.3302 (0.3202) grad: 0.1364 (0.1408) time: 0.3371 data: 0.0042 max mem: 3951 +train: [18] [100/400] eta: 0:01:53 lr: 0.000010 loss: 0.3126 (0.3168) grad: 0.1364 (0.1399) time: 0.3625 data: 0.0038 max mem: 3951 +train: [18] [120/400] eta: 0:01:44 lr: 0.000009 loss: 0.2958 (0.3136) grad: 0.1309 (0.1409) time: 0.3553 data: 0.0036 max mem: 3951 +train: [18] [140/400] eta: 0:01:35 lr: 0.000009 loss: 0.3164 (0.3159) grad: 0.1406 (0.1417) time: 0.3346 data: 0.0040 max mem: 3951 +train: [18] [160/400] eta: 0:01:27 lr: 0.000008 loss: 0.3164 (0.3150) grad: 0.1417 (0.1420) time: 0.3368 data: 0.0043 max mem: 3951 +train: [18] [180/400] eta: 0:01:20 lr: 0.000008 loss: 0.2996 (0.3135) grad: 0.1417 (0.1421) time: 0.3617 data: 0.0045 max mem: 3951 +train: [18] [200/400] eta: 0:01:12 lr: 0.000007 loss: 0.3102 (0.3138) grad: 0.1413 (0.1421) time: 0.3549 data: 0.0042 max mem: 3951 +train: [18] [220/400] eta: 0:01:05 lr: 0.000007 loss: 0.3240 (0.3142) grad: 0.1365 (0.1418) time: 0.3458 data: 0.0044 max mem: 3951 +train: [18] [240/400] eta: 0:00:57 lr: 0.000006 loss: 0.3241 (0.3135) grad: 0.1361 (0.1416) time: 0.3516 data: 0.0044 max mem: 3951 +train: [18] [260/400] eta: 0:00:50 lr: 0.000006 loss: 0.3237 (0.3136) grad: 0.1364 (0.1411) time: 0.3480 data: 0.0042 max mem: 3951 +train: [18] [280/400] eta: 0:00:43 lr: 0.000006 loss: 0.3069 (0.3132) grad: 0.1364 (0.1413) time: 0.3466 data: 0.0044 max mem: 3951 +train: [18] [300/400] eta: 0:00:37 lr: 0.000005 loss: 0.3081 (0.3132) grad: 0.1365 (0.1409) time: 0.5320 data: 0.1859 max mem: 3951 +train: [18] [320/400] eta: 0:00:29 lr: 0.000005 loss: 0.3088 (0.3120) grad: 0.1359 (0.1409) time: 0.3805 data: 0.0038 max mem: 3951 +train: [18] [340/400] eta: 0:00:22 lr: 0.000004 loss: 0.3033 (0.3119) grad: 0.1318 (0.1406) time: 0.3422 data: 0.0036 max mem: 3951 +train: [18] [360/400] eta: 0:00:14 lr: 0.000004 loss: 0.2989 (0.3108) grad: 0.1310 (0.1401) time: 0.3441 data: 0.0032 max mem: 3951 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 0.2961 (0.3114) grad: 0.1320 (0.1398) time: 0.3505 data: 0.0038 max mem: 3951 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.3157 (0.3114) grad: 0.1347 (0.1401) time: 0.3508 data: 0.0043 max mem: 3951 +train: [18] Total time: 0:02:26 (0.3665 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.3157 (0.3114) grad: 0.1347 (0.1401) +eval (validation): [18] [ 0/63] eta: 0:03:27 time: 3.2894 data: 3.0383 max mem: 3951 +eval (validation): [18] [20/63] eta: 0:00:21 time: 0.3494 data: 0.0088 max mem: 3951 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3278 data: 0.0031 max mem: 3951 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3400 data: 0.0036 max mem: 3951 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3282 data: 0.0021 max mem: 3951 +eval (validation): [18] Total time: 0:00:24 (0.3900 s / it) +cv: [18] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.127 acc: 0.955 f1: 0.946 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:21:55 lr: nan time: 3.2897 data: 3.0728 max mem: 3951 +train: [19] [ 20/400] eta: 0:03:10 lr: 0.000003 loss: 0.3418 (0.3351) grad: 0.1391 (0.1367) time: 0.3618 data: 0.0172 max mem: 3951 +train: [19] [ 40/400] eta: 0:02:33 lr: 0.000003 loss: 0.3142 (0.3281) grad: 0.1391 (0.1375) time: 0.3499 data: 0.0033 max mem: 3951 +train: [19] [ 60/400] eta: 0:02:18 lr: 0.000002 loss: 0.3085 (0.3161) grad: 0.1387 (0.1378) time: 0.3629 data: 0.0040 max mem: 3951 +train: [19] [ 80/400] eta: 0:02:04 lr: 0.000002 loss: 0.3077 (0.3154) grad: 0.1334 (0.1372) time: 0.3397 data: 0.0039 max mem: 3951 +train: [19] [100/400] eta: 0:01:55 lr: 0.000002 loss: 0.3006 (0.3121) grad: 0.1334 (0.1375) time: 0.3594 data: 0.0040 max mem: 3951 +train: [19] [120/400] eta: 0:01:46 lr: 0.000002 loss: 0.2995 (0.3100) grad: 0.1320 (0.1368) time: 0.3590 data: 0.0042 max mem: 3951 +train: [19] [140/400] eta: 0:01:37 lr: 0.000001 loss: 0.3114 (0.3120) grad: 0.1361 (0.1377) time: 0.3406 data: 0.0041 max mem: 3951 +train: [19] [160/400] eta: 0:01:28 lr: 0.000001 loss: 0.3114 (0.3111) grad: 0.1410 (0.1386) time: 0.3297 data: 0.0040 max mem: 3951 +train: [19] [180/400] eta: 0:01:20 lr: 0.000001 loss: 0.2942 (0.3103) grad: 0.1410 (0.1395) time: 0.3558 data: 0.0042 max mem: 3951 +train: [19] [200/400] eta: 0:01:13 lr: 0.000001 loss: 0.3007 (0.3101) grad: 0.1435 (0.1403) time: 0.3526 data: 0.0035 max mem: 3951 +train: [19] [220/400] eta: 0:01:05 lr: 0.000001 loss: 0.2933 (0.3078) grad: 0.1329 (0.1393) time: 0.3526 data: 0.0044 max mem: 3951 +train: [19] [240/400] eta: 0:00:58 lr: 0.000001 loss: 0.2933 (0.3082) grad: 0.1322 (0.1393) time: 0.3612 data: 0.0043 max mem: 3951 +train: [19] [260/400] eta: 0:00:50 lr: 0.000000 loss: 0.3024 (0.3079) grad: 0.1404 (0.1393) time: 0.3482 data: 0.0045 max mem: 3951 +train: [19] [280/400] eta: 0:00:43 lr: 0.000000 loss: 0.3024 (0.3088) grad: 0.1289 (0.1384) time: 0.3475 data: 0.0043 max mem: 3951 +train: [19] [300/400] eta: 0:00:37 lr: 0.000000 loss: 0.3339 (0.3102) grad: 0.1316 (0.1385) time: 0.4952 data: 0.1818 max mem: 3951 +train: [19] [320/400] eta: 0:00:29 lr: 0.000000 loss: 0.3248 (0.3106) grad: 0.1322 (0.1380) time: 0.3602 data: 0.0049 max mem: 3951 +train: [19] [340/400] eta: 0:00:22 lr: 0.000000 loss: 0.3219 (0.3112) grad: 0.1345 (0.1383) time: 0.3667 data: 0.0026 max mem: 3951 +train: [19] [360/400] eta: 0:00:14 lr: 0.000000 loss: 0.3005 (0.3108) grad: 0.1351 (0.1380) time: 0.3596 data: 0.0041 max mem: 3951 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 0.2999 (0.3103) grad: 0.1349 (0.1383) time: 0.3405 data: 0.0040 max mem: 3951 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.2999 (0.3107) grad: 0.1414 (0.1385) time: 0.3572 data: 0.0041 max mem: 3951 +train: [19] Total time: 0:02:27 (0.3679 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.2999 (0.3107) grad: 0.1414 (0.1385) +eval (validation): [19] [ 0/63] eta: 0:03:32 time: 3.3787 data: 3.1027 max mem: 3951 +eval (validation): [19] [20/63] eta: 0:00:21 time: 0.3647 data: 0.0039 max mem: 3951 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3411 data: 0.0038 max mem: 3951 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3359 data: 0.0029 max mem: 3951 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3331 data: 0.0032 max mem: 3951 +eval (validation): [19] Total time: 0:00:25 (0.3992 s / it) +cv: [19] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.126 acc: 0.956 f1: 0.946 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +eval model info: +{"score": 0.9558531746031746, "hparam": [6, 1.0], "hparam_id": 35, "epoch": 19, "is_best": false, "best_score": 0.9575892857142857} +eval (train): [20] [ 0/297] eta: 0:16:43 time: 3.3802 data: 3.1034 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:16 time: 0.3497 data: 0.0030 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:50 time: 0.3629 data: 0.0038 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:35 time: 0.3518 data: 0.0038 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:23 time: 0.3332 data: 0.0036 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:15 time: 0.3597 data: 0.0040 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:07 time: 0.3804 data: 0.0041 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:58 time: 0.3371 data: 0.0034 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:50 time: 0.3319 data: 0.0033 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3590 data: 0.0038 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:35 time: 0.3439 data: 0.0036 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3551 data: 0.0038 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3513 data: 0.0040 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3231 data: 0.0037 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3415 data: 0.0040 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3041 data: 0.0033 max mem: 3951 +eval (train): [20] Total time: 0:01:46 (0.3586 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:18 time: 3.1484 data: 2.8932 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3338 data: 0.0077 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3262 data: 0.0036 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3170 data: 0.0026 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3149 data: 0.0025 max mem: 3951 +eval (validation): [20] Total time: 0:00:23 (0.3757 s / it) +eval (test): [20] [ 0/79] eta: 0:04:17 time: 3.2569 data: 3.0411 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3479 data: 0.0183 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3310 data: 0.0033 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3305 data: 0.0029 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3016 data: 0.0027 max mem: 3951 +eval (test): [20] Total time: 0:00:29 (0.3702 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +eval model info: +{"score": 0.9575892857142857, "hparam": [4.3, 1.0], "hparam_id": 33, "epoch": 13, "is_best": true, "best_score": 0.9575892857142857} +eval (train): [20] [ 0/297] eta: 0:20:52 time: 4.2174 data: 3.9938 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:26 time: 0.3446 data: 0.0166 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:53 time: 0.3531 data: 0.0038 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:36 time: 0.3329 data: 0.0033 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:25 time: 0.3494 data: 0.0040 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:17 time: 0.3860 data: 0.0040 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:07 time: 0.3389 data: 0.0035 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:59 time: 0.3487 data: 0.0037 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:51 time: 0.3356 data: 0.0035 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3150 data: 0.0036 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:35 time: 0.3372 data: 0.0037 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3365 data: 0.0035 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3593 data: 0.0039 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3555 data: 0.0039 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3584 data: 0.0038 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3208 data: 0.0035 max mem: 3951 +eval (train): [20] Total time: 0:01:46 (0.3596 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:28 time: 3.3108 data: 3.0904 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3672 data: 0.0045 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3444 data: 0.0031 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3297 data: 0.0035 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3238 data: 0.0034 max mem: 3951 +eval (validation): [20] Total time: 0:00:25 (0.3988 s / it) +eval (test): [20] [ 0/79] eta: 0:04:26 time: 3.3777 data: 3.0869 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3593 data: 0.0051 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3377 data: 0.0033 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3316 data: 0.0029 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3068 data: 0.0029 max mem: 3951 +eval (test): [20] Total time: 0:00:29 (0.3775 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|---------:|--------:|----------:|--------:|-----------:| +| flat_mae | reg | linear | hcpya_task21 | best | 13 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | train | 0.078811 | 0.98068 | 0.0010048 | 0.9817 | 0.00098721 | +| flat_mae | reg | linear | hcpya_task21 | best | 13 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | validation | 0.13199 | 0.95759 | 0.0029501 | 0.9477 | 0.0041699 | +| flat_mae | reg | linear | hcpya_task21 | best | 13 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | test | 0.15045 | 0.95198 | 0.0029253 | 0.94258 | 0.0038393 | + + +done! total time: 1:02:55 diff --git a/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..57d9456fa7064b9b2b481094fb047682dd3a3ff8 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.4805843490362167, "train/grad": 0.5826628637313843, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.24689208984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.24506591796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.2419970703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.23900390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.2360552978515624, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.231876220703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.22714599609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.2219195556640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.2148760986328124, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.207520751953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.2001507568359373, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1889947509765624, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1780023193359375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.161943359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1464239501953126, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1312554931640624, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.111580810546875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0877734375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.060460205078125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.034188232421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0008319091796873, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.965169677734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.92390625, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.8777484130859374, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.827508544921875, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.7646884155273437, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.705860900878906, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.65035888671875, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.572737731933594, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.478338165283203, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3931483459472656, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.3161431121826173, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2134864807128904, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.124019432067871, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.021193618774414, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9230352592468263, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.822488784790039, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7309051036834717, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6361368274688721, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5251173973083496, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.444367926120758, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.3770570945739746, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.294116530418396, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2265464818477632, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1531981205940247, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.0804313716292382, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.0223258924484253, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.957828871011734, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.9068138867616653, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.10461418975144625, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.10452319629490375, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.10437388654798269, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.10422169517725706, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.10407327741384506, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.10386760786175728, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.1036314956843853, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.10337185081094503, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.10302119351923465, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.10264929845929145, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.10228102274239063, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.10172662399709224, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.10117600735276938, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.10037198808044195, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.09958798617124558, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.09882740031927824, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.09784697387367487, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.09667050037533045, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.09533474501222372, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.09407329408451914, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.09251334857195616, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.09089881230145692, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.08913622492924332, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.08729260692372919, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.08543797072023153, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.08330768262967467, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.08146030891686679, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0798160921037197, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.07762127021327615, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.07505137287080288, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.07278128176927566, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.07074686210602522, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.06806948618963361, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.06575092777609826, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.06310703681781887, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.06060960927978158, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.058066139053553345, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.055758705995976925, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05337795283645391, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05060698988847435, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.048632797319442034, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.047006525974720714, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04500194272492081, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.043320809006690976, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04146529364399612, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.039681962793692946, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.038343206527642906, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03707735162694007, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.036201070467941465, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.2256407737731934, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.2204604148864746, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.211498260498047, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.2026865482330322, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.194033622741699, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.181886672973633, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1682472229003906, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.153280735015869, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1332693099975586, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1122992038726807, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.091921806335449, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0609099864959717, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.030869483947754, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.9879047870635986, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.946477174758911, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.906806230545044, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.856252670288086, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.796621799468994, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.7293765544891357, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.6657397747039795, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5866284370422363, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5029144287109375, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.407022714614868, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.301013946533203, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.186209201812744, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.0445008277893066, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.91409432888031, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.7950284481048584, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.6354857683181763, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.4553453922271729, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.3070409297943115, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.185137152671814, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.0405203104019165, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.929963231086731, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.8186289668083191, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.7265675663948059, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.6442983746528625, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.5784189701080322, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.5184236764907837, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4571841061115265, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4182724952697754, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3902648389339447, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.36101725697517395, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.33921465277671814, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.31495919823646545, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2916727364063263, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.27327263355255127, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.25328269600868225, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.2411305457353592, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.02132936507936508, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.021081349206349208, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.021577380952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.021081349206349208, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.02132936507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0220734126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.021825396825396824, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.023809523809523808, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.027281746031746032, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.031001984126984128, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.032242063492063495, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.03869047619047619, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.04513888888888889, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.05555555555555555, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06522817460317461, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07837301587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.09226190476190477, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.11879960317460317, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.14583333333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.17708333333333334, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2177579365079365, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2586805555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.30481150793650796, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.35615079365079366, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.40922619047619047, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.4724702380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.5406746031746031, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.5927579365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.6614583333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7271825396825397, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.7661210317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.7914186507936508, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8127480158730159, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.828125, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8435019841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8536706349206349, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8643353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8759920634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8824404761904762, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8968253968253969, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9007936507936508, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.904265873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9077380952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9141865079365079, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9211309523809523, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9248511904761905, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013916128550054111, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013781203592161602, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014499921595488015, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014320292378107755, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014823260357231633, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01560159366428505, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015632811586551, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017588871737926136, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021475687760975047, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.025872325169280064, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.027392250572651964, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03394867387102218, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.038921715109557886, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.04679311730864436, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.05380403193983823, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.06215956762966501, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.07148391841226465, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.08847148779158787, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.10249570341645253, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.11413441232883398, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.13097754666083533, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.14925766945973437, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17811213576066606, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21820473820847275, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2613638995496903, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.3220159818560145, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.39110876750328305, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.45176450570849475, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.5475912811324741, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.6386753390168004, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.6981186147726648, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7410950490844651, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.7677164436378899, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7860305614904765, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8072222094900564, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8220888894507196, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8382346994766633, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8524746111580713, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8611805369177263, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8719507389674733, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8748637514256004, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8744709620285711, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8748166660082439, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8770084998266777, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8831403248324619, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8907176669726224, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9003864077996262, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9089058840565749, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9125049191385498, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.9068138867616653, "validation/loss_best": 0.2411305457353592, "validation/acc_best": 0.9248511904761905, "validation/f1_best": 0.9125049191385498} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.6034174221754074, "train/grad": 0.42953954994678495, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1753070068359377, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1636322021484373, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.14447998046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1258245849609376, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1074169921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0824029541015623, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.054542236328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0242718505859374, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.985400390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.945096435546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.906461181640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.84986083984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.796300048828125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7205902099609376, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6496258544921876, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5824929809570314, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4981695556640626, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.399455871582031, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2892822265625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1872093200683596, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0620367431640627, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9345893859863281, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7959017181396484, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6531034088134766, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5122018432617188, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3565452194213867, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2298046112060548, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.125170021057129, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.999515905380249, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.8736282634735107, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.7798773002624512, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.7078538322448731, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.6270180904865265, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.56758713722229, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.5093906509876251, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.46179347693920136, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.4193047031760216, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.38517952620983126, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.35382086768746374, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.32143378801643846, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.30056644752621653, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.2847326759248972, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.26718224748969077, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2542835196107626, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.24072356902062894, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.22799510657787322, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.21774995114654302, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.20795036461204292, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.2026921513862908, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.10074673261493444, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.10015554714947938, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.09919220563024282, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0982484669983387, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.09732590470463037, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.09606195904314518, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.09467509042471647, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.093167446218431, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.09125426478683948, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.08929914943873882, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.08746773071587086, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.08486485611647368, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.08251105181872845, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.07940271520987154, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.07673572670668363, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.07442685024812817, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.07179177829995752, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.06901669919490815, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.06621857097372413, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.06381068145856261, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.06101015266031027, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.05824590895324946, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.05525719730183482, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.05216105636209249, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.049047629591077564, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04551769411191344, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042564535457640884, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04008164801634848, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03704954044893384, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03398617601953447, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0317087843362242, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02998982584103942, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.028099870216101407, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02675004694610834, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02543984407559037, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024302476420998573, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.023220181367360055, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022287679836153984, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021408514794893564, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.020607706657610834, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.020190701331011952, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.019977895389311016, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01987654032651335, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.019882844681851565, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.019762875246815383, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.019663619240745903, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.019208050039596854, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.018697401848621665, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01880604077130556, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1243977546691895, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1053664684295654, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0742292404174805, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.0440711975097656, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.01517653465271, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.975428342819214, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.932363510131836, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.886035919189453, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.827382802963257, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.767054319381714, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7104995250701904, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6282265186309814, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5514743328094482, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.443920612335205, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.343324899673462, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.248610258102417, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.130781412124634, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.9941530227661133, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.8456177711486816, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.712655782699585, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.5572078227996826, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.4089807271957397, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.2597830295562744, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.119825005531311, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.9945263862609863, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.8695937991142273, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.7772641777992249, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.706516683101654, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.6271525025367737, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.5527330040931702, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4997555911540985, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.45973268151283264, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.4155466556549072, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3830220699310303, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3507537245750427, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.32379597425460815, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2981888949871063, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.27813616394996643, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2611997723579407, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.24189414083957672, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22772780060768127, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.21724507212638855, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.20787782967090607, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.20400643348693848, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.19937750697135925, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1926395446062088, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.18274644017219543, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.17633916437625885, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.17514629662036896, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.029017857142857144, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.03249007936507937, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.037698412698412696, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.043402777777777776, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05084325396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05952380952380952, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07390873015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08878968253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10739087301587301, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13368055555555555, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.15922619047619047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2021329365079365, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.24206349206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.30282738095238093, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.35813492063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.4079861111111111, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.4632936507936508, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5275297619047619, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.5910218253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6374007936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7001488095238095, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7457837301587301, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7807539682539683, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8067956349206349, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.824156746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8377976190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8504464285714286, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8601190476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8683035714285714, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.876984126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8831845238095238, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8871527777777778, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8943452380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8978174603174603, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9037698412698413, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9089781746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9174107142857143, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9211309523809523, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9228670634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9270833333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9308035714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9327876984126984, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9325396825396826, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9332837301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9308035714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9312996031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9367559523809523, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9402281746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9417162698412699, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.023656860208701554, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.027294346517243707, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.032708016407734755, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.03802456107084692, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.04323027083805096, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.04998854927224533, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.05981138348164072, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.06919838015201932, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.08297041298837966, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09767098531543943, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11150718392468023, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13137016758439884, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14744148884933836, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.184362729009707, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.22161668205313148, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2612193627406895, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.30486184085856594, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3655426387114228, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.4353892904540085, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.4990360543899131, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.5927012095162534, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.6698768483099831, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7236481032482154, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7613609789810541, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7840803757814033, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.803288783413559, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8209271739223232, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8346952535719806, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8453544733491986, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8566171268275967, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8647234322390654, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8686045350227974, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8760449667859699, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8805227202177813, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8871300241295236, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8959030120652484, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9047285406810566, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9081447933751698, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9086466913305228, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9138204142050717, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9179327582079209, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.920845747381703, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9210727030090033, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9230005461711418, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9205647186778693, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9207148722472447, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9228937557577055, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9253453785999495, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9259716712397069, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.2026921513862908, "validation/loss_best": 0.17514629662036896, "validation/acc_best": 0.9417162698412699, "validation/f1_best": 0.9259716712397069} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.2125388887524604, "train/grad": 0.354999375641346, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.0485260009765627, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.0206719970703126, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.9759283447265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.9331341552734376, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.89224365234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8374822998046874, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.778363037109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.7155267333984376, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.6368560791015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5569464111328126, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.48170166015625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.3730450439453126, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.271828308105469, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.131498107910156, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.003351135253906, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.8862008666992187, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.7456919860839843, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.5923881530761719, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.4371413421630859, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.3076447296142577, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.1669181060791016, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.0421098899841308, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.9242715072631836, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.8193743133544922, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.729147367477417, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.6416932678222657, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.5782236623764038, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5300551414489746, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.4761825382709503, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.4256130301952362, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.38961479246616365, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3624026334285736, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.3320161670446396, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.30950752839446066, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.28696075201034543, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2680750666558743, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.25069487661123274, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.23644114047288894, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.2231554514169693, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.20876785703003406, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.19882688220590353, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.19106950797140598, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.18225692745298147, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.17597166128456593, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.1703808675520122, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.16607310601510109, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1647502649202943, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.16665850304067134, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.17102407287806273, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.09556931544095278, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.09420148599892855, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.09201671443879604, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.08995113778859377, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0880051963403821, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.08547036446630955, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.08283509068191051, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.08017894092947245, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0770948177203536, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.07426715163514018, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.07187137858942151, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0688172222673893, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.06630723999813198, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.06316330904141068, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0604566615819931, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.05800543008372187, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.055018146727234124, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.05164360234513879, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.048069690354168415, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0449673478025943, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04149329762905836, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03833000569604337, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.035293632736429575, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03255810027010739, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030186609299853444, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.027896580528467895, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.026225799052044748, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.024962790394201875, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023545475797727703, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022194357686676085, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.021235922486521303, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02051329702604562, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01970844600349665, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01912945056799799, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.018539463486522436, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.018053097003139555, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.017611334454268216, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01731191451661289, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.017073362306691705, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.016764052445068955, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01651643920689821, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016265294838231057, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01601467461558059, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.015937637281604113, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01603927615797147, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01643844329053536, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.016963948630727827, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01762083394918591, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01809361672028899, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9759697914123535, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9394078254699707, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8814573287963867, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.826720952987671, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7743611335754395, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.705738067626953, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.6317849159240723, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.554049253463745, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.4564950466156006, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.357908248901367, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.2648072242736816, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.1314423084259033, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.00840425491333, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8413118124008179, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.6937122344970703, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.5638867616653442, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4152590036392212, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.2627627849578857, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.1180623769760132, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0048540830612183, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8885390758514404, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7904512882232666, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7014513611793518, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6245691776275635, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5597589015960693, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.4978896975517273, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.4529586136341095, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.41873592138290405, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.3804328441619873, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3441762626171112, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.3182734549045563, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2987963557243347, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2767801582813263, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.25952497124671936, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.24145260453224182, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.22668345272541046, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.21434268355369568, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.20446659624576569, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1935979425907135, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1847698986530304, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.18090957403182983, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.17605546116828918, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.17017203569412231, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.16877935826778412, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1726120561361313, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.17244085669517517, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.17358195781707764, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.18424712121486664, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.20186421275138855, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05952380952380952, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07316468253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.08978174603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.1076388888888889, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.13120039682539683, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.16319444444444445, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.20089285714285715, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.24330357142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.28794642857142855, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.33705357142857145, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.37996031746031744, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.44518849206349204, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5009920634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5793650793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6423611111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7003968253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7462797619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7881944444444444, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8107638888888888, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8246527777777778, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8405257936507936, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8511904761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8603670634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8725198412698413, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8814484126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8883928571428571, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8953373015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9010416666666666, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9072420634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9117063492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9171626984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9196428571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9226190476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9250992063492064, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9273313492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.933531746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9384920634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9384920634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9404761904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9382440476190477, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9404761904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9429563492063492, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9424603174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9377480158730159, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9397321428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9392361111111112, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9350198412698413, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9305555555555556, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.05019584644905072, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.059473200772010175, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07009704381925186, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.08142070816181024, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0923767931868719, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.10989525699787543, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.1260673237322391, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.1429854769458288, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.16621164927159282, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.20183377609847272, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.2387580575675972, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.29699523582902226, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3501390289980272, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.4386512204126205, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5230657379213071, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6025595058881994, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6712509437234416, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7325815928513937, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7646113301355305, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7848530705930022, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8080597371665765, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8191762067475012, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8328403662600582, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8498432750795967, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.86092614545144, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8702494778308262, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8781090172923515, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8835132916963027, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.890914129970746, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.895167593398798, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9019200215841408, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9056218692603254, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9087193580041474, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9121399643170385, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.915057976584138, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9226516213391204, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9280905760095296, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9286174979787182, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.931010831570188, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9316680907054762, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9295934618477311, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9327314009764196, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.934394260533936, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.933209560685221, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9266630407182054, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9281308644678121, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9263845458845749, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9203930676856228, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.911975148828999, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.18225692745298147, "validation/loss_best": 0.17017203569412231, "validation/acc_best": 0.9429563492063492, "validation/f1_best": 0.934394260533936} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.9728984147310257, "train/grad": 0.29857669487595556, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.88438720703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.839390869140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.7685040283203124, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.70203857421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.6390899658203124, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.5560198974609376, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.46704345703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.3728543090820313, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.25549560546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.1372946166992186, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.0274221801757815, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.8734706115722657, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.7362167358398437, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.558332977294922, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.4097711181640624, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.2854653930664062, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1500465393066406, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.0177913284301758, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8975775527954102, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8064721965789795, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7151238250732422, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6394128656387329, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.5715537929534912, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5132777690887451, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.4644050192832947, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4175547230243683, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.38369782269001007, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.35786464154720304, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.32870437145233156, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3006448015570641, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.2800730769336224, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.26414983049035073, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.2459414579719305, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.23199051104485988, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.21781448654830457, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.20568679869174958, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.19444792691618204, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18513469457626341, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17645993266254664, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.16714219512417913, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.16176051702350377, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1589591494947672, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.15719718169420957, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.15445940613746642, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14889185633510352, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.14897006106562913, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.15418920228257776, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.16477929273620248, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.17705111518502237, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.08629435315728187, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0841969970986247, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.08102878199890257, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.07825006194412708, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.07581362217664718, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.07289869410917163, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.07016598777845502, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.06764062421396375, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0648857999779284, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.06238180786371231, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.060163498707115654, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.05707395315170288, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.054233281631022694, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.05037448670715094, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04697108427062631, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.044012458547949794, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.040693990821018815, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0373664447106421, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03429800246842205, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.031946516772732136, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02957279549911618, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.027593090757727624, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.025809507500380278, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0242820714879781, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0229982381593436, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021781803499907257, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02090860724914819, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.020257169459946454, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01951987700536847, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0187675559008494, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01819237616378814, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.017706249658949673, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01714217069093138, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.016713812951929866, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.016322485096752645, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.016068531260825695, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.015895126801915467, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01580873429775238, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.015805669720284642, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015660770537797363, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015706157910171895, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.015960036790929735, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.016362373582087458, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0163429008400999, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01574740217765793, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01610681738238782, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.016853948468342424, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017767527645919472, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.018525248237419873, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.800203323364258, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7463223934173584, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.6617777347564697, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.582832098007202, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.508472204208374, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.410529136657715, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.3058714866638184, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.1953675746917725, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.059034824371338, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.9237200021743774, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.8007663488388062, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.633569359779358, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.490044355392456, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3128081560134888, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.1723618507385254, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.0599464178085327, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.9421513676643372, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.8313998579978943, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.7339283227920532, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.6613225936889648, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5895925760269165, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5305154919624329, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4777654707431793, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.43239694833755493, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3940495252609253, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.3569094240665436, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.32984665036201477, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.30886420607566833, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2852264642715454, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.26233354210853577, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.24574322998523712, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2327239066362381, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2179410755634308, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.20716668665409088, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.19646553695201874, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.18717177212238312, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.17899122834205627, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17293250560760498, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.16619178652763367, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15755563974380493, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.151754692196846, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1479516327381134, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.14443251490592957, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.14506198465824127, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.15205241739749908, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.16148561239242554, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.17328770458698273, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.1841244101524353, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.2036888748407364, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.12003968253968254, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.14211309523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.1817956349206349, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.22470238095238096, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.26240079365079366, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3115079365079365, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3687996031746032, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4270833333333333, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4937996031746032, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5580357142857143, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6111111111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6775793650793651, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7276785714285714, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7805059523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8025793650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8234126984126984, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8363095238095238, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8484623015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8608630952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8717757936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8794642857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.892609126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9005456349206349, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9035218253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9099702380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9139384920634921, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9193948412698413, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9248511904761905, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.933531746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9357638888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9397321428571429, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9446924603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9454365079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9449404761904762, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9449404761904762, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9461805555555556, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9506448412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9439484126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9367559523809523, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9362599206349206, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9399801587301587, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.933531746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08862185916669982, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10110057333264393, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.11913689597577715, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.13988833264483094, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1562800260170398, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.18900473203500678, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.23109875912095834, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.28023338267875864, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.340434723082318, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.40450425355405867, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.46743548593413986, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5601880565497342, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.6425282664463028, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7236355384306472, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.7536166227305215, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7828174563766439, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.802720627693846, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8184797739314166, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8353763274622026, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8482728282217825, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8573857964712962, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8663631494001572, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8735652392198532, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8826609523017769, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.886585802572815, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8935217619347997, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8988480110116948, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.904767225696608, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9113027730712092, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9174365121306804, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9215582402547795, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9238526698809209, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.928932231999775, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9312650469776267, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9331669337121107, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9350860046771532, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.936834340621162, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9371834747794036, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.936336170119871, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9371619411344667, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9390981020714472, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9417370587321541, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9420507149618336, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9394876193273812, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9354151648189093, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9258864268731124, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9231750607619724, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9244260336452088, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.918835822787165, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.15719718169420957, "validation/loss_best": 0.14443251490592957, "validation/acc_best": 0.9508928571428571, "validation/f1_best": 0.9420507149618336} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.7917435857653617, "train/grad": 0.25811570703983305, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.694263916015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6309796142578126, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5320208740234373, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4398846435546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.35312744140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2391452026367187, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.1181381225585936, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.99238525390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8403350830078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.6943106079101562, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.565769805908203, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.3978514099121093, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.260131607055664, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.0976290893554688, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.9739487457275391, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.8779300689697266, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.7797339057922363, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6890889930725098, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6103618621826172, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5523240232467651, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.49525734663009646, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.44830491542816164, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.40646030187606813, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.37032905101776126, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3396276932954788, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.3098687136173248, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.28793242990970613, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2709918856620789, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.2514832490682602, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.23244481950998305, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.21800791464745997, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.20670754536986352, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.19338127799332142, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.18305240154266358, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.17232485990971327, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.16311249252408744, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1541213233023882, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.14652447888627648, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.13968033840879798, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.13333663027733564, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12977672973647714, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.12794797567650676, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12655943252146243, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12693663148209453, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.13051194491796195, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1413542968966067, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1574987439531833, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1827615672443062, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.20977920947596432, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.07809291193261743, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.07565212646499277, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.07222645223140717, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.06945785522460937, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.06717654144391418, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.06452358728274703, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.061964582316577435, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.059425775241106746, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.05634029433131218, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.05326552007347345, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.05043383277952671, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0465458744764328, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04321495333686471, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.039144555563107135, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03598409438505769, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03350140961818397, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03094852670095861, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.028576763002201914, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.026499999659135937, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02496229753829539, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023432758999988437, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02215480199083686, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02099980551749468, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019999365829862656, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01913117939606309, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.018293627179227768, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.017683652066625654, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.017217735936865212, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01669181363657117, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.016204821583814918, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.015836659572087228, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.015542866922914981, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.015163876730948687, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.014880332285538317, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.014605483666528016, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.014395171101205051, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01417253068415448, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.013974300508853049, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013844557804986835, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.013920235608238726, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.014050777249503882, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014317135165911168, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014647560853045433, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014814997084904462, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.015284078735858202, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.016618214533664286, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01814634838839993, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01980223442427814, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.020614342100452633, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.609680652618408, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5383222103118896, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4272618293762207, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.323870897293091, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2265498638153076, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.099395751953125, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.9656431674957275, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.8287688493728638, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.6672818660736084, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.5165534019470215, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.387987732887268, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.225987434387207, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.0980706214904785, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.9521542191505432, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.844489574432373, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.7624177932739258, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.679530918598175, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6038212180137634, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.538353681564331, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.49018940329551697, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.44267737865448, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4036770164966583, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3687850534915924, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.33864787220954895, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3128296732902527, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.287688672542572, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.268962025642395, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.25456464290618896, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2379283905029297, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.22188667953014374, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2100917398929596, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.20111753046512604, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.19144923985004425, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.18492484092712402, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.17871443927288055, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1733894795179367, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.16694876551628113, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16170942783355713, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15844863653182983, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15738721191883087, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.15674705803394318, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.15907254815101624, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.16408544778823853, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.16927970945835114, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1709120273590088, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1672946661710739, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.1770820915699005, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.21556337177753448, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.23138867318630219, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.21329365079365079, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.25124007936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3087797619047619, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.35887896825396826, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4099702380952381, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4732142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5362103174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5974702380952381, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6607142857142857, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7152777777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7552083333333334, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7976190476190477, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8142361111111112, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8333333333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8479662698412699, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.857390873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8680555555555556, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8759920634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8824404761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8965773809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9015376984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9084821428571429, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9126984126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9159226190476191, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9265873015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9305555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9350198412698413, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9389880952380952, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9402281746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9422123015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9424603174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9417162698412699, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9422123015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9419642857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9407242063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9402281746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9379960317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9375, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9379960317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9392361111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9407242063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.939484126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9317956349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9327876984126984, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.13548513463596987, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1515593651497565, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.18767799989328907, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.2227184479967678, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2664597021808762, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3240918295137821, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3800879681897532, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4508934630245863, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5384779681471105, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6195157818271368, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6861070270742767, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7475058830880352, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7696905374013154, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7968481855290748, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8178876573271638, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8296088034170337, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8421886002651553, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8526736921941642, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8598150537763386, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8683827584610403, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8767466766953896, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8825424943747403, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8914762426318529, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.896157445332274, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9000588735579628, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9080142336441556, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9132695712899379, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9174722762830421, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.923105763398751, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9278897480319789, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9293426565390213, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9313155116300796, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9313836929357757, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9304306011311907, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9308425598976087, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9318045614399428, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9323510684326574, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9312775263606662, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.931352240896822, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9305740478468761, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9299099575095727, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9276819403728217, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9268819169535375, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9261953719148112, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9279767757418552, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9302642201308463, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.928776983499616, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9168761638144476, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9187514155562511, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.16311249252408744, "validation/loss_best": 0.1733894795179367, "validation/acc_best": 0.9429563492063492, "validation/f1_best": 0.9318045614399428} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.6676138544082642, "train/grad": 0.23457022227346896, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5113214111328124, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4312176513671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.3062591552734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.190323486328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.0819137573242186, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.9421286010742187, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.7977873229980468, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.6538316345214843, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.4888677978515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3400723266601562, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2170468139648438, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.066736068725586, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.9513740348815918, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.822967414855957, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7298878002166748, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.6596505165100097, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5892535591125488, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.5251218318939209, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.46983798742294314, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.4291999638080597, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.3889918029308319, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3557101398706436, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.32575608491897584, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.299525583088398, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2770585311949253, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.25485959872603414, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.23807975091040134, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.22489047802984716, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.20939726188778876, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.19392871014773846, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1823172367364168, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1730801247432828, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.16225011747330428, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.15387335419654846, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.14520632511004805, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.13787406867370008, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.13144328370690345, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1270484061166644, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.12403133779764175, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.12142352076247334, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.11800950598903, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11507992269471287, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.11410536020994186, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11708065356127918, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.12464868952520192, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.13887006654404105, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.15928470384329557, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.18232451878488065, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.21213096584193408, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.07242020756006241, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.07007469227537513, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.06692702827975154, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.06436767287552357, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.06214501366019249, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.05934326665475965, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0563995149731636, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.05332549497485161, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.049622435364872215, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04610915506258607, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04309836369007826, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.039325081380084156, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03637267917394638, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03304974097758531, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03061396679840982, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.028757444266229868, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.026872111605480312, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.025126893473789096, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023598065776750447, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02245479430537671, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02132082235533744, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020361026409082116, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019483783491887154, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01869962106924504, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01801145438104868, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.017321441629901527, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01678498977329582, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.016365109933540224, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.015856590317562223, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0153730649035424, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.015013096486218274, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014724196577444672, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.014403519742190838, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.014164582504890859, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.013963120959233492, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013865044945850969, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013892404609359801, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014193152654916047, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014777886357624084, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015392790813930333, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015218153046444059, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014783525557722896, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014438482751138508, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014591848098207265, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014961433871649206, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01560341514647007, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.016628483275417237, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017562492962460964, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01849621162749827, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.4272258281707764, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.3397514820098877, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.2038562297821045, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0783472061157227, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.9621189832687378, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.814063310623169, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.6641534566879272, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.5178096294403076, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.3548301458358765, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.2117223739624023, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.0964488983154297, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9587379097938538, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8552623987197876, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.7418538928031921, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.6605448722839355, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5994510054588318, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.5385787487030029, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.48310601711273193, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.43550023436546326, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4000616669654846, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.3651677072048187, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.33625248074531555, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.30977606773376465, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2865894138813019, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.26663774251937866, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.24722032248973846, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23284748196601868, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.22185082733631134, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.20885837078094482, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1961471438407898, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.18679578602313995, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.17969225347042084, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.17142266035079956, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.16493867337703705, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15809132158756256, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.15299803018569946, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1492001861333847, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.14669044315814972, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1470671147108078, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15585926175117493, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.16819746792316437, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18223458528518677, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.19945991039276123, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2132120132446289, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.22018833458423615, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2269916832447052, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.24776767194271088, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.2953631579875946, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.2802179455757141, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3060515873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3492063492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.41617063492063494, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.47693452380952384, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5305059523809523, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5989583333333334, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6569940476190477, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7192460317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7646329365079365, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7953869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.814484126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8315972222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8469742063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8571428571428571, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8692956349206349, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8782242063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8859126984126984, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8978174603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9030257936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.908234126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9154265873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9278273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9317956349206349, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.935515873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.939484126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9399801587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9454365079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9446924603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9459325396825397, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9382440476190477, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9340277777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9290674603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9278273809523809, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9268353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9305555555555556, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9317956349206349, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9231150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9308035714285714, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.18379155930240612, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.21533196116273334, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.27308577378617244, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.328616912981612, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3768005626508787, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.45912631665118797, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5416247637020701, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6332518109526143, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7030507170186281, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7471159053521167, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7718219699656549, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.795249111741267, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8173165792442534, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8295741037787397, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8466276210659123, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8573846067144777, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8659881424975195, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8727605319388845, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.878378679736455, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8844582779839834, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8916169462679939, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9003452434432196, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.905397414888865, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9108701968453349, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9153584433794153, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.920082595654291, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9250219792672747, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9291250756800293, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9297445634967643, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9341044434330761, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9358509796753397, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9344190636952628, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9344106192455274, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.935617769358017, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9381191902480426, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.940302460347604, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9399189164090556, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9395255007541023, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9381458821378398, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.932242828111968, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9241263007814715, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9192791719410925, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9118169438370346, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9089230331669986, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9084897928206702, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9146928722244474, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9144926756387887, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9037909167556956, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9094824642925465, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.13787406867370008, "validation/loss_best": 0.15299803018569946, "validation/acc_best": 0.9503968253968254, "validation/f1_best": 0.940302460347604} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.578634535819292, "train/grad": 0.2114877736568451, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.3380657958984377, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.242786560058594, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.0953878784179687, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.9605767822265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.8372979736328126, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.6829368591308593, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.5301008605957032, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3844940948486328, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.2262238311767577, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.091045799255371, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9841509246826172, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.858849048614502, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7659911155700684, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6652952194213867, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5936375617980957, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.5400257635116578, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.4866437745094299, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4381887006759644, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.3963212633132935, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3654154747724533, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.33460316598415374, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3088779240846634, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.28547213912010194, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.26469293162226676, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2465035945177078, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.22807864017784596, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.21401409491896628, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.20267320856451987, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.18924795024096966, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17545744508504868, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16496775083243848, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.15654192358255387, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.14648856040090322, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.13873657405376436, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.1306191235780716, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.12362804088741541, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.11688827082514763, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11131228546611965, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.10635516978800297, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10135808989405631, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0996315498650074, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09992978933267295, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.10203349757939577, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.10497730460017919, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.10956342719495296, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1228559107426554, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.13921817801892758, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1565093390736729, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.18842173962853848, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.06663750438019633, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.06447248864918947, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.061437508966773746, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05877789596095681, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.05632169345393777, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.053123166430741546, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04978061540052295, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04643831646069884, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04265110468491912, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03931364694610238, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03663251968100667, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03344757643528282, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031067759515717625, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028460994390770794, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.026580722546204926, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025151911973953247, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02371214489918202, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022377594746649264, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02121016232762486, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02033654640428722, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019458215804770588, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.018713180148042737, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01804503449704498, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01744547157548368, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.016916351271793247, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01638564605731517, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.015963468491099773, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.015623211204074323, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.015189671395346523, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.014718152177520096, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01437547919806093, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014109356577973813, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.013796224861871452, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01358845692826435, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.013353407057002187, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013165277598891406, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013017459635157137, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012967513483017683, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013013712256215513, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.013036166685633361, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.013320540271233767, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013697937952820212, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014286496562417596, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014651247127912938, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014833318355958908, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01587175581837073, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01651063542929478, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017210338555742055, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.017545483901631086, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.2679131031036377, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.166776418685913, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.0111751556396484, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.8701726198196411, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.7428903579711914, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.585742712020874, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.433104157447815, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.2903664112091064, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.1384142637252808, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.0109297037124634, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.9114624261856079, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7961872816085815, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.7114155888557434, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6199363470077515, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5549837350845337, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5064107179641724, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4580705463886261, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.4139399826526642, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3758075535297394, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.34743180871009827, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.31920742988586426, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.2956928014755249, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2742612063884735, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.25545209646224976, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.23922835290431976, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.22324854135513306, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.21126432716846466, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2021266520023346, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.19128145277500153, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.18000395596027374, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.17133484780788422, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1652616560459137, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15924206376075745, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.15563587844371796, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15123198926448822, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14628733694553375, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14293289184570312, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.14180542528629303, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1416253000497818, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14385254681110382, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14585517346858978, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.149382084608078, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.15576018393039703, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.16086968779563904, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.17658501863479614, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2107965052127838, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.23265743255615234, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.25980713963508606, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3106982409954071, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3814484126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4332837301587302, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5064484126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5724206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6287202380952381, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6929563492063492, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7423115079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.78125, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8112599206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8303571428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.839781746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8521825396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8628472222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8735119047619048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8812003968253969, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8869047619047619, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9007936507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9089781746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9124503968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.921875, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9278273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9315476190476191, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9377480158730159, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9402281746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9402281746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9461805555555556, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9481646825396826, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9476686507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9479166666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9466765873015873, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9399801587301587, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9332837301587301, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9263392857142857, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9255952380952381, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9250992063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.24164866468548538, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.28886748816544505, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.35647106552278973, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4312403161755014, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4957874109494491, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5899225388544876, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6667403855932037, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7249267546003424, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7667817862189901, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7944410982316196, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8081620271015627, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8269921769951012, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8398344041309068, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8511318307430263, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8609397831407284, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8669497996270391, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8743129387803162, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8825448557227418, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8927325010122993, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.896026209866591, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9038971189664157, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9094181840363039, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9125573014062986, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9148099636610938, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9201145534063266, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9265019962718675, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9291570615196063, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9296982322707494, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9334982542078866, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9365802566375346, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9366830504136784, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9387270303473663, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9377769801096457, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9369021790647922, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9374653847525247, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9378992320149997, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9391238930487618, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9397852650499371, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9406925296933353, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9387891618036145, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9392325804895689, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.940461334753234, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9390954811559069, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9359755511171871, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9279088825168674, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9194531905319892, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.912550621880267, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9096299387534976, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9133765001645354, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.10635516978800297, "validation/loss_best": 0.1416253000497818, "validation/acc_best": 0.9494047619047619, "validation/f1_best": 0.9406925296933353} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.5122249875962734, "train/grad": 0.19677245937287807, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1892498779296874, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.0819601440429687, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9179754638671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.77122802734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.6405427551269531, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.481805419921875, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3307229614257812, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1921021270751953, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.0473787307739257, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.9279670333862304, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.8359891128540039, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7302553749084473, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6531680679321289, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5703079175949096, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5115881896018982, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4678002309799194, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.4240501832962036, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3841234767436981, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.34935591638088226, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.32338770806789396, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.297324874997139, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.27527999728918073, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.2548102293908596, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.23651328071951866, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2202607313543558, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.20387623883783818, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.19117168985307217, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.18090756233781577, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.16855265203863382, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.15578300938010214, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.14589734269306065, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1379951720125973, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1286608281545341, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12149201763793827, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11382647005841136, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10702404100447893, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10051839144900442, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09504896654747426, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09007767477072776, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08563186038285493, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08352215432561934, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.08244294688105583, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08302981271408498, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08518730969168246, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09048361931927502, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10333403795026243, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11650654607452453, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.14340673077851535, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.16950005851686, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0638261459209025, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.061636710483580825, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.05836223809048533, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05534860294312239, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.052533456645905974, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.048933888338506225, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04532597729936242, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04189157210290432, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.038203485338017346, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03510811927728355, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03270332955755293, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029924848787486552, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02788138037547469, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025667786002159118, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024084906596690416, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022890710481442512, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021690492099151016, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020592358857393266, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019634098405949773, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01890980200842023, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018189586279913784, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01757837360724807, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01701188290026039, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01650208931416273, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.016073882393538953, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.015624326635152102, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01526717712637037, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014938252517022193, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014470442300662398, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013931275580544024, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013525110189802944, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.013223800568375736, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012926681214012206, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01275436817202717, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0125602350034751, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.012340970560908318, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.012029889961704612, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011743266824632883, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011593408565968275, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011594385325443, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011691200160421432, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011798651987919583, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.012155627136817202, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.012603982286527753, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.013453943061176688, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.014635921989101916, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01551246337359771, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01691239895997569, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.018022084978874774, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.127675771713257, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.015618085861206, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.846096158027649, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6959272623062134, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.564082384109497, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4061864614486694, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2580047845840454, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1244224309921265, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9865207076072693, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8740071654319763, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.788037896156311, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6897796988487244, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.618417501449585, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5418024063110352, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.48753365874290466, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4472353458404541, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.4068916141986847, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.37007153034210205, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3381374180316925, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.31425827741622925, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.29049739241600037, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.27033552527427673, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.25198158621788025, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.23569969832897186, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.22159422934055328, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.20765821635723114, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.19737152755260468, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.18935608863830566, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1795787811279297, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.16962842643260956, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1624869853258133, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15727411210536957, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15132111310958862, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.14724570512771606, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.14330261945724487, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1402960866689682, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1379426121711731, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13672545552253723, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13638964295387268, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1370275765657425, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13873504102230072, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14213776588439941, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1498403400182724, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1599111407995224, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.16974365711212158, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.19380591809749603, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.2312571406364441, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.2721310257911682, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.36742958426475525, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4496527777777778, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5803571428571429, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6431051587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6959325396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7475198412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.785218253968254, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.810515873015873, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8340773809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8449900793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8563988095238095, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8678075396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8777281746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8851686507936508, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8953373015873016, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9012896825396826, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9079861111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9136904761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9223710317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9265873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9325396825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9367559523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9399801587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9424603174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9441964285714286, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.951140873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.949156746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9479166666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9474206349206349, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9449404761904762, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9432043650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.941468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9340277777777778, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9288194444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9184027777777778, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.30411427178285555, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.35087306979070804, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.43950769259675293, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.52106066734449, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5934625614351435, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6791539454825476, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.733029054106221, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7670726582555555, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.799962602896624, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8168720580074953, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8327630207443677, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8462173116219741, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.856773950786862, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8660560996513621, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.873133125098222, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8775347991431486, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.88370221506145, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8910496813326025, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8982789345304626, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9048149064413842, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9093060074188288, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9142731301338713, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9213400082335916, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9257533550913993, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9301250379285675, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9303197065673947, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9318318264065537, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9341447707378181, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9384065714396174, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9402320371949645, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9409631974657641, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9411670341464332, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9410804725597877, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9418653571282268, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9428148689921992, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9421600724545155, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9427053435172058, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9413425167456287, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9399403367617387, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9386646816962019, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9388658043629182, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9394716812118722, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9389713830869417, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9352792148668483, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9335578720900531, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9301503747154581, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9226024215728776, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9172798258557829, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9111076079996249, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.11382647005841136, "validation/loss_best": 0.14330261945724487, "validation/acc_best": 0.9528769841269841, "validation/f1_best": 0.9428148689921992} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.4668788468837738, "train/grad": 0.18314307913184166, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.060419616699219, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.9435873413085938, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7684904479980468, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6154208374023438, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.4825070190429688, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3258689117431641, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.1812003326416016, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0526044082641601, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9216887283325196, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8161454582214356, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7361178207397461, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6452021121978759, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5795159482955933, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5092607498168945, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.4595151829719544, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.42245283484458923, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3853162384033203, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3513686114549637, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.32152780413627624, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.29914673149585724, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.27648444384336474, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2571932528913021, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.23910395979881285, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.22286093905568122, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2083182789385319, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.19343050640076398, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.18185961347073318, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1724839658290148, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.16123073298484086, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.14962090218439697, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.14060172021389009, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1332360953465104, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.12430820621550083, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.11719615240581334, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.10951063283719123, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10268254111520946, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09633363042026759, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09109504201449453, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08627947611734271, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08133200038224458, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07875301792286336, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07742061028257012, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07762220142409205, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08020282381214201, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08380292519927025, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.09142753859981895, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09838361902162433, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.12835091745480895, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.13858101351186633, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.06064313156530261, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.058355132229626176, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.05482836559414864, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05157326431944966, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04860555727034807, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04494234288111329, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04143418189138174, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03824207950383425, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.034945352477952836, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03226551079191267, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.030215046238154172, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027856937777251005, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026127017503604293, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02423899548128247, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02288030876778066, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021850237352773548, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02078697463963181, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019804911273531615, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018918197024613618, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01823695980478078, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01752053627744317, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016889822343364357, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01627894488628954, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015717816608957946, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015224887006916106, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014716819359455258, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.014313128942158073, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013989202147349715, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.013596659358590842, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013200467694550752, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012902753402013332, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012651907813269645, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012369266194291413, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01214499498018995, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01185988548444584, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011531082796864212, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011209300309419632, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010988094538915902, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010776457245228813, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01057224350515753, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010589072052389382, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010696803347673266, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010999509009998291, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011478297736030072, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01156908787204884, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.011805243726121262, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.012418329300126062, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0139299818163272, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.014538413095287979, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.006070613861084, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.8859837055206299, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.707358717918396, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5531599521636963, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.420816421508789, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.2665963172912598, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.1259632110595703, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.0021339654922485, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.8774040341377258, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7773587107658386, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7016484141349792, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6160642504692078, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5541119575500488, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4879301190376282, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4411468505859375, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4062105119228363, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.37119075655937195, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3391662836074829, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.31111904978752136, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2903057634830475, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2691343128681183, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.251426637172699, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.23510590195655823, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.22060738503932953, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.20790083706378937, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.19521591067314148, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1856515109539032, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.17836694419384003, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1701589971780777, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.16251008212566376, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.15705735981464386, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15287216007709503, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14804650843143463, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1444603055715561, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.14042223989963531, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13643702864646912, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13369148969650269, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1332654505968094, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.135337695479393, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13996411859989166, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14140017330646515, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1425464004278183, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1468631476163864, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.15220950543880463, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.17133691906929016, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1954115331172943, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.22714115679264069, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.23008491098880768, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.24497754871845245, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5069444444444444, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5632440476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6416170634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7043650793650794, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.746031746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7827380952380952, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8090277777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8273809523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8435019841269841, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8539186507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8670634920634921, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8759920634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8824404761904762, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8901289682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8958333333333334, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9077380952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9129464285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9196428571428571, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.923859126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9278273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9315476190476191, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.935515873015873, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9434523809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9456845238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9474206349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.951140873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9513888888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9513888888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9419642857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9382440476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9392361111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9434523809523809, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3564687673272206, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4173103672594972, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5185490224576383, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.603838797389087, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6748549043657691, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7296463122058583, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7667810050317964, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7911701763280243, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8129282806845023, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8266589443408583, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8431903942223803, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8535829684896353, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8616072559188772, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8708935383590456, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.87784050969814, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8831147374356231, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8905093667136261, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8981251232279045, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9054685468013434, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9115708974731204, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9159621898680437, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9209272951442826, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9252126624795866, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9309639886420646, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.93342758598443, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9352960635000971, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9373350484479471, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9387459621922983, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9408380164914909, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.941100700274371, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.942068212482456, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9414444106089486, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9419239307278159, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9408268518543601, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9415535501678265, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.942655302052926, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9445649155418854, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.94539029175773, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9434383408214243, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9430164795709963, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.941571822826707, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9415905305983772, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9405048982793938, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.941186965885238, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9334168541447823, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9326505836587515, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.927772342425496, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9297421090493023, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9327387732587614, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.09109504201449453, "validation/loss_best": 0.1332654505968094, "validation/acc_best": 0.9546130952380952, "validation/f1_best": 0.94539029175773} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.42785228073596954, "train/grad": 0.1751401897519827, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9449014282226562, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8212677001953126, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.6388847351074218, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4834695434570313, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.3517452239990235, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2000489044189453, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0637065887451171, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.9449634552001953, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8264776611328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7322059631347656, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6613462734222412, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5813009786605835, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5236316442489624, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.46199426651000974, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.41837836146354673, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.38569276332855223, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3528623998165131, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3225408774614334, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2958291694521904, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2756653806567192, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.25502531364560127, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.23734594792127608, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.22062734074890614, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.20542007841169835, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1917824564874172, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.17782798450440168, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.16686018921434878, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.15796580795198678, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.14728292433544993, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.13623260179534555, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.12773340007290243, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1208180770650506, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.11261226762086153, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.10607399819418788, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.09915489609353244, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.09294808616861701, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0870644081942737, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08240941598080098, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07833966802805663, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07470230520702899, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07269312495365739, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0715500208735466, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07095820164307952, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07248566099442541, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.07562630427069962, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.08289980840869247, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0925797472987324, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.11921928062103689, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.1416094033792615, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05870755847543478, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05623031696304679, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.05237758800387383, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0488825791887939, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04577281745150685, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04204464772716165, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03860363978892565, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03555639246478677, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03249229566194117, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.030039513800293207, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028172153746709228, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026045615114271642, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02448920580558479, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02279620152898133, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021583162862807512, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020666119889356196, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01972423315513879, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01884678707458079, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018057458335533738, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017459732075221838, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.016832295842468738, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016286457418464125, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.015771906133741142, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015285527585074305, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014841309725306928, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014371355227194727, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013983566893730313, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013670537117868663, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.013300979579798877, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012939072924200445, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012689709377009422, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012513662641867995, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012316231050062925, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012141114671248943, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011879417612217367, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011593664980027825, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011251797472359612, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011009843294741585, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010814468432217837, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01067869270569645, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010633509013568982, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010590924243442714, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010589479360496624, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.010820419425144792, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011213737769285217, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.011999434634344653, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01287476585013792, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.015101916709681973, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.016334143565036357, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.9021495580673218, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7766615152359009, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5929436683654785, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.4378083944320679, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.3074737787246704, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.1587495803833008, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.026293158531189, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.911729097366333, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7978543043136597, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7076084017753601, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.639801025390625, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5634567737579346, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.508424699306488, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4498443305492401, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.4081415832042694, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.37698009610176086, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3459646701812744, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3173729479312897, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.29213911294937134, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.27334973216056824, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.25438031554222107, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.23832888901233673, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.22342275083065033, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.21022644639015198, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1985899657011032, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.18698327243328094, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.17835916578769684, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.17146576941013336, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.16354219615459442, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1559324413537979, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1504325419664383, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.14654800295829773, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14258138835430145, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1399235874414444, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1380687952041626, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13731326162815094, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13658638298511505, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13445547223091125, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1323423981666565, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13310123980045319, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13723812997341156, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14443065226078033, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.15896476805210114, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.17513886094093323, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.18851526081562042, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2064431756734848, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.22306443750858307, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.26149341464042664, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3339869976043701, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5558035714285714, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6145833333333334, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6875, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7418154761904762, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7767857142857143, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8055555555555556, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8239087301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.839781746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8541666666666666, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8648313492063492, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8735119047619048, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8826884920634921, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.888640873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8963293650793651, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9069940476190477, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9131944444444444, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9171626984126984, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9211309523809523, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9255952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9290674603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9377480158730159, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9412202380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9437003968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9471726190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.949156746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.953125, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.949156746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9471726190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9404761904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9370039682539683, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9357638888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.933531746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9312996031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9221230158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4049331922875757, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.47497496309595405, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5809719086827185, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6683488331822781, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7199818035390492, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7599259313640159, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7859201759321146, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.807487038734776, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8267075189908524, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8415402197146078, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8513020247546412, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8626696781224281, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8700203269483524, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8780768118570783, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8838953576610754, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8901010808392759, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8974234247328823, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9038238738088133, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9081347465567265, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9137787697674801, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.917025644049377, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9232937113530184, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9269776238849898, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9311151397150287, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9335746879013123, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9370030796346408, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9394362744461233, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.940517212828834, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9410079499891065, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9438826264135398, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9443805983803887, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9441490419952707, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9437371500101144, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9434111982383044, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.943558342506259, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9428211189160433, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9443272213854248, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9459363231018514, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9465559127614741, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9446270306909512, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9427165260193665, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9394003834317297, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.936598692048912, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9321673461847964, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9286643578451278, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9263478308487324, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9227566203354857, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9227026460694199, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9100219545962944, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.12773340007290243, "validation/loss_best": 0.1504325419664383, "validation/acc_best": 0.9536210317460317, "validation/f1_best": 0.9443805983803887} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.3973948623239994, "train/grad": 0.1681940573453903, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.8545330810546874, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.7259494018554689, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.5395387268066407, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3840100860595703, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2545215606689453, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.1084532928466797, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9793947601318359, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.8689605331420899, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.7599541664123535, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6740125179290771, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6096276187896729, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5372813940048218, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.48518479108810425, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.4296001148223877, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3901064074039459, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3604195737838745, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3305368798971176, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3028465884923935, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2782126024365425, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2595749065279961, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.24032898530364036, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.22365313805639744, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.20792552962899208, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.19345556408166886, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.18034180834889413, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.16673338577151298, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.15595228482037782, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.14727375246584415, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.13667837232351304, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.12580157775431872, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11727722061797977, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.11041402107104659, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.10222468867897988, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09576675744727253, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08900332970544696, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.08326066688634455, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07796370354481041, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07368637385778129, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07011406734585762, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06744928641244768, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06652301248162985, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06559999721124768, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06373418423347176, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06347754187881946, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06712609690614045, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0730542050395161, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.08159437101334334, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.09042539602145552, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.10279023848474025, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05697366511449218, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.054352165888994934, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.050310131646692756, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.046729086823761465, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04362357344478369, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.040006798561662435, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0367471123021096, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03391989351250231, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03109606941230595, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.028848831448704003, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.027130466336384416, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025169411273673177, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023733715978451073, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02216144896578044, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02102115381974727, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020151761607266963, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019258386571891606, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018411781177856027, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.017637898894026876, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017043356685899198, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01641569091938436, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.015860920469276608, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01533279471565038, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014836776019074023, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014386698007583618, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013918609807733446, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013526527353096753, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013215292163658887, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012822208937723189, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012429040337447078, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01209694851655513, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011819020628463477, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011462301490828395, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011177626049611717, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010928056833799929, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010749090127646924, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010598570228321478, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010457463091006503, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01040149636566639, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.010566762521630153, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01082363533321768, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010870301286922767, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010673674979479984, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0107209805236198, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011372050787322223, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.012129953869152815, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.012648789443774148, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.013048180635087192, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.013501030300394632, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8155392408370972, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6862841844558716, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5001953840255737, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.3460808992385864, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.218865990638733, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.076150894165039, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9510208964347839, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8441063165664673, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7389799356460571, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6561740040779114, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.594429075717926, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5249108672142029, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.4749487638473511, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.4215473234653473, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3836507499217987, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.35536453127861023, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3271324932575226, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.30092060565948486, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2780308127403259, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.26090413331985474, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.24350899457931519, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22878959774971008, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.2149084061384201, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2025870382785797, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1919492930173874, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.18101061880588531, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.17293719947338104, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.16643112897872925, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15898215770721436, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.15176674723625183, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.14657452702522278, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.14299757778644562, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13968601822853088, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1379418820142746, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.13669328391551971, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.136347696185112, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13645201921463013, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13537544012069702, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13502342998981476, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13643231987953186, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13842979073524475, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14094661176204681, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1465650200843811, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.15265344083309174, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.15994711220264435, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1669342815876007, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.18175144493579865, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.204990416765213, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.2140072137117386, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5989583333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6505456349206349, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7204861111111112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.765625, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7931547619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.816468253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8323412698412699, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8469742063492064, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8618551587301587, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.873015873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8797123015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.886656746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8916170634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8998015873015873, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9079861111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9119543650793651, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9156746031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9193948412698413, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9243551587301587, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9273313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.941468253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.955109126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.957093253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.953125, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9459325396825397, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9427083333333334, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9449404761904762, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4558058307460057, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5300169997952825, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6326370760825328, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7040404200587617, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7442271259803442, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7760930070945109, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7978601710206693, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8162212162177088, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8376964474789469, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8508078946398098, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8593429633141205, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8673900613932637, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8736022422684235, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8824065346047479, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8910155487245045, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8966413441054902, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9014418670056434, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9057026420054813, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9117565886949655, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.914963446207288, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9183244728378225, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9229967960964962, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9278559471834061, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.930976519350917, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9336492936451293, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9382922440170175, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9396011566155545, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9411480613685687, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.942859643825705, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9448550573955645, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9441185353183466, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9465883547280731, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9480187923731999, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9482032582669216, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9488095646932742, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9478412066199201, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.947818394229588, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9492125521789809, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9451956725792124, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9452678708470567, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9430324787154412, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.942862166330516, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9429808885022107, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9447550169433027, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9421990368715141, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9386785486280277, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9376407507268316, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9329986670956797, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9367170578496558, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.07368637385778129, "validation/loss_best": 0.13537544012069702, "validation/acc_best": 0.957093253968254, "validation/f1_best": 0.9492125521789809} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.37046380281448366, "train/grad": 0.15742435365915297, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7677127075195314, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.6363577270507812, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4489070129394532, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2953836822509766, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.169655303955078, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.029940414428711, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9083675384521485, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.8051412200927734, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.7043199443817139, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6251738262176514, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5661274814605712, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.499807653427124, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.45212588548660276, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.4011865770816803, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3649483060836792, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3376590955257416, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3102161377668381, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.284714073240757, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.261999214887619, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.24470294252038002, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2268407154083252, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.21144006930291653, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.19670986250042916, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.18310521185398101, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.17081481907516718, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15796358872205019, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.14789698434993626, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1396090043336153, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.12963690059259533, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11935480657964945, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11135811567306518, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10495712717063725, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09727321779355406, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09131160878576339, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08486206955276429, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07914136385545134, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07343440094031393, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06858076695352792, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06398722701705992, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05922140381298959, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05611039781011641, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05401566630229354, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05241455132141709, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05225914707407355, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05369869749993086, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05526750278659165, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.059889593552798034, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06393300194293261, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07319215083494783, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0546076225116849, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05177307112142444, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.047489890791475774, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04379708467051387, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0406712406873703, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.037126924209296704, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.034013755694031714, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03135855961591005, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.028760374505072833, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026716514443978667, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02517982954159379, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02344656318426132, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0221822549123317, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02082228729967028, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019842073703184724, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019087226684205233, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018318664594553412, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.017591310604475437, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.016918109958060087, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.016400683084502817, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015838946704752742, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01533993867225945, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014839631333015859, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014362332832533867, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013915191891137511, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013430697394069284, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013036049066577106, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012712175452616065, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012326604514382779, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011905167074874044, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011581538887694478, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01132535660173744, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011028389411512763, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010781484364997596, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010511166143696755, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010224348919000476, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009860146119026468, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009501218732912093, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00920390894752927, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008977317526005208, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008826380809769035, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008676064260071144, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008621213289443404, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008815820236923172, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.009186100674560294, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00958353010355495, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.010101473422255368, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.010358874400844797, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.011055927985580638, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.744445562362671, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.613058090209961, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.4262605905532837, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2742693424224854, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1501275300979614, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0129852294921875, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8940413594245911, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.793289840221405, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.694978654384613, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6180170774459839, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.560617983341217, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.4963332712650299, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.4501005709171295, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.40067699551582336, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3657171130180359, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3394218981266022, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.31296321749687195, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2885899245738983, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.26703640818595886, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.25085097551345825, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2344578057527542, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.22046855092048645, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.20751936733722687, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.19603115320205688, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.18587666749954224, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.17594878375530243, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.16826322674751282, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.16227315366268158, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15566042065620422, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14932705461978912, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.14477616548538208, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.14110732078552246, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13726262748241425, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13456855714321136, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.13273978233337402, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1320921927690506, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1332099437713623, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13545149564743042, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1381155550479889, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13875100016593933, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13863487541675568, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13922129571437836, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.14194585382938385, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.14586715400218964, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1520489901304245, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1596953123807907, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.1689784973859787, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.1790078580379486, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.19441580772399902, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6254960317460317, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6760912698412699, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7420634920634921, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7819940476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8067956349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8256448412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8412698412698413, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8536706349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8678075396825397, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8764880952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8829365079365079, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8938492063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9030257936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9097222222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9136904761904762, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9181547619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9278273809523809, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9325396825396826, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9357638888888888, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9404761904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9434523809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9466765873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9479166666666666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.949156746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.953125, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9548611111111112, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.955109126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9513888888888888, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.955109126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9558531746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9501488095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4901036009783908, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.568027190622165, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.668346333833415, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7278169696160217, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7636858315874581, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7882900971731375, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8102756011154878, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8264613673840906, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8447149617303674, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8546247236443869, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.863334485186575, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.870127824344436, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8755772940349686, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8845207512298588, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8927184975626953, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8986611188716592, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9044401384869728, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9112954444732309, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9161804739252466, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9218722222278134, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9249630469867857, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9299405843983584, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.930709394228755, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9328370146306545, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9365112542070287, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9381128000609197, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9392404153191409, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9404342588952493, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.940839885282496, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9420037448475914, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9424931049801889, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9440564748372967, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9456177994435809, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9459442310216305, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9448532764925972, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9434968632955609, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9421016459150567, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.942028212004371, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.94153575118009, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9406719111015556, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9420419340150533, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.944360469885221, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9449673865997761, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9462794802284596, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9446980523178683, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9446218618696955, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9463747058475993, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9437819499287178, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9382906781757688, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.059889593552798034, "validation/loss_best": 0.1689784973859787, "validation/acc_best": 0.9558531746031746, "validation/f1_best": 0.9463747058475993} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.35144017174839975, "train/grad": 0.1532708280533552, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7096366882324219, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5764630126953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3884510040283202, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.236719512939453, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1138685607910157, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9786677169799805, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.862185287475586, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7641970443725586, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6687489891052246, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.594197325706482, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5386518335342407, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.47629460573196414, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.43147451400756837, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.38347312808036804, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3493913596868515, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.32363686978816986, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2975940176844597, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2733618131279945, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.25173931270837785, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.23508163914084435, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2179552885890007, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.20305110290646552, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.18876268707215785, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1756164311617613, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.16368244472891091, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15109241250902414, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.14126029696315526, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1330659029632807, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.12327792940661311, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1129814319871366, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10491816900670528, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.09835094649344683, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09041999691165983, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08410457525402308, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07734252214431762, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07141572256572544, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06573984742164612, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06104164456017316, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05658724420703948, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05179896427318454, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0486598552018404, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04620414829812944, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.043912492739036676, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04240382211282849, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.04112577031366527, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.041206162655726075, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.041797674540430305, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0432037911657244, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04575523759238422, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05334051663056016, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05045744549483061, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.046153691466897726, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04250861817970872, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03947547009214759, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03608574733138084, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03314111152663827, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030654246816411615, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.028205913258716464, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02627646000124514, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02482304726727307, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023160907328128814, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021956173684448004, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0206329189799726, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019671231894753873, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018935499209910632, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018174575162120163, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.017450000857934356, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.016786702191457153, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.016264427159912883, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015710634491406382, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.015211811573244631, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014721051752567292, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014262213665060699, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01382338659837842, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013355635339394212, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012974125863984227, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012652801615186036, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01227424746612087, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011849828944541514, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011501799719408155, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011190316411666573, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010818738995585591, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010479930792935193, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010087292599491775, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009714360849466174, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009364731477107853, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00906512095592916, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008799987366655842, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008504094979725778, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008295461599482223, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008139686085050925, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008034575199708342, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008009947203681804, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007977559262653812, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008088484493782744, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.008166634403751231, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008495630222605542, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008787915480788798, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6879358291625977, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5550910234451294, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.368675708770752, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2185144424438477, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.097580075263977, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9650542140007019, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.850981593132019, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7551504373550415, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6621012091636658, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5893445014953613, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5354206562042236, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.4748554825782776, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.4313681423664093, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3848898112773895, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.35195887088775635, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3271978795528412, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.30231979489326477, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2793647050857544, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2591434419155121, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.24390241503715515, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.22836560010910034, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.21526020765304565, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.20298896729946136, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1920471340417862, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.18240222334861755, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.17267321050167084, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.16530893743038177, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1593402475118637, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15265460312366486, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14600588381290436, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.14144377410411835, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13827097415924072, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1348428875207901, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13262303173542023, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1314554661512375, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13091948628425598, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13110193610191345, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1323452889919281, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13393890857696533, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1371958702802658, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14066120982170105, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14434821903705597, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.14793574810028076, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.15049821138381958, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.15388116240501404, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.16105856001377106, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.16567362844944, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.17538684606552124, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.18666408956050873, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6450892857142857, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6994047619047619, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7596726190476191, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7931547619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8134920634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8313492063492064, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8474702380952381, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8571428571428571, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8722718253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8784722222222222, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8861607142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8973214285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9055059523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9107142857142857, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9154265873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9206349206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9231150793650794, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9268353174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9303075396825397, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9350198412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9370039682539683, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9471726190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9521329365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.951140873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.953125, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9503968253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9494047619047619, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5220154954081758, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5994739995817154, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6955518124847114, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7431272248457732, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7708550859199261, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7969422099245219, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8175054912014122, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8324660574328684, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8493170519729636, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8576021115432149, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8662697027302715, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.875196632089624, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8794489445933933, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8877933539552294, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8943613626562517, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9002228493542107, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.907440233220088, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9109841716220354, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9152370730289162, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9187134856547924, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9243462567846615, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9265203902320622, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9278610773719995, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9313333631084783, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9346338456474845, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9384607842499387, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9402059088209765, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9394259355023273, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9405163719965292, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9432479402336431, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9427132013161199, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9437016417126287, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9429732069994078, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9418275273519134, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9412666283169893, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9419636331156753, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9434242659259702, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9432616787865131, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9432805472238529, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9429227289016753, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9442957587544925, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9432895537433175, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9399620451136457, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9405209418110706, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9404687496624502, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9431242132569047, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9401584343546284, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9376843955491839, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9367605728242216, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.09835094649344683, "validation/loss_best": 0.13827097415924072, "validation/acc_best": 0.9536210317460317, "validation/f1_best": 0.9437016417126287} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.3416551697254181, "train/grad": 0.14817364066839217, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.663904571533203, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.530155792236328, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3433389282226562, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1940541076660156, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0743063735961913, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9436804962158203, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8318938064575195, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7382714176177978, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6473933601379395, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5765831661224365, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5238337421417236, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4646989393234253, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4221077001094818, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.37637564420700076, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.34371269166469576, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.31893823444843294, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.29379075437784197, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2701962050795555, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.24900595381855964, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.23273349583148956, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.215750070810318, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.20087065055966377, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.18674937460571528, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1735609483718872, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1615941609442234, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.14911248220130802, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1391959958896041, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1311030442453921, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.12121083002537489, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11083172930404545, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10277982426807285, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.09611915552057326, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08808612914755941, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08180806810967624, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07517425863072276, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06938536188565195, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0638928035646677, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05930702889338136, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05494412062689662, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05022651213221252, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04704667783342302, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04471864765509963, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04239107606001198, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.04076888370327651, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.039221012052148584, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03834316148422658, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03840315298177302, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.037623139144852755, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.041909965090453624, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.052080266270786525, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04909847766160965, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0446873409114778, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04101081980392337, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.037994511742144826, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.034671797044575216, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.031816592598333956, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.029419960109516977, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.027087125796824693, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025254037510603667, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02387402966618538, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02230674693826586, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021160672474652528, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019908748734742403, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019000214478000998, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018294560718350113, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.017571755163371564, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016869778111577034, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.016227701590396465, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015721173705533147, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015173438084311784, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014680018541403115, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014203372313641011, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013736823098734021, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013297662723343819, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012832109625451267, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012443075596820563, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01211836507776752, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011707406349014491, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011257236553356052, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01088702012784779, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010577867890242487, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010184387033805252, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009874902677256613, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009535830044187605, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009234359735855832, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008944483189843595, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008710958623560146, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008484801977174357, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008255275642732159, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00807702038087882, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007936825732467696, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007807560778455808, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.007724599135108292, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007672551667783409, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0077347320120316, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00792338420113083, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008186672332230955, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008593470416381024, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.644432783126831, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5107979774475098, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3249043226242065, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1767590045928955, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0583797693252563, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9294243454933167, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8193126916885376, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7271673679351807, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6380607485771179, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5686335563659668, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5170567035675049, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.4593302309513092, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.41785356402397156, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3735892176628113, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.34211307764053345, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.31837621331214905, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.29463186860084534, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2724810540676117, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.25288641452789307, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2380618155002594, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.222965270280838, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.21006640791893005, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19805927574634552, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18754281103610992, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17812258005142212, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16910763084888458, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1622154861688614, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15700341761112213, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15099501609802246, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1452227234840393, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.14095498621463776, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13771602511405945, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.134238138794899, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13199007511138916, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.13006477057933807, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1287367045879364, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1280243843793869, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12834535539150238, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12919440865516663, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13087765872478485, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1323413848876953, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1339695006608963, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.13682910799980164, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13932667672634125, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14320334792137146, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14710696041584015, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.1530371755361557, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15795719623565674, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1681663542985916, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6617063492063492, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7170138888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7700892857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8000992063492064, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8177083333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8365575396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8497023809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8623511904761905, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8737599206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8812003968253969, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8878968253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8950892857142857, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9010416666666666, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9079861111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9136904761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9176587301587301, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9255952380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9357638888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9439484126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9575892857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.953125, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.949156746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5443949790159919, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6283552788304817, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7104117820609561, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7535421632656445, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7772365875498317, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8038672830071703, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8209217002112068, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8381538905269224, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8514157303968519, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8602614293493983, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8688944241868226, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8772219791627556, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8837631118074243, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8914100757505162, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.898213678357807, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9039108270482022, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.906885954242929, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9125345605918337, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9219032290405446, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9248967088692615, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9286170041687776, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9315271945138299, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9339594376441118, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9373689792917833, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9374456318816646, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9416302327724492, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9434695352540443, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9442205802773982, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9433628249752398, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9439893370978426, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9444638838875276, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9465354774440997, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9474578457984861, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9476952135405334, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9462478667807708, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9460587573172106, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9449772192352756, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9443191283163208, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9447858273009679, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9449647392583639, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9452254798123375, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9457370621520504, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9425422868088523, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9437821851294824, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9431376794723763, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9429843015495372, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9427771712589484, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9424805041193194, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9379759673414204, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.08180806810967624, "validation/loss_best": 0.13199007511138916, "validation/acc_best": 0.9575892857142857, "validation/f1_best": 0.9476952135405334} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.3258525698632002, "train/grad": 0.1451337229087949, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6073672485351562, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.472957305908203, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2867676544189453, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1396710968017578, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0226820373535157, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8960189056396485, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7883929824829101, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6988007831573486, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6122041130065918, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5450006294250488, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.49510502338409423, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4392574334144592, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3991641902923584, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.35614891290664674, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.32550306618213654, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3022828471660614, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2787166029214859, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2565371061861515, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.23671844184398652, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.22144657522439956, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20558495506644248, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.19164334937930108, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.17833730813115836, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.16596082650125027, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15475399993360042, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.1429426372051239, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.13364154435694217, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12596095202490687, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1165853880159557, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10687474945560098, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09923662154003977, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.092981768976897, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08539526510052382, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07946632238104939, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07316534587182105, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06757621983997524, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0623238680511713, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05781081250868738, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.053457344993948934, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04875784273259342, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04571878589689732, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04324974087998271, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.04055021350272, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03844613963738084, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03662665202282369, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03560234377160668, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0348089059162885, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.033803986785933375, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.03476938677020371, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05145709466189146, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04843700237572193, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.044011320434510705, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04036329692229629, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03739077099598944, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03413315119221807, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03134498367086053, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02901307286694646, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026731591681018473, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024934679865837098, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02358222850598395, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022040468277409672, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02091542466543615, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01968415731098503, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01879349260125309, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01810635956004262, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01738792675547302, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0166961080301553, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.016064626541920007, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015554757425561547, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015022179484367371, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014527856730856001, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014045047550462186, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013574851262383163, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01313378707272932, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012653696821071208, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01225906565086916, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011918858173303307, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01147486178437248, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010978891481645405, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010550572590436786, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01019824403570965, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.009744688132777811, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009387995786964894, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008999342555180192, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008652225818950683, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008338509794557468, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008042225443059579, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0077328134980052705, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007387945833615958, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007172826705500484, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006996309235692024, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006792856877436862, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006627738294773735, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0065108825842617075, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006485020107938908, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006505561892990955, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006573576002847403, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006783260096563026, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6125563383102417, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4785573482513428, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2930858135223389, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.146634817123413, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.030070424079895, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9037802815437317, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7966064810752869, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7071483135223389, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6206872463226318, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5535991787910461, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5037333369255066, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.44826364517211914, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.40806129574775696, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3653546869754791, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.33506327867507935, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.31216999888420105, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2890285849571228, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.26765769720077515, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24867238104343414, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.23429130017757416, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21956849098205566, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20705650746822357, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19525526463985443, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18476122617721558, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17556339502334595, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1663273274898529, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1594594269990921, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15402060747146606, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.14790652692317963, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1420120894908905, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13782775402069092, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13482636213302612, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13173530995845795, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13007062673568726, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12849077582359314, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12753848731517792, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12710657715797424, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12747646868228912, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12844300270080566, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1298247128725052, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1317528337240219, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13341748714447021, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.13622619211673737, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1389540284872055, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14275376498699188, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1468086838722229, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.15061064064502716, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15830892324447632, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1646067053079605, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6748511904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7266865079365079, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.777281746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8050595238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8211805555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8385416666666666, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8514384920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8630952380952381, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8754960317460317, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8829365079365079, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8883928571428571, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8955853174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9020337301587301, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9079861111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9136904761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9174107142857143, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.921875, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9255952380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9303075396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.933531746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9375, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9412202380952381, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9434523809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9521329365079365, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.953125, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9521329365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9528769841269841, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.568537633195053, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6434091460262794, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.72343862212453, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7601046095482606, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7831099757224659, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8063062714086195, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8231887429548349, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8397979909528553, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8532320510844598, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8629394377107015, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8694073079478825, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8773591461864795, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8840094950163191, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8908288214138975, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8983856901145156, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9035936430215886, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9091714861345179, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9143173071324294, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9189791621785369, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9226152024919859, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9269672261453343, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9307539952015585, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9332698053007003, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9359192189184109, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9381975184774353, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.941235143481106, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9428907215937589, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9437844544018427, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.943829695855674, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9445937003169599, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9441338837166432, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9434254126005102, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9440838453632359, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9433346554662982, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9432712935864771, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9427595038900268, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.943035717061789, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9421143489536528, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9421885455852129, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9432034371660815, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.943365318851294, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.944846915213926, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9426600733201457, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9435106045815154, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9430592208815172, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9437715246163123, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.942808964349132, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9455476336715485, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.943252767691069, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 0.10687474945560098, "validation/loss_best": 0.1420120894908905, "validation/acc_best": 0.9543650793650794, "validation/f1_best": 0.9445937003169599} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.31891695015132426, "train/grad": 0.1434431741014123, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5828245544433595, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4478132629394531, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.262058639526367, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1161158752441407, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0006239700317383, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8762024307250976, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7709561347961426, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6835167694091797, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5991993808746338, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5338080883026123, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.48534220695495606, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.43114437580108644, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.392037079334259, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.350162108540535, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3203243958950043, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.29767199486494067, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2746563819050789, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.25298321023583414, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2334947642683983, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.21850146040320395, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20292267084121704, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1892249244451523, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1760495400428772, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.16379691179841757, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15265577912330627, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.1408760941773653, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.131486728861928, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12373964617028832, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11430377369746565, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10431789513677359, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09640886422246694, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08995861614122987, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08215962016955018, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07596918589435518, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06941717488691211, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0637224413920194, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05840505720116198, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05396347304806113, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04977227190509439, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04523958130739629, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.042258494310081, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.03992803950794041, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.037331346049904825, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03537329761311412, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03351242399774492, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.032172619551420215, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.031286434214562175, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.030541243171319364, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.030699679516255855, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.050923903845250605, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04789962213486433, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04351734528318048, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03993309812620282, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0370313710719347, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03386620251461864, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.031155763445422053, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02887550961226225, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02664505214430392, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024870227919891477, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02353002273477614, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021998164406977595, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02087546224705875, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01964188354089856, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018751036366447805, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018052965090610086, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.017329451479017734, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016631340142339468, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.015984537506010383, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015470035285688936, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014903890744317323, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014391539634671062, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013883370934054255, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01338252873858437, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012909562503919005, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012394270724616945, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011964002454187722, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011597800131421537, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011133040986023843, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01062161618610844, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010193034771364183, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.009823244486469776, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.009361790872644634, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008978837435133755, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008560965405777096, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008188168527558446, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007814899686491118, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007503939472371712, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0072137164021842184, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006882026670500636, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006680567877483554, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006539979321532882, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006407876466982998, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0063108684634789825, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00623778693145141, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0062334418325917795, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006225386540754698, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006221526462468319, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006319755572476425, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5904285907745361, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4561980962753296, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2714382410049438, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1259033679962158, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0107231140136719, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8864516019821167, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7810622453689575, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6934529542922974, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6089715957641602, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5434687733650208, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.4948213994503021, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.4404766261577606, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.401571124792099, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3597487211227417, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.33012232184410095, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3077372908592224, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.28518056869506836, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.26416200399398804, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24558021128177643, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.23145464062690735, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2171412706375122, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20471057295799255, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19331735372543335, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18301111459732056, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1740228533744812, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1650550812482834, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.15833033621311188, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15307411551475525, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.14735546708106995, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1418607383966446, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13811388611793518, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13539062440395355, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13264791667461395, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1311437338590622, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1298532485961914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1294042468070984, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12944218516349792, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1302625685930252, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1315275877714157, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13329878449440002, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13464678823947906, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13625980913639069, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.13789325952529907, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13963578641414642, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14185677468776703, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14469727873802185, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.14697058498859406, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15095262229442596, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1581694334745407, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.683531746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7348710317460317, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7817460317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8100198412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8239087301587301, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.841765873015873, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8519345238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8653273809523809, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8759920634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8854166666666666, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8970734126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9032738095238095, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.910218253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9154265873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9280753968253969, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9310515873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9350198412698413, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9471726190476191, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9481646825396826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.953125, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.953125, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.951140873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9513888888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9518849206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5783542721723053, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6571860917466924, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7281061188148273, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7666365064117244, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7864218800670452, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8106302411272261, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8244962435645381, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8412390248296765, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8539110477360297, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8652939249049856, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8702668914744338, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8793451444246223, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8850734951287877, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.893810417932613, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.900719806669114, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9047749340339315, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9095376334965977, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9167220851564951, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9197802373630863, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9245330929388909, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.92820733851574, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9317780297489532, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9334683579075015, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9359756423822028, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.937820261303825, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9396298269540405, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.941853523299895, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9409508436285162, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9425989893853626, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9440244875113623, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9427369721299961, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9439529660511807, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9444112939473561, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9436893983848426, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9433280417071936, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9434137399199969, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9418337253781569, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9408874832991048, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.942066518577087, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9438764256807983, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9432885205187935, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9435407198633847, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9446848539343625, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9429812275223102, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9435145175466954, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9418689808696603, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9425133022514147, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9432394865662627, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9426641402763574, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 0.10431789513677359, "validation/loss_best": 0.1418607383966446, "validation/acc_best": 0.9536210317460317, "validation/f1_best": 0.9440244875113623} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.3183578437566757, "train/grad": 0.1425946283340454, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5672804260253905, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4333314514160156, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2494213104248046, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1054865264892577, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9917808532714844, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8697306823730468, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7664364433288574, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6807610988616943, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5982164907455444, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.534251046180725, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.4867934250831604, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4335954785346985, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.39541664481163025, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3542682057619095, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.32485211014747617, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.30249363511800764, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2796014660596848, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.25802456483244895, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2384274723380804, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.22318987227976322, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20724733665585518, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.193195785805583, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.17950926311314105, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1667798538133502, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15506956197321414, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.14277190033346415, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1329103844985366, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12481861108914018, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11500664286315442, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1046964399702847, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09666180986911059, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.09006897078827024, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.08221103061921894, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07600789830088615, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0694863854907453, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06373493321239948, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05831295320764184, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05376552179455757, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04941596363671124, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04472528016194701, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.041556944511830805, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.03919475526548922, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.036443548472598196, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.034377080481499435, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.03238379227928817, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.030518190860748293, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.029321181681007148, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.028257811265066265, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.02772558414377272, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05089451076462865, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04778316497802734, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04329156832769513, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03964789690449834, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.036716760862618686, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03354076901450753, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.030841480484232308, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.028581171538680793, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02637610159814358, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02464070844464004, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02333000998944044, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021840529409237208, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020752691403031348, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019561473480425775, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018693486573174597, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018012634897604584, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01731304177083075, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016620988701470198, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01598105299286544, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015457826191559434, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014899406097829342, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0143910927278921, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013868114952929318, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013375762328505516, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012897384336683899, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012375144776888192, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011943873732816428, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01156500066164881, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011095327655784786, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01057524369796738, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010147126310039311, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.009769256713334472, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.009313958715647459, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008930728659033776, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008512347098439932, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008133769945707171, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007763462046859786, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007432938455604016, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007100034003378823, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006726610789773986, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006469383733347058, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006281252518529072, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0060919744783313945, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005948219458805398, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0058307098725344985, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00574409507738892, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005697880069492385, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0056711390038253736, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.005674395493115298, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5768585205078125, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4424360990524292, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2578938007354736, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1132175922393799, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9988616704940796, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8755687475204468, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7715466022491455, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6850013732910156, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6017090082168579, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.537097156047821, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.4892250597476959, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.43570172786712646, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.3972170650959015, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.35602760314941406, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.32680249214172363, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3047886788845062, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2824842631816864, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2616502344608307, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24343064427375793, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.22953255474567413, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21523422002792358, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20311132073402405, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19185929000377655, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18156617879867554, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1727936714887619, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1638239473104477, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.15721474587917328, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15191197395324707, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1460748165845871, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1405283361673355, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13664351403713226, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13390213251113892, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1310466229915619, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1292322874069214, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12790252268314362, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1271933913230896, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1269402652978897, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12743274867534637, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12856243550777435, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13008366525173187, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13144324719905853, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13301007449626923, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1353960931301117, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13751526176929474, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14017583429813385, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14351259171962738, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.1463199406862259, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15031962096691132, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.15750865638256073, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6897321428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7390873015873016, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7834821428571429, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.810515873015873, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8251488095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8430059523809523, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8541666666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8683035714285714, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8779761904761905, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8849206349206349, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.890625, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8975694444444444, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9032738095238095, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9117063492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9166666666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.919890873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9228670634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9290674603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9332837301587301, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.951140873015873, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.953125, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.953125, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.955109126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9541170634920635, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5866645848487021, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6647609989097284, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7307283643149833, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7671867515267469, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7888546076476215, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.81214647786449, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8278519768242922, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8443309986892034, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.85669785946332, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8649726836588953, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8724892824430284, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.880042671089297, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8851876845047831, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8956292423000856, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9021312081035979, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9065338246512802, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9103558170774259, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9178778354734408, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.922680173400165, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9257276249708923, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9298091674145323, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.932816552454837, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9359431153307144, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.938372141966835, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9412560027314709, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9426284680959672, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9423544382338119, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9433675943613283, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9446888975610376, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9446333374138509, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9431824613320247, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9442054351840076, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9444342881655897, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9444185963818135, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9450113241323441, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9446552685063668, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9449716522215399, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9456492933311571, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9447807199417623, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9450737428781674, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9447909643945813, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9435164572852002, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9427551505053576, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9444214196832666, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9444576630774616, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9452444200344049, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9458438258101668, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9470737367256247, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9449550969300207, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.028257811265066265, "validation/loss_best": 0.15031962096691132, "validation/acc_best": 0.955109126984127, "validation/f1_best": 0.9470737367256247} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.3141118373721838, "train/grad": 0.1398756754025817, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.564493408203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.42948974609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2444529724121094, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0997686767578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9857077026367187, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8633201599121094, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7599315452575683, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6742716121673584, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5917195987701416, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5278141593933106, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.48036073207855223, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.42723863005638124, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3889910447597504, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3478003978729248, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3183753085136414, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.29607400119304655, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2732754173874855, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2518085505068302, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2323978514969349, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.21742455638945102, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.2016933335363865, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.18793971952050925, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.174610391035676, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.16220824927091598, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15073965705931186, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.13874318081885575, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.12915895778685807, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12127823263406753, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11168682118877768, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10161784064024687, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09367895629256964, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08722962422296404, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.07943340071476995, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07333168488927186, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06686195785179734, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.061257839063182475, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05590091912075877, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05142121990211308, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04715326650068164, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04254406251013279, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.039422578932717445, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.037087764339521524, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03434109316207468, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03226333156228065, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.030099824955686926, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.02818507102318108, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.026817165780812502, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.025457063177600502, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.024601011909544467, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05024182502180338, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0471548019349575, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.042712336778640746, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03911190228536725, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0362209602072835, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0330861562397331, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.030413727685809137, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02817726825363934, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025986398812383415, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024254064131528138, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022946763052605092, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021453794189728797, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02035988561809063, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019160054959356784, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018280192157253624, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01760279501322657, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0168950471887365, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016202500103972853, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.015561395278200506, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01505440526176244, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01450274926610291, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.013998840944841504, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013493569402489811, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01301012521609664, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012542146120686083, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012028017230331898, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011598621914163231, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011236590093467384, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010764210706111044, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010246875854209065, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00981284599751234, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.009445009934715927, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.008968448406085372, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008583932772744447, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008150641629472375, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007761983388336375, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007381414792034775, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007043194426223635, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006711114306235686, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006348273362964392, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006098973728949204, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005913664764957503, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005689610473345965, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005519999267999082, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005355593324638903, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0052094853721791875, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.005113022000878118, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0050364962907042355, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004989257611450739, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5693894624710083, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4351739883422852, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.250661849975586, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1065119504928589, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9923637509346008, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8699394464492798, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7664406895637512, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6805302500724792, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5978487730026245, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5337413549423218, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.48621702194213867, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.43309155106544495, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.3949067294597626, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3542237877845764, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3252226710319519, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3033689856529236, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2811497449874878, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.26071634888648987, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24250063300132751, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.22865989804267883, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21448102593421936, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20242096483707428, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19124482572078705, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1810513138771057, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17220339179039001, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16333413124084473, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1567503958940506, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15153442323207855, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1456378549337387, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14009974896907806, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13624149560928345, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1333392709493637, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.130482017993927, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1287422776222229, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1274973303079605, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1266033947467804, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12641571462154388, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12680843472480774, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1278844028711319, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1293645203113556, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13118967413902283, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13279448449611664, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1353762447834015, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13784092664718628, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14081786572933197, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14433695375919342, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.14737243950366974, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15148073434829712, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.15886396169662476, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6912202380952381, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7415674603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7854662698412699, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8120039682539683, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8266369047619048, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8432539682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8544146825396826, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8680555555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8794642857142857, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8851686507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8908730158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8983134920634921, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9035218253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9114583333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9164186507936508, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9193948412698413, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9231150793650794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9325396825396826, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9357638888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9427083333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.951140873015873, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.955109126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.955109126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9533730158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5887445347339825, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6679720774298679, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7340722792340373, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.768930542214644, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7902767514773911, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8123069023173144, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8276004370694219, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8451412584639741, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8582794145015888, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.865276954603576, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.872773225216329, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.880791668564208, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8854014497879009, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8953068610255398, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.901619565439857, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9058938140763656, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9108566854264029, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9171303256019859, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9218584213455481, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.925355892508252, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9308101059744118, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9326278710327449, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9363983110124511, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9377090564704138, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9414228718034217, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9422489806056481, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.942714361474438, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9429151527319442, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9432534691295222, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9441382111273162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9430256188143646, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9446797693695081, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9441901070716353, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9440151699138891, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9440080655671599, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9454336527755842, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9461529786101449, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9463297719389269, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9451661305999831, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9458025436226587, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9463409048990404, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.946177108639606, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9451563458461707, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9455890572523211, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.94528305928959, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9461217909930937, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9446100337645136, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9454328147274957, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9438221880439264, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.05590091912075877, "validation/loss_best": 0.12641571462154388, "validation/acc_best": 0.9553571428571429, "validation/f1_best": 0.9461529786101449} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.3114487191289663, "train/grad": 0.1400942987948656, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5553523254394532, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4202557373046876, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2352743530273438, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0908691024780273, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9770736312866211, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8550365829467773, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7521522521972657, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6669787311553955, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5850981664657593, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5216631984710693, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.47471529483795166, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.42220627307891845, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3844580638408661, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.34396305561065676, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.31503348648548124, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.2931151500344276, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.270776709318161, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2497718572616577, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.23075376361608504, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2160589574277401, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20069301418960095, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1871155921369791, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1740956747531891, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.161869400665164, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1505854343622923, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.1386564029753208, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.12914038537070155, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12126046506687999, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11163205709308385, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10155307652428747, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09358214336447418, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08707808658480644, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0792387170623988, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07306174113415181, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06646229305304586, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06080761884339154, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.055312010319903494, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05076033811084926, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.046439796844497325, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.041784002585336565, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.038628913713619116, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.036179382177069785, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03343643565662205, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.031284621208906176, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.029116527698934077, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.02719955215230584, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.025795886805281044, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.024298824677243827, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.023312519825994967, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.050326535049825906, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.047200576476752756, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04271217327564955, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.039082822240889076, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0361682637501508, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03302307150326669, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.030354586020112036, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.028131461637094617, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025975260054692625, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024281542263925076, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02301699008792639, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021582866702228785, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020526110408827663, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019381986688822506, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01853790260385722, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.017875107903964817, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.017176339812576772, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016479587387293576, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.015826909523457288, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015294123897328973, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014719312330707907, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014172798804938793, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013639115486294031, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013107768080662935, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012610193386208265, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01206519819330424, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011616527806036175, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011234769488219172, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010747653190046549, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010224081820342689, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.009787096914369613, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.009418951848056167, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00895206157816574, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008561476826434955, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00812450040713884, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007721825306070969, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007316913248505443, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006964326340239495, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006609650588361546, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006211712383665144, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005922965877689421, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005695847015595064, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0054317587421974164, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005220713902381249, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005017583453445695, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004842037042835728, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004721611716668121, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004616852603503503, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.004543136013380717, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5666725635528564, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4321669340133667, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2480144500732422, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1038116216659546, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9899792671203613, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8677809834480286, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7644636631011963, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6787881851196289, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5962992310523987, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5324591994285583, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.48511067032814026, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.43215829133987427, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.39418289065361023, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3535073399543762, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3245479464530945, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3028048872947693, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.28074997663497925, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2602616250514984, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24212361872196198, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.22836866974830627, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21426114439964294, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20223847031593323, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19110025465488434, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.18090280890464783, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1720307618379593, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16324061155319214, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1565077006816864, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.15136152505874634, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.14557309448719025, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.13995563983917236, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13598455488681793, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13339753448963165, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1304236203432083, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12860561907291412, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12721000611782074, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12656544148921967, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1263374388217926, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12685436010360718, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12767720222473145, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.12922759354114532, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13104498386383057, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13257035613059998, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1349119395017624, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13747432827949524, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14010652899742126, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14348219335079193, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.14612413942813873, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15027515590190887, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.15772010385990143, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6922123015873016, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7423115079365079, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7857142857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8115079365079365, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8273809523809523, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8435019841269841, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8559027777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8692956349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8782242063492064, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8854166666666666, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8908730158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8985615079365079, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9040178571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9114583333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9169146825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9233630952380952, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.935515873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9427083333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9479166666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.955109126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.953125, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5894930839560627, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6690735255355492, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7347553133474575, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7689994226521212, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7910965126774844, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8127237665981859, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8300128703170804, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8468818779098365, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8572211146821639, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8653469457503279, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8727841874005268, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8808997536487827, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8861685562737852, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8954039480818269, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9022825991706547, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9068369449950084, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9110208741003654, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.917226869531754, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9224045591772971, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9250831307819821, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9297949942972226, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9326302133926258, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9364877819517552, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9380864068229494, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.940622998840014, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9428675665716061, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9433540658178741, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9430205275987815, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9430138341484781, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9431153946206194, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9435612735084562, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9439829148143253, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9436214318253524, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9452062998543804, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9455477977060082, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9451726748195942, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9462066901455108, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9450221256976199, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9459950922203982, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9459505024376927, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9451465518194093, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9448059589549311, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9433168152816304, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9450471507844014, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.944983911048367, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9438079061971304, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.944114347679209, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9455985184261441, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9439160960298739, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.06646229305304586, "validation/loss_best": 0.12721000611782074, "validation/acc_best": 0.9553571428571429, "validation/f1_best": 0.9455477977060082} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.31068887032568454, "train/grad": 0.13850200247019528, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5504986572265624, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4158444213867187, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2317731475830078, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0881527709960936, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9751006317138672, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8538886833190918, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7517760276794434, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6671432113647461, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5859002876281738, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5228739643096924, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.47623828172683713, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.42388594150543213, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.38622155904769895, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.34573051691055295, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3167597883939743, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.29478032618761063, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.27224445015192034, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.25095060527324675, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.23163195699453354, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2167232009768486, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20110557183623315, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.18727745555341244, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.17404627747833729, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.16165837667882443, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15022442355751991, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.13815684797242284, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.12862473456189036, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12074982348829508, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11098387286067009, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10076631197705865, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09266167784109712, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08605608726851642, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.07798384034074843, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07168948089703918, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06494980131275951, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.059150826996192334, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05364498193375766, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04903614729642868, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.044671624079346654, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.039991619884967806, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.036813946813344954, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.034404700733721254, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03160867494530976, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.029499650094658138, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.027250108243897557, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.025296945590525866, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.023825493343174457, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.02226982661522925, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.021237389305606483, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.049714539181441066, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04663171965628862, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04221046192571521, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03863559859804809, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.035771466111764315, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03267825775779784, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03005579343996942, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027871272396296262, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025744658214971423, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024070163797587155, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022810515901073813, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021371695320121942, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02031929349526763, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01915747825987637, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018303552535362543, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01764965822454542, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.016944014062173666, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016263254359364508, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.015621859887614846, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015102946371771396, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014547687554731965, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01403454716783017, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013517886041663587, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013021725579164922, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0125442695338279, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012026945329271257, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011587855753023178, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011221205997280777, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010742527663242072, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010207980310078711, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00976747192442417, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00939119994174689, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.008901854250580073, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008497420763596893, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008043990381993353, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007626889314269647, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007222036037128419, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006862352637108415, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006502419096650555, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006098038371419534, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005811587236821652, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005574158638482913, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005317937908694148, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005104946165229194, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004879933469928801, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00468901269196067, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.004556266799918376, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.004415378451230936, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00432460770563921, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5662564039230347, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4318230152130127, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.247588872909546, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.103437900543213, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.989667534828186, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8674694895744324, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.764156699180603, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6785545349121094, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5960818529129028, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5322688817977905, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.48492854833602905, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.43201544880867004, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.39401379227638245, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3534078299999237, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3244880735874176, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3026683032512665, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2806737720966339, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.26024502515792847, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.24206826090812683, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.2283293455839157, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21422907710075378, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20218154788017273, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19105477631092072, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1808776706457138, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.17198878526687622, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.16319139301776886, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1564539670944214, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1512821614742279, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1454266607761383, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.13990025222301483, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13595561683177948, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.13323521614074707, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13029716908931732, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12852665781974792, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1271297037601471, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12642061710357666, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12630388140678406, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12668287754058838, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12751740217208862, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1292705535888672, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1309027373790741, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13255521655082703, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.13490338623523712, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1371021270751953, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.13981392979621887, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.14317379891872406, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.14575664699077606, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.14980600774288177, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.15704141557216644, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6922123015873016, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7415674603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7857142857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8112599206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8273809523809523, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.84375, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8556547619047619, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8692956349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8784722222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8854166666666666, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8983134920634921, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.904265873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9119543650793651, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9169146825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9206349206349206, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9352678571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9397321428571429, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9548611111111112, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9548611111111112, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.953125, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9536210317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5893607992935279, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6681877988979112, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7347553133474575, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7683825221233339, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7910965126774844, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8128726697381329, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8298288442298437, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.846869222525445, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8574857130563888, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8654757806716971, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8730598640395383, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8806307249359774, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8864252165006835, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8957708218713915, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9022825991706547, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9073150401387221, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.911213951293913, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9174068418597469, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9222266086090478, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9249050891965643, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9296182126956707, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.933153777788701, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9364664123449666, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9380907299332731, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9403435604883321, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9432125061077159, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9433540658178741, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9426629473531941, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9429225066943349, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9434288554145553, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9433243747910931, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9439715579387961, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9438252445841933, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9451100038880673, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9452805971383592, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9457554712137102, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9461938066262903, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9456443467761483, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9458137493016843, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9456480992455385, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9445579825781378, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9450496962602001, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9442151626543731, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9445688422311865, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9448592609973748, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9441535970816206, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9444181639100199, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9452705150823161, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9445241277774279, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.059150826996192334, "validation/loss_best": 0.12642061710357666, "validation/acc_best": 0.9558531746031746, "validation/f1_best": 0.9457554712137102} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0bef36338bd1471c7d90156a7b1beba272fe27fb --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..5e12a30e83c522b4af85927d054085c744a88217 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 5, "eval/id_best": 26, "eval/lr_best": 0.00041999999999999996, "eval/wd_best": 0.05, "eval/train/loss": 2.051693916320801, "eval/train/acc": 0.3771166907403424, "eval/train/acc_std": 0.00244381370034404, "eval/train/f1": 0.3151314575290664, "eval/train/f1_std": 0.0025878796213352436, "eval/validation/loss": 2.3111226558685303, "eval/validation/acc": 0.29955703211517165, "eval/validation/acc_std": 0.00557199653562535, "eval/validation/f1": 0.22809156533459699, "eval/validation/f1_std": 0.0051914275587849334, "eval/test/loss": 2.2077643871307373, "eval/test/acc": 0.32040816326530613, "eval/test/acc_std": 0.005742964572796282, "eval/test/f1": 0.25615203285306026, "eval/test/f1_std": 0.005755988496212478, "eval/testid/loss": 2.260200262069702, "eval/testid/acc": 0.31289762868710236, "eval/testid/acc_std": 0.005649747157859323, "eval/testid/f1": 0.2524580398378575, "eval/testid/f1_std": 0.005408003032298818} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..8e99385f33a374a2ed4b878d826dfe277f03b815 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 5, "eval/best/id_best": 26, "eval/best/lr_best": 0.00041999999999999996, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.051693916320801, "eval/best/train/acc": 0.3771166907403424, "eval/best/train/acc_std": 0.00244381370034404, "eval/best/train/f1": 0.3151314575290664, "eval/best/train/f1_std": 0.0025878796213352436, "eval/best/validation/loss": 2.3111226558685303, "eval/best/validation/acc": 0.29955703211517165, "eval/best/validation/acc_std": 0.00557199653562535, "eval/best/validation/f1": 0.22809156533459699, "eval/best/validation/f1_std": 0.0051914275587849334, "eval/best/test/loss": 2.2077643871307373, "eval/best/test/acc": 0.32040816326530613, "eval/best/test/acc_std": 0.005742964572796282, "eval/best/test/f1": 0.25615203285306026, "eval/best/test/f1_std": 0.005755988496212478, "eval/best/testid/loss": 2.260200262069702, "eval/best/testid/acc": 0.31289762868710236, "eval/best/testid/acc_std": 0.005649747157859323, "eval/best/testid/f1": 0.2524580398378575, "eval/best/testid/f1_std": 0.005408003032298818} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..db11a33ac14d4f2162b89337bf6c1deb5b45a017 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 21, "eval/last/lr_best": 0.00018299999999999998, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.8462697267532349, "eval/last/train/acc": 0.4454961738221826, "eval/last/train/acc_std": 0.0026430052029591394, "eval/last/train/f1": 0.40051732798990813, "eval/last/train/f1_std": 0.0029712207243615626, "eval/last/validation/loss": 2.3841230869293213, "eval/last/validation/acc": 0.290328534514581, "eval/last/validation/acc_std": 0.005608880975360732, "eval/last/validation/f1": 0.2381362302924078, "eval/last/validation/f1_std": 0.00522716009559544, "eval/last/test/loss": 2.1815240383148193, "eval/last/test/acc": 0.32857142857142857, "eval/last/test/acc_std": 0.005580069685148526, "eval/last/test/f1": 0.26636787138103407, "eval/last/test/f1_std": 0.005714705380358942, "eval/last/testid/loss": 2.1871299743652344, "eval/last/testid/acc": 0.3285135916714864, "eval/last/testid/acc_std": 0.005967391686051638, "eval/last/testid/f1": 0.27284751491179066, "eval/last/testid/f1_std": 0.005656637438623792} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..188cac56b59ba504540d1ce10689da0659864f89 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",train,2.051693916320801,0.3771166907403424,0.00244381370034404,0.3151314575290664,0.0025878796213352436 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",validation,2.3111226558685303,0.29955703211517165,0.00557199653562535,0.22809156533459699,0.0051914275587849334 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",test,2.2077643871307373,0.32040816326530613,0.005742964572796282,0.25615203285306026,0.005755988496212478 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",testid,2.260200262069702,0.31289762868710236,0.005649747157859323,0.2524580398378575,0.005408003032298818 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..188cac56b59ba504540d1ce10689da0659864f89 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",train,2.051693916320801,0.3771166907403424,0.00244381370034404,0.3151314575290664,0.0025878796213352436 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",validation,2.3111226558685303,0.29955703211517165,0.00557199653562535,0.22809156533459699,0.0051914275587849334 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",test,2.2077643871307373,0.32040816326530613,0.005742964572796282,0.25615203285306026,0.005755988496212478 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00041999999999999996,0.05,26,"[1.4, 1.0]",testid,2.260200262069702,0.31289762868710236,0.005649747157859323,0.2524580398378575,0.005408003032298818 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..4fa87991740a004b06301fec1f785c71342e86a5 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00018299999999999998,0.05,21,"[0.61, 1.0]",train,1.8462697267532349,0.4454961738221826,0.0026430052029591394,0.40051732798990813,0.0029712207243615626 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00018299999999999998,0.05,21,"[0.61, 1.0]",validation,2.3841230869293213,0.290328534514581,0.005608880975360732,0.2381362302924078,0.00522716009559544 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00018299999999999998,0.05,21,"[0.61, 1.0]",test,2.1815240383148193,0.32857142857142857,0.005580069685148526,0.26636787138103407,0.005714705380358942 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00018299999999999998,0.05,21,"[0.61, 1.0]",testid,2.1871299743652344,0.3285135916714864,0.005967391686051638,0.27284751491179066,0.005656637438623792 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/log.txt b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a76af4f0af4e96fab2b3f7a1d7a99e0ee03b4796 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,963 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 22:49:57 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:18 lr: nan time: 3.4967 data: 2.9269 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:42 lr: 0.000003 loss: 3.1690 (3.1736) grad: 0.1500 (0.1500) time: 0.4388 data: 0.0037 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:05 lr: 0.000006 loss: 3.1690 (3.1714) grad: 0.1488 (0.1496) time: 0.4439 data: 0.0038 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:50 lr: 0.000009 loss: 3.1600 (3.1683) grad: 0.1462 (0.1491) time: 0.4705 data: 0.0041 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:37 lr: 0.000012 loss: 3.1540 (3.1649) grad: 0.1421 (0.1483) time: 0.4608 data: 0.0042 max mem: 22448 +train: [0] [100/400] eta: 0:02:25 lr: 0.000015 loss: 3.1551 (3.1651) grad: 0.1381 (0.1472) time: 0.4545 data: 0.0040 max mem: 22448 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 3.1597 (3.1633) grad: 0.1339 (0.1451) time: 0.4710 data: 0.0041 max mem: 22448 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 3.1560 (3.1624) grad: 0.1348 (0.1449) time: 0.4527 data: 0.0041 max mem: 22448 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 3.1517 (3.1595) grad: 0.1509 (0.1463) time: 0.4633 data: 0.0039 max mem: 22448 +train: [0] [180/400] eta: 0:01:45 lr: 0.000027 loss: 3.1301 (3.1569) grad: 0.1472 (0.1462) time: 0.4912 data: 0.0043 max mem: 22448 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 3.1425 (3.1562) grad: 0.1363 (0.1451) time: 0.4472 data: 0.0040 max mem: 22448 +train: [0] [220/400] eta: 0:01:25 lr: 0.000033 loss: 3.1428 (3.1553) grad: 0.1417 (0.1449) time: 0.4565 data: 0.0038 max mem: 22448 +train: [0] [240/400] eta: 0:01:15 lr: 0.000036 loss: 3.1377 (3.1528) grad: 0.1436 (0.1448) time: 0.4555 data: 0.0041 max mem: 22448 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 3.1143 (3.1497) grad: 0.1412 (0.1443) time: 0.4528 data: 0.0042 max mem: 22448 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 3.0975 (3.1455) grad: 0.1389 (0.1442) time: 0.4574 data: 0.0041 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.0730 (3.1399) grad: 0.1426 (0.1444) time: 0.4514 data: 0.0042 max mem: 22448 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 3.0669 (3.1360) grad: 0.1507 (0.1453) time: 0.4509 data: 0.0047 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.0726 (3.1319) grad: 0.1506 (0.1455) time: 0.4506 data: 0.0041 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0524 (3.1265) grad: 0.1542 (0.1465) time: 0.4546 data: 0.0040 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0377 (3.1221) grad: 0.1608 (0.1474) time: 0.4618 data: 0.0040 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0379 (3.1189) grad: 0.1658 (0.1483) time: 0.4556 data: 0.0041 max mem: 22448 +train: [0] Total time: 0:03:05 (0.4650 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0379 (3.1189) grad: 0.1658 (0.1483) +eval (validation): [0] [ 0/85] eta: 0:04:37 time: 3.2630 data: 3.0068 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:31 time: 0.3412 data: 0.0033 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3366 data: 0.0038 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3197 data: 0.0036 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3395 data: 0.0041 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3346 data: 0.0039 max mem: 22448 +eval (validation): [0] Total time: 0:00:31 (0.3716 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 2.542 acc: 0.251 f1: 0.171 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:06 lr: nan time: 3.3165 data: 2.9770 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:38 lr: 0.000063 loss: 2.9988 (3.0011) grad: 0.1620 (0.1584) time: 0.4376 data: 0.0038 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:06 lr: 0.000066 loss: 3.0037 (2.9989) grad: 0.1544 (0.1553) time: 0.4608 data: 0.0041 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:49 lr: 0.000069 loss: 2.9862 (2.9882) grad: 0.1533 (0.1566) time: 0.4523 data: 0.0043 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:36 lr: 0.000072 loss: 2.9862 (2.9896) grad: 0.1646 (0.1601) time: 0.4636 data: 0.0043 max mem: 22448 +train: [1] [100/400] eta: 0:02:24 lr: 0.000075 loss: 2.9706 (2.9818) grad: 0.1658 (0.1618) time: 0.4569 data: 0.0042 max mem: 22448 +train: [1] [120/400] eta: 0:02:14 lr: 0.000078 loss: 2.9594 (2.9780) grad: 0.1658 (0.1628) time: 0.4671 data: 0.0043 max mem: 22448 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 2.9717 (2.9764) grad: 0.1663 (0.1643) time: 0.4523 data: 0.0040 max mem: 22448 +train: [1] [160/400] eta: 0:01:54 lr: 0.000084 loss: 2.9584 (2.9755) grad: 0.1677 (0.1648) time: 0.4828 data: 0.0043 max mem: 22448 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 2.9613 (2.9755) grad: 0.1677 (0.1657) time: 0.4575 data: 0.0042 max mem: 22448 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 2.9522 (2.9724) grad: 0.1694 (0.1664) time: 0.4387 data: 0.0040 max mem: 22448 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 2.8990 (2.9642) grad: 0.1799 (0.1684) time: 0.4651 data: 0.0042 max mem: 22448 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 2.8958 (2.9602) grad: 0.1808 (0.1686) time: 0.4519 data: 0.0041 max mem: 22448 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 2.9093 (2.9583) grad: 0.1772 (0.1695) time: 0.4466 data: 0.0041 max mem: 22448 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 2.9184 (2.9547) grad: 0.1754 (0.1697) time: 0.4523 data: 0.0040 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.8987 (2.9526) grad: 0.1754 (0.1704) time: 0.4508 data: 0.0039 max mem: 22448 +train: [1] [320/400] eta: 0:00:37 lr: 0.000108 loss: 2.8889 (2.9487) grad: 0.1784 (0.1711) time: 0.4496 data: 0.0042 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.8746 (2.9443) grad: 0.1776 (0.1716) time: 0.4448 data: 0.0042 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.8795 (2.9419) grad: 0.1744 (0.1720) time: 0.4517 data: 0.0041 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.8792 (2.9376) grad: 0.1780 (0.1729) time: 0.4475 data: 0.0042 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.8792 (2.9360) grad: 0.1913 (0.1742) time: 0.4517 data: 0.0041 max mem: 22448 +train: [1] Total time: 0:03:04 (0.4615 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.8792 (2.9360) grad: 0.1913 (0.1742) +eval (validation): [1] [ 0/85] eta: 0:04:37 time: 3.2686 data: 3.0326 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3574 data: 0.0047 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:18 time: 0.3233 data: 0.0037 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3601 data: 0.0041 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3214 data: 0.0040 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3128 data: 0.0039 max mem: 22448 +eval (validation): [1] Total time: 0:00:31 (0.3758 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 2.383 acc: 0.276 f1: 0.208 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:49 lr: nan time: 3.2745 data: 2.8979 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:47 lr: 0.000123 loss: 2.9184 (2.8800) grad: 0.2179 (0.2181) time: 0.4638 data: 0.0032 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:13 lr: 0.000126 loss: 2.8963 (2.8823) grad: 0.2182 (0.2165) time: 0.4748 data: 0.0041 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:54 lr: 0.000129 loss: 2.8850 (2.8908) grad: 0.2267 (0.2429) time: 0.4592 data: 0.0041 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:38 lr: 0.000132 loss: 2.9580 (2.9458) grad: 0.3782 (0.3490) time: 0.4508 data: 0.0041 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=69.05 > 63.56) at step 448. Freezing. +train: [2] [100/400] eta: 0:02:27 lr: 0.000135 loss: 3.2077 (3.0764) grad: 0.7867 (0.5331) time: 0.4703 data: 0.0042 max mem: 22448 +train: [2] [120/400] eta: 0:02:15 lr: 0.000138 loss: 2.8862 (3.0355) grad: 0.2149 (0.4770) time: 0.4550 data: 0.0040 max mem: 22448 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 2.8497 (3.0135) grad: 0.1955 (0.4383) time: 0.4765 data: 0.0043 max mem: 22448 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 2.8214 (2.9871) grad: 0.2004 (0.4085) time: 0.4556 data: 0.0042 max mem: 22448 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 2.8055 (2.9663) grad: 0.1961 (0.3854) time: 0.4452 data: 0.0041 max mem: 22448 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 2.8050 (2.9505) grad: 0.1939 (0.3665) time: 0.4588 data: 0.0043 max mem: 22448 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 2.8186 (2.9390) grad: 0.2000 (0.3525) time: 0.4566 data: 0.0041 max mem: 22448 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 2.8410 (2.9321) grad: 0.2056 (0.3408) time: 0.4524 data: 0.0041 max mem: 22448 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 2.8451 (2.9217) grad: 0.2035 (0.3300) time: 0.4572 data: 0.0041 max mem: 22448 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 2.7907 (2.9128) grad: 0.2140 (0.3228) time: 0.4590 data: 0.0041 max mem: 22448 +train: [2] [300/400] eta: 0:00:46 lr: 0.000165 loss: 2.7907 (2.9058) grad: 0.2381 (0.3172) time: 0.4484 data: 0.0043 max mem: 22448 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 2.8358 (2.9029) grad: 0.2483 (0.3220) time: 0.4548 data: 0.0043 max mem: 22448 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 3.0091 (2.9371) grad: 0.5157 (0.3744) time: 0.4494 data: 0.0042 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=79.17 > 63.56) at step 573. Freezing. +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 3.1265 (2.9513) grad: 0.6940 (0.3937) time: 0.4459 data: 0.0042 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.8510 (2.9455) grad: 0.2290 (0.3857) time: 0.4424 data: 0.0041 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8365 (2.9385) grad: 0.2336 (0.3783) time: 0.4407 data: 0.0039 max mem: 22448 +train: [2] Total time: 0:03:05 (0.4633 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8365 (2.9385) grad: 0.2336 (0.3783) +eval (validation): [2] [ 0/85] eta: 0:04:41 time: 3.3160 data: 3.0353 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:32 time: 0.3593 data: 0.0039 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:19 time: 0.3585 data: 0.0043 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:10 time: 0.3423 data: 0.0042 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3330 data: 0.0041 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3271 data: 0.0040 max mem: 22448 +eval (validation): [2] Total time: 0:00:32 (0.3850 s / it) +cv: [2] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 2.362 acc: 0.286 f1: 0.212 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:44 lr: nan time: 3.4113 data: 3.0189 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:45 lr: 0.000183 loss: 2.8073 (2.8051) grad: 0.2852 (0.2784) time: 0.4522 data: 0.0034 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:11 lr: 0.000186 loss: 2.8290 (2.8456) grad: 0.2921 (0.3113) time: 0.4673 data: 0.0041 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:51 lr: 0.000189 loss: 2.9584 (2.9784) grad: 0.5688 (0.5036) time: 0.4519 data: 0.0041 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=91.16 > 63.56) at step 638. Freezing. +train: [3] [ 80/400] eta: 0:02:38 lr: 0.000192 loss: 3.5606 (3.2435) grad: 1.2373 (0.8640) time: 0.4622 data: 0.0041 max mem: 22448 +train: [3] [100/400] eta: 0:02:26 lr: 0.000195 loss: 3.7894 (3.3428) grad: 1.7970 (1.0081) time: 0.4572 data: 0.0042 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=72.19 > 63.56) at step 654. Freezing. +train: [3] [120/400] eta: 0:02:14 lr: 0.000198 loss: 3.7056 (3.3115) grad: 1.2837 (0.9531) time: 0.4371 data: 0.0040 max mem: 22448 +train: [3] [140/400] eta: 0:02:04 lr: 0.000201 loss: 2.7940 (3.2363) grad: 0.2097 (0.8470) time: 0.4703 data: 0.0048 max mem: 22448 +train: [3] [160/400] eta: 0:01:53 lr: 0.000204 loss: 2.7940 (3.1845) grad: 0.2051 (0.7668) time: 0.4405 data: 0.0041 max mem: 22448 +train: [3] [180/400] eta: 0:01:43 lr: 0.000207 loss: 2.7984 (3.1388) grad: 0.1988 (0.7037) time: 0.4424 data: 0.0042 max mem: 22448 +train: [3] [200/400] eta: 0:01:33 lr: 0.000210 loss: 2.7944 (3.1048) grad: 0.1983 (0.6536) time: 0.4581 data: 0.0043 max mem: 22448 +train: [3] [220/400] eta: 0:01:23 lr: 0.000213 loss: 2.7871 (3.0747) grad: 0.2011 (0.6134) time: 0.4468 data: 0.0041 max mem: 22448 +train: [3] [240/400] eta: 0:01:14 lr: 0.000216 loss: 2.7871 (3.0499) grad: 0.2111 (0.5801) time: 0.4499 data: 0.0043 max mem: 22448 +train: [3] [260/400] eta: 0:01:04 lr: 0.000219 loss: 2.7725 (3.0268) grad: 0.2089 (0.5516) time: 0.4476 data: 0.0043 max mem: 22448 +train: [3] [280/400] eta: 0:00:55 lr: 0.000222 loss: 2.7604 (3.0075) grad: 0.2066 (0.5275) time: 0.4459 data: 0.0041 max mem: 22448 +train: [3] [300/400] eta: 0:00:46 lr: 0.000225 loss: 2.7660 (2.9915) grad: 0.2039 (0.5060) time: 0.4481 data: 0.0041 max mem: 22448 +train: [3] [320/400] eta: 0:00:36 lr: 0.000228 loss: 2.7372 (2.9764) grad: 0.2078 (0.4879) time: 0.4404 data: 0.0041 max mem: 22448 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 2.7578 (2.9641) grad: 0.2178 (0.4724) time: 0.4425 data: 0.0043 max mem: 22448 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 2.7839 (2.9562) grad: 0.2238 (0.4587) time: 0.4441 data: 0.0041 max mem: 22448 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 2.7734 (2.9460) grad: 0.2253 (0.4461) time: 0.4416 data: 0.0040 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.7314 (2.9349) grad: 0.2144 (0.4344) time: 0.4550 data: 0.0040 max mem: 22448 +train: [3] Total time: 0:03:03 (0.4578 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.7314 (2.9349) grad: 0.2144 (0.4344) +eval (validation): [3] [ 0/85] eta: 0:04:33 time: 3.2235 data: 2.9891 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:31 time: 0.3462 data: 0.0046 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3548 data: 0.0036 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3411 data: 0.0043 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3308 data: 0.0039 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3141 data: 0.0039 max mem: 22448 +eval (validation): [3] Total time: 0:00:32 (0.3780 s / it) +cv: [3] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 2.371 acc: 0.289 f1: 0.205 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:23:05 lr: nan time: 3.4638 data: 3.0934 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:51 lr: 0.000243 loss: 2.6375 (2.6839) grad: 0.2101 (0.2131) time: 0.4654 data: 0.0043 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:10 lr: 0.000246 loss: 2.6795 (2.6999) grad: 0.2134 (0.2173) time: 0.4452 data: 0.0040 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:50 lr: 0.000249 loss: 2.7137 (2.7104) grad: 0.2201 (0.2182) time: 0.4416 data: 0.0042 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:36 lr: 0.000252 loss: 2.7137 (2.7093) grad: 0.2168 (0.2161) time: 0.4615 data: 0.0043 max mem: 22448 +train: [4] [100/400] eta: 0:02:24 lr: 0.000255 loss: 2.7404 (2.7206) grad: 0.2158 (0.2163) time: 0.4493 data: 0.0042 max mem: 22448 +train: [4] [120/400] eta: 0:02:13 lr: 0.000258 loss: 2.7404 (2.7163) grad: 0.2158 (0.2167) time: 0.4545 data: 0.0041 max mem: 22448 +train: [4] [140/400] eta: 0:02:03 lr: 0.000261 loss: 2.6799 (2.7157) grad: 0.2206 (0.2194) time: 0.4624 data: 0.0042 max mem: 22448 +train: [4] [160/400] eta: 0:01:53 lr: 0.000264 loss: 2.7174 (2.7169) grad: 0.2389 (0.2226) time: 0.4465 data: 0.0041 max mem: 22448 +train: [4] [180/400] eta: 0:01:43 lr: 0.000267 loss: 2.7159 (2.7202) grad: 0.2435 (0.2252) time: 0.4508 data: 0.0043 max mem: 22448 +train: [4] [200/400] eta: 0:01:33 lr: 0.000270 loss: 2.7053 (2.7158) grad: 0.2385 (0.2263) time: 0.4502 data: 0.0043 max mem: 22448 +train: [4] [220/400] eta: 0:01:23 lr: 0.000273 loss: 2.7072 (2.7183) grad: 0.2385 (0.2285) time: 0.4517 data: 0.0043 max mem: 22448 +train: [4] [240/400] eta: 0:01:14 lr: 0.000276 loss: 2.7316 (2.7195) grad: 0.2542 (0.2306) time: 0.4443 data: 0.0043 max mem: 22448 +train: [4] [260/400] eta: 0:01:04 lr: 0.000279 loss: 2.7801 (2.7289) grad: 0.2657 (0.2451) time: 0.4480 data: 0.0044 max mem: 22448 +WARNING: classifier 43 (22, 1.0) diverged (loss=85.78 > 63.56) at step 940. Freezing. +train: [4] [280/400] eta: 0:00:55 lr: 0.000282 loss: 2.9901 (2.7964) grad: 0.5782 (0.3357) time: 0.4453 data: 0.0041 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=88.94 > 63.56) at step 946. Freezing. +train: [4] [300/400] eta: 0:00:46 lr: 0.000285 loss: 3.4692 (2.8364) grad: 1.4687 (0.3982) time: 0.4492 data: 0.0041 max mem: 22448 +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 2.7509 (2.8288) grad: 0.2073 (0.3851) time: 0.4464 data: 0.0040 max mem: 22448 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 2.7147 (2.8224) grad: 0.1911 (0.3741) time: 0.4483 data: 0.0041 max mem: 22448 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 2.7251 (2.8185) grad: 0.1944 (0.3645) time: 0.4401 data: 0.0041 max mem: 22448 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.7251 (2.8136) grad: 0.2034 (0.3562) time: 0.4312 data: 0.0042 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.7217 (2.8081) grad: 0.2061 (0.3486) time: 0.4579 data: 0.0045 max mem: 22448 +train: [4] Total time: 0:03:02 (0.4573 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.7217 (2.8081) grad: 0.2061 (0.3486) +eval (validation): [4] [ 0/85] eta: 0:04:29 time: 3.1761 data: 2.9104 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:33 time: 0.3766 data: 0.0047 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:19 time: 0.3527 data: 0.0038 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:10 time: 0.3423 data: 0.0045 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3286 data: 0.0040 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3090 data: 0.0039 max mem: 22448 +eval (validation): [4] Total time: 0:00:32 (0.3844 s / it) +cv: [4] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 2.345 acc: 0.290 f1: 0.239 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:26 lr: nan time: 3.3668 data: 2.9941 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:49 lr: 0.000300 loss: 2.6198 (2.6200) grad: 0.2113 (0.2139) time: 0.4663 data: 0.0045 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:08 lr: 0.000300 loss: 2.6448 (2.6786) grad: 0.2123 (0.2167) time: 0.4386 data: 0.0042 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:50 lr: 0.000300 loss: 2.6719 (2.6769) grad: 0.2134 (0.2166) time: 0.4556 data: 0.0044 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:36 lr: 0.000300 loss: 2.6718 (2.6722) grad: 0.2118 (0.2148) time: 0.4582 data: 0.0042 max mem: 22448 +train: [5] [100/400] eta: 0:02:24 lr: 0.000300 loss: 2.6879 (2.6786) grad: 0.2184 (0.2177) time: 0.4384 data: 0.0040 max mem: 22448 +train: [5] [120/400] eta: 0:02:14 lr: 0.000300 loss: 2.6520 (2.6738) grad: 0.2196 (0.2171) time: 0.4774 data: 0.0042 max mem: 22448 +train: [5] [140/400] eta: 0:02:03 lr: 0.000300 loss: 2.6371 (2.6676) grad: 0.2099 (0.2159) time: 0.4404 data: 0.0042 max mem: 22448 +train: [5] [160/400] eta: 0:01:52 lr: 0.000299 loss: 2.6371 (2.6652) grad: 0.2083 (0.2158) time: 0.4407 data: 0.0040 max mem: 22448 +train: [5] [180/400] eta: 0:01:43 lr: 0.000299 loss: 2.6636 (2.6672) grad: 0.2099 (0.2153) time: 0.4539 data: 0.0042 max mem: 22448 +train: [5] [200/400] eta: 0:01:33 lr: 0.000299 loss: 2.6753 (2.6673) grad: 0.2121 (0.2156) time: 0.4390 data: 0.0043 max mem: 22448 +train: [5] [220/400] eta: 0:01:23 lr: 0.000299 loss: 2.6609 (2.6656) grad: 0.2145 (0.2152) time: 0.4400 data: 0.0042 max mem: 22448 +train: [5] [240/400] eta: 0:01:13 lr: 0.000299 loss: 2.6314 (2.6649) grad: 0.2168 (0.2157) time: 0.4415 data: 0.0043 max mem: 22448 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 2.6162 (2.6615) grad: 0.2194 (0.2158) time: 0.4531 data: 0.0042 max mem: 22448 +train: [5] [280/400] eta: 0:00:55 lr: 0.000298 loss: 2.6572 (2.6632) grad: 0.2246 (0.2167) time: 0.4393 data: 0.0041 max mem: 22448 +train: [5] [300/400] eta: 0:00:45 lr: 0.000298 loss: 2.6656 (2.6596) grad: 0.2228 (0.2168) time: 0.4401 data: 0.0042 max mem: 22448 +train: [5] [320/400] eta: 0:00:36 lr: 0.000298 loss: 2.6175 (2.6610) grad: 0.2228 (0.2173) time: 0.4424 data: 0.0044 max mem: 22448 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 2.6605 (2.6596) grad: 0.2218 (0.2173) time: 0.4427 data: 0.0041 max mem: 22448 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 2.6061 (2.6587) grad: 0.2202 (0.2177) time: 0.4387 data: 0.0041 max mem: 22448 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.6075 (2.6584) grad: 0.2230 (0.2181) time: 0.4478 data: 0.0043 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.5986 (2.6553) grad: 0.2150 (0.2175) time: 0.4483 data: 0.0044 max mem: 22448 +train: [5] Total time: 0:03:01 (0.4547 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.5986 (2.6553) grad: 0.2150 (0.2175) +eval (validation): [5] [ 0/85] eta: 0:04:34 time: 3.2337 data: 2.9672 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:33 time: 0.3741 data: 0.0050 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:19 time: 0.3549 data: 0.0038 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:10 time: 0.3313 data: 0.0042 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3296 data: 0.0039 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3289 data: 0.0039 max mem: 22448 +eval (validation): [5] Total time: 0:00:32 (0.3836 s / it) +cv: [5] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 2.311 acc: 0.300 f1: 0.228 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:23:09 lr: nan time: 3.4745 data: 3.1324 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:43 lr: 0.000296 loss: 2.5878 (2.5773) grad: 0.2112 (0.2088) time: 0.4431 data: 0.0033 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:04 lr: 0.000296 loss: 2.5977 (2.6033) grad: 0.2149 (0.2161) time: 0.4337 data: 0.0037 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:47 lr: 0.000296 loss: 2.6000 (2.6027) grad: 0.2196 (0.2187) time: 0.4558 data: 0.0042 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:35 lr: 0.000295 loss: 2.5690 (2.5895) grad: 0.2196 (0.2192) time: 0.4622 data: 0.0044 max mem: 22448 +train: [6] [100/400] eta: 0:02:23 lr: 0.000295 loss: 2.5750 (2.5881) grad: 0.2161 (0.2192) time: 0.4473 data: 0.0042 max mem: 22448 +train: [6] [120/400] eta: 0:02:13 lr: 0.000295 loss: 2.5821 (2.5918) grad: 0.2227 (0.2208) time: 0.4675 data: 0.0043 max mem: 22448 +train: [6] [140/400] eta: 0:02:02 lr: 0.000294 loss: 2.6096 (2.5996) grad: 0.2243 (0.2220) time: 0.4295 data: 0.0042 max mem: 22448 +train: [6] [160/400] eta: 0:01:52 lr: 0.000294 loss: 2.6096 (2.6024) grad: 0.2243 (0.2220) time: 0.4439 data: 0.0042 max mem: 22448 +train: [6] [180/400] eta: 0:01:42 lr: 0.000293 loss: 2.5976 (2.6014) grad: 0.2268 (0.2229) time: 0.4509 data: 0.0043 max mem: 22448 +train: [6] [200/400] eta: 0:01:32 lr: 0.000293 loss: 2.6099 (2.6018) grad: 0.2273 (0.2227) time: 0.4414 data: 0.0042 max mem: 22448 +train: [6] [220/400] eta: 0:01:23 lr: 0.000292 loss: 2.6100 (2.5998) grad: 0.2265 (0.2232) time: 0.4503 data: 0.0043 max mem: 22448 +train: [6] [240/400] eta: 0:01:13 lr: 0.000292 loss: 2.6136 (2.6014) grad: 0.2265 (0.2236) time: 0.4495 data: 0.0041 max mem: 22448 +train: [6] [260/400] eta: 0:01:04 lr: 0.000291 loss: 2.5892 (2.5971) grad: 0.2219 (0.2233) time: 0.4339 data: 0.0041 max mem: 22448 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 2.5643 (2.5963) grad: 0.2171 (0.2235) time: 0.4346 data: 0.0042 max mem: 22448 +train: [6] [300/400] eta: 0:00:45 lr: 0.000290 loss: 2.6122 (2.5977) grad: 0.2171 (0.2233) time: 0.4389 data: 0.0041 max mem: 22448 +train: [6] [320/400] eta: 0:00:36 lr: 0.000290 loss: 2.6410 (2.5985) grad: 0.2189 (0.2234) time: 0.4360 data: 0.0041 max mem: 22448 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 2.6389 (2.6003) grad: 0.2244 (0.2237) time: 0.4369 data: 0.0040 max mem: 22448 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 2.5801 (2.5982) grad: 0.2291 (0.2241) time: 0.4348 data: 0.0041 max mem: 22448 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 2.5658 (2.5999) grad: 0.2261 (0.2242) time: 0.4376 data: 0.0041 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6139 (2.5999) grad: 0.2209 (0.2238) time: 0.4436 data: 0.0042 max mem: 22448 +train: [6] Total time: 0:03:00 (0.4514 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6139 (2.5999) grad: 0.2209 (0.2238) +eval (validation): [6] [ 0/85] eta: 0:04:25 time: 3.1235 data: 2.8877 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:32 time: 0.3648 data: 0.0048 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3471 data: 0.0039 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3239 data: 0.0041 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3451 data: 0.0047 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3363 data: 0.0042 max mem: 22448 +eval (validation): [6] Total time: 0:00:32 (0.3806 s / it) +cv: [6] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.358 acc: 0.291 f1: 0.240 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:40 lr: nan time: 3.2515 data: 2.9162 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:42 lr: 0.000286 loss: 2.4963 (2.5277) grad: 0.2165 (0.2243) time: 0.4529 data: 0.0053 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:08 lr: 0.000286 loss: 2.4963 (2.5297) grad: 0.2288 (0.2285) time: 0.4561 data: 0.0036 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:49 lr: 0.000285 loss: 2.4911 (2.5137) grad: 0.2306 (0.2300) time: 0.4473 data: 0.0041 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:36 lr: 0.000284 loss: 2.5121 (2.5284) grad: 0.2280 (0.2279) time: 0.4646 data: 0.0045 max mem: 22448 +train: [7] [100/400] eta: 0:02:24 lr: 0.000284 loss: 2.5259 (2.5229) grad: 0.2219 (0.2262) time: 0.4444 data: 0.0043 max mem: 22448 +train: [7] [120/400] eta: 0:02:13 lr: 0.000283 loss: 2.5066 (2.5250) grad: 0.2230 (0.2264) time: 0.4613 data: 0.0042 max mem: 22448 +train: [7] [140/400] eta: 0:02:03 lr: 0.000282 loss: 2.5392 (2.5303) grad: 0.2242 (0.2267) time: 0.4468 data: 0.0044 max mem: 22448 +train: [7] [160/400] eta: 0:01:52 lr: 0.000282 loss: 2.5464 (2.5311) grad: 0.2253 (0.2266) time: 0.4368 data: 0.0041 max mem: 22448 +train: [7] [180/400] eta: 0:01:42 lr: 0.000281 loss: 2.5707 (2.5360) grad: 0.2253 (0.2274) time: 0.4550 data: 0.0044 max mem: 22448 +train: [7] [200/400] eta: 0:01:32 lr: 0.000280 loss: 2.5702 (2.5350) grad: 0.2266 (0.2273) time: 0.4435 data: 0.0041 max mem: 22448 +train: [7] [220/400] eta: 0:01:23 lr: 0.000279 loss: 2.5171 (2.5311) grad: 0.2240 (0.2275) time: 0.4447 data: 0.0042 max mem: 22448 +train: [7] [240/400] eta: 0:01:13 lr: 0.000278 loss: 2.5337 (2.5343) grad: 0.2252 (0.2278) time: 0.4347 data: 0.0042 max mem: 22448 +train: [7] [260/400] eta: 0:01:04 lr: 0.000278 loss: 2.5435 (2.5333) grad: 0.2231 (0.2274) time: 0.4416 data: 0.0045 max mem: 22448 +train: [7] [280/400] eta: 0:00:54 lr: 0.000277 loss: 2.5010 (2.5311) grad: 0.2217 (0.2273) time: 0.4360 data: 0.0041 max mem: 22448 +train: [7] [300/400] eta: 0:00:45 lr: 0.000276 loss: 2.4748 (2.5289) grad: 0.2241 (0.2273) time: 0.4408 data: 0.0040 max mem: 22448 +train: [7] [320/400] eta: 0:00:36 lr: 0.000275 loss: 2.4996 (2.5298) grad: 0.2255 (0.2272) time: 0.4347 data: 0.0043 max mem: 22448 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 2.5399 (2.5287) grad: 0.2242 (0.2269) time: 0.4415 data: 0.0042 max mem: 22448 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 2.5545 (2.5304) grad: 0.2249 (0.2270) time: 0.4422 data: 0.0042 max mem: 22448 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 2.5545 (2.5295) grad: 0.2259 (0.2273) time: 0.4381 data: 0.0041 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5485 (2.5310) grad: 0.2292 (0.2276) time: 0.4548 data: 0.0042 max mem: 22448 +train: [7] Total time: 0:03:01 (0.4532 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5485 (2.5310) grad: 0.2292 (0.2276) +eval (validation): [7] [ 0/85] eta: 0:04:23 time: 3.1001 data: 2.8689 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:32 time: 0.3644 data: 0.0048 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:19 time: 0.3549 data: 0.0035 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3418 data: 0.0041 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3227 data: 0.0040 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3221 data: 0.0039 max mem: 22448 +eval (validation): [7] Total time: 0:00:32 (0.3809 s / it) +cv: [7] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.349 acc: 0.290 f1: 0.233 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:24 lr: nan time: 3.3617 data: 2.9827 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:45 lr: 0.000270 loss: 2.4117 (2.4316) grad: 0.2120 (0.2155) time: 0.4557 data: 0.0045 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:08 lr: 0.000270 loss: 2.4275 (2.4463) grad: 0.2196 (0.2190) time: 0.4488 data: 0.0043 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:49 lr: 0.000269 loss: 2.4535 (2.4578) grad: 0.2228 (0.2196) time: 0.4519 data: 0.0042 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:36 lr: 0.000268 loss: 2.5115 (2.4709) grad: 0.2230 (0.2229) time: 0.4504 data: 0.0041 max mem: 22448 +train: [8] [100/400] eta: 0:02:24 lr: 0.000267 loss: 2.5118 (2.4714) grad: 0.2309 (0.2265) time: 0.4554 data: 0.0044 max mem: 22448 +train: [8] [120/400] eta: 0:02:13 lr: 0.000266 loss: 2.4798 (2.4696) grad: 0.2425 (0.2291) time: 0.4646 data: 0.0042 max mem: 22448 +train: [8] [140/400] eta: 0:02:03 lr: 0.000265 loss: 2.4649 (2.4737) grad: 0.2394 (0.2301) time: 0.4534 data: 0.0041 max mem: 22448 +train: [8] [160/400] eta: 0:01:52 lr: 0.000264 loss: 2.4689 (2.4719) grad: 0.2410 (0.2324) time: 0.4414 data: 0.0040 max mem: 22448 +train: [8] [180/400] eta: 0:01:43 lr: 0.000263 loss: 2.4085 (2.4667) grad: 0.2394 (0.2324) time: 0.4543 data: 0.0042 max mem: 22448 +train: [8] [200/400] eta: 0:01:33 lr: 0.000262 loss: 2.4363 (2.4695) grad: 0.2327 (0.2326) time: 0.4385 data: 0.0042 max mem: 22448 +train: [8] [220/400] eta: 0:01:23 lr: 0.000260 loss: 2.5133 (2.4731) grad: 0.2327 (0.2325) time: 0.4390 data: 0.0043 max mem: 22448 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 2.5101 (2.4718) grad: 0.2346 (0.2325) time: 0.4510 data: 0.0041 max mem: 22448 +train: [8] [260/400] eta: 0:01:04 lr: 0.000258 loss: 2.4819 (2.4741) grad: 0.2284 (0.2328) time: 0.4470 data: 0.0041 max mem: 22448 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 2.4819 (2.4737) grad: 0.2260 (0.2330) time: 0.4393 data: 0.0042 max mem: 22448 +train: [8] [300/400] eta: 0:00:45 lr: 0.000256 loss: 2.4648 (2.4745) grad: 0.2293 (0.2333) time: 0.4468 data: 0.0041 max mem: 22448 +train: [8] [320/400] eta: 0:00:36 lr: 0.000255 loss: 2.4648 (2.4748) grad: 0.2272 (0.2327) time: 0.4399 data: 0.0041 max mem: 22448 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 2.4642 (2.4736) grad: 0.2250 (0.2330) time: 0.4437 data: 0.0041 max mem: 22448 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 2.4571 (2.4731) grad: 0.2376 (0.2331) time: 0.4465 data: 0.0041 max mem: 22448 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 2.4909 (2.4745) grad: 0.2324 (0.2330) time: 0.4415 data: 0.0040 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.5149 (2.4756) grad: 0.2335 (0.2335) time: 0.4374 data: 0.0040 max mem: 22448 +train: [8] Total time: 0:03:01 (0.4549 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.5149 (2.4756) grad: 0.2335 (0.2335) +eval (validation): [8] [ 0/85] eta: 0:04:32 time: 3.2033 data: 2.9706 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:32 time: 0.3653 data: 0.0040 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:19 time: 0.3434 data: 0.0034 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3275 data: 0.0041 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3319 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3351 data: 0.0040 max mem: 22448 +eval (validation): [8] Total time: 0:00:32 (0.3789 s / it) +cv: [8] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.437 acc: 0.279 f1: 0.225 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:49 lr: nan time: 3.4237 data: 3.0429 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:44 lr: 0.000249 loss: 2.4206 (2.4523) grad: 0.2375 (0.2433) time: 0.4480 data: 0.0051 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:08 lr: 0.000248 loss: 2.4522 (2.4659) grad: 0.2330 (0.2366) time: 0.4547 data: 0.0043 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:51 lr: 0.000247 loss: 2.4623 (2.4553) grad: 0.2285 (0.2336) time: 0.4615 data: 0.0043 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:36 lr: 0.000246 loss: 2.4550 (2.4609) grad: 0.2299 (0.2346) time: 0.4465 data: 0.0042 max mem: 22448 +train: [9] [100/400] eta: 0:02:24 lr: 0.000244 loss: 2.4257 (2.4525) grad: 0.2356 (0.2342) time: 0.4460 data: 0.0042 max mem: 22448 +train: [9] [120/400] eta: 0:02:14 lr: 0.000243 loss: 2.3835 (2.4469) grad: 0.2255 (0.2332) time: 0.4749 data: 0.0044 max mem: 22448 +train: [9] [140/400] eta: 0:02:03 lr: 0.000242 loss: 2.3835 (2.4461) grad: 0.2306 (0.2341) time: 0.4431 data: 0.0042 max mem: 22448 +train: [9] [160/400] eta: 0:01:52 lr: 0.000241 loss: 2.4127 (2.4427) grad: 0.2322 (0.2349) time: 0.4329 data: 0.0042 max mem: 22448 +train: [9] [180/400] eta: 0:01:43 lr: 0.000240 loss: 2.4094 (2.4438) grad: 0.2382 (0.2362) time: 0.4601 data: 0.0042 max mem: 22448 +train: [9] [200/400] eta: 0:01:33 lr: 0.000238 loss: 2.3858 (2.4410) grad: 0.2451 (0.2379) time: 0.4482 data: 0.0042 max mem: 22448 +train: [9] [220/400] eta: 0:01:23 lr: 0.000237 loss: 2.3831 (2.4376) grad: 0.2443 (0.2384) time: 0.4498 data: 0.0041 max mem: 22448 +train: [9] [240/400] eta: 0:01:14 lr: 0.000236 loss: 2.4707 (2.4423) grad: 0.2410 (0.2381) time: 0.4365 data: 0.0041 max mem: 22448 +train: [9] [260/400] eta: 0:01:04 lr: 0.000234 loss: 2.4242 (2.4398) grad: 0.2324 (0.2374) time: 0.4484 data: 0.0042 max mem: 22448 +train: [9] [280/400] eta: 0:00:55 lr: 0.000233 loss: 2.4025 (2.4386) grad: 0.2295 (0.2373) time: 0.4501 data: 0.0042 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.4336 (2.4389) grad: 0.2410 (0.2381) time: 0.4489 data: 0.0042 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.4416 (2.4411) grad: 0.2412 (0.2379) time: 0.4484 data: 0.0042 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.4239 (2.4383) grad: 0.2396 (0.2381) time: 0.4503 data: 0.0041 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.4066 (2.4386) grad: 0.2439 (0.2386) time: 0.4452 data: 0.0041 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.4141 (2.4386) grad: 0.2439 (0.2385) time: 0.4471 data: 0.0040 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.4481 (2.4389) grad: 0.2375 (0.2388) time: 0.4242 data: 0.0038 max mem: 22448 +train: [9] Total time: 0:03:02 (0.4560 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.4481 (2.4389) grad: 0.2375 (0.2388) +eval (validation): [9] [ 0/85] eta: 0:04:46 time: 3.3678 data: 3.0808 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:32 time: 0.3643 data: 0.0104 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:19 time: 0.3491 data: 0.0039 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:10 time: 0.3395 data: 0.0034 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3291 data: 0.0040 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3239 data: 0.0039 max mem: 22448 +eval (validation): [9] Total time: 0:00:32 (0.3836 s / it) +cv: [9] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.387 acc: 0.290 f1: 0.242 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:23:20 lr: nan time: 3.5022 data: 3.1210 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:44 lr: 0.000224 loss: 2.3580 (2.3876) grad: 0.2420 (0.2414) time: 0.4457 data: 0.0029 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:06 lr: 0.000222 loss: 2.3784 (2.3926) grad: 0.2408 (0.2390) time: 0.4392 data: 0.0042 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:51 lr: 0.000221 loss: 2.3946 (2.4074) grad: 0.2292 (0.2353) time: 0.4743 data: 0.0042 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:37 lr: 0.000220 loss: 2.3591 (2.3919) grad: 0.2232 (0.2333) time: 0.4633 data: 0.0044 max mem: 22448 +train: [10] [100/400] eta: 0:02:25 lr: 0.000218 loss: 2.3591 (2.3905) grad: 0.2322 (0.2344) time: 0.4491 data: 0.0040 max mem: 22448 +train: [10] [120/400] eta: 0:02:14 lr: 0.000217 loss: 2.3683 (2.3894) grad: 0.2338 (0.2352) time: 0.4566 data: 0.0043 max mem: 22448 +train: [10] [140/400] eta: 0:02:03 lr: 0.000215 loss: 2.3871 (2.3930) grad: 0.2338 (0.2356) time: 0.4560 data: 0.0041 max mem: 22448 +train: [10] [160/400] eta: 0:01:53 lr: 0.000214 loss: 2.3871 (2.3928) grad: 0.2316 (0.2353) time: 0.4462 data: 0.0043 max mem: 22448 +train: [10] [180/400] eta: 0:01:43 lr: 0.000213 loss: 2.4028 (2.3953) grad: 0.2346 (0.2358) time: 0.4481 data: 0.0042 max mem: 22448 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 2.4033 (2.3945) grad: 0.2416 (0.2365) time: 0.4440 data: 0.0041 max mem: 22448 +train: [10] [220/400] eta: 0:01:23 lr: 0.000210 loss: 2.3793 (2.3942) grad: 0.2294 (0.2366) time: 0.4486 data: 0.0042 max mem: 22448 +train: [10] [240/400] eta: 0:01:14 lr: 0.000208 loss: 2.3780 (2.3927) grad: 0.2307 (0.2362) time: 0.4446 data: 0.0042 max mem: 22448 +train: [10] [260/400] eta: 0:01:04 lr: 0.000207 loss: 2.3807 (2.3926) grad: 0.2291 (0.2357) time: 0.4499 data: 0.0042 max mem: 22448 +train: [10] [280/400] eta: 0:00:55 lr: 0.000205 loss: 2.3887 (2.3920) grad: 0.2291 (0.2354) time: 0.4398 data: 0.0042 max mem: 22448 +train: [10] [300/400] eta: 0:00:46 lr: 0.000204 loss: 2.3576 (2.3893) grad: 0.2334 (0.2353) time: 0.4576 data: 0.0042 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.3576 (2.3874) grad: 0.2363 (0.2355) time: 0.4549 data: 0.0042 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.3641 (2.3872) grad: 0.2363 (0.2357) time: 0.4522 data: 0.0042 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.3517 (2.3852) grad: 0.2323 (0.2354) time: 0.4495 data: 0.0041 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.3305 (2.3834) grad: 0.2326 (0.2354) time: 0.4548 data: 0.0041 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.3381 (2.3828) grad: 0.2389 (0.2358) time: 0.4452 data: 0.0042 max mem: 22448 +train: [10] Total time: 0:03:03 (0.4590 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.3381 (2.3828) grad: 0.2389 (0.2358) +eval (validation): [10] [ 0/85] eta: 0:04:23 time: 3.1048 data: 2.8502 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:31 time: 0.3510 data: 0.0035 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3692 data: 0.0039 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3351 data: 0.0040 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3286 data: 0.0038 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3178 data: 0.0037 max mem: 22448 +eval (validation): [10] Total time: 0:00:32 (0.3799 s / it) +cv: [10] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.348 acc: 0.292 f1: 0.238 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:20:58 lr: nan time: 3.1468 data: 2.8241 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:31 lr: 0.000195 loss: 2.2902 (2.3218) grad: 0.2279 (0.2323) time: 0.4276 data: 0.0039 max mem: 22448 +train: [11] [ 40/400] eta: 0:02:58 lr: 0.000193 loss: 2.3414 (2.3466) grad: 0.2283 (0.2339) time: 0.4298 data: 0.0037 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:46 lr: 0.000192 loss: 2.3381 (2.3338) grad: 0.2336 (0.2356) time: 0.4791 data: 0.0043 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:34 lr: 0.000190 loss: 2.3110 (2.3395) grad: 0.2350 (0.2377) time: 0.4579 data: 0.0041 max mem: 22448 +train: [11] [100/400] eta: 0:02:22 lr: 0.000189 loss: 2.3274 (2.3378) grad: 0.2411 (0.2381) time: 0.4517 data: 0.0039 max mem: 22448 +train: [11] [120/400] eta: 0:02:12 lr: 0.000187 loss: 2.2940 (2.3258) grad: 0.2391 (0.2384) time: 0.4640 data: 0.0040 max mem: 22448 +train: [11] [140/400] eta: 0:02:02 lr: 0.000186 loss: 2.2979 (2.3293) grad: 0.2379 (0.2384) time: 0.4578 data: 0.0040 max mem: 22448 +train: [11] [160/400] eta: 0:01:52 lr: 0.000184 loss: 2.3549 (2.3357) grad: 0.2357 (0.2385) time: 0.4510 data: 0.0042 max mem: 22448 +train: [11] [180/400] eta: 0:01:42 lr: 0.000183 loss: 2.3495 (2.3366) grad: 0.2424 (0.2401) time: 0.4518 data: 0.0043 max mem: 22448 +train: [11] [200/400] eta: 0:01:33 lr: 0.000181 loss: 2.3732 (2.3427) grad: 0.2425 (0.2399) time: 0.4488 data: 0.0043 max mem: 22448 +train: [11] [220/400] eta: 0:01:23 lr: 0.000180 loss: 2.4306 (2.3515) grad: 0.2335 (0.2390) time: 0.4414 data: 0.0043 max mem: 22448 +train: [11] [240/400] eta: 0:01:13 lr: 0.000178 loss: 2.4100 (2.3520) grad: 0.2275 (0.2388) time: 0.4496 data: 0.0040 max mem: 22448 +train: [11] [260/400] eta: 0:01:04 lr: 0.000177 loss: 2.3356 (2.3504) grad: 0.2381 (0.2395) time: 0.4459 data: 0.0042 max mem: 22448 +train: [11] [280/400] eta: 0:00:55 lr: 0.000175 loss: 2.3381 (2.3501) grad: 0.2425 (0.2396) time: 0.4530 data: 0.0041 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.3639 (2.3547) grad: 0.2406 (0.2396) time: 0.4462 data: 0.0042 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.4255 (2.3572) grad: 0.2421 (0.2403) time: 0.4513 data: 0.0043 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.3544 (2.3571) grad: 0.2457 (0.2408) time: 0.4420 data: 0.0043 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.3469 (2.3553) grad: 0.2371 (0.2409) time: 0.4324 data: 0.0042 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.2873 (2.3515) grad: 0.2371 (0.2406) time: 0.4568 data: 0.0042 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.3216 (2.3527) grad: 0.2339 (0.2404) time: 0.4485 data: 0.0043 max mem: 22448 +train: [11] Total time: 0:03:02 (0.4564 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.3216 (2.3527) grad: 0.2339 (0.2404) +eval (validation): [11] [ 0/85] eta: 0:04:30 time: 3.1813 data: 2.9369 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:33 time: 0.3779 data: 0.0039 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3715 data: 0.0037 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3626 data: 0.0044 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3413 data: 0.0039 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3148 data: 0.0038 max mem: 22448 +eval (validation): [11] Total time: 0:00:33 (0.3964 s / it) +cv: [11] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.369 acc: 0.295 f1: 0.245 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:43 lr: nan time: 3.4097 data: 3.0841 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:47 lr: 0.000164 loss: 2.2434 (2.2731) grad: 0.2259 (0.2278) time: 0.4581 data: 0.0029 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:11 lr: 0.000163 loss: 2.2953 (2.2877) grad: 0.2335 (0.2351) time: 0.4604 data: 0.0038 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:54 lr: 0.000161 loss: 2.2953 (2.2907) grad: 0.2340 (0.2339) time: 0.4724 data: 0.0043 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:38 lr: 0.000160 loss: 2.2866 (2.2943) grad: 0.2337 (0.2344) time: 0.4497 data: 0.0040 max mem: 22448 +train: [12] [100/400] eta: 0:02:25 lr: 0.000158 loss: 2.3111 (2.2945) grad: 0.2423 (0.2361) time: 0.4460 data: 0.0041 max mem: 22448 +train: [12] [120/400] eta: 0:02:15 lr: 0.000156 loss: 2.3076 (2.3008) grad: 0.2383 (0.2344) time: 0.4619 data: 0.0042 max mem: 22448 +train: [12] [140/400] eta: 0:02:04 lr: 0.000155 loss: 2.3023 (2.2965) grad: 0.2321 (0.2360) time: 0.4516 data: 0.0042 max mem: 22448 +train: [12] [160/400] eta: 0:01:53 lr: 0.000153 loss: 2.3023 (2.2990) grad: 0.2454 (0.2380) time: 0.4424 data: 0.0042 max mem: 22448 +train: [12] [180/400] eta: 0:01:43 lr: 0.000152 loss: 2.3074 (2.2994) grad: 0.2454 (0.2388) time: 0.4418 data: 0.0041 max mem: 22448 +train: [12] [200/400] eta: 0:01:33 lr: 0.000150 loss: 2.3362 (2.3077) grad: 0.2431 (0.2395) time: 0.4470 data: 0.0041 max mem: 22448 +train: [12] [220/400] eta: 0:01:23 lr: 0.000149 loss: 2.3668 (2.3112) grad: 0.2431 (0.2398) time: 0.4460 data: 0.0040 max mem: 22448 +train: [12] [240/400] eta: 0:01:14 lr: 0.000147 loss: 2.2700 (2.3074) grad: 0.2438 (0.2410) time: 0.4302 data: 0.0039 max mem: 22448 +train: [12] [260/400] eta: 0:01:04 lr: 0.000145 loss: 2.2589 (2.3065) grad: 0.2441 (0.2409) time: 0.4603 data: 0.0041 max mem: 22448 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 2.2637 (2.3029) grad: 0.2351 (0.2408) time: 0.4446 data: 0.0041 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.2630 (2.3042) grad: 0.2391 (0.2417) time: 0.4372 data: 0.0039 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.2947 (2.3040) grad: 0.2436 (0.2417) time: 0.4548 data: 0.0039 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.2759 (2.3021) grad: 0.2416 (0.2414) time: 0.4482 data: 0.0039 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.2759 (2.3013) grad: 0.2396 (0.2414) time: 0.4420 data: 0.0042 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.2868 (2.3012) grad: 0.2384 (0.2414) time: 0.4398 data: 0.0039 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.2648 (2.2992) grad: 0.2357 (0.2414) time: 0.4380 data: 0.0038 max mem: 22448 +train: [12] Total time: 0:03:02 (0.4564 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.2648 (2.2992) grad: 0.2357 (0.2414) +eval (validation): [12] [ 0/85] eta: 0:04:56 time: 3.4833 data: 3.1812 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:33 time: 0.3692 data: 0.0030 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:19 time: 0.3367 data: 0.0041 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3307 data: 0.0040 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3296 data: 0.0038 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3229 data: 0.0037 max mem: 22448 +eval (validation): [12] Total time: 0:00:32 (0.3808 s / it) +cv: [12] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.408 acc: 0.278 f1: 0.225 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:21:41 lr: nan time: 3.2531 data: 2.8942 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:35 lr: 0.000133 loss: 2.2623 (2.2623) grad: 0.2372 (0.2407) time: 0.4326 data: 0.0038 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:04 lr: 0.000131 loss: 2.2671 (2.2682) grad: 0.2419 (0.2436) time: 0.4562 data: 0.0039 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:48 lr: 0.000130 loss: 2.2795 (2.2714) grad: 0.2428 (0.2430) time: 0.4606 data: 0.0041 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:34 lr: 0.000128 loss: 2.2210 (2.2669) grad: 0.2396 (0.2436) time: 0.4465 data: 0.0043 max mem: 22448 +train: [13] [100/400] eta: 0:02:21 lr: 0.000127 loss: 2.2156 (2.2636) grad: 0.2396 (0.2422) time: 0.4288 data: 0.0038 max mem: 22448 +train: [13] [120/400] eta: 0:02:12 lr: 0.000125 loss: 2.2683 (2.2588) grad: 0.2431 (0.2422) time: 0.4692 data: 0.0042 max mem: 22448 +train: [13] [140/400] eta: 0:02:02 lr: 0.000124 loss: 2.2800 (2.2692) grad: 0.2446 (0.2432) time: 0.4527 data: 0.0041 max mem: 22448 +train: [13] [160/400] eta: 0:01:51 lr: 0.000122 loss: 2.2647 (2.2681) grad: 0.2505 (0.2445) time: 0.4376 data: 0.0042 max mem: 22448 +train: [13] [180/400] eta: 0:01:41 lr: 0.000120 loss: 2.2572 (2.2724) grad: 0.2505 (0.2444) time: 0.4365 data: 0.0042 max mem: 22448 +train: [13] [200/400] eta: 0:01:31 lr: 0.000119 loss: 2.2255 (2.2664) grad: 0.2428 (0.2443) time: 0.4336 data: 0.0042 max mem: 22448 +train: [13] [220/400] eta: 0:01:22 lr: 0.000117 loss: 2.2372 (2.2681) grad: 0.2401 (0.2446) time: 0.4432 data: 0.0041 max mem: 22448 +train: [13] [240/400] eta: 0:01:13 lr: 0.000116 loss: 2.2746 (2.2670) grad: 0.2418 (0.2446) time: 0.4426 data: 0.0041 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.2551 (2.2684) grad: 0.2359 (0.2436) time: 0.4360 data: 0.0042 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.2384 (2.2661) grad: 0.2262 (0.2429) time: 0.4415 data: 0.0041 max mem: 22448 +train: [13] [300/400] eta: 0:00:45 lr: 0.000111 loss: 2.2159 (2.2647) grad: 0.2298 (0.2421) time: 0.4401 data: 0.0041 max mem: 22448 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 2.2948 (2.2670) grad: 0.2417 (0.2428) time: 0.4404 data: 0.0043 max mem: 22448 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 2.2561 (2.2669) grad: 0.2426 (0.2424) time: 0.4473 data: 0.0042 max mem: 22448 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 2.2530 (2.2670) grad: 0.2455 (0.2427) time: 0.4407 data: 0.0042 max mem: 22448 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 2.2621 (2.2671) grad: 0.2524 (0.2429) time: 0.4435 data: 0.0043 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2791 (2.2683) grad: 0.2439 (0.2431) time: 0.4408 data: 0.0043 max mem: 22448 +train: [13] Total time: 0:03:00 (0.4509 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2791 (2.2683) grad: 0.2439 (0.2431) +eval (validation): [13] [ 0/85] eta: 0:04:29 time: 3.1663 data: 2.9201 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:31 time: 0.3514 data: 0.0092 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:18 time: 0.3503 data: 0.0040 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:09 time: 0.3451 data: 0.0041 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3480 data: 0.0044 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3345 data: 0.0043 max mem: 22448 +eval (validation): [13] Total time: 0:00:32 (0.3829 s / it) +cv: [13] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.389 acc: 0.290 f1: 0.236 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:18 lr: nan time: 3.3453 data: 3.0120 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:37 lr: 0.000102 loss: 2.1500 (2.1771) grad: 0.2324 (0.2338) time: 0.4351 data: 0.0045 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:03 lr: 0.000101 loss: 2.1756 (2.2009) grad: 0.2301 (0.2346) time: 0.4448 data: 0.0040 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:49 lr: 0.000099 loss: 2.2255 (2.2107) grad: 0.2329 (0.2364) time: 0.4698 data: 0.0043 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:36 lr: 0.000098 loss: 2.2259 (2.2200) grad: 0.2324 (0.2353) time: 0.4589 data: 0.0042 max mem: 22448 +train: [14] [100/400] eta: 0:02:23 lr: 0.000096 loss: 2.2481 (2.2262) grad: 0.2338 (0.2369) time: 0.4418 data: 0.0041 max mem: 22448 +train: [14] [120/400] eta: 0:02:13 lr: 0.000095 loss: 2.2137 (2.2190) grad: 0.2435 (0.2380) time: 0.4663 data: 0.0041 max mem: 22448 +train: [14] [140/400] eta: 0:02:03 lr: 0.000093 loss: 2.2016 (2.2174) grad: 0.2471 (0.2391) time: 0.4540 data: 0.0042 max mem: 22448 +train: [14] [160/400] eta: 0:01:52 lr: 0.000092 loss: 2.1929 (2.2172) grad: 0.2406 (0.2392) time: 0.4430 data: 0.0042 max mem: 22448 +train: [14] [180/400] eta: 0:01:42 lr: 0.000090 loss: 2.1929 (2.2165) grad: 0.2406 (0.2398) time: 0.4461 data: 0.0041 max mem: 22448 +train: [14] [200/400] eta: 0:01:33 lr: 0.000089 loss: 2.1867 (2.2180) grad: 0.2452 (0.2408) time: 0.4463 data: 0.0043 max mem: 22448 +train: [14] [220/400] eta: 0:01:23 lr: 0.000088 loss: 2.1868 (2.2174) grad: 0.2506 (0.2415) time: 0.4547 data: 0.0042 max mem: 22448 +train: [14] [240/400] eta: 0:01:14 lr: 0.000086 loss: 2.1912 (2.2206) grad: 0.2407 (0.2415) time: 0.4516 data: 0.0042 max mem: 22448 +train: [14] [260/400] eta: 0:01:04 lr: 0.000085 loss: 2.2020 (2.2204) grad: 0.2404 (0.2411) time: 0.4461 data: 0.0040 max mem: 22448 +train: [14] [280/400] eta: 0:00:55 lr: 0.000083 loss: 2.2059 (2.2206) grad: 0.2386 (0.2406) time: 0.4405 data: 0.0042 max mem: 22448 +train: [14] [300/400] eta: 0:00:45 lr: 0.000082 loss: 2.2583 (2.2255) grad: 0.2387 (0.2406) time: 0.4445 data: 0.0041 max mem: 22448 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 2.2907 (2.2273) grad: 0.2437 (0.2411) time: 0.4456 data: 0.0041 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.2403 (2.2282) grad: 0.2475 (0.2416) time: 0.4478 data: 0.0041 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.2094 (2.2272) grad: 0.2400 (0.2413) time: 0.4508 data: 0.0040 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.2025 (2.2262) grad: 0.2382 (0.2415) time: 0.4391 data: 0.0042 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.1765 (2.2264) grad: 0.2434 (0.2419) time: 0.4476 data: 0.0042 max mem: 22448 +train: [14] Total time: 0:03:02 (0.4562 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.1765 (2.2264) grad: 0.2434 (0.2419) +eval (validation): [14] [ 0/85] eta: 0:04:32 time: 3.2100 data: 2.9453 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:32 time: 0.3666 data: 0.0041 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:19 time: 0.3638 data: 0.0047 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:10 time: 0.3422 data: 0.0044 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3462 data: 0.0040 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3379 data: 0.0039 max mem: 22448 +eval (validation): [14] Total time: 0:00:33 (0.3908 s / it) +cv: [14] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.410 acc: 0.290 f1: 0.243 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:00 lr: nan time: 3.4521 data: 3.0611 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:42 lr: 0.000074 loss: 2.1328 (2.1777) grad: 0.2300 (0.2332) time: 0.4422 data: 0.0049 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:04 lr: 0.000072 loss: 2.1856 (2.1853) grad: 0.2300 (0.2344) time: 0.4367 data: 0.0040 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:49 lr: 0.000071 loss: 2.1379 (2.1735) grad: 0.2346 (0.2361) time: 0.4658 data: 0.0045 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:37 lr: 0.000070 loss: 2.1379 (2.1679) grad: 0.2346 (0.2351) time: 0.4744 data: 0.0041 max mem: 22448 +train: [15] [100/400] eta: 0:02:24 lr: 0.000068 loss: 2.1435 (2.1645) grad: 0.2329 (0.2355) time: 0.4424 data: 0.0042 max mem: 22448 +train: [15] [120/400] eta: 0:02:13 lr: 0.000067 loss: 2.1861 (2.1719) grad: 0.2413 (0.2370) time: 0.4515 data: 0.0042 max mem: 22448 +train: [15] [140/400] eta: 0:02:03 lr: 0.000066 loss: 2.2175 (2.1771) grad: 0.2451 (0.2394) time: 0.4608 data: 0.0044 max mem: 22448 +train: [15] [160/400] eta: 0:01:53 lr: 0.000064 loss: 2.1543 (2.1785) grad: 0.2458 (0.2401) time: 0.4605 data: 0.0041 max mem: 22448 +train: [15] [180/400] eta: 0:01:43 lr: 0.000063 loss: 2.1922 (2.1871) grad: 0.2477 (0.2415) time: 0.4411 data: 0.0041 max mem: 22448 +train: [15] [200/400] eta: 0:01:33 lr: 0.000062 loss: 2.2139 (2.1857) grad: 0.2447 (0.2411) time: 0.4631 data: 0.0044 max mem: 22448 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 2.1504 (2.1836) grad: 0.2334 (0.2407) time: 0.4464 data: 0.0043 max mem: 22448 +train: [15] [240/400] eta: 0:01:14 lr: 0.000059 loss: 2.1616 (2.1840) grad: 0.2434 (0.2408) time: 0.4532 data: 0.0040 max mem: 22448 +train: [15] [260/400] eta: 0:01:04 lr: 0.000058 loss: 2.2141 (2.1888) grad: 0.2415 (0.2407) time: 0.4355 data: 0.0040 max mem: 22448 +train: [15] [280/400] eta: 0:00:55 lr: 0.000057 loss: 2.2063 (2.1870) grad: 0.2322 (0.2401) time: 0.4541 data: 0.0043 max mem: 22448 +train: [15] [300/400] eta: 0:00:46 lr: 0.000056 loss: 2.1655 (2.1859) grad: 0.2290 (0.2400) time: 0.4484 data: 0.0041 max mem: 22448 +train: [15] [320/400] eta: 0:00:36 lr: 0.000054 loss: 2.1748 (2.1865) grad: 0.2391 (0.2399) time: 0.4486 data: 0.0042 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.2068 (2.1873) grad: 0.2389 (0.2399) time: 0.4470 data: 0.0041 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.1933 (2.1875) grad: 0.2402 (0.2400) time: 0.4505 data: 0.0042 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.1916 (2.1853) grad: 0.2342 (0.2398) time: 0.4526 data: 0.0042 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1884 (2.1864) grad: 0.2309 (0.2396) time: 0.4556 data: 0.0042 max mem: 22448 +train: [15] Total time: 0:03:03 (0.4593 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1884 (2.1864) grad: 0.2309 (0.2396) +eval (validation): [15] [ 0/85] eta: 0:04:36 time: 3.2537 data: 2.9799 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:33 time: 0.3795 data: 0.0045 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:19 time: 0.3596 data: 0.0041 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.3613 data: 0.0042 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:02 time: 0.3570 data: 0.0041 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3497 data: 0.0041 max mem: 22448 +eval (validation): [15] Total time: 0:00:34 (0.4004 s / it) +cv: [15] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.378 acc: 0.290 f1: 0.236 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:23:13 lr: nan time: 3.4847 data: 3.1019 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:52 lr: 0.000048 loss: 2.0878 (2.1409) grad: 0.2194 (0.2250) time: 0.4682 data: 0.0041 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:08 lr: 0.000047 loss: 2.0911 (2.1412) grad: 0.2249 (0.2272) time: 0.4327 data: 0.0039 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:52 lr: 0.000046 loss: 2.1174 (2.1403) grad: 0.2285 (0.2295) time: 0.4698 data: 0.0043 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:38 lr: 0.000045 loss: 2.1659 (2.1569) grad: 0.2379 (0.2335) time: 0.4588 data: 0.0042 max mem: 22448 +train: [16] [100/400] eta: 0:02:25 lr: 0.000044 loss: 2.1728 (2.1522) grad: 0.2464 (0.2350) time: 0.4439 data: 0.0041 max mem: 22448 +train: [16] [120/400] eta: 0:02:14 lr: 0.000043 loss: 2.1315 (2.1544) grad: 0.2397 (0.2359) time: 0.4595 data: 0.0042 max mem: 22448 +train: [16] [140/400] eta: 0:02:03 lr: 0.000042 loss: 2.1443 (2.1535) grad: 0.2342 (0.2357) time: 0.4503 data: 0.0045 max mem: 22448 +train: [16] [160/400] eta: 0:01:53 lr: 0.000041 loss: 2.1531 (2.1604) grad: 0.2348 (0.2367) time: 0.4493 data: 0.0041 max mem: 22448 +train: [16] [180/400] eta: 0:01:43 lr: 0.000040 loss: 2.1766 (2.1632) grad: 0.2397 (0.2370) time: 0.4316 data: 0.0040 max mem: 22448 +train: [16] [200/400] eta: 0:01:33 lr: 0.000039 loss: 2.1422 (2.1597) grad: 0.2380 (0.2361) time: 0.4679 data: 0.0043 max mem: 22448 +train: [16] [220/400] eta: 0:01:23 lr: 0.000038 loss: 2.1324 (2.1606) grad: 0.2278 (0.2356) time: 0.4503 data: 0.0043 max mem: 22448 +train: [16] [240/400] eta: 0:01:14 lr: 0.000036 loss: 2.1586 (2.1595) grad: 0.2302 (0.2358) time: 0.4428 data: 0.0042 max mem: 22448 +train: [16] [260/400] eta: 0:01:04 lr: 0.000035 loss: 2.1734 (2.1630) grad: 0.2428 (0.2364) time: 0.4335 data: 0.0042 max mem: 22448 +train: [16] [280/400] eta: 0:00:55 lr: 0.000034 loss: 2.1704 (2.1623) grad: 0.2427 (0.2367) time: 0.4543 data: 0.0042 max mem: 22448 +train: [16] [300/400] eta: 0:00:46 lr: 0.000033 loss: 2.1704 (2.1647) grad: 0.2341 (0.2370) time: 0.4551 data: 0.0043 max mem: 22448 +train: [16] [320/400] eta: 0:00:36 lr: 0.000032 loss: 2.2167 (2.1693) grad: 0.2360 (0.2376) time: 0.4539 data: 0.0042 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.1473 (2.1677) grad: 0.2360 (0.2375) time: 0.4433 data: 0.0040 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 2.1334 (2.1676) grad: 0.2360 (0.2378) time: 0.4580 data: 0.0044 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.1421 (2.1675) grad: 0.2409 (0.2384) time: 0.4524 data: 0.0042 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.1602 (2.1677) grad: 0.2474 (0.2389) time: 0.4455 data: 0.0042 max mem: 22448 +train: [16] Total time: 0:03:03 (0.4589 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.1602 (2.1677) grad: 0.2474 (0.2389) +eval (validation): [16] [ 0/85] eta: 0:04:15 time: 3.0103 data: 2.7781 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:31 time: 0.3597 data: 0.0037 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:18 time: 0.3503 data: 0.0038 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3459 data: 0.0041 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3410 data: 0.0039 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3314 data: 0.0038 max mem: 22448 +eval (validation): [16] Total time: 0:00:32 (0.3818 s / it) +cv: [16] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.392 acc: 0.290 f1: 0.238 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:14 lr: nan time: 3.3369 data: 2.9558 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:50 lr: 0.000028 loss: 2.1001 (2.1033) grad: 0.2177 (0.2275) time: 0.4700 data: 0.0035 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:08 lr: 0.000027 loss: 2.1087 (2.1287) grad: 0.2284 (0.2309) time: 0.4362 data: 0.0039 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:51 lr: 0.000026 loss: 2.1381 (2.1436) grad: 0.2257 (0.2302) time: 0.4656 data: 0.0042 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:38 lr: 0.000025 loss: 2.1150 (2.1303) grad: 0.2252 (0.2300) time: 0.4617 data: 0.0043 max mem: 22448 +train: [17] [100/400] eta: 0:02:24 lr: 0.000024 loss: 2.1125 (2.1400) grad: 0.2269 (0.2300) time: 0.4341 data: 0.0041 max mem: 22448 +train: [17] [120/400] eta: 0:02:13 lr: 0.000023 loss: 2.1315 (2.1379) grad: 0.2296 (0.2302) time: 0.4451 data: 0.0041 max mem: 22448 +train: [17] [140/400] eta: 0:02:02 lr: 0.000023 loss: 2.1147 (2.1366) grad: 0.2288 (0.2302) time: 0.4520 data: 0.0043 max mem: 22448 +train: [17] [160/400] eta: 0:01:52 lr: 0.000022 loss: 2.1261 (2.1339) grad: 0.2300 (0.2308) time: 0.4424 data: 0.0043 max mem: 22448 +train: [17] [180/400] eta: 0:01:42 lr: 0.000021 loss: 2.1179 (2.1332) grad: 0.2300 (0.2302) time: 0.4449 data: 0.0042 max mem: 22448 +train: [17] [200/400] eta: 0:01:32 lr: 0.000020 loss: 2.1179 (2.1337) grad: 0.2286 (0.2301) time: 0.4525 data: 0.0042 max mem: 22448 +train: [17] [220/400] eta: 0:01:23 lr: 0.000019 loss: 2.1537 (2.1357) grad: 0.2286 (0.2309) time: 0.4407 data: 0.0045 max mem: 22448 +train: [17] [240/400] eta: 0:01:13 lr: 0.000019 loss: 2.1626 (2.1348) grad: 0.2358 (0.2318) time: 0.4432 data: 0.0043 max mem: 22448 +train: [17] [260/400] eta: 0:01:04 lr: 0.000018 loss: 2.1095 (2.1342) grad: 0.2376 (0.2320) time: 0.4481 data: 0.0044 max mem: 22448 +train: [17] [280/400] eta: 0:00:55 lr: 0.000017 loss: 2.1196 (2.1356) grad: 0.2376 (0.2322) time: 0.4489 data: 0.0041 max mem: 22448 +train: [17] [300/400] eta: 0:00:45 lr: 0.000016 loss: 2.1475 (2.1370) grad: 0.2294 (0.2320) time: 0.4506 data: 0.0043 max mem: 22448 +train: [17] [320/400] eta: 0:00:36 lr: 0.000016 loss: 2.1572 (2.1370) grad: 0.2207 (0.2313) time: 0.4394 data: 0.0044 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 2.1491 (2.1375) grad: 0.2243 (0.2316) time: 0.4336 data: 0.0041 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.1329 (2.1381) grad: 0.2341 (0.2319) time: 0.4374 data: 0.0040 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.1573 (2.1367) grad: 0.2341 (0.2325) time: 0.4586 data: 0.0043 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.1040 (2.1353) grad: 0.2293 (0.2321) time: 0.4413 data: 0.0040 max mem: 22448 +train: [17] Total time: 0:03:01 (0.4549 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.1040 (2.1353) grad: 0.2293 (0.2321) +eval (validation): [17] [ 0/85] eta: 0:04:27 time: 3.1507 data: 2.8648 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:33 time: 0.3766 data: 0.0053 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3443 data: 0.0036 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3500 data: 0.0042 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3540 data: 0.0039 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3340 data: 0.0039 max mem: 22448 +eval (validation): [17] Total time: 0:00:33 (0.3898 s / it) +cv: [17] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.377 acc: 0.293 f1: 0.240 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:59 lr: nan time: 3.4493 data: 3.0299 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:48 lr: 0.000012 loss: 2.1430 (2.1671) grad: 0.2303 (0.2332) time: 0.4591 data: 0.0049 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:09 lr: 0.000012 loss: 2.1277 (2.1326) grad: 0.2272 (0.2319) time: 0.4475 data: 0.0038 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:50 lr: 0.000011 loss: 2.0808 (2.1237) grad: 0.2347 (0.2353) time: 0.4471 data: 0.0044 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:35 lr: 0.000011 loss: 2.0942 (2.1200) grad: 0.2337 (0.2338) time: 0.4463 data: 0.0041 max mem: 22448 +train: [18] [100/400] eta: 0:02:23 lr: 0.000010 loss: 2.1140 (2.1229) grad: 0.2299 (0.2327) time: 0.4512 data: 0.0043 max mem: 22448 +train: [18] [120/400] eta: 0:02:12 lr: 0.000009 loss: 2.0945 (2.1144) grad: 0.2304 (0.2320) time: 0.4428 data: 0.0041 max mem: 22448 +train: [18] [140/400] eta: 0:02:01 lr: 0.000009 loss: 2.1111 (2.1182) grad: 0.2322 (0.2323) time: 0.4351 data: 0.0044 max mem: 22448 +train: [18] [160/400] eta: 0:01:51 lr: 0.000008 loss: 2.1151 (2.1194) grad: 0.2285 (0.2312) time: 0.4341 data: 0.0043 max mem: 22448 +train: [18] [180/400] eta: 0:01:41 lr: 0.000008 loss: 2.1087 (2.1173) grad: 0.2285 (0.2315) time: 0.4417 data: 0.0043 max mem: 22448 +train: [18] [200/400] eta: 0:01:32 lr: 0.000007 loss: 2.1105 (2.1192) grad: 0.2287 (0.2314) time: 0.4468 data: 0.0043 max mem: 22448 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 2.1333 (2.1211) grad: 0.2303 (0.2316) time: 0.4385 data: 0.0042 max mem: 22448 +train: [18] [240/400] eta: 0:01:12 lr: 0.000006 loss: 2.1462 (2.1245) grad: 0.2303 (0.2314) time: 0.4324 data: 0.0041 max mem: 22448 +train: [18] [260/400] eta: 0:01:03 lr: 0.000006 loss: 2.1544 (2.1240) grad: 0.2256 (0.2310) time: 0.4609 data: 0.0042 max mem: 22448 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 2.1352 (2.1243) grad: 0.2256 (0.2312) time: 0.4458 data: 0.0042 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 2.1199 (2.1231) grad: 0.2256 (0.2309) time: 0.4323 data: 0.0040 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 2.1237 (2.1256) grad: 0.2270 (0.2310) time: 0.4539 data: 0.0041 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 2.1237 (2.1231) grad: 0.2272 (0.2308) time: 0.4479 data: 0.0042 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.1039 (2.1225) grad: 0.2254 (0.2306) time: 0.4384 data: 0.0043 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.1049 (2.1221) grad: 0.2252 (0.2306) time: 0.4297 data: 0.0039 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.0860 (2.1202) grad: 0.2273 (0.2308) time: 0.4363 data: 0.0038 max mem: 22448 +train: [18] Total time: 0:03:00 (0.4512 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.0860 (2.1202) grad: 0.2273 (0.2308) +eval (validation): [18] [ 0/85] eta: 0:04:26 time: 3.1353 data: 2.9031 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:30 time: 0.3314 data: 0.0036 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:17 time: 0.3274 data: 0.0037 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:09 time: 0.3373 data: 0.0041 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3414 data: 0.0040 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3401 data: 0.0039 max mem: 22448 +eval (validation): [18] Total time: 0:00:31 (0.3700 s / it) +cv: [18] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.385 acc: 0.291 f1: 0.238 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:12 lr: nan time: 3.3321 data: 2.9512 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:42 lr: 0.000003 loss: 2.0851 (2.1144) grad: 0.2151 (0.2240) time: 0.4480 data: 0.0035 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:08 lr: 0.000003 loss: 2.0851 (2.0954) grad: 0.2222 (0.2268) time: 0.4573 data: 0.0039 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:51 lr: 0.000002 loss: 2.1251 (2.1201) grad: 0.2250 (0.2278) time: 0.4622 data: 0.0043 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:37 lr: 0.000002 loss: 2.1626 (2.1237) grad: 0.2264 (0.2274) time: 0.4632 data: 0.0044 max mem: 22448 +train: [19] [100/400] eta: 0:02:24 lr: 0.000002 loss: 2.1224 (2.1247) grad: 0.2267 (0.2280) time: 0.4392 data: 0.0041 max mem: 22448 +train: [19] [120/400] eta: 0:02:13 lr: 0.000002 loss: 2.1128 (2.1250) grad: 0.2288 (0.2288) time: 0.4532 data: 0.0043 max mem: 22448 +train: [19] [140/400] eta: 0:02:03 lr: 0.000001 loss: 2.1241 (2.1238) grad: 0.2288 (0.2282) time: 0.4571 data: 0.0045 max mem: 22448 +train: [19] [160/400] eta: 0:01:53 lr: 0.000001 loss: 2.0866 (2.1163) grad: 0.2221 (0.2278) time: 0.4459 data: 0.0043 max mem: 22448 +train: [19] [180/400] eta: 0:01:43 lr: 0.000001 loss: 2.0866 (2.1137) grad: 0.2281 (0.2290) time: 0.4451 data: 0.0042 max mem: 22448 +train: [19] [200/400] eta: 0:01:33 lr: 0.000001 loss: 2.0991 (2.1113) grad: 0.2315 (0.2288) time: 0.4491 data: 0.0043 max mem: 22448 +train: [19] [220/400] eta: 0:01:23 lr: 0.000001 loss: 2.0949 (2.1091) grad: 0.2234 (0.2282) time: 0.4454 data: 0.0043 max mem: 22448 +train: [19] [240/400] eta: 0:01:13 lr: 0.000001 loss: 2.0877 (2.1071) grad: 0.2234 (0.2285) time: 0.4378 data: 0.0041 max mem: 22448 +train: [19] [260/400] eta: 0:01:04 lr: 0.000000 loss: 2.0718 (2.1057) grad: 0.2266 (0.2282) time: 0.4561 data: 0.0041 max mem: 22448 +train: [19] [280/400] eta: 0:00:55 lr: 0.000000 loss: 2.1287 (2.1087) grad: 0.2266 (0.2281) time: 0.4444 data: 0.0043 max mem: 22448 +train: [19] [300/400] eta: 0:00:45 lr: 0.000000 loss: 2.1432 (2.1094) grad: 0.2280 (0.2281) time: 0.4420 data: 0.0042 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 2.1331 (2.1112) grad: 0.2295 (0.2285) time: 0.4427 data: 0.0042 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 2.1378 (2.1142) grad: 0.2281 (0.2283) time: 0.4483 data: 0.0042 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.1122 (2.1142) grad: 0.2226 (0.2280) time: 0.4441 data: 0.0042 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.0926 (2.1133) grad: 0.2221 (0.2278) time: 0.4366 data: 0.0039 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.1109 (2.1152) grad: 0.2260 (0.2278) time: 0.4483 data: 0.0044 max mem: 22448 +train: [19] Total time: 0:03:02 (0.4558 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.1109 (2.1152) grad: 0.2260 (0.2278) +eval (validation): [19] [ 0/85] eta: 0:04:50 time: 3.4184 data: 3.1313 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:32 time: 0.3572 data: 0.0043 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:19 time: 0.3621 data: 0.0039 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3591 data: 0.0042 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3466 data: 0.0043 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3368 data: 0.0042 max mem: 22448 +eval (validation): [19] Total time: 0:00:33 (0.3953 s / it) +cv: [19] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.384 acc: 0.290 f1: 0.238 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.290328534514581, "hparam": [0.61, 1.0], "hparam_id": 21, "epoch": 19, "is_best": false, "best_score": 0.29955703211517165} +eval (train): [20] [ 0/509] eta: 0:26:05 time: 3.0748 data: 2.7974 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:01 time: 0.3642 data: 0.0082 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:23 time: 0.3706 data: 0.0045 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:00 time: 0.3378 data: 0.0038 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:49 time: 0.3718 data: 0.0042 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:40 time: 0.3892 data: 0.0046 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:30 time: 0.3587 data: 0.0041 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:22 time: 0.3838 data: 0.0049 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:14 time: 0.3816 data: 0.0040 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:06 time: 0.3648 data: 0.0044 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:57 time: 0.3539 data: 0.0044 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:49 time: 0.3502 data: 0.0039 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:41 time: 0.3647 data: 0.0039 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:33 time: 0.3678 data: 0.0041 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:25 time: 0.3625 data: 0.0042 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:18 time: 0.3692 data: 0.0042 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:10 time: 0.3429 data: 0.0041 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:03 time: 0.3736 data: 0.0044 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3813 data: 0.0044 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:48 time: 0.3648 data: 0.0043 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3721 data: 0.0044 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:33 time: 0.3688 data: 0.0044 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3634 data: 0.0049 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3753 data: 0.0043 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3568 data: 0.0043 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3635 data: 0.0041 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3365 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:09 (0.3723 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:31 time: 3.1951 data: 2.9056 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:34 time: 0.3937 data: 0.0043 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3480 data: 0.0036 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3396 data: 0.0040 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3610 data: 0.0040 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3545 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:33 (0.3964 s / it) +eval (test): [20] [ 0/85] eta: 0:04:34 time: 3.2276 data: 2.9798 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:34 time: 0.3944 data: 0.0284 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3551 data: 0.0039 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3366 data: 0.0041 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3440 data: 0.0044 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3298 data: 0.0039 max mem: 22448 +eval (test): [20] Total time: 0:00:33 (0.3934 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:32 time: 3.3186 data: 3.0762 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3656 data: 0.0035 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3536 data: 0.0040 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3550 data: 0.0043 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3294 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3209 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:31 (0.3885 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.29955703211517165, "hparam": [1.4, 1.0], "hparam_id": 26, "epoch": 5, "is_best": true, "best_score": 0.29955703211517165} +eval (train): [20] [ 0/509] eta: 0:27:01 time: 3.1850 data: 2.8849 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:59 time: 0.3560 data: 0.0044 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:23 time: 0.3720 data: 0.0041 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:01 time: 0.3453 data: 0.0045 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:47 time: 0.3529 data: 0.0042 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:37 time: 0.3571 data: 0.0045 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:29 time: 0.3770 data: 0.0045 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:19 time: 0.3486 data: 0.0043 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:11 time: 0.3761 data: 0.0045 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:04 time: 0.3728 data: 0.0047 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:56 time: 0.3573 data: 0.0043 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:48 time: 0.3576 data: 0.0043 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:39 time: 0.3430 data: 0.0041 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:32 time: 0.3662 data: 0.0044 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:24 time: 0.3562 data: 0.0042 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:17 time: 0.3651 data: 0.0040 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:09 time: 0.3612 data: 0.0043 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:02 time: 0.3854 data: 0.0047 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:54 time: 0.3500 data: 0.0040 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:47 time: 0.3538 data: 0.0043 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3844 data: 0.0045 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3425 data: 0.0040 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3737 data: 0.0044 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3572 data: 0.0043 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3544 data: 0.0041 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3534 data: 0.0042 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3327 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:06 (0.3671 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:24 time: 3.1099 data: 2.8611 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3849 data: 0.0044 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:20 time: 0.4097 data: 0.0044 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3624 data: 0.0041 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:02 time: 0.3496 data: 0.0041 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3419 data: 0.0042 max mem: 22448 +eval (validation): [20] Total time: 0:00:34 (0.4087 s / it) +eval (test): [20] [ 0/85] eta: 0:04:28 time: 3.1584 data: 2.9109 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:33 time: 0.3855 data: 0.0041 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3670 data: 0.0038 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3546 data: 0.0043 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3359 data: 0.0038 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3345 data: 0.0037 max mem: 22448 +eval (test): [20] Total time: 0:00:33 (0.3952 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:20 time: 3.1772 data: 2.9392 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3680 data: 0.0048 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3782 data: 0.0041 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3407 data: 0.0040 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3465 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3336 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3932 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | train | 2.0517 | 0.37712 | 0.0024438 | 0.31513 | 0.0025879 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | validation | 2.3111 | 0.29956 | 0.005572 | 0.22809 | 0.0051914 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | test | 2.2078 | 0.32041 | 0.005743 | 0.25615 | 0.005756 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00042 | 0.05 | 26 | [1.4, 1.0] | testid | 2.2602 | 0.3129 | 0.0056497 | 0.25246 | 0.005408 | + + +done! total time: 1:23:52 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/train_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..62e5ab20c26025ed4d5363ed12a955b660114a88 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.1188801193237303, "train/grad": 0.1483418546617031, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.17439208984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.17382568359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.17285888671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.171881103515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1710009765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.169796142578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.168543701171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.167203369140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.16548583984375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.163931884765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.16248291015625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.160487060546875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.158759765625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.156629638671875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1548876953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1532861328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.151751708984375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.150064697265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.148482666015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.14724365234375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.14593017578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.144698486328125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.143546142578125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1425439453125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.14156494140625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.140601806640625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.140028076171875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.139500732421875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.138941650390625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.138533935546875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.138271484375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.1380670166015623, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.137908935546875, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.137828369140625, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.137662353515625, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1372393798828124, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1360333251953123, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1328399658203123, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1220223999023435, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.097703857421875, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.076588439941406, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0553006744384765, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0269007873535156, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.0032855224609376, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9787228012084963, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.955631122589111, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.936689758300781, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9176777172088624, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.8998692512512205, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021726392805576325, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02168082901276648, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02160618135239929, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021535068582743407, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021466922275722028, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021376821105368437, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021278918734751642, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021177627369761465, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021053569056093693, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0209331610891968, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02082524965517223, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020680041760206224, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020557603943161668, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02040718057192862, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020289121861569583, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020194099568761885, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020093680680729448, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019998524351976812, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019915291708894074, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.019853225713595747, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019791039316914977, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019740232531912625, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019696509917266666, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0196593929361552, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01962996883317828, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.019607939599081872, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01959654892794788, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01959116873797029, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.019588422193191946, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01958994774147868, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.019591636909171938, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019586575222201644, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.019564529517665505, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.019529898059554397, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.019459913675673304, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.019366443888284266, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01924561841413379, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.019181033540517093, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.019358206260949373, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02010005971416831, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.020988689549267293, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02162655827589333, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.022365238671191036, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023069742876105012, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.023847466222941874, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.024818798671476544, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.025665103681385516, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02672895485535264, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02755000358913094, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1609208583831787, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.159207344055176, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.15655255317688, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1540639400482178, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.15175461769104, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.148786783218384, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.145719528198242, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1427299976348877, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.139328718185425, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1362411975860596, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.133742094039917, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.130826950073242, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.128664970397949, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.126462459564209, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1251060962677, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.124246120452881, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1235384941101074, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1230502128601074, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.122713804244995, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1225414276123047, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.122443675994873, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1224777698516846, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1227598190307617, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1233160495758057, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1241917610168457, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1254169940948486, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1265172958374023, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.127361297607422, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.128051996231079, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1277894973754883, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1262223720550537, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.123807907104492, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.11985445022583, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1162469387054443, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.111642360687256, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.104916572570801, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0849180221557617, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.012044906616211, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8692808151245117, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7095601558685303, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6476473808288574, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.595111846923828, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5826215744018555, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.565894365310669, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.53485107421875, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.541975498199463, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.5239105224609375, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.5336990356445312, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.5662646293640137, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07198228128460686, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06681432262827612, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.0636766334440753, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.059800664451827246, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05906238464377999, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.058508674787744554, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06109265411590993, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.06478405315614617, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07235142118863049, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07493540051679587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07567368032484312, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08028792912513842, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10908084163898117, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.15134736064968624, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19029162052417867, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22535991140642303, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22277593207825766, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.23089700996677742, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.23883351790328536, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2513842746400886, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.2440014765596161, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.22905131044665927, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.21613141380583242, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014751186960469896, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014980197708722108, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014926164545082513, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014916035035427261, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014711603607377319, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013836217099620978, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013008303266390756, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011804128757514914, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011288657143851724, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010494539657214473, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010270372138012076, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.010411923357009257, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010502006762050537, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.010382902498531207, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010857590190377705, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.010932156064769391, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.010950405999345202, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.010746898009685028, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.010921244239988715, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.010639800542202807, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.010632239848362207, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.010916427747505552, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.012582572621167928, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.014659368885990814, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.015360123554404173, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.014202058066927856, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.012842989494510246, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.011657056543847756, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.012224485112073231, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.012540865607008199, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.01279772396540943, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.012268374316395672, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.014020870225196533, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01586530821014602, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.01840141003823031, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.02326788745443534, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.02690626838036554, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04494929371513542, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.08120754145839594, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.11307310028247557, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13269686387351506, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14732767360394647, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15004936793755388, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16275275719726198, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.17187321764848296, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.17144769710225163, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.16618497676532662, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1541914902899228, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.14162965142629672, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.955631122589111, "validation/loss_best": 2.541975498199463, "validation/acc_best": 0.2513842746400886, "validation/f1_best": 0.17144769710225163} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.935979506969452, "train/grad": 0.1742237314581871, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.15230712890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.150045166015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.146795654296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.143992919921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.141856689453125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.139144287109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1368017578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.134925537109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.132955322265625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1314892578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130399169921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.129388427734375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12871337890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.127921142578125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.127525634765625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.127200927734375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.126788330078125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.126329345703125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1258251953125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1253369140625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.124619140625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.12388427734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.123004150390625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.122110595703125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.121044921875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1194873046875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1177557373046874, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.11516357421875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1051904296875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0650668334960938, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0108657836914063, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.957857818603516, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.8871280670166017, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.8229386901855467, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.7518356323242186, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.686768856048584, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6247069740295412, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.5753325176239015, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.535948085784912, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.504951763153076, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.493492851257324, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.489055471420288, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4792703437805175, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.4813204288482664, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.488153157234192, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5147488117218018, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.537060980796814, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.573844087123871, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6246497988700868, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019810933182016016, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019636048944666983, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019387738234363495, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019183863238431513, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01901911908760667, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01883812287822366, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01868732461705804, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01856758920010179, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018465114375576377, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018399360477924346, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01836337064858526, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018340161792002617, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018338130158372223, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018351720268838106, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018372297110036014, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01839408416301012, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01842269331216812, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01845361042767763, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018484183610416947, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018510504299774765, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01853723931591958, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01855972710531205, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01857834561727941, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.018584203380160032, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01856909122783691, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.018512478107586505, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.018421423602849245, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01831395340617746, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.018276467006653547, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.018932732534594834, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020039755729958416, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.021129248943179847, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0225387921044603, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02366713476832956, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024909179895184933, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.026098956419155002, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.027574496204033495, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.029126083166338503, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03047234270721674, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03173875896260142, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.032541967015713456, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03304867585189641, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03407716347835958, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03466094503179193, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03577252992428839, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03753940302878618, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04000925577245653, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.042157375076785686, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04607934162020683, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1377320289611816, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1351656913757324, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1319077014923096, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12955379486084, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1278276443481445, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1262075901031494, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1250741481781006, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.124361991882324, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123823404312134, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123547315597534, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1233913898468018, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1231203079223633, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.122858762741089, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.122404098510742, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121849298477173, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.121273994445801, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1205458641052246, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.119645595550537, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1187796592712402, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.11816668510437, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.117710828781128, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1175241470336914, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1175951957702637, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.118011951446533, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1187584400177, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1192893981933594, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1167495250701904, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1031925678253174, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0041463375091553, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7674014568328857, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.655733823776245, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5876119136810303, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5333454608917236, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4997222423553467, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4840002059936523, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.4717657566070557, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.4281647205352783, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4080333709716797, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.4073379039764404, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.3901636600494385, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.377760887145996, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.383488893508911, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.4568073749542236, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.47768235206604, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.4941482543945312, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.664214611053467, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8311736583709717, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.8712809085845947, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2217679023742676, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06183093392395718, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06201550387596899, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06976744186046512, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07179771133259505, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.11424880029531193, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.1644518272425249, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.19472129937246216, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23385012919896642, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2351421188630491, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2349575489110373, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.25046142488002954, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2622739018087855, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2631967515688446, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.27205610926541157, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.27611664820967147, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.25341454411221853, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.25950535252860835, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.25452196382428943, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20505721668512367, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.17940199335548174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.1657438169066076, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010691252901849921, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009743603144277063, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00906964142360182, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009088495027449815, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008727192901418433, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008521409579768305, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008418986395368956, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008497074520940414, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.008050663098158281, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.007717523756361162, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0075388739718152435, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.007637692743320719, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.007717745199983299, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.008173132726574139, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.008908389812745766, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.008850993928029549, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0100379650312298, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01131328936930271, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012343725902673216, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.013061531771721866, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.013092171936094821, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015163738433007138, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016308372135636382, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.01603649561844357, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.015049716992785803, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.015045092563394036, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.015485559498316356, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.018887906258890485, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04365936465738663, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.08046918236324085, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.11661286525877364, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.14153259845145807, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1640260175301557, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17041723570647296, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17011574376290686, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16833480211981044, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18109246939773668, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19068961635653772, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19673236940779185, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1968832094311639, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.20792873864130454, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.20829396917325047, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19177365421193593, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.19396061861909417, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.19311413806385747, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1684938006018066, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14159169961413146, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1201639668142181, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.10523877285875365, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 2.489055471420288, "validation/loss_best": 2.383488893508911, "validation/acc_best": 0.27611664820967147, "validation/f1_best": 0.20829396917325047} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.9384684443473814, "train/grad": 0.37830781027674676, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13684326171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13531982421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.133680419921875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13260986328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.131920166015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1314013671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.130986328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1306787109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1302880859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12985107421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.129478759765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.128778076171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12819580078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.127188720703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.126322021484375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.125435791015625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.124276123046875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.123018798828125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12149169921875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1200830078125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.11803955078125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.11512451171875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1087481689453127, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0835003662109375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.019012756347656, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.9162439727783203, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.8152015686035154, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.726374855041504, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.6195361709594724, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.5342426681518555, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.480011196136475, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.4449331951141358, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.4076446437835695, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.3776380252838134, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3503161239624024, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.333858246803284, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.3274797320365908, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3266774344444277, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.330559368133545, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.348008017539978, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.364404833316803, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.374817237854004, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.3976337242126466, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.44538604259491, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.4803955614566804, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.59925705909729, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.684490691423416, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.545382745265961, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.73219046831131, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018717399113811553, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018614948065951466, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01851403755135834, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018465957860462366, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018446329571306704, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01844450434669852, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018457384295761586, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01847519790288061, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01849900827743113, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018521189861930906, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01854088975582272, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018563213846646248, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018582431515678764, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018606189168058337, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018623349233530463, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018637501238845288, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018650138773955403, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018656091215088964, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018648197180591523, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01862625082023442, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018573805643245577, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0184883868182078, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.018388754846528174, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01864485928788781, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01995002478826791, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02215915684122592, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02402056146413088, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.025750249731354414, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02786275149323046, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02950821693055332, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.030265007885172963, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03096389763057232, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03178130380809307, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03228473590686917, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.032840427635237576, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03366488302126527, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03428033024072647, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.034955765204504136, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03599008943885565, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03779476154595614, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03926625449210405, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.039735325183719396, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.041425557006150485, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04418061540462077, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04565670667216182, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05421821370720863, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.061120103150606155, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1514384144358337, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1123844888061285, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.126514434814453, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125551462173462, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124642848968506, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1241722106933594, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1238889694213867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.123650312423706, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1234545707702637, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.123284339904785, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123098134994507, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122896909713745, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1227688789367676, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.12262225151062, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.122530937194824, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1224794387817383, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1224355697631836, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1223020553588867, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1219372749328613, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.121061325073242, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1193344593048096, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.11681866645813, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.111915111541748, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.102020025253296, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0640869140625, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.891176223754883, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.693504571914673, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5883193016052246, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5320019721984863, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4918060302734375, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.443950653076172, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.406984567642212, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.378480911254883, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.3624424934387207, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.364673614501953, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.38354754447937, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.412358045578003, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.440373182296753, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.446726083755493, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4435017108917236, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.44787335395813, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.442030906677246, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.4589920043945312, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.4934282302856445, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.4697225093841553, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.5639498233795166, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.561851739883423, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.465120553970337, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.293067455291748, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.067921742340347, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.09062384643779993, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.14857881136950904, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.19287559985234404, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22905131044665927, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24529346622369877, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2574750830564784, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2702104097452935, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.27556293835363604, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.28460686600221485, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.28552971576227393, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.28017718715393136, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2766703580657069, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2757475083056478, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.26264304171280917, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.26245847176079734, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2563676633444075, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.26356589147286824, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24658545588778147, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2587670727205611, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.24141749723145073, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.24473975636766335, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1611295681063123, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.18087855297157623, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008480081413669712, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008426045030508034, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009123955205296455, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009576901086335049, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00966381096487886, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009746754431042157, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009757383263122345, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010009577837902926, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009983365440519842, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.009716838653162705, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010097201308283687, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.010542048548159834, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.011181828960756266, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01177670488295951, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.011835620555825375, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.011716322107047579, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01216132458682407, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.012257234848386022, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012432962739226524, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01427730939562842, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.015328777526192286, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.01659757763431513, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.023159633399322626, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.06442571873151592, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.10609754012032845, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.14928890481856985, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.16954812732003474, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18375526647644466, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19471802203932084, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2003476495644476, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20958792173823096, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21174715067526717, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.21070789839494275, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2120322403616682, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21295369486891402, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19666386378883968, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.2018689087103782, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20494915656115334, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20913637779589278, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.20114262989976361, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19557848452262183, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18833412425938587, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1913720021779283, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18464152020699628, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.17679530250009642, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.10039446005863906, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.1438328095062067, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 2.4449331951141358, "validation/loss_best": 2.3624424934387207, "validation/acc_best": 0.28552971576227393, "validation/f1_best": 0.21174715067526717} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.9349246442317964, "train/grad": 0.4343560874462128, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1304345703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1296923828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.128934326171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.128509521484375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128018798828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127373046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12669921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.126058349609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.125155029296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.124296875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12344482421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.121993408203125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12088134765625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.119151611328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1175146484375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.115740966796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1134814453125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1101751708984375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1037255859375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0857391357421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0136090087890626, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.8964410400390626, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.759816131591797, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.637969207763672, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.5528033065795896, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.4853989028930665, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4390845489501953, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.4031417083740236, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3663289833068846, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.3304747772216796, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.297886619567871, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.2803360843658447, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2658776092529296, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2555255985260008, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2507565307617186, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2472805070877073, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.256298847198486, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.271142542362213, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.290282816886902, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3278471517562864, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3655689644813536, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4150633537769317, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.451115319728851, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.521558209657669, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.660148183107376, "train/loss_045_lr3.1e+01_wd1.0e+00": 8.080842700004577, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.937941981554031, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01837355189025402, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018374760691076516, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018390295612625777, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018407463831827044, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018422142937779426, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01843947804067284, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018454768541269005, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01846910276915878, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018483058526180686, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01849758895114064, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018509225961752238, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018522844538092612, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018530849791131912, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018534825583919882, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018528050361201167, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018510997919365765, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018474500961601734, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018408427056856455, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018326670648530125, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01846518415492028, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019729923126287758, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022240449553355576, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.025162582630291582, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.027556211147457363, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02896387156099081, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.029785008328035475, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03062201362103224, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03140668641775846, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03209137810394168, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.032977413861081, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03368556523695588, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03433136778883636, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03518806060776115, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03589741147123277, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03677205365151167, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03746642061509192, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.038138668313622476, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.038889867272228006, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.039756138483062385, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04174096467904746, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04351837287656963, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04707535749301314, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04877059856429696, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.053726760186254975, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06381843971088529, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.2015067945420742, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.12337751153856516, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1237430572509766, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123427629470825, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1230132579803467, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1226441860198975, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122307777404785, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1218364238739014, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.12125301361084, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1206023693084717, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1197307109832764, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.118828773498535, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1178979873657227, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1165623664855957, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.11527681350708, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1135072708129883, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.111842155456543, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1102168560028076, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1077218055725098, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.102261543273926, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0805864334106445, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.9801902770996094, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.7523672580718994, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.6396231651306152, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5579867362976074, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5101191997528076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4746768474578857, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4326999187469482, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.413785219192505, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4175117015838623, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4320173263549805, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4293863773345947, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.411634683609009, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.390496015548706, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.385655641555786, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.3746373653411865, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.3708910942077637, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.401707172393799, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.477046012878418, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.472297191619873, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5179731845855713, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.4895715713500977, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5587522983551025, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6734421253204346, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.662572145462036, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.774533271789551, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.864711046218872, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.067921742340347, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.08324104835732743, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.12661498708010335, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.17497231450719822, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2041343669250646, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2222222222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23477297895902546, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2441860465116279, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2596899224806202, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26448874123292726, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.271686969361388, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2731635289774825, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.27611664820967147, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2871908453303802, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.28903654485049834, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.27500922849760057, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2702104097452935, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2517534145441122, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25452196382428943, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2279438907345884, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23846437799926173, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21816168327796234, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.22000738279808046, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00975108708770679, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009855175316092472, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009922173659619858, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00983273214790356, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009831219546997083, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009881712858172796, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009937841285977692, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009699846863046417, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009813662807441968, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01004945143868548, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.011035368679385914, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011054022122289215, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.012311939119507695, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014811111122544443, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.016244060172028733, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01780229923725826, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0186815437559875, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02072265969124009, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03132619836826959, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.06658442950835246, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.10162767762920095, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.13053527746782922, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.14839300117578225, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16235943021069443, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17194645350534846, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18664478858710107, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19852315240041085, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19689175304498321, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19415215870975477, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1983711607380194, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20175107927523164, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.2026646207562787, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.2038088480624928, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.20875370305704674, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.20485335068640487, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1980831047564633, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1996833935846607, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20950477526190756, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19498999269914005, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19906522332436416, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18981707890190846, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17505957228061467, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18256066075480581, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15608814618527359, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14424912505323084, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 2.2507565307617186, "validation/loss_best": 2.3708910942077637, "validation/acc_best": 0.28903654485049834, "validation/f1_best": 0.20485335068640487} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.8080612134933474, "train/grad": 0.34856828264892104, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13111328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.130782470703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.130107421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12944580078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128785400390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127862548828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.126983642578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.126007080078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12468994140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.123333740234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.121895751953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.119927978515625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1180126953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.114842529296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1113818359375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1062847900390627, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.08945068359375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.013802795410156, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.8683494567871093, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.729453430175781, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6072486877441405, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5305888748168943, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.4675439834594726, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4146665573120116, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.367642402648926, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3263950157165527, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2967320823669435, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.268328104019165, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2457096004486083, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.2201257038116453, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2014958381652834, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1882096672058107, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.1844255781173705, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.187537446022034, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1877633428573606, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.195835838317871, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2172263717651366, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2408925008773806, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2772315657138824, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3443407607078552, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4055674958229063, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.464237563610077, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5959077274799345, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.620516964197159, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.17881975531578, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01822125369682908, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018235304001718758, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018250980973243715, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018261410337872805, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01826919401064515, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01827798259444535, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018286329419352113, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01829523648135364, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018304608543403447, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018310751081444324, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0183152875630185, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018312464975751938, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018302454352378844, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018266412215307355, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018211540430784226, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018143544951453806, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018193096239119768, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01937864672858268, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022427529534325003, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.025376301216892896, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.027897406592965125, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.029269964126870038, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.030258300723508002, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03101615102030337, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.031943555595353244, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03278225662186742, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03336559690535069, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03405317937023938, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.034486334938555956, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.035244214879348874, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.035668731862679125, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03603167643770575, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03662169556133449, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03733053189702332, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03860525775700808, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.039903823118656874, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.040688675548881295, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04120445791631937, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043519952706992625, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.047137216925621034, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05041221082210541, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05288942409679294, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.061904000844806434, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.11790033021941781, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.148542069979012, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1235849857330322, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1234419345855713, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1231491565704346, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122875928878784, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122610569000244, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1221752166748047, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121674060821533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121004819869995, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120089530944824, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.118931531906128, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1177241802215576, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.115751266479492, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1136488914489746, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1099884510040283, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1045494079589844, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0920941829681396, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.0206797122955322, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.788804769515991, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.644789218902588, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.5661134719848633, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5153486728668213, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.486243486404419, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4503376483917236, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.407193899154663, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3911867141723633, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3835930824279785, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.382066011428833, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.366926431655884, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.3507790565490723, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.347804546356201, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.345449686050415, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.364777088165283, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.417377471923828, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.462449073791504, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4975688457489014, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.456266403198242, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.4151811599731445, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.465264081954956, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5151405334472656, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.684753656387329, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6751537322998047, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.740661144256592, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7377312183380127, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.11498708010335917, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.16961978589885568, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.20579549649317092, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.22591362126245848, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2467700258397933, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27353266888150607, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.27500922849760057, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2753783684016242, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.27888519748984864, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2866371354743448, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2888519748984865, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2897748246585456, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.28202288667404946, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26744186046511625, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.26947212993724623, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2705795496493171, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.25858250276854927, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23809523809523808, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23754152823920266, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23274270948689554, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23864894795127353, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007407992765654042, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007157349155200928, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007181637580621803, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007108971407233299, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0075870356891600205, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.007780086980485489, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008728368504179525, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009708044834945152, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010716418960356008, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011450253086396772, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012052479905391851, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013674821334837077, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014476274588516587, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.016867641787954763, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01790959172409509, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.023634215339298267, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.05380584221690734, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.09867582495984643, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.13148710948495518, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1504639933893134, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.16497819478787648, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.17727435022889418, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18836755272473682, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20280789592440707, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20719245686925813, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21097361932367675, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21590242822054462, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22103397102308464, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.23342704717910734, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2357557749118718, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.23867509158026504, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22978649367723206, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.21924055190905034, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.21050694413538795, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19892741176307024, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.20510903883356166, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.2095221926695678, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20311753220818632, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19756495608533056, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16883038644982798, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16726884836846603, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17359028211399022, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1776972338473105, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 2.2014958381652834, "validation/loss_best": 2.345449686050415, "validation/acc_best": 0.2897748246585456, "validation/f1_best": 0.23867509158026504} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.6552819776535035, "train/grad": 0.2175409022718668, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123001708984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12236572265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12146240234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12049560546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11968994140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11856689453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11724365234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.115791015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.114132080078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.112176513671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.110264892578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10717529296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10355712890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.09446533203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.06318359375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.967091064453125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.7910093688964843, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6341994476318358, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5365332794189452, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4735394668579103, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.412781810760498, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.361056671142578, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.3118092727661135, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.2691242408752443, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.232024049758911, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.196872491836548, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.170813956260681, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.145982265472412, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1257283592224123, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.108560185432434, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1017001342773436, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1020251870155335, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.105251042842865, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1073142766952513, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1130005049705507, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1294929456710814, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1583133721351624, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.186939570903778, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2249168717861174, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2885427832603455, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3258876675367355, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3907185292243955, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4427692532539367, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018472434217110277, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018482406260445713, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018496095333248377, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01850779272150248, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018519572466611862, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018533501117490234, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0185462801810354, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01855693118646741, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018566845241002737, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01857199524063617, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018569779605604707, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01855178887024522, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018516602092422545, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018469394268468022, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018812915766611694, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020491348407231273, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024116671662777662, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02736457563005388, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02893409229815006, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.029930861704051494, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.031180357858538628, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03218404524959624, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03308837236836552, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.033833918310701844, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03450547831133008, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03498654399067164, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03549825659953058, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03600785129703581, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.036263314997777345, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0369369797129184, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.037709732558578256, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03865472829900682, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03981340512633324, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.040244784094393256, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0410804620012641, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0426954831648618, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04358900489285588, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0443552247993648, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04587621953338385, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.049191274922341106, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0506621678173542, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05271156730130315, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05440763931721449, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.121588706970215, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1210451126098633, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120210886001587, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1193974018096924, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.118621587753296, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1175804138183594, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1164467334747314, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1151771545410156, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1135199069976807, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1116604804992676, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1096744537353516, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1059863567352295, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1006011962890625, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.07942271232605, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.971536874771118, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.7651448249816895, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.6321024894714355, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.541002035140991, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.483393669128418, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.444207191467285, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4084911346435547, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.382460594177246, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.365222692489624, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.348703384399414, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3277103900909424, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3100287914276123, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.3111226558685303, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.3303606510162354, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.3901569843292236, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.3962409496307373, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.3958287239074707, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.423544406890869, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4187562465667725, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4333407878875732, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.471142530441284, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5455448627471924, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6271119117736816, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.63381028175354, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.657787561416626, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.702852725982666, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6739814281463623, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.624652147293091, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.597550630569458, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0828719084533038, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.13528977482465854, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.17257290513104467, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2011812476928756, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.22720561092654115, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24492432631967515, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2698412698412698, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2792543373938723, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2943890734588409, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2977113325950535, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.29955703211517165, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2918050941306755, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2857142857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2842377260981912, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2822074566260613, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.27740863787375414, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2739018087855297, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.27353266888150607, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24012550756736803, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23311184939091917, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2369878183831672, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007607561526396499, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007547304356281007, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007103378382473521, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007131549609516406, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007394820746101181, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0076286730280052855, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008393195283724852, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.00925109698262931, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009829380285193156, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01000983829114468, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010222901293678064, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011775905966614838, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014962195256904556, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.024571195152821374, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.061857363201797744, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.0907838525482209, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.11940493363338302, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1484798090151357, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16962379019095516, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18278948968154626, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1972132442967195, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2035673363649889, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20752014757887052, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21015107515131523, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21920235142211175, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2215063848800415, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22809156533459699, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22164063921598567, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21993889793702728, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.22023297956664956, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.21989480890319044, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22082815460385463, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.2233343978809432, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.21895767946074152, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2094049839157975, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1945471253048603, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19131596334830492, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18998606973449864, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19122973673880214, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1858457239141439, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1835296445651473, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1924997723264221, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19106297679695103, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 2.170813956260681, "validation/loss_best": 2.3111226558685303, "validation/acc_best": 0.29955703211517165, "validation/f1_best": 0.22809156533459699} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.5999049282073976, "train/grad": 0.22382767103612422, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1278515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12697265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125787353515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12446044921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.123323974609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1216650390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11989013671875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11782958984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1151904296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.112335205078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10927001953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.102889404296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.089942626953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0042098999023437, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8096421813964843, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6674240112304686, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5648651123046875, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.485403633117676, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.4201388549804688, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.371178913116455, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.320994987487793, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2735141563415526, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.227245864868164, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1872510623931887, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.149040412902832, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.111389203071594, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.0841090869903565, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.058947479724884, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0434461987018584, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0288871324062345, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0232875108718873, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0192525291442873, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.02393119931221, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.035082224607468, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0483554339408876, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0679365396499634, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.110548824071884, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1478713804483416, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.192638738155365, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2557944643497465, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.33870668053627, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.360635195374489, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.414956409931183, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01846360262017697, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018466634773649276, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018471032157540323, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018475246182642877, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018477721265517177, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018480757297948004, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018480593520216644, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018475177222862838, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018462379705160858, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018438566802069546, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018405359471216798, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01833801263011992, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018320754240266978, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01957637322600931, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02338247970212251, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02649915107525885, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02843732889741659, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02977383411489427, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.031000516414642333, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.032041828520596026, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03295467345975339, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.033641694998368624, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03424276080913842, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03456450858153403, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03504636881873011, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03553698963485658, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.036343836430460214, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03710361173376441, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03792525035329163, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.038229047851637006, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03865202333778143, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.039254189860075714, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03988134551793337, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04037018282338977, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04170030443929136, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.043020440954715015, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.044187401682138444, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04556011600419879, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04678181242197752, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.048959259502589704, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05390402868390083, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.053598020300269125, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0547609175182879, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1208529472351074, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1202609539031982, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119333028793335, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.118495464324951, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.117677688598633, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1165826320648193, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1153855323791504, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1139893531799316, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.112029552459717, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1095399856567383, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.106278419494629, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.096985101699829, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0661041736602783, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8518948554992676, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.661924362182617, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.572183609008789, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4999048709869385, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.452812910079956, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.419393539428711, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.395371913909912, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3708276748657227, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.348198890686035, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.344147205352783, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.34047269821167, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3475828170776367, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.357914447784424, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.3757128715515137, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4110305309295654, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.442678451538086, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4808149337768555, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.490950345993042, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4790377616882324, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.466390609741211, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4881772994995117, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.496004819869995, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.522003412246704, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.4924330711364746, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5012497901916504, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.573962450027466, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.521852731704712, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6660971641540527, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.565556287765503, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.599496364593506, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.067921742340347, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.09394610557401255, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.15559246954595793, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2026578073089701, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22425249169435216, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2441860465116279, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2587670727205611, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2811000369139904, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2866371354743448, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.28645256552233295, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28940568475452194, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.29106681432262826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2857142857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.27500922849760057, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.27703949796973054, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2739018087855297, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2768549280177187, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2722406792174234, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2691029900332226, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2705795496493171, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.27113325950535255, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2698412698412698, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.26375046142488, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.25396825396825395, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.26116648209671467, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24861572535991142, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2500922849760059, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2543373938722776, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00819241056645207, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008149974578579725, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00827340281451751, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008269130143381846, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008238025967861974, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.00860941283447196, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009948577200453206, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012149137064405608, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013527196644283373, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01577400462527472, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.017645597447582196, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.021832072807084116, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.03684291966520407, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07752788615807552, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12264395572897842, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14919152891684764, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17153333441483087, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1912043726124196, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20231510811485087, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21345990731129502, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21929628256459346, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22692843279988617, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23391960880037777, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2356743157983755, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2415450834583778, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2400095746127833, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22956510291210575, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2220051626523379, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.223331518927565, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.22002173813370018, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2215888883850244, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21986406189692376, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22113383636752224, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22178887384240023, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2175023943317336, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21243130998626478, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.22698504788478044, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.2231575934765666, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.21221619680715595, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.2110014725304402, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18929388259376348, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1894953351463713, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19748413399800788, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.111389203071594, "validation/loss_best": 2.357914447784424, "validation/acc_best": 0.29106681432262826, "validation/f1_best": 0.2400095746127833} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5309578263759613, "train/grad": 0.22764851830899716, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125128173828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124091796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.122418212890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12084228515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11927978515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11714599609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114754638671875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11210693359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1084515380859377, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1037725830078124, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.097681884765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.075506591796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9868600463867185, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.738921661376953, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6028268432617185, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5218441009521486, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4475367355346678, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.379330711364746, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.319305839538574, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.273934268951416, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2253764152526854, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1776374053955077, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1332581996917725, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.0954568433761596, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0583181619644164, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.017044129371643, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9875875282287598, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9614894819259643, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9488454020023347, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.938489829301834, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9396974194049834, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9383991861343384, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.946232996582985, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9555056351423263, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9715234553813934, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9837428271770476, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.023424504995346, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.056024276018143, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.101637463569641, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.1494686901569366, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1961026310920717, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2497754323482515, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.318907424211502, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018023328562267123, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018026391505263747, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01803080700337887, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01803395999595523, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018034646473824977, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018033333797939123, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018027948499657213, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018016123250126838, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017991151222959162, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017950964188203217, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.017905996995978057, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017981339027173818, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019311423413455486, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024162769075483082, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027191038485616447, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02855822562240064, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.029933313829824327, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03141700603067875, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03251001895405352, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03317662353627384, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03387236653827131, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03472588281147182, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.035431281747296456, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.035963097475469115, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.036506746197119355, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.037144453218206766, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.037755633126944306, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03857133050449193, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03922494175843894, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04014187337830663, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.040955236312001944, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04153906684368849, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.042037286646664146, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04213325422257185, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.043544425386935474, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.043736823480576274, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04494996696710587, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04572025103494525, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.047351916786283255, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04829662621021271, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04941836465150118, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05092916417866945, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05298655118793249, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1199638843536377, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.119328498840332, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1183409690856934, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1173806190490723, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.116450548171997, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.115183115005493, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1137728691101074, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1120541095733643, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1094582080841064, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.105458974838257, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0978827476501465, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.05374813079834, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8780274391174316, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.655531644821167, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.551457166671753, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.488696813583374, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4382660388946533, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3987679481506348, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3739545345306396, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3629343509674072, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3558754920959473, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3485326766967773, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3688032627105713, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3920040130615234, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.40608811378479, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4064645767211914, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.431155204772949, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4540798664093018, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4764058589935303, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4873576164245605, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.475576639175415, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4581875801086426, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.476931095123291, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4897632598876953, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.544532537460327, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.598372459411621, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5781683921813965, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.614563465118408, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6384212970733643, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6741714477539062, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.698939085006714, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.776069164276123, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.668522596359253, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0976375046142488, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1554078995939461, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20173495754891105, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2321889996308601, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2587670727205611, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.287375415282392, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2901439645625692, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2888519748984865, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.28460686600221485, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2827611664820967, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28294573643410853, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27888519748984864, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2796234772978959, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.27703949796973054, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.27722406792174237, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2783314876338132, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.28202288667404946, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.27500922849760057, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2757475083056478, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2692875599852344, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.26707272056109266, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.262827611664821, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.24455518641565152, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24640088593576967, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22572905131044665, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.24750830564784054, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008255863759723056, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008255987106246207, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008344162436399927, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00950384634755108, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010798967037261069, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012822493241328031, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014400650726959932, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015745876745321467, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015909918480034153, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01651464767864985, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01765382758445087, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.030899752268010654, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.07338952771474645, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12416407640654016, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15410059271032941, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1716338392740485, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19064324466282642, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20388365053361437, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21325397835325566, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22339855156278954, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22693038103944074, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.23285937740158735, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23400414877252082, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.23128423682006063, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23335679616144275, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23680237322435396, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.23219271956619528, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2312304356184974, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22556117612961588, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.22846777932483042, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.23242991721979891, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.23533835849777796, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.2323050482286931, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.23309072121830807, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21789109030665924, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21152861341662557, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.21263390260995588, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20803479576582615, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20385442727938688, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19794467993401157, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19342214735345875, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16402718250396137, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18392724134899704, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.1776374053955077, "validation/loss_best": 2.3485326766967773, "validation/acc_best": 0.2901439645625692, "validation/f1_best": 0.23285937740158735} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.4755920755863188, "train/grad": 0.23354081951081754, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12064208984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.119366455078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11766357421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11583740234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.114056396484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1116357421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.108935546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10571533203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1008502197265626, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.093336181640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0775347900390626, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.978858642578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7731204223632813, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5988735961914062, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5059537506103515, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4428660583496096, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.378121337890625, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.314328880310059, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.258832015991211, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.214416389465332, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.163698797225952, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.117391390800476, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.07146808385849, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.032388746738434, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9929914236068726, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.94816352725029, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9170897555351258, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.891030902862549, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8740525662899017, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8634184300899506, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.859097847342491, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8549604570865632, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8557245194911958, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.869711441397667, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.883730109333992, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9027309161424637, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9344502896070481, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9670683133602143, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0096555083990095, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.069824712276459, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1258366787433625, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.1842315208911898, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.233103278875351, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018620937005616723, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018624624246731402, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018632129202596843, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018636175296269358, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018637978099286558, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018636389742605387, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01863022489938885, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018615472628735006, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018579846452921628, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018535922383889555, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018571395161561668, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019962863842956723, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023715569749474526, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027922984715551137, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.029682769337669015, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.030918494109064342, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03245736691169441, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.033963527334854006, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03479798696935177, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.035262730186805126, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03586543412879109, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03642137943767011, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03696406650356948, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03737174628302455, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03803874955512583, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03880988226272166, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.039611798115074634, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04034824177622795, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04098380223847926, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04184444746002555, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04210570205003023, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04262722995132208, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04302157806232572, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.042978578079491854, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04385081853717566, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04442246835678816, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04474030574783683, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.045020667631179095, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0457332762144506, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04739565098658204, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04895124211907387, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050541441328823565, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05119171725586057, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1194639205932617, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1186957359313965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1174283027648926, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1161744594573975, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1148881912231445, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1130800247192383, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1109297275543213, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1081674098968506, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1033642292022705, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0936431884765625, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0661330223083496, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8902931213378906, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.69206166267395, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5623695850372314, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4905855655670166, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.450263738632202, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4188811779022217, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3973827362060547, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3926193714141846, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.398491859436035, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.406477212905884, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4078128337860107, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.425445318222046, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.428964853286743, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4307706356048584, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4368233680725098, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4518744945526123, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.456726312637329, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4734673500061035, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.490457773208618, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.500955581665039, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5225167274475098, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4911770820617676, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.515176296234131, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.572004795074463, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7009117603302, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7897462844848633, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.76749587059021, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6526706218719482, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6430602073669434, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5697507858276367, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.599470615386963, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6794323921203613, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0812107788851975, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.10003691399040236, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1539313399778516, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19232188999630861, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22831303063861202, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26005906238464377, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2753783684016242, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27297895902547065, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27556293835363604, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27297895902547065, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2757475083056478, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2739018087855297, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.27870062753783686, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27260981912144705, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.27353266888150607, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26947212993724623, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26651901070505724, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2692875599852344, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.27500922849760057, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.27205610926541157, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25858250276854927, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.24473975636766335, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25692137320044295, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2561830933923957, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.24178663713547435, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008624901819164352, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008597667001082507, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00900524430592484, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010892228402993918, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01204367097527489, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014108134487023702, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015305343549691064, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01952200236888961, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020940294345337784, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02639473524193991, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.040415613766322855, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07455573613658632, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11146012319100655, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15380188930236185, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1709736212586417, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18576762679039924, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19903657497091806, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21334834292862725, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21834419993750018, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22057703698204803, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21864125132216095, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2253793369225597, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.22277934141932051, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.22704081092131156, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22332162548602508, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22527585282002113, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2201449516200601, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22437661336512135, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21878579890344682, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21725191813909675, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22088142773202554, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21515021047577043, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22407467593569871, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22452595598071232, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21486381333560453, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.20011527584082053, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18898984797132803, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19967801858229106, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20784471798943827, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.2036559102834693, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.20885113675246283, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1983207162246484, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1900786493357117, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 1.94816352725029, "validation/loss_best": 2.4368233680725098, "validation/acc_best": 0.27870062753783686, "validation/f1_best": 0.22527585282002113} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.4389058995246886, "train/grad": 0.2387809879332781, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118988037109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117659912109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11556884765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113519287109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.111510009765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.108687744140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.105548095703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1016046142578126, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.094547119140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0796112060546874, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0309112548828123, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.817851867675781, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.651613922119141, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5277735137939454, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4503823852539064, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3933813095092775, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.332085704803467, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2699045372009277, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.216179609298706, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1710309410095214, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.119420871734619, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.073675413131714, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.027447862625122, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9871321439743042, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9449133229255677, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8991302466392517, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8656715905666352, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.838957004547119, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.817902728319168, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8004184317588807, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7988783478736878, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.793585746884346, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.803972887992859, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8193049442768097, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.827480435371399, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.854575606584549, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9011270886659621, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9384687089920043, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9769000452756882, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0399194812774657, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.090472929477692, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.1291190320253373, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.198306767940521, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018528300374746322, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018530346183106304, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018532267790287733, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018532275408506393, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018531175763346255, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018523739869706332, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01851029843091965, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018484868579544126, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01844180530868471, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01845204217825085, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01902615774422884, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022617546264082192, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026546991830691695, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029092874201014637, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030587037205696107, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03188823287375271, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03334680130705237, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.034558043545112015, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.035038756662979724, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03545904661528766, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036173870591446756, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03664660935290158, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.037329407604411245, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03799248426221311, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03894315947778523, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03992498518899083, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04085979554802179, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04160732639953494, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04213646268472075, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.042906429003924135, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04379740728065371, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04436470746994019, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044848888255655764, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.045013755243271586, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.045167069565504786, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.045869500190019605, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046507875379174946, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04701381020247936, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04691509712487459, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.048300865069031716, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04953472409397364, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04953494368121028, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04994542447850108, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1186363697052, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.117810010910034, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.116433620452881, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1150944232940674, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1137642860412598, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1118454933166504, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1094131469726562, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1059720516204834, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.09832501411438, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.075760841369629, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.989086389541626, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.737607717514038, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6109185218811035, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.508471727371216, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.454129934310913, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4189612865448, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3863160610198975, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3618478775024414, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3559341430664062, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.357006788253784, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.357616901397705, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3545472621917725, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3672454357147217, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.374788999557495, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3866703510284424, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.407949447631836, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4482457637786865, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5000498294830322, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5435895919799805, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5564818382263184, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5218613147735596, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.517136812210083, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.498168706893921, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5203516483306885, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6246213912963867, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7063045501708984, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7106869220733643, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6679821014404297, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.643967628479004, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5836198329925537, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6242926120758057, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5373425483703613, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5674006938934326, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07678110003691399, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08877814691768181, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1301218161683278, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18180140273163528, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21391657438169065, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2410483573274271, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28313030638612036, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2851605758582503, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2823920265780731, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2871908453303802, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2840531561461794, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.28940568475452194, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2899593946105574, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2847914359542267, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27906976744186046, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2676264304171281, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26836471022517533, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26947212993724623, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26891842008121075, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.25101513473606496, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2526762643041713, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.26578073089701, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25655223329641935, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.26504245108896274, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2606127722406792, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00843492577315624, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00877864349586357, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010115764236929227, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011608662035327908, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01345644310976006, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015697514747679026, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017174000102457523, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02087295495673785, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.023200851441855897, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.031662369367033434, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.059733591109072136, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09879016474676877, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13170227319175207, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1614192989451489, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18134882558475618, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19035515153304916, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20086765777267712, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21654055260123484, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22176763213926187, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21999716872545813, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2193145311611228, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.23045596908166768, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23120434268063672, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2397390937955928, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2417429636663048, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.24108827997343715, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2377368137350815, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22968812424619775, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2225940179188591, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.22843057956449794, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.23922197034310969, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.24292803849015665, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.24325137665273697, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2338871325369649, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21840689662458343, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.2086780824405802, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19930210167772497, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1996551553998478, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19940774355406998, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.21277381006798837, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1987585420601555, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.218263394088923, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.20623683493560688, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 1.9449133229255677, "validation/loss_best": 2.3866703510284424, "validation/acc_best": 0.2899593946105574, "validation/f1_best": 0.2417429636663048} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.382753324508667, "train/grad": 0.23576873362064363, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.116573486328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1141650390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1118603515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.109659423828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.106407470703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.102547607421875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0973541259765627, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0864117431640623, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0520184326171873, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.935804748535156, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6981765747070314, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.579146423339844, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.471644287109375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.398769187927246, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.342496566772461, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.282244758605957, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.220730018615723, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.167957763671875, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.122780122756958, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0689463901519773, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0207126140594482, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9694437885284424, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9244444584846496, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8750976049900054, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8250214743614197, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7884396946430205, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7556125593185425, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.731501761674881, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.710099646449089, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7004508030414582, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6952921229600906, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.703446519970894, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7220077621936798, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.725903251171112, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7482133442163468, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7837486124038697, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8277987521886825, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8725479871034623, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9329729604721069, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.987635253071785, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.031027241945267, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0885051691532137, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018247272898443044, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018248932315036655, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018248609686270355, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018247466590255498, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018243930558674036, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0182354477327317, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018215402150526642, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018185167661868035, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018155188504606484, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018418823573738335, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02014488559216261, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024855430405586958, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02763829691335559, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029768903767690064, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0313705228921026, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032760010380297896, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03411830582655966, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03517142410390079, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03554662795737386, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036018958650529384, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036717395083978775, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0371004874445498, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03771918683312833, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03822598003782332, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03904799150303006, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03998392244800925, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04068857537582517, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04128583941608668, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0416773683950305, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.042528601326048375, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.042920093536376956, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043770659640431406, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04433360174298286, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.044437951277941465, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04457384055480361, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.045106488037854435, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04489155350252986, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04549153935164213, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04573239590972662, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04589207042008638, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04651167405769229, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04706527546048164, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.047462220434099436, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.118236541748047, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.117342472076416, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115955352783203, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1145308017730713, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.113112211227417, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.110900640487671, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1079540252685547, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1032235622406006, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0901780128479004, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.037207841873169, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.876762866973877, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6633999347686768, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.562830924987793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.478015661239624, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.431227207183838, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4019312858581543, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.376511573791504, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3604543209075928, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3579888343811035, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3570473194122314, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3583436012268066, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3483495712280273, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3624894618988037, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.381124973297119, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.399388313293457, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4345896244049072, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4720823764801025, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5038459300994873, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.542478322982788, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.57478928565979, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5615313053131104, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5997262001037598, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6274824142456055, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.617799997329712, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.649505138397217, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.755159616470337, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7663943767547607, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.721766948699951, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7743804454803467, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7634096145629883, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.740096092224121, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.669663429260254, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7502365112304688, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1079734219269103, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.15411590992986343, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2059800664451827, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2307124400147656, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28313030638612036, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.28313030638612036, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2847914359542267, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.29217423403469917, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.29051310446659284, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2847914359542267, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2807308970099668, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.27740863787375414, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2705795496493171, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26578073089701, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26153562200073827, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2672572905131045, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.260797342192691, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.257844222960502, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24916943521594684, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23901808785529716, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23901808785529716, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2456626061277224, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008977647158874136, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009147282770337994, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011963726799261573, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012921227629165083, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014070237435224514, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014921149074577212, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016284484191305286, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01848037589893349, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.024392626674638873, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04422236061494301, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07393121205954499, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12503169806350597, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1515610381279623, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17357715227475332, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19444380332578481, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2076449125010679, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21773894026622775, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22102759010398057, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22223586993920455, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22366958793808386, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22448234071438675, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2377000273728113, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23771654571747902, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.23499171089907692, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23319548017202274, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23219164410829016, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22744647800817086, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22707112975690724, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22143931481045556, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.22664036640778287, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.23236946856295004, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22516301371437378, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22280954832862973, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22644575676284417, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21892872290324786, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.20738067491405396, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.20760436698092835, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20490460894830065, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.194283934491255, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1992306085701604, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.20279679428790823, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.20779122792936447, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1888803710111213, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.0207126140594482, "validation/loss_best": 2.3483495712280273, "validation/acc_best": 0.29217423403469917, "validation/f1_best": 0.2377000273728113} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.3527088987827303, "train/grad": 0.2404102875292301, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122723388671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.121126708984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118714599609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.116317138671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.113941650390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11051513671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1061334228515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.099754638671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0824554443359373, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.012188720703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8410430908203126, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6443536376953123, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5438082122802737, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.446857452392578, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3761611557006836, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.320641860961914, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2602572441101074, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1986876487731934, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.143577690124512, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.096824064254761, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0424712085723877, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9936444902420043, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9395844316482544, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8920106887817383, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8396451377868652, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7831367874145507, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7374758249521256, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.706310585141182, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.683241331577301, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6556237715482711, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6534396702051162, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6412766379117967, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6413605529069901, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6596285116672516, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.678666986823082, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6990337175130845, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7379725974798204, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7799493569135665, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8203945529460908, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8655410343408585, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9147585368156432, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9749607062339782, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0352781397104263, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018338027889840305, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018336885962635278, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01833484388887882, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018331770594231786, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01832826469093561, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018318234137259425, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018296960717998446, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018268518932163715, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01830311120953411, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019196672020480036, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021955217514187098, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0265907659009099, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028702751267701387, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030769702289253475, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03250389932654798, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03392381125129759, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.035208120634779336, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03607288427650929, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03627410275861621, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03663375312462449, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03734010926447809, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03785155248828232, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03863232498057187, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039256881922483444, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.040130877755582335, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04123328868299723, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042267304714769124, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04310411293059588, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04355303842574358, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044353467375040055, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04513744626194239, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045495164133608344, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04545986507087946, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04550056582316756, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04572295051068068, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04581582909449935, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046056701187044385, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04550985608249903, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0456624916754663, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04513442965224385, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04538463160395622, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.046152041349560025, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04624044857919216, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1178810596466064, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1168811321258545, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1152358055114746, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1135761737823486, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1118204593658447, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.109107255935669, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.10526967048645, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.098599910736084, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.07619571685791, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.974410057067871, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.781851291656494, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.616356372833252, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5309836864471436, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4637012481689453, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.427337169647217, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4046638011932373, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3852598667144775, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.375908374786377, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3749442100524902, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3718273639678955, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.373189687728882, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.365142345428467, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.369314432144165, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3748021125793457, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3909409046173096, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.408592462539673, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.454662322998047, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.492159843444824, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.50833797454834, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5664737224578857, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.584245443344116, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6059441566467285, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6469039916992188, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6583638191223145, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.62990140914917, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6694798469543457, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6770122051239014, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6435728073120117, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.585153579711914, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.547682046890259, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.57438063621521, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5781538486480713, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5846776962280273, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.067921742340347, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08933185677371724, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13602805463270579, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17534145441122184, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21133259505352528, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23440383905500184, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28165374677002586, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2862679955703212, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.28460686600221485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2942045035068291, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.29457364341085274, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.29365079365079366, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2918050941306755, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2884828349944629, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2868217054263566, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26965669988925806, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2703949796973053, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26799557032115173, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26965669988925806, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.27611664820967147, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.26836471022517533, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.257844222960502, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2751937984496124, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.27888519748984864, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.27740863787375414, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2681801402731635, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.26744186046511625, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2574750830564784, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009145757346584358, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00992571778817633, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0116151712099178, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012254217239192062, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013824496955171383, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01642400271483181, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.018335544102231177, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021947562435166486, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.033963155615777105, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06214397247454705, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09171482656471315, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13126351564971972, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15512791122826808, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17640383674340576, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19267140933965657, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20229457489813973, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2142199563971229, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22166654987006884, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22305975381491963, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22556045263904514, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2284630110866974, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.24340930939077965, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2453353440047549, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.24602782110790042, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.24692786603786154, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.24573329329938917, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.24448533693405658, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22817800244193606, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22861763569616533, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.226368018261608, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22733543991678626, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22804850394805629, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22758548042111415, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2244974443329374, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.23075149271136378, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.22440818236503177, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.21186912354939333, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.22655143131684044, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.23094417393999325, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.23324234891634119, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.22439020493105852, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.21868786054907421, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.21371032108619656, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 1.9395844316482544, "validation/loss_best": 2.369314432144165, "validation/acc_best": 0.29457364341085274, "validation/f1_best": 0.2453353440047549} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.299242445230484, "train/grad": 0.24143188126385212, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1138671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112081298828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10931884765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.106475830078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.103648681640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.099423828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.094012451171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.085057373046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0563763427734374, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.93820556640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7495216369628905, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.590290985107422, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.499940872192383, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4085700607299803, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.339419136047363, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2843446731567383, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2237146949768065, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.161757583618164, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1049891567230223, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.055623245239258, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9978900480270385, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.946604859828949, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8891576838493347, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8394183385372163, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7852897191047667, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7250436377525329, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.677103123664856, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6400657570362092, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.6053673565387725, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5633209705352784, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5532155567407608, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.5328246831893921, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5316793090105056, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5428684902191163, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5615492129325867, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5914043253660202, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6243820571899414, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.676872762441635, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.725263268351555, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7774528831243515, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8203138518333435, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8604927557706832, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.927412696480751, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018323086597956718, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018325320365838706, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01832741281017661, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018327072304673495, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018324145483784377, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018313048710115253, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018290673452429473, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018268360192887485, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01842322756536305, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020088018369860947, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02350520155392587, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027686309069395065, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029595061549916864, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031738146794959905, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03352164090611041, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03486092840321362, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03597214444540441, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03669865541160107, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03684934860095382, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037182830944657325, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03789758332073689, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03839700110256672, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0391963079571724, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03983747625723481, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.040652757529169324, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.041423448137938974, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042266007252037524, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04282236259430647, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0433683941885829, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04400952028110623, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.044427620023488996, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04497372794896364, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.045595950540155174, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04571555195376277, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04606538821011782, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.046199353616684674, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046046676114201546, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.046003295984119175, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0456579801812768, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04535577803850174, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04483227415010333, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04472813216969371, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04468686331063509, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1177287101745605, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.116724967956543, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1150577068328857, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.113363027572632, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1115219593048096, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1085753440856934, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1040914058685303, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0953075885772705, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.060075283050537, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.913048505783081, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.726449966430664, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.588088274002075, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5125508308410645, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4549710750579834, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.423790693283081, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.404172658920288, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3898003101348877, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3832502365112305, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3875486850738525, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.390218734741211, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4047036170959473, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.408100128173828, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.437467098236084, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.461057662963867, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.47998309135437, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4988749027252197, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.538203716278076, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.580080032348633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6381990909576416, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.705502510070801, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6878554821014404, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7181050777435303, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.723555564880371, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7210655212402344, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7236862182617188, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.735346794128418, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7060422897338867, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.704437255859375, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.673255205154419, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.616276264190674, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5969655513763428, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5847153663635254, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6223270893096924, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09819121447028424, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.150609080841639, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1821705426356589, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2188999630860096, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2353266888150609, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2591362126245847, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.266703580657069, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26965669988925806, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27297895902547065, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2718715393133998, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27759320782576596, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27464008859357697, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27260981912144705, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2727943890734588, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2702104097452935, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2698412698412698, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26153562200073827, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26375046142488, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2691029900332226, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2619047619047619, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2737172388335179, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2763012181616833, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.271686969361388, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.26559616094499816, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008955057765287017, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009171000390575076, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010629958767446795, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011639057224123658, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012860525985075331, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014123497207365271, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016454952666067193, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020917935073352645, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0416906483153428, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07027942783172575, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09904372482786085, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1385372429320582, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15684869221273742, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1763309587345443, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1878033042893815, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19997315579031585, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20753430763324257, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2122099907667434, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21027722506256152, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2147196000405999, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21824496448292394, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22510346793219202, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.22140379755379072, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.22051839528092568, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22051327800916823, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21862950326555586, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21893188722580362, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2189715475851147, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21076894086631168, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21183838922678333, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2143930368799103, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21180653328221877, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.21787057348799568, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.21786606908813702, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.22293037489426829, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21487929516005036, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.2196722697467436, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.2183128751759332, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.21548895180289165, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.22735342795081212, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.22877080925709511, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.230707939555327, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.21520805398486773, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.946604859828949, "validation/loss_best": 2.408100128173828, "validation/acc_best": 0.27759320782576596, "validation/f1_best": 0.22510346793219202} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.2682630240917208, "train/grad": 0.2431114959716797, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11820068359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11647705078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.113486328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.110574951171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.107762451171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1033721923828126, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.09732177734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.08661376953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0456561279296874, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.892278747558594, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.710734100341797, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5695690155029296, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.4831776428222656, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3938661575317384, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3261655807495116, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2719879722595215, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.211794528961182, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.14867844581604, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0906620025634766, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.041316518783569, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.981698932647705, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.929660291671753, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8697003936767578, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8138809406757355, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7545191609859467, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6824614429473876, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6287353646755218, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5854361480474473, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5562229198217392, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5154193925857544, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.499621680378914, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4793692934513092, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.475336530804634, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.476577267050743, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4928324973583222, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5144436687231064, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5410576105117797, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5828886669874191, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.624932302236557, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6929464715719222, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.742240759730339, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7825566405057907, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.8514063185453415, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018666197867132722, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01866815974470228, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018667428265325726, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018664272385649383, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01865914569236338, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01864226674195379, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01861498971004039, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018596960864961147, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018916705776937305, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021156957331113516, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02478431469760835, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028308590948581694, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030034824348986147, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03217507129535079, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03389611421152949, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03516476901248097, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03618524259887636, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03689014825038612, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037040177844464776, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0374051062669605, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03820135597139597, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03872251490131021, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03954637702554464, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.040138773564249275, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04102081067860126, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.041913276966661216, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04289424996823073, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04347266498953104, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04405595606192946, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04466530224308372, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04523116420954466, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04596568891778588, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04661605974659324, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.046523276697844264, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.046535659674555065, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04647150844335556, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0457310058362782, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04528432223945856, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04470895050093532, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04442575631663203, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04449623567983508, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04443827286362648, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.044012846704572436, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.117367744445801, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1162638664245605, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1144073009490967, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.112485647201538, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.11039662361145, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1069605350494385, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.101594924926758, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0903639793395996, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.039999008178711, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.861839771270752, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.6895439624786377, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.564669132232666, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.4951369762420654, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.440549612045288, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.409025192260742, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.388685941696167, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.373347043991089, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.366131067276001, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3711607456207275, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3757553100585938, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3902976512908936, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3891587257385254, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.411109685897827, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4211316108703613, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.442004680633545, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4654054641723633, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.514361619949341, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5485641956329346, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5774030685424805, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6238551139831543, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.626753330230713, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.658693313598633, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7060370445251465, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.739542007446289, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.776865005493164, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8407342433929443, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.900357484817505, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8446874618530273, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7913219928741455, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8024754524230957, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.767483949661255, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.693115472793579, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.679415702819824, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.073827980804725, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07899593946105574, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.11018826135105204, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15762273901808785, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19324473975636766, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2277593207825766, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.24344776670358065, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26965669988925806, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27851605758582504, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2809154669619786, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2807308970099668, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2901439645625692, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2862679955703212, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2871908453303802, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2851605758582503, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28294573643410853, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27740863787375414, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.27408637873754155, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26965669988925806, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2661498708010336, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2727943890734588, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2678110003691399, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2646733111849391, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2593207825765965, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2528608342561831, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.26744186046511625, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2593207825765965, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2739018087855297, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2631967515688446, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009926913175049438, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011780435285544141, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01241480714673957, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014690656387660463, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016236571749524604, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.018279936643709166, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020296319076039536, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.025147972843707064, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.048724552177380344, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07523130927198955, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11080289362723712, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.14728797526797618, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.16515086532452036, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.18768108818426577, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.20071543178207651, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20989872007604848, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21841386230886073, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22201208007974735, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2209469667035744, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2240938002670684, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.22486100200937623, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2359774892614023, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23402749870192663, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.23522931579360695, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23379000228900346, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23543053874166264, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.23242479551743608, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.23108086046219564, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22880743307800522, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2270800737225128, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.23471190096442532, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.23003005833312562, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.23023862304763357, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22573696389738437, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21953632094375394, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.2157852548914975, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.20738728673843787, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.21317373061628295, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.21262507563736113, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.21355242567909496, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.20480450991452867, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.21128035673847742, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.20377533662488703, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.929660291671753, "validation/loss_best": 2.3891587257385254, "validation/acc_best": 0.2901439645625692, "validation/f1_best": 0.2359774892614023} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.2264160764217378, "train/grad": 0.24187561139464378, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115362548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11353515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11048583984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.107559814453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.104498291015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.099862060546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0931695556640624, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.080367431640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0256890869140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.848011474609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6731959533691407, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5398289489746095, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.457300109863281, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.368134536743164, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.300327949523926, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.246037712097168, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1869429206848143, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1253505039215086, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0687541580200195, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0177570247650145, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.95439528465271, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.898910915851593, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8358135843276977, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7780028343200684, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.714245059490204, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6430331039428712, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.579343649148941, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5321153807640076, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4884680616855621, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.44145048558712, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4152454286813736, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3886278200149536, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3747302031517028, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3783611446619033, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.390464483499527, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4131428188085555, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.444514029622078, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4883245676755905, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5400691688060761, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5961849755048751, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6467428338527679, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6921768277883529, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7466062426567077, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018283728784881534, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018284369981847702, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018284040950238705, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018282629139721392, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01827806399669498, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018263688934966923, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018243336211889984, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018249639216810463, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018783715781755746, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021469402760267257, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02520486026071012, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028365905517712234, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030075537515804173, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0323166902270168, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03406122260726988, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035300302812829615, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036269765067845586, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03692805322818458, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0370271127205342, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03735389056615532, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03813055695965886, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0385523428954184, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039344965498894456, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03997661661356688, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04087082987651229, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04203212136402726, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04291587309911847, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04364895474165678, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04416175991296768, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0450949059613049, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.045354331880807876, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04602479547262192, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.045935223996639254, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0455920878611505, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.045757990516722205, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.045782336648553607, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04579426363110542, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04548492182046175, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04496083810925484, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04407676635310054, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04379178822040558, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04372334210202098, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.042448540180921555, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1167988777160645, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115633010864258, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.113670825958252, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1116063594818115, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1093785762786865, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1056864261627197, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0996031761169434, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.086178779602051, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.021665096282959, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.826848268508911, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.668177843093872, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5504608154296875, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.4853403568267822, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4332683086395264, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.402757167816162, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.382235288619995, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3675341606140137, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3593833446502686, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.360509157180786, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.362285852432251, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3727810382843018, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3741865158081055, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3942244052886963, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.409846544265747, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4356026649475098, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.472428798675537, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.531383991241455, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.574575901031494, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.640505075454712, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6890523433685303, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6870970726013184, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7304136753082275, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7815446853637695, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.808577537536621, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8030130863189697, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8409488201141357, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8335952758789062, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8130552768707275, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7294867038726807, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.714073419570923, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6719484329223633, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.665332555770874, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.716604471206665, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08250276854928018, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.11775562938353636, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1659283868586194, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19804355850867478, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2307124400147656, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.268733850129199, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2871908453303802, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2847914359542267, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28608342561830935, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2899593946105574, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.287375415282392, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.290328534514581, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28460686600221485, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28294573643410853, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2737172388335179, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26504245108896274, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25987449243263194, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25452196382428943, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25544481358434845, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25083056478405313, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.26744186046511625, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2589516426725729, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2591362126245847, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2532299741602067, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01018370186311227, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010632378698778403, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012304264979924423, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01360752271762363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01409090768923769, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016989148246400052, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019928265180254433, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.027006949411885087, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.052579230622893595, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08233170087110828, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11629957430962005, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.15090121073979915, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.16846595130729425, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1871202834664515, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19764197435130337, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.21010013353344795, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21871433247219885, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22673641424171523, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2273599351352238, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22932843988595342, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23226544036029392, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2395880011934052, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2381094371458056, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2432386801858506, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.24004594941517246, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23859960443672978, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2322388927349699, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22498347128215454, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22164119212362973, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2198764271106133, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22085601292969037, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21691860811370775, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.21700011936303074, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.21485823686967764, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21977382710313462, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21190285505015413, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.20614111001628857, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.2071478810309374, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.21922414401580062, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.2212787751221477, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.21711110386512475, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.21945326305760007, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.20849875102590384, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 1.7780028343200684, "validation/loss_best": 2.409846544265747, "validation/acc_best": 0.290328534514581, "validation/f1_best": 0.2432386801858506} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.1864405244588854, "train/grad": 0.23956435576081275, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11073974609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.10881591796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1056982421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.102764892578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.099671630859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0948834228515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.087698974609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.07324462890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0079913330078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8188302612304685, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6567918395996095, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5286396026611326, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.446941909790039, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.357471237182617, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.2894450759887697, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.234821262359619, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.174328155517578, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.109137601852417, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0489184999465944, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.996762843132019, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.933051221370697, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8752087545394898, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.808423660993576, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.747201247215271, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6827388000488281, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.603946449160576, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5369921928644181, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4845335376262665, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4383131617307663, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.379943413734436, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3514643859863282, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.314044942855835, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2937620604038238, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2875290024280548, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2844243776798248, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.305960003733635, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3364380565285683, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.374241042137146, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4233574360609054, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4759509980678558, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5166265100240708, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.561431024670601, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6351590877771378, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018661747821606695, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018661994324065744, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01865857483353466, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018653374980203807, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018643257529474794, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01862172685097903, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018590875775553285, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01859673172235489, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01927594917360693, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022165516540408135, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025951290102675557, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028931982489302753, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030645893793553113, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03297142297960818, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03471840755082667, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0359346080198884, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03684942528605461, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037416515834629537, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03748542051762342, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03773536576889455, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03842424946837127, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03878641903400421, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039549385588616134, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.040132131706923244, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0409505839087069, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04180486720055342, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042506150007247924, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04309247270226479, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043576832190155984, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044009435549378396, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04393828552216292, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04427977509796619, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04448023894801736, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.044272471256554126, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04449706083163619, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.044519809633493425, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.044441398289054634, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.044209948908537626, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043859556596726176, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04306039964780212, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04264417571946979, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.042416619211435316, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.041807622965425256, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116696834564209, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1155147552490234, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1135268211364746, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.111433506011963, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1091692447662354, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.105323553085327, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0987868309020996, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.083653688430786, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.008565664291382, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8055508136749268, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.655127763748169, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.540839672088623, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.4780101776123047, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.427037239074707, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.3974902629852295, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.3786354064941406, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3662493228912354, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3601834774017334, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.363162040710449, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.365492343902588, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.376621723175049, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.377915143966675, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4005415439605713, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.417401075363159, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.43886399269104, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4700510501861572, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5228610038757324, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5671446323394775, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.638400077819824, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7015795707702637, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7176995277404785, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.758171558380127, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7808845043182373, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.800575017929077, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8335206508636475, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8560287952423096, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8356690406799316, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.847534656524658, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8036561012268066, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.720625162124634, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7319092750549316, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6259875297546387, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6277055740356445, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.067921742340347, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07401255075673681, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08139534883720931, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12347729789590255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17035806570690293, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2039497969730528, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.23329641934293097, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.24861572535991142, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27870062753783686, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28460686600221485, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2866371354743448, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2871908453303802, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2897748246585456, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.28755998523440385, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2868217054263566, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2840531561461794, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2812846068660022, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.271686969361388, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26245847176079734, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2631967515688446, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.257844222960502, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2576596530084902, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2541528239202658, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24437061646363972, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2661498708010336, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25950535252860835, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.26411960132890366, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2602436323366556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010078165681939644, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010887430449927368, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012720381854220272, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014294606937469737, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01545430006300712, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017734000689345067, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021403186765972523, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.027200261617431803, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05618644932273804, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08870120049774681, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.12376469138978781, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.15332077334051894, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.17161601069352894, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.19126365534864295, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.20261214523071824, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.21368463541613383, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2205635118438168, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22586512965700567, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22876368835149682, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22973372345319487, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23270266665723316, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2364279609693288, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23588367361940174, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2379144736529019, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2369134828082843, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23596314345783898, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.23309437787258225, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22263018950787447, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22342722031627046, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2214727030306722, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2221547144576712, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21905430300410256, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22963406441378872, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22482101255894596, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.22374631983317284, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.22185732397117786, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.2189299470096997, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.21980812225530663, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.22807133594708476, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.23517434008734892, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.23277224742213565, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.2403120893197858, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.22466025979905133, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.8752087545394898, "validation/loss_best": 2.377915143966675, "validation/acc_best": 0.2897748246585456, "validation/f1_best": 0.2364279609693288} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.167706154584885, "train/grad": 0.23889113202691079, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1185107421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11659423828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.113441162109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11026123046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10699462890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.101929931640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0942425537109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0782086181640627, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0049505615234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8093768310546876, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.655362091064453, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5335589599609376, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.455104293823242, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3672780990600586, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.299636001586914, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.243349895477295, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.181734838485718, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.116581983566284, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.056199836730957, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.002812910079956, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9349659776687622, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.878007435798645, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.812073644399643, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7487651979923249, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6791107857227325, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5942510843276978, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5211499255895615, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4628743869066239, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4145682352781295, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3522510313987732, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3166976648569106, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2752716720104218, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2446690821647644, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2323110511898994, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2170187491178512, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.2365871715545653, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2608456385135651, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2861814641952514, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.330043375492096, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.389461116194725, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4359926342964173, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4642048954963685, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.5329219990968703, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018598049972206356, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018597142430953683, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01859187232796103, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01858461641240865, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018573052939027546, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01854997653979808, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018520630421116947, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018543155509978534, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019365424676798283, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022453042631968855, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02632714728824794, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02922779413871467, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03091440662741661, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0331992228794843, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03486312655732036, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.036022744160145524, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03686989106237888, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03742761860601604, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03747363215312362, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037734998017549516, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038433317597955464, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03880436431616545, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03954209465533495, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04008391015231609, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04082045014947653, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04164362281560898, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042372286822646855, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0428638968616724, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043089054133743046, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04367143658921122, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043598967418074605, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043883663713932035, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044110031686723235, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.044013468436896803, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.044161060582846405, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04440049747005105, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04433012185618281, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04443190036341548, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04362129570916295, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04307949170470238, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.042840260714292526, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04190901363268495, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.041051531583070754, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116626501083374, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115434408187866, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.113431930541992, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1113121509552, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.109006643295288, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.105071783065796, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.098287582397461, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0820398330688477, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0001704692840576, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7932419776916504, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.647660732269287, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5358529090881348, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.4747588634490967, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4248483180999756, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.395874500274658, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.377504587173462, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.36590313911438, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3610286712646484, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3668947219848633, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3728411197662354, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.389578104019165, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3920481204986572, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4151244163513184, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4341213703155518, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4581823348999023, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5012805461883545, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.558476209640503, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6062636375427246, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.672248125076294, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7342631816864014, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.753441095352173, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7906646728515625, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8150124549865723, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8344621658325195, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.885986328125, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9440364837646484, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.939279556274414, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.944944143295288, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8696513175964355, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.84037184715271, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8360633850097656, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.711343288421631, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.766314744949341, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0812107788851975, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1258767072720561, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17404946474713917, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2056109265411591, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.23661867847914358, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.262827611664821, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28294573643410853, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2862679955703212, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2836840162421558, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2833148763381321, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2840531561461794, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2901439645625692, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2868217054263566, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2840531561461794, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2809154669619786, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2792543373938723, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26744186046511625, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25359911406423036, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25987449243263194, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2571059431524548, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2456626061277224, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24178663713547435, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.25489110372831303, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.25489110372831303, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2511997046880768, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2602436323366556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.25230712440014763, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010175213322708496, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011510573851354141, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012552254656274194, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014056679353279429, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015108501525280286, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017770285701308605, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02028268936318534, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.026570057606108696, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05719604985131735, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09153682605357162, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.12548263086826447, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.15863325311646265, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.17433056348481288, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.19184863453776838, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.20433940787832583, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2146227569793878, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.21956787063551655, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22668617273975386, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22632477140538887, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22727228620166015, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23044939415219423, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.23794395094814658, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23700056839835404, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2352482851548875, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23526244933960236, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23596733187645955, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22757123330735096, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2202238529680016, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2205997760435945, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21984428233311284, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22322313196090326, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21818291754929264, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22312260721136015, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2234440213633526, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2229705382215905, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21750433217205167, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.21387629936373276, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.21114169978298766, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.22182012949124097, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.22023920223473328, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.21938635417153038, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.22705776637268046, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.211647731256373, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.878007435798645, "validation/loss_best": 2.3920481204986572, "validation/acc_best": 0.2901439645625692, "validation/f1_best": 0.23794395094814658} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.135287736058235, "train/grad": 0.23208682782948017, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11385009765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.111761474609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.108466796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1052294921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10178955078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.096644287109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.088568115234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0715374755859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.993060302734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7912908935546876, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.63652587890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.51428092956543, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.4358966064453127, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3490653228759766, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.282560043334961, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.227814750671387, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.167450942993164, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1006156539916994, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0375004863739012, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9836079454421998, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9175310039520264, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8577780890464783, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7889254736900329, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7243751764297486, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6534634149074554, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5665178579092025, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4914543950557708, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4324982637166976, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3790596288442611, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3080893230438233, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.268241965174675, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.221303949058056, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.186943392753601, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1704845187067985, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1538255482912063, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.161946875154972, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.173912054002285, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.19050014346838, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2377038705348968, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2869049862027169, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3299405485391618, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.3580620414018632, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.4208717733621596, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018405608581379056, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018404053999111058, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01839901856146753, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018391107032075523, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01838074335362762, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0183594208676368, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018335078465752302, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018371019680052995, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01925768927205354, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022363115129992367, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026086645526811482, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02886118920519948, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03051690209656954, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.032783933393657205, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03441467615775764, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03554349941201508, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03637092525139451, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036948762582615015, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037054114704951645, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03734953354112804, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03803925490006804, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038350277580320835, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039043769016861914, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039549568016082046, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04020766664296389, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.040917583517730234, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04152162516489625, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04195099350064993, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.042161771878600124, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04243509605526924, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.042302210722118615, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04233651112765074, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04228153072297573, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.042153227273374796, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04199324082583189, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04195693396031856, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04183860071003437, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04178926907479763, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04135065482929349, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04091005289927125, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04045962039381266, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04014743009582162, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03984414486214519, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1165270805358887, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1153552532196045, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1133320331573486, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.111171007156372, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1088335514068604, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1048293113708496, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0978035926818848, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.080852746963501, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.995389461517334, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.786663293838501, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.6434166431427, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.532759666442871, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.472149133682251, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.42207407951355, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.3925342559814453, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.3732309341430664, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3605306148529053, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.354038715362549, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.358035087585449, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3620035648345947, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3764026165008545, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3770225048065186, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3991096019744873, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4167850017547607, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4430603981018066, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4799506664276123, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5402557849884033, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5844228267669678, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6430602073669434, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7149431705474854, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.732365369796753, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.782496452331543, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8141653537750244, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8488082885742188, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8811581134796143, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.953326463699341, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.96842622756958, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.979694128036499, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.930624008178711, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.872694492340088, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8334288597106934, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.747058868408203, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7690203189849854, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12846068660022147, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17478774455518642, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2087486157253599, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.25046142488002954, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2870062753783684, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2858988556662975, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28645256552233295, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2853451458102621, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2929125138427464, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.287375415282392, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2858988556662975, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2833148763381321, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28017718715393136, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2724252491694352, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26448874123292726, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2622739018087855, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26356589147286824, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26005906238464377, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25544481358434845, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25046142488002954, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2617201919527501, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2631967515688446, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2663344407530454, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.25396825396825395, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010680612967473864, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011728661871638449, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013002862851468405, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014694276934439568, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015843637375006055, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01862854735008864, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021047796835151122, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.027319017288970793, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0587564796718151, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09250749123796355, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.12891517607610128, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1591638164790213, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.17399583422988438, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1926724059879783, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.20735862603890706, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2166128868781457, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2235797495430961, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22901788142198762, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22871575412142367, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.23089881090958064, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.233752058660719, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.24020445980653093, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2379096501610606, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.23932073513735527, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23863435042171896, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23819517269177992, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.23379640001424998, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22632207454010644, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22537784928809013, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2217542909285224, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22630042600259634, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22148508086256066, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22722965163308542, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22662533385754166, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.21985481892302738, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.213901861126322, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.20467501430401813, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20868686801621053, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.2165734848392076, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.2257596435882995, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.22803742495521048, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.23083512714297827, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.2166357360890916, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.8577780890464783, "validation/loss_best": 2.3770225048065186, "validation/acc_best": 0.2929125138427464, "validation/f1_best": 0.24020445980653093} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.120225956439972, "train/grad": 0.23077149137854577, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115855712890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11399658203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1106982421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1075244140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10425048828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09897705078125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.090848388671875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.07377197265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9936700439453126, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7935008239746093, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6428317260742187, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.522164840698242, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.443543243408203, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3544835662841797, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.2857343482971193, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.22928560256958, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.16654333114624, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0988791751861573, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.035438871383667, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9805357980728149, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9131495571136474, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8525590300559998, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7829319059848785, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.716996101140976, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6424909073114395, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5545799666643143, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4772802317142486, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4194632518291472, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3624633628129958, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2848076742887498, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2453604143857957, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.193180574476719, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1530181208252908, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1330535221099853, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1076468712091445, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1114576902985573, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1258438965678215, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1343280911445617, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1723137640953063, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2216977190971374, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2626760196685791, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2870347172021865, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3429561460018158, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018517162231728435, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018514295970089734, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01850815655197948, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01849963948596269, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018488382990472018, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018464836534112693, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018439107462763785, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018473814111202955, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019386859615333377, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02250652987509966, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026244169771671294, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029013885306194424, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030683037992566824, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03293910847976804, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03456424140371382, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035682113682851194, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036505441209301354, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037077696062624454, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03712157147936523, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03735031856223941, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038001094534993174, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03831435482949019, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.038977512437850234, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03946030210703611, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04014616159722209, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.040886793974787, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.041516217347234485, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04190571688115597, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04197029011324048, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04219359269365668, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04203812312334776, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04219945464283228, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.041924482844769954, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04159952573478222, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04142115032300353, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041289823707193134, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04120849037542939, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.041127242743968964, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04051614698022604, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04002028807997703, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.039644141159951685, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03945038687437773, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03896551387384534, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116525650024414, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115330696105957, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.113290548324585, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.111116409301758, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1087756156921387, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1047239303588867, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.097649097442627, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.080451488494873, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9936554431915283, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.784365653991699, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.6420650482177734, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5320069789886475, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.472059965133667, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4225924015045166, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.3936221599578857, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.37504506111145, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.363184928894043, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.357734441757202, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.362489938735962, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.367581605911255, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.383661985397339, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.38482666015625, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.406186103820801, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4235618114471436, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4463424682617188, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4852468967437744, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.544050693511963, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5907726287841797, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6531271934509277, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.72342848777771, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7360241413116455, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7870032787323, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8140251636505127, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.852384328842163, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8960824012756348, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.955932855606079, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.962503671646118, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.988067150115967, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.944472551345825, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8902769088745117, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.859494686126709, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.762446403503418, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.806117296218872, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.073827980804725, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08305647840531562, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12938353636028055, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17571059431524547, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2081949058693245, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2355112587670727, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.24787744555186417, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2739018087855297, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.28146917681801403, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2840531561461794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2858988556662975, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28497600590623845, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28922111480251017, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2847914359542267, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.29051310446659284, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2866371354743448, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2870062753783684, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28349944629014395, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28294573643410853, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2731635289774825, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26264304171280917, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.262827611664821, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2574750830564784, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2589516426725729, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25544481358434845, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2619047619047619, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.24713916574381692, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010183020067456802, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011653593635540208, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012745435228514361, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01480237595698572, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015151184254940888, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.018911708508386176, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021475480809904714, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.028253536702477128, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05928940183248058, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09334401678816796, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.12823384507562977, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.15707661892940258, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.17150791588762382, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.19173679762750306, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2070081067161532, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.21585846979369672, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.22119973696514142, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2281863012470934, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22787622282755438, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.23219840006277392, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23284164465708793, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.23825832740766648, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23509014289912825, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2389244996708483, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23776545479426417, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.24097008017592514, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.23397985626123388, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22335637767141855, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2246244276863553, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21997371177355754, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22859100872083302, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.22045677567153366, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22956946724211372, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.2244936667835907, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.22377947708119927, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21277809300534622, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.21066291965027287, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20622030782726786, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.21254186007692066, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.22860772291262044, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.22495167321608248, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.23151815025725497, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.21392370198165458, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.8525590300559998, "validation/loss_best": 2.38482666015625, "validation/acc_best": 0.29051310446659284, "validation/f1_best": 0.23825832740766648} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.1151802110672, "train/grad": 0.22780755184590817, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11604248046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114022216796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.110758056640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1075634765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10419921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09897216796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.090819091796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0735089111328127, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9932611083984373, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.793118133544922, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6425294494628906, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.523326110839844, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.445402297973633, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.356720962524414, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.2880388641357423, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.231616859436035, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.169484758377075, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.103161573410034, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.041432991027832, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9877764463424683, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9196225142478942, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.858443796634674, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7870002090930939, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7195350563526153, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6468162977695464, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.559521621465683, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4811823016405106, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4190863406658172, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3598858523368835, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.277499839067459, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.236228231191635, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1837035673856735, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1411625814437867, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1171361082792282, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0931111669540405, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1033304500579835, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.109754878282547, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1097684270143509, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.148158237040043, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1912781584262848, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2259044295549393, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2437776336073876, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2889200031757355, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018183022774755954, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018179856943897902, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018172013680450617, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01816268641501665, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018150806059129537, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01812557104974985, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0180992685072124, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018136485530994832, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019048643526621164, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022152960821986197, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025868282355368137, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028634588001295924, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03031562360934913, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.032631874550133944, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0342866416554898, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03541947677731514, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03623414086177945, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036784290056675675, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03686299758031964, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037110981857404114, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03779631949029863, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038135649291798475, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.038831170629709956, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03928743173368275, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03989241486415267, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0406345668900758, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.041186668593436476, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04151716496795416, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04166538804769516, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04187836546450854, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.041618978530168535, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04157117316499352, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04124343600124121, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.040910676009953025, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04078900862485171, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04070757728070021, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04047711269930005, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04021835034713149, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.039731261190027, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03913399359211325, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03866022542119026, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03846984280273318, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03777591859921813, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116511106491089, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115324020385742, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1132800579071045, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1110923290252686, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1087541580200195, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1047022342681885, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0976152420043945, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0803792476654053, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9933879375457764, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7840285301208496, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.6418814659118652, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5317625999450684, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.471825122833252, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.422351360321045, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.3932175636291504, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.3745832443237305, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3625967502593994, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.356886625289917, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3614115715026855, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3663547039031982, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3825607299804688, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.3841230869293213, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4061763286590576, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.423837900161743, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4470126628875732, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4849748611450195, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5433902740478516, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5891523361206055, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6522252559661865, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7261784076690674, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.739858865737915, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7907862663269043, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8192050457000732, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8557679653167725, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.899226427078247, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.962967872619629, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9685089588165283, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9872045516967773, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9485440254211426, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8936054706573486, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8585939407348633, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7697834968566895, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8088696002960205, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08305647840531562, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12993724621631597, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1758951642672573, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2081949058693245, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.23606496862310816, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.28202288667404946, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.28349944629014395, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2858988556662975, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2877445551864157, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2857142857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.290328534514581, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.28755998523440385, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.284422296050203, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2827611664820967, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2809154669619786, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.27260981912144705, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26208933185677374, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2619047619047619, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2528608342561831, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2561830933923957, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24603174603174602, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.25839793281653745, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2567368032484312, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.25950535252860835, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2515688445921004, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01018405132601963, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011722920851191651, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012730154068310717, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014668183969349812, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015172646663897024, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01891460212242548, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021328630920889758, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02819561484515022, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05949708877374942, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09356495357849141, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.12832588482141763, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.15777424722031164, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1725703157449424, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.19212708214273014, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.20752452747666464, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.21666904998463885, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.22141875527596958, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.22805955292418303, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22840250968471543, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.23046397430758095, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2335937883738678, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2381362302924078, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2357756086452273, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.23670286396082765, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.23734090956958798, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.23818865403117825, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2326777453815536, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.22269783613220318, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.22365400636857902, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.21902166602230647, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.22769096919003975, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.21972625560695958, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.22922289204654933, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22316855021724571, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2203879644401586, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.21149554552627595, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.20594823506664695, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.2083791030044212, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.2130283839105005, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.225227618006542, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.22575458165875326, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.22798508023801697, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.21660002502420025, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 1.858443796634674, "validation/loss_best": 2.3841230869293213, "validation/acc_best": 0.290328534514581, "validation/f1_best": 0.2381362302924078} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..70e5f66f671cc20d5b8bb55518a323aff0f9a03f --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..4deb75dd782f8a280539d119125ca4a00c004b42 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 9, "eval/id_best": 42, "eval/lr_best": 0.005699999999999999, "eval/wd_best": 0.05, "eval/train/loss": 3.0125246047973633, "eval/train/acc": 0.11555364332032331, "eval/train/acc_std": 0.0015183232110664866, "eval/train/f1": 0.0694388717688838, "eval/train/f1_std": 0.001292369967701993, "eval/validation/loss": 3.0744924545288086, "eval/validation/acc": 0.11332595053525286, "eval/validation/acc_std": 0.00365135032283621, "eval/validation/f1": 0.06398156835832226, "eval/validation/f1_std": 0.002658004700935807, "eval/test/loss": 3.0849170684814453, "eval/test/acc": 0.10742115027829313, "eval/test/acc_std": 0.0036072229828714836, "eval/test/f1": 0.05510529801947945, "eval/test/f1_std": 0.002394285839874631, "eval/testid/loss": 3.120675802230835, "eval/testid/acc": 0.09118951224214382, "eval/testid/acc_std": 0.003488809078670962, "eval/testid/f1": 0.0527984605930589, "eval/testid/f1_std": 0.002616201533975763} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..e8316c0635d9493734a6b7cac865018234fa653f --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 9, "eval/best/id_best": 42, "eval/best/lr_best": 0.005699999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 3.0125246047973633, "eval/best/train/acc": 0.11555364332032331, "eval/best/train/acc_std": 0.0015183232110664866, "eval/best/train/f1": 0.0694388717688838, "eval/best/train/f1_std": 0.001292369967701993, "eval/best/validation/loss": 3.0744924545288086, "eval/best/validation/acc": 0.11332595053525286, "eval/best/validation/acc_std": 0.00365135032283621, "eval/best/validation/f1": 0.06398156835832226, "eval/best/validation/f1_std": 0.002658004700935807, "eval/best/test/loss": 3.0849170684814453, "eval/best/test/acc": 0.10742115027829313, "eval/best/test/acc_std": 0.0036072229828714836, "eval/best/test/f1": 0.05510529801947945, "eval/best/test/f1_std": 0.002394285839874631, "eval/best/testid/loss": 3.120675802230835, "eval/best/testid/acc": 0.09118951224214382, "eval/best/testid/acc_std": 0.003488809078670962, "eval/best/testid/f1": 0.0527984605930589, "eval/best/testid/f1_std": 0.002616201533975763} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..78562d5d2dececb21924b4aadf0242e3707349ad --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 45, "eval/last/lr_best": 0.0093, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.8881781101226807, "eval/last/train/acc": 0.15864040074986938, "eval/last/train/acc_std": 0.0018108131174768393, "eval/last/train/f1": 0.10797583243671188, "eval/last/train/f1_std": 0.0015984771075117261, "eval/last/validation/loss": 3.009662628173828, "eval/last/validation/acc": 0.11037283130306386, "eval/last/validation/acc_std": 0.003654327304372387, "eval/last/validation/f1": 0.0635803202356986, "eval/last/validation/f1_std": 0.002931298599406075, "eval/last/test/loss": 2.9948039054870605, "eval/last/test/acc": 0.12523191094619665, "eval/last/test/acc_std": 0.0038503292097281437, "eval/last/test/f1": 0.06348908295563634, "eval/last/test/f1_std": 0.002491365399721176, "eval/last/testid/loss": 3.0109305381774902, "eval/last/testid/acc": 0.11875843454790823, "eval/last/testid/acc_std": 0.00413957052437481, "eval/last/testid/f1": 0.07884182744119443, "eval/last/testid/f1_std": 0.0032993581963108925} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..93ec4ff85599616f1bbde177770608146e3e9128 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",train,3.0125246047973633,0.11555364332032331,0.0015183232110664866,0.0694388717688838,0.001292369967701993 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",validation,3.0744924545288086,0.11332595053525286,0.00365135032283621,0.06398156835832226,0.002658004700935807 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",test,3.0849170684814453,0.10742115027829313,0.0036072229828714836,0.05510529801947945,0.002394285839874631 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",testid,3.120675802230835,0.09118951224214382,0.003488809078670962,0.0527984605930589,0.002616201533975763 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..93ec4ff85599616f1bbde177770608146e3e9128 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",train,3.0125246047973633,0.11555364332032331,0.0015183232110664866,0.0694388717688838,0.001292369967701993 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",validation,3.0744924545288086,0.11332595053525286,0.00365135032283621,0.06398156835832226,0.002658004700935807 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",test,3.0849170684814453,0.10742115027829313,0.0036072229828714836,0.05510529801947945,0.002394285839874631 +flat_mae,patch,linear,nsd_cococlip,best,9,0.005699999999999999,0.05,42,"[19, 1.0]",testid,3.120675802230835,0.09118951224214382,0.003488809078670962,0.0527984605930589,0.002616201533975763 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..d4c5b2c3a68fb06ae729c913f1db6a51e5ff36f6 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,nsd_cococlip,last,19,0.0093,0.05,45,"[31, 1.0]",train,2.8881781101226807,0.15864040074986938,0.0018108131174768393,0.10797583243671188,0.0015984771075117261 +flat_mae,patch,linear,nsd_cococlip,last,19,0.0093,0.05,45,"[31, 1.0]",validation,3.009662628173828,0.11037283130306386,0.003654327304372387,0.0635803202356986,0.002931298599406075 +flat_mae,patch,linear,nsd_cococlip,last,19,0.0093,0.05,45,"[31, 1.0]",test,2.9948039054870605,0.12523191094619665,0.0038503292097281437,0.06348908295563634,0.002491365399721176 +flat_mae,patch,linear,nsd_cococlip,last,19,0.0093,0.05,45,"[31, 1.0]",testid,3.0109305381774902,0.11875843454790823,0.00413957052437481,0.07884182744119443,0.0032993581963108925 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/log.txt b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e13b7986c18b2ff225eb253affc751e8c33abf4 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/log.txt @@ -0,0 +1,959 @@ +fMRI foundation model probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 23:07:28 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 0.9M (0.9M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:46 lr: nan time: 3.5674 data: 3.1054 max mem: 3910 +train: [0] [ 20/400] eta: 0:03:12 lr: 0.000003 loss: 3.1981 (3.2034) grad: 0.2070 (0.2090) time: 0.3541 data: 0.0050 max mem: 3953 +train: [0] [ 40/400] eta: 0:02:33 lr: 0.000006 loss: 3.1932 (3.1966) grad: 0.2116 (0.2156) time: 0.3431 data: 0.0037 max mem: 3953 +train: [0] [ 60/400] eta: 0:02:14 lr: 0.000009 loss: 3.1818 (3.1929) grad: 0.2006 (0.2074) time: 0.3307 data: 0.0039 max mem: 3953 +train: [0] [ 80/400] eta: 0:02:03 lr: 0.000012 loss: 3.1909 (3.1945) grad: 0.1822 (0.2028) time: 0.3572 data: 0.0041 max mem: 3953 +train: [0] [100/400] eta: 0:01:54 lr: 0.000015 loss: 3.1909 (3.1869) grad: 0.1814 (0.1977) time: 0.3563 data: 0.0041 max mem: 3953 +train: [0] [120/400] eta: 0:01:44 lr: 0.000018 loss: 3.1683 (3.1834) grad: 0.1818 (0.1963) time: 0.3395 data: 0.0039 max mem: 3953 +train: [0] [140/400] eta: 0:01:36 lr: 0.000021 loss: 3.1700 (3.1836) grad: 0.1833 (0.1944) time: 0.3499 data: 0.0041 max mem: 3953 +train: [0] [160/400] eta: 0:01:28 lr: 0.000024 loss: 3.1784 (3.1820) grad: 0.1798 (0.1949) time: 0.3510 data: 0.0040 max mem: 3953 +train: [0] [180/400] eta: 0:01:20 lr: 0.000027 loss: 3.1571 (3.1797) grad: 0.1857 (0.1942) time: 0.3481 data: 0.0041 max mem: 3953 +train: [0] [200/400] eta: 0:01:12 lr: 0.000030 loss: 3.1683 (3.1799) grad: 0.1844 (0.1931) time: 0.3546 data: 0.0042 max mem: 3953 +train: [0] [220/400] eta: 0:01:05 lr: 0.000033 loss: 3.1718 (3.1789) grad: 0.1905 (0.1936) time: 0.3553 data: 0.0040 max mem: 3953 +train: [0] [240/400] eta: 0:00:57 lr: 0.000036 loss: 3.1708 (3.1781) grad: 0.1905 (0.1932) time: 0.3389 data: 0.0041 max mem: 3953 +train: [0] [260/400] eta: 0:00:50 lr: 0.000039 loss: 3.1610 (3.1769) grad: 0.1799 (0.1923) time: 0.3512 data: 0.0041 max mem: 3953 +train: [0] [280/400] eta: 0:00:43 lr: 0.000042 loss: 3.1577 (3.1760) grad: 0.1778 (0.1920) time: 0.3635 data: 0.0037 max mem: 3953 +train: [0] [300/400] eta: 0:00:36 lr: 0.000045 loss: 3.1480 (3.1744) grad: 0.1907 (0.1918) time: 0.3514 data: 0.0039 max mem: 3953 +train: [0] [320/400] eta: 0:00:28 lr: 0.000048 loss: 3.1549 (3.1737) grad: 0.1808 (0.1908) time: 0.3432 data: 0.0038 max mem: 3953 +train: [0] [340/400] eta: 0:00:21 lr: 0.000051 loss: 3.1612 (3.1731) grad: 0.1800 (0.1902) time: 0.3459 data: 0.0038 max mem: 3953 +train: [0] [360/400] eta: 0:00:14 lr: 0.000054 loss: 3.1443 (3.1709) grad: 0.1720 (0.1890) time: 0.3457 data: 0.0040 max mem: 3953 +train: [0] [380/400] eta: 0:00:07 lr: 0.000057 loss: 3.1376 (3.1703) grad: 0.1640 (0.1881) time: 0.3395 data: 0.0040 max mem: 3953 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.1678 (3.1697) grad: 0.1652 (0.1873) time: 0.3617 data: 0.0041 max mem: 3953 +train: [0] Total time: 0:02:22 (0.3573 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.1678 (3.1697) grad: 0.1652 (0.1873) +eval (validation): [0] [ 0/85] eta: 0:04:52 time: 3.4469 data: 3.2110 max mem: 3953 +eval (validation): [0] [20/85] eta: 0:00:32 time: 0.3450 data: 0.0209 max mem: 3953 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3279 data: 0.0048 max mem: 3953 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3342 data: 0.0032 max mem: 3953 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3348 data: 0.0042 max mem: 3953 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3295 data: 0.0039 max mem: 3953 +eval (validation): [0] Total time: 0:00:31 (0.3742 s / it) +cv: [0] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 3.174 acc: 0.073 f1: 0.019 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:29:04 lr: nan time: 4.3610 data: 4.0518 max mem: 3953 +train: [1] [ 20/400] eta: 0:03:29 lr: 0.000063 loss: 3.1765 (3.1738) grad: 0.1663 (0.1707) time: 0.3608 data: 0.0030 max mem: 3953 +train: [1] [ 40/400] eta: 0:02:39 lr: 0.000066 loss: 3.1567 (3.1610) grad: 0.1768 (0.1800) time: 0.3286 data: 0.0037 max mem: 3953 +train: [1] [ 60/400] eta: 0:02:23 lr: 0.000069 loss: 3.1465 (3.1547) grad: 0.1837 (0.1786) time: 0.3839 data: 0.0046 max mem: 3953 +train: [1] [ 80/400] eta: 0:02:10 lr: 0.000072 loss: 3.1456 (3.1520) grad: 0.1733 (0.1788) time: 0.3606 data: 0.0046 max mem: 3953 +train: [1] [100/400] eta: 0:01:59 lr: 0.000075 loss: 3.1530 (3.1532) grad: 0.1732 (0.1766) time: 0.3587 data: 0.0039 max mem: 3953 +train: [1] [120/400] eta: 0:01:49 lr: 0.000078 loss: 3.1526 (3.1500) grad: 0.1650 (0.1757) time: 0.3571 data: 0.0042 max mem: 3953 +train: [1] [140/400] eta: 0:01:42 lr: 0.000081 loss: 3.1381 (3.1491) grad: 0.1712 (0.1750) time: 0.3983 data: 0.0043 max mem: 3953 +train: [1] [160/400] eta: 0:01:33 lr: 0.000084 loss: 3.1423 (3.1492) grad: 0.1689 (0.1750) time: 0.3781 data: 0.0043 max mem: 3953 +train: [1] [180/400] eta: 0:01:25 lr: 0.000087 loss: 3.1423 (3.1494) grad: 0.1724 (0.1758) time: 0.3733 data: 0.0042 max mem: 3953 +train: [1] [200/400] eta: 0:01:17 lr: 0.000090 loss: 3.1419 (3.1482) grad: 0.1724 (0.1757) time: 0.3621 data: 0.0041 max mem: 3953 +train: [1] [220/400] eta: 0:01:08 lr: 0.000093 loss: 3.1429 (3.1485) grad: 0.1664 (0.1753) time: 0.3486 data: 0.0040 max mem: 3953 +train: [1] [240/400] eta: 0:01:00 lr: 0.000096 loss: 3.1374 (3.1482) grad: 0.1744 (0.1756) time: 0.3495 data: 0.0042 max mem: 3953 +train: [1] [260/400] eta: 0:00:52 lr: 0.000099 loss: 3.1361 (3.1470) grad: 0.1721 (0.1751) time: 0.3595 data: 0.0039 max mem: 3953 +train: [1] [280/400] eta: 0:00:45 lr: 0.000102 loss: 3.1391 (3.1468) grad: 0.1721 (0.1756) time: 0.3655 data: 0.0040 max mem: 3953 +train: [1] [300/400] eta: 0:00:37 lr: 0.000105 loss: 3.1326 (3.1465) grad: 0.1725 (0.1753) time: 0.3654 data: 0.0041 max mem: 3953 +train: [1] [320/400] eta: 0:00:30 lr: 0.000108 loss: 3.1420 (3.1459) grad: 0.1696 (0.1750) time: 0.3514 data: 0.0042 max mem: 3953 +train: [1] [340/400] eta: 0:00:22 lr: 0.000111 loss: 3.1396 (3.1452) grad: 0.1692 (0.1746) time: 0.3505 data: 0.0043 max mem: 3953 +train: [1] [360/400] eta: 0:00:14 lr: 0.000114 loss: 3.1390 (3.1449) grad: 0.1692 (0.1746) time: 0.3423 data: 0.0041 max mem: 3953 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 3.1416 (3.1447) grad: 0.1735 (0.1746) time: 0.3509 data: 0.0040 max mem: 3953 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 3.1630 (3.1453) grad: 0.1755 (0.1745) time: 0.3522 data: 0.0041 max mem: 3953 +train: [1] Total time: 0:02:28 (0.3702 s / it) +train: [1] Summary: lr: 0.000120 loss: 3.1630 (3.1453) grad: 0.1755 (0.1745) +eval (validation): [1] [ 0/85] eta: 0:04:57 time: 3.4962 data: 3.2536 max mem: 3953 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3494 data: 0.0040 max mem: 3953 +eval (validation): [1] [40/85] eta: 0:00:19 time: 0.3420 data: 0.0036 max mem: 3953 +eval (validation): [1] [60/85] eta: 0:00:10 time: 0.3716 data: 0.0048 max mem: 3953 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3287 data: 0.0044 max mem: 3953 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3190 data: 0.0044 max mem: 3953 +eval (validation): [1] Total time: 0:00:32 (0.3860 s / it) +cv: [1] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 3.173 acc: 0.087 f1: 0.031 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:23:28 lr: nan time: 3.5221 data: 3.2747 max mem: 3953 +train: [2] [ 20/400] eta: 0:03:06 lr: 0.000123 loss: 3.1155 (3.1199) grad: 0.1628 (0.1641) time: 0.3391 data: 0.0042 max mem: 3953 +train: [2] [ 40/400] eta: 0:02:28 lr: 0.000126 loss: 3.1346 (3.1324) grad: 0.1628 (0.1606) time: 0.3322 data: 0.0036 max mem: 3953 +train: [2] [ 60/400] eta: 0:02:13 lr: 0.000129 loss: 3.1231 (3.1265) grad: 0.1635 (0.1653) time: 0.3500 data: 0.0043 max mem: 3953 +train: [2] [ 80/400] eta: 0:02:02 lr: 0.000132 loss: 3.1231 (3.1285) grad: 0.1802 (0.1693) time: 0.3593 data: 0.0039 max mem: 3953 +train: [2] [100/400] eta: 0:01:54 lr: 0.000135 loss: 3.1449 (3.1328) grad: 0.1802 (0.1715) time: 0.3645 data: 0.0041 max mem: 3953 +train: [2] [120/400] eta: 0:01:45 lr: 0.000138 loss: 3.1167 (3.1299) grad: 0.1799 (0.1708) time: 0.3508 data: 0.0041 max mem: 3953 +train: [2] [140/400] eta: 0:01:37 lr: 0.000141 loss: 3.1132 (3.1291) grad: 0.1678 (0.1697) time: 0.3650 data: 0.0039 max mem: 3953 +train: [2] [160/400] eta: 0:01:29 lr: 0.000144 loss: 3.1333 (3.1318) grad: 0.1717 (0.1706) time: 0.3513 data: 0.0039 max mem: 3953 +train: [2] [180/400] eta: 0:01:21 lr: 0.000147 loss: 3.1304 (3.1302) grad: 0.1770 (0.1704) time: 0.3505 data: 0.0042 max mem: 3953 +train: [2] [200/400] eta: 0:01:13 lr: 0.000150 loss: 3.1166 (3.1307) grad: 0.1701 (0.1701) time: 0.3528 data: 0.0039 max mem: 3953 +train: [2] [220/400] eta: 0:01:05 lr: 0.000153 loss: 3.1166 (3.1297) grad: 0.1755 (0.1703) time: 0.3482 data: 0.0042 max mem: 3953 +train: [2] [240/400] eta: 0:00:58 lr: 0.000156 loss: 3.1204 (3.1311) grad: 0.1755 (0.1708) time: 0.3494 data: 0.0040 max mem: 3953 +train: [2] [260/400] eta: 0:00:50 lr: 0.000159 loss: 3.1396 (3.1322) grad: 0.1762 (0.1708) time: 0.3547 data: 0.0040 max mem: 3953 +train: [2] [280/400] eta: 0:00:43 lr: 0.000162 loss: 3.1505 (3.1336) grad: 0.1758 (0.1709) time: 0.3576 data: 0.0041 max mem: 3953 +train: [2] [300/400] eta: 0:00:36 lr: 0.000165 loss: 3.1427 (3.1321) grad: 0.1729 (0.1710) time: 0.3512 data: 0.0040 max mem: 3953 +train: [2] [320/400] eta: 0:00:29 lr: 0.000168 loss: 3.1097 (3.1315) grad: 0.1746 (0.1714) time: 0.3687 data: 0.0042 max mem: 3953 +train: [2] [340/400] eta: 0:00:21 lr: 0.000171 loss: 3.1190 (3.1306) grad: 0.1780 (0.1718) time: 0.3411 data: 0.0036 max mem: 3953 +train: [2] [360/400] eta: 0:00:14 lr: 0.000174 loss: 3.1227 (3.1306) grad: 0.1739 (0.1718) time: 0.3465 data: 0.0039 max mem: 3953 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 3.1233 (3.1306) grad: 0.1670 (0.1715) time: 0.3547 data: 0.0040 max mem: 3953 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 3.1233 (3.1299) grad: 0.1658 (0.1714) time: 0.3600 data: 0.0044 max mem: 3953 +train: [2] Total time: 0:02:24 (0.3606 s / it) +train: [2] Summary: lr: 0.000180 loss: 3.1233 (3.1299) grad: 0.1658 (0.1714) +eval (validation): [2] [ 0/85] eta: 0:05:01 time: 3.5412 data: 3.2370 max mem: 3953 +eval (validation): [2] [20/85] eta: 0:00:34 time: 0.3865 data: 0.0050 max mem: 3953 +eval (validation): [2] [40/85] eta: 0:00:19 time: 0.3384 data: 0.0035 max mem: 3953 +eval (validation): [2] [60/85] eta: 0:00:10 time: 0.3358 data: 0.0044 max mem: 3953 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3284 data: 0.0040 max mem: 3953 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3176 data: 0.0039 max mem: 3953 +eval (validation): [2] Total time: 0:00:32 (0.3856 s / it) +cv: [2] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 3.158 acc: 0.091 f1: 0.042 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:24:58 lr: nan time: 3.7465 data: 3.4844 max mem: 3953 +train: [3] [ 20/400] eta: 0:03:10 lr: 0.000183 loss: 3.1334 (3.1263) grad: 0.1965 (0.1936) time: 0.3377 data: 0.0045 max mem: 3953 +train: [3] [ 40/400] eta: 0:02:35 lr: 0.000186 loss: 3.1334 (3.1191) grad: 0.1913 (0.1907) time: 0.3596 data: 0.0031 max mem: 3953 +train: [3] [ 60/400] eta: 0:02:20 lr: 0.000189 loss: 3.1114 (3.1220) grad: 0.1831 (0.1878) time: 0.3751 data: 0.0046 max mem: 3953 +train: [3] [ 80/400] eta: 0:02:10 lr: 0.000192 loss: 3.1187 (3.1260) grad: 0.1792 (0.1864) time: 0.3906 data: 0.0044 max mem: 3953 +train: [3] [100/400] eta: 0:01:59 lr: 0.000195 loss: 3.1325 (3.1302) grad: 0.1827 (0.1856) time: 0.3554 data: 0.0039 max mem: 3953 +train: [3] [120/400] eta: 0:01:49 lr: 0.000198 loss: 3.1318 (3.1284) grad: 0.1818 (0.1855) time: 0.3592 data: 0.0040 max mem: 3953 +train: [3] [140/400] eta: 0:01:40 lr: 0.000201 loss: 3.1216 (3.1287) grad: 0.1712 (0.1828) time: 0.3633 data: 0.0043 max mem: 3953 +train: [3] [160/400] eta: 0:01:32 lr: 0.000204 loss: 3.1353 (3.1310) grad: 0.1756 (0.1829) time: 0.3591 data: 0.0039 max mem: 3953 +train: [3] [180/400] eta: 0:01:24 lr: 0.000207 loss: 3.1353 (3.1327) grad: 0.1805 (0.1828) time: 0.3684 data: 0.0044 max mem: 3953 +train: [3] [200/400] eta: 0:01:15 lr: 0.000210 loss: 3.1394 (3.1349) grad: 0.1793 (0.1824) time: 0.3571 data: 0.0039 max mem: 3953 +train: [3] [220/400] eta: 0:01:07 lr: 0.000213 loss: 3.1374 (3.1326) grad: 0.1793 (0.1824) time: 0.3611 data: 0.0042 max mem: 3953 +train: [3] [240/400] eta: 0:01:00 lr: 0.000216 loss: 3.0953 (3.1314) grad: 0.1803 (0.1825) time: 0.3618 data: 0.0039 max mem: 3953 +train: [3] [260/400] eta: 0:00:52 lr: 0.000219 loss: 3.1188 (3.1315) grad: 0.1728 (0.1816) time: 0.3640 data: 0.0042 max mem: 3953 +train: [3] [280/400] eta: 0:00:45 lr: 0.000222 loss: 3.1385 (3.1324) grad: 0.1724 (0.1816) time: 0.4110 data: 0.0040 max mem: 3953 +train: [3] [300/400] eta: 0:00:37 lr: 0.000225 loss: 3.1436 (3.1327) grad: 0.1847 (0.1818) time: 0.3655 data: 0.0046 max mem: 3953 +train: [3] [320/400] eta: 0:00:30 lr: 0.000228 loss: 3.1319 (3.1314) grad: 0.1807 (0.1816) time: 0.3624 data: 0.0042 max mem: 3953 +train: [3] [340/400] eta: 0:00:22 lr: 0.000231 loss: 3.1010 (3.1308) grad: 0.1873 (0.1822) time: 0.3631 data: 0.0041 max mem: 3953 +train: [3] [360/400] eta: 0:00:14 lr: 0.000234 loss: 3.1239 (3.1304) grad: 0.1950 (0.1829) time: 0.3525 data: 0.0041 max mem: 3953 +train: [3] [380/400] eta: 0:00:07 lr: 0.000237 loss: 3.1338 (3.1308) grad: 0.1758 (0.1822) time: 0.3700 data: 0.0042 max mem: 3953 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 3.1338 (3.1304) grad: 0.1728 (0.1819) time: 0.3623 data: 0.0041 max mem: 3953 +train: [3] Total time: 0:02:29 (0.3737 s / it) +train: [3] Summary: lr: 0.000240 loss: 3.1338 (3.1304) grad: 0.1728 (0.1819) +eval (validation): [3] [ 0/85] eta: 0:04:59 time: 3.5237 data: 3.2372 max mem: 3953 +eval (validation): [3] [20/85] eta: 0:00:32 time: 0.3530 data: 0.0050 max mem: 3953 +eval (validation): [3] [40/85] eta: 0:00:19 time: 0.3543 data: 0.0044 max mem: 3953 +eval (validation): [3] [60/85] eta: 0:00:10 time: 0.3643 data: 0.0047 max mem: 3953 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3180 data: 0.0043 max mem: 3953 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.2991 data: 0.0037 max mem: 3953 +eval (validation): [3] Total time: 0:00:32 (0.3849 s / it) +cv: [3] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 3.228 acc: 0.091 f1: 0.041 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:23:09 lr: nan time: 3.4731 data: 3.1915 max mem: 3953 +train: [4] [ 20/400] eta: 0:03:07 lr: 0.000243 loss: 3.1040 (3.1031) grad: 0.1705 (0.1783) time: 0.3451 data: 0.0073 max mem: 3953 +train: [4] [ 40/400] eta: 0:02:27 lr: 0.000246 loss: 3.1068 (3.1148) grad: 0.1836 (0.1830) time: 0.3229 data: 0.0036 max mem: 3953 +train: [4] [ 60/400] eta: 0:02:11 lr: 0.000249 loss: 3.1055 (3.1090) grad: 0.1871 (0.1845) time: 0.3351 data: 0.0044 max mem: 3953 +train: [4] [ 80/400] eta: 0:01:58 lr: 0.000252 loss: 3.1329 (3.1193) grad: 0.1779 (0.1837) time: 0.3282 data: 0.0044 max mem: 3953 +train: [4] [100/400] eta: 0:01:48 lr: 0.000255 loss: 3.1551 (3.1246) grad: 0.1787 (0.1854) time: 0.3246 data: 0.0044 max mem: 3953 +train: [4] [120/400] eta: 0:01:40 lr: 0.000258 loss: 3.1260 (3.1227) grad: 0.1887 (0.1856) time: 0.3413 data: 0.0043 max mem: 3953 +train: [4] [140/400] eta: 0:01:33 lr: 0.000261 loss: 3.1036 (3.1217) grad: 0.1858 (0.1848) time: 0.3648 data: 0.0040 max mem: 3953 +train: [4] [160/400] eta: 0:01:26 lr: 0.000264 loss: 3.1092 (3.1232) grad: 0.1801 (0.1851) time: 0.3537 data: 0.0044 max mem: 3953 +train: [4] [180/400] eta: 0:01:18 lr: 0.000267 loss: 3.1115 (3.1223) grad: 0.1803 (0.1845) time: 0.3399 data: 0.0045 max mem: 3953 +train: [4] [200/400] eta: 0:01:11 lr: 0.000270 loss: 3.1115 (3.1225) grad: 0.1817 (0.1846) time: 0.3550 data: 0.0039 max mem: 3953 +train: [4] [220/400] eta: 0:01:03 lr: 0.000273 loss: 3.0973 (3.1212) grad: 0.1715 (0.1833) time: 0.3444 data: 0.0039 max mem: 3953 +train: [4] [240/400] eta: 0:00:56 lr: 0.000276 loss: 3.0973 (3.1203) grad: 0.1682 (0.1827) time: 0.3621 data: 0.0040 max mem: 3953 +train: [4] [260/400] eta: 0:00:49 lr: 0.000279 loss: 3.1010 (3.1179) grad: 0.1832 (0.1833) time: 0.3520 data: 0.0044 max mem: 3953 +train: [4] [280/400] eta: 0:00:42 lr: 0.000282 loss: 3.1010 (3.1174) grad: 0.1835 (0.1832) time: 0.3583 data: 0.0040 max mem: 3953 +train: [4] [300/400] eta: 0:00:35 lr: 0.000285 loss: 3.1124 (3.1180) grad: 0.1847 (0.1835) time: 0.3629 data: 0.0041 max mem: 3953 +train: [4] [320/400] eta: 0:00:28 lr: 0.000288 loss: 3.1216 (3.1189) grad: 0.1812 (0.1830) time: 0.3410 data: 0.0041 max mem: 3953 +train: [4] [340/400] eta: 0:00:21 lr: 0.000291 loss: 3.1137 (3.1186) grad: 0.1769 (0.1833) time: 0.3380 data: 0.0041 max mem: 3953 +train: [4] [360/400] eta: 0:00:14 lr: 0.000294 loss: 3.1287 (3.1204) grad: 0.1808 (0.1833) time: 0.3400 data: 0.0041 max mem: 3953 +train: [4] [380/400] eta: 0:00:07 lr: 0.000297 loss: 3.1430 (3.1200) grad: 0.1784 (0.1824) time: 0.3595 data: 0.0041 max mem: 3953 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 3.1059 (3.1204) grad: 0.1695 (0.1818) time: 0.3542 data: 0.0042 max mem: 3953 +train: [4] Total time: 0:02:21 (0.3543 s / it) +train: [4] Summary: lr: 0.000300 loss: 3.1059 (3.1204) grad: 0.1695 (0.1818) +eval (validation): [4] [ 0/85] eta: 0:04:57 time: 3.4997 data: 3.2098 max mem: 3953 +eval (validation): [4] [20/85] eta: 0:00:31 time: 0.3312 data: 0.0050 max mem: 3953 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3199 data: 0.0043 max mem: 3953 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3281 data: 0.0044 max mem: 3953 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3083 data: 0.0038 max mem: 3953 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3025 data: 0.0040 max mem: 3953 +eval (validation): [4] Total time: 0:00:30 (0.3616 s / it) +cv: [4] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 3.091 acc: 0.095 f1: 0.042 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:33 lr: nan time: 3.3835 data: 3.1455 max mem: 3953 +train: [5] [ 20/400] eta: 0:03:00 lr: 0.000300 loss: 3.0665 (3.0830) grad: 0.1774 (0.1837) time: 0.3284 data: 0.0040 max mem: 3953 +train: [5] [ 40/400] eta: 0:02:25 lr: 0.000300 loss: 3.0968 (3.0953) grad: 0.1758 (0.1813) time: 0.3322 data: 0.0046 max mem: 3953 +train: [5] [ 60/400] eta: 0:02:09 lr: 0.000300 loss: 3.0974 (3.0895) grad: 0.1694 (0.1779) time: 0.3280 data: 0.0040 max mem: 3953 +train: [5] [ 80/400] eta: 0:02:04 lr: 0.000300 loss: 3.0920 (3.0944) grad: 0.1733 (0.1765) time: 0.4171 data: 0.0044 max mem: 3953 +train: [5] [100/400] eta: 0:01:56 lr: 0.000300 loss: 3.1142 (3.0971) grad: 0.1769 (0.1778) time: 0.3848 data: 0.0047 max mem: 3953 +train: [5] [120/400] eta: 0:01:47 lr: 0.000300 loss: 3.1164 (3.1035) grad: 0.1822 (0.1791) time: 0.3594 data: 0.0043 max mem: 3953 +train: [5] [140/400] eta: 0:01:39 lr: 0.000300 loss: 3.1189 (3.1047) grad: 0.1819 (0.1794) time: 0.3885 data: 0.0042 max mem: 3953 +train: [5] [160/400] eta: 0:01:31 lr: 0.000299 loss: 3.1121 (3.1039) grad: 0.1752 (0.1786) time: 0.3664 data: 0.0044 max mem: 3953 +train: [5] [180/400] eta: 0:01:23 lr: 0.000299 loss: 3.0936 (3.1036) grad: 0.1771 (0.1789) time: 0.3675 data: 0.0044 max mem: 3953 +train: [5] [200/400] eta: 0:01:15 lr: 0.000299 loss: 3.1124 (3.1063) grad: 0.1791 (0.1786) time: 0.3647 data: 0.0041 max mem: 3953 +train: [5] [220/400] eta: 0:01:07 lr: 0.000299 loss: 3.1297 (3.1084) grad: 0.1718 (0.1782) time: 0.3618 data: 0.0041 max mem: 3953 +train: [5] [240/400] eta: 0:01:00 lr: 0.000299 loss: 3.1145 (3.1093) grad: 0.1718 (0.1778) time: 0.3584 data: 0.0042 max mem: 3953 +train: [5] [260/400] eta: 0:00:52 lr: 0.000299 loss: 3.1145 (3.1099) grad: 0.1737 (0.1776) time: 0.3546 data: 0.0041 max mem: 3953 +train: [5] [280/400] eta: 0:00:44 lr: 0.000298 loss: 3.1016 (3.1110) grad: 0.1702 (0.1771) time: 0.3581 data: 0.0040 max mem: 3953 +train: [5] [300/400] eta: 0:00:37 lr: 0.000298 loss: 3.0967 (3.1106) grad: 0.1663 (0.1766) time: 0.3441 data: 0.0043 max mem: 3953 +train: [5] [320/400] eta: 0:00:29 lr: 0.000298 loss: 3.0967 (3.1103) grad: 0.1784 (0.1769) time: 0.3470 data: 0.0041 max mem: 3953 +train: [5] [340/400] eta: 0:00:22 lr: 0.000298 loss: 3.1072 (3.1107) grad: 0.1808 (0.1768) time: 0.3574 data: 0.0045 max mem: 3953 +train: [5] [360/400] eta: 0:00:14 lr: 0.000297 loss: 3.1072 (3.1103) grad: 0.1800 (0.1771) time: 0.4017 data: 0.0049 max mem: 3953 +train: [5] [380/400] eta: 0:00:07 lr: 0.000297 loss: 3.1063 (3.1103) grad: 0.1800 (0.1778) time: 0.3562 data: 0.0041 max mem: 3953 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 3.1095 (3.1112) grad: 0.1931 (0.1786) time: 0.3616 data: 0.0040 max mem: 3953 +train: [5] Total time: 0:02:27 (0.3697 s / it) +train: [5] Summary: lr: 0.000297 loss: 3.1095 (3.1112) grad: 0.1931 (0.1786) +eval (validation): [5] [ 0/85] eta: 0:05:04 time: 3.5812 data: 3.2755 max mem: 3953 +eval (validation): [5] [20/85] eta: 0:00:31 time: 0.3291 data: 0.0038 max mem: 3953 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3355 data: 0.0040 max mem: 3953 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3503 data: 0.0043 max mem: 3953 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3425 data: 0.0045 max mem: 3953 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3389 data: 0.0043 max mem: 3953 +eval (validation): [5] Total time: 0:00:32 (0.3798 s / it) +cv: [5] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 3.150 acc: 0.098 f1: 0.050 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:30 lr: nan time: 3.3752 data: 3.1434 max mem: 3953 +train: [6] [ 20/400] eta: 0:03:17 lr: 0.000296 loss: 3.1253 (3.1209) grad: 0.1830 (0.1889) time: 0.3762 data: 0.0122 max mem: 3953 +train: [6] [ 40/400] eta: 0:02:35 lr: 0.000296 loss: 3.1070 (3.1104) grad: 0.1830 (0.1855) time: 0.3426 data: 0.0034 max mem: 3953 +train: [6] [ 60/400] eta: 0:02:17 lr: 0.000296 loss: 3.0811 (3.1000) grad: 0.1768 (0.1858) time: 0.3500 data: 0.0043 max mem: 3953 +train: [6] [ 80/400] eta: 0:02:06 lr: 0.000295 loss: 3.0879 (3.1033) grad: 0.1872 (0.1851) time: 0.3647 data: 0.0040 max mem: 3953 +train: [6] [100/400] eta: 0:01:57 lr: 0.000295 loss: 3.1005 (3.0972) grad: 0.1817 (0.1838) time: 0.3713 data: 0.0043 max mem: 3953 +train: [6] [120/400] eta: 0:01:47 lr: 0.000295 loss: 3.0848 (3.1014) grad: 0.1653 (0.1811) time: 0.3567 data: 0.0039 max mem: 3953 +train: [6] [140/400] eta: 0:01:39 lr: 0.000294 loss: 3.1202 (3.1017) grad: 0.1650 (0.1798) time: 0.3572 data: 0.0039 max mem: 3953 +train: [6] [160/400] eta: 0:01:30 lr: 0.000294 loss: 3.1202 (3.1021) grad: 0.1758 (0.1794) time: 0.3529 data: 0.0040 max mem: 3953 +train: [6] [180/400] eta: 0:01:22 lr: 0.000293 loss: 3.1049 (3.1015) grad: 0.1788 (0.1797) time: 0.3706 data: 0.0041 max mem: 3953 +train: [6] [200/400] eta: 0:01:15 lr: 0.000293 loss: 3.0860 (3.1007) grad: 0.1815 (0.1807) time: 0.3595 data: 0.0042 max mem: 3953 +train: [6] [220/400] eta: 0:01:07 lr: 0.000292 loss: 3.0948 (3.1014) grad: 0.1846 (0.1805) time: 0.3521 data: 0.0040 max mem: 3953 +train: [6] [240/400] eta: 0:00:59 lr: 0.000292 loss: 3.0974 (3.1004) grad: 0.1846 (0.1816) time: 0.3626 data: 0.0041 max mem: 3953 +train: [6] [260/400] eta: 0:00:52 lr: 0.000291 loss: 3.0910 (3.1021) grad: 0.1856 (0.1815) time: 0.3682 data: 0.0043 max mem: 3953 +train: [6] [280/400] eta: 0:00:44 lr: 0.000291 loss: 3.0977 (3.1020) grad: 0.1821 (0.1818) time: 0.3508 data: 0.0042 max mem: 3953 +train: [6] [300/400] eta: 0:00:36 lr: 0.000290 loss: 3.1083 (3.1039) grad: 0.1821 (0.1815) time: 0.3610 data: 0.0041 max mem: 3953 +train: [6] [320/400] eta: 0:00:29 lr: 0.000290 loss: 3.1265 (3.1045) grad: 0.1863 (0.1817) time: 0.3776 data: 0.0043 max mem: 3953 +train: [6] [340/400] eta: 0:00:22 lr: 0.000289 loss: 3.1087 (3.1060) grad: 0.1721 (0.1810) time: 0.3513 data: 0.0045 max mem: 3953 +train: [6] [360/400] eta: 0:00:14 lr: 0.000288 loss: 3.1087 (3.1049) grad: 0.1712 (0.1806) time: 0.3470 data: 0.0036 max mem: 3953 +train: [6] [380/400] eta: 0:00:07 lr: 0.000288 loss: 3.0934 (3.1048) grad: 0.1801 (0.1811) time: 0.3723 data: 0.0040 max mem: 3953 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 3.1123 (3.1048) grad: 0.1834 (0.1811) time: 0.3892 data: 0.0042 max mem: 3953 +train: [6] Total time: 0:02:27 (0.3693 s / it) +train: [6] Summary: lr: 0.000287 loss: 3.1123 (3.1048) grad: 0.1834 (0.1811) +eval (validation): [6] [ 0/85] eta: 0:04:41 time: 3.3090 data: 3.0881 max mem: 3953 +eval (validation): [6] [20/85] eta: 0:00:34 time: 0.3894 data: 0.0320 max mem: 3953 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3356 data: 0.0041 max mem: 3953 +eval (validation): [6] [60/85] eta: 0:00:10 time: 0.3332 data: 0.0046 max mem: 3953 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3150 data: 0.0044 max mem: 3953 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3138 data: 0.0042 max mem: 3953 +eval (validation): [6] Total time: 0:00:32 (0.3807 s / it) +cv: [6] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 3.196 acc: 0.099 f1: 0.042 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:24:22 lr: nan time: 3.6570 data: 3.3460 max mem: 3953 +train: [7] [ 20/400] eta: 0:03:21 lr: 0.000286 loss: 3.0635 (3.0792) grad: 0.1840 (0.1881) time: 0.3731 data: 0.0047 max mem: 3953 +train: [7] [ 40/400] eta: 0:02:38 lr: 0.000286 loss: 3.0760 (3.0822) grad: 0.1762 (0.1820) time: 0.3479 data: 0.0035 max mem: 3953 +train: [7] [ 60/400] eta: 0:02:20 lr: 0.000285 loss: 3.0905 (3.0834) grad: 0.1752 (0.1800) time: 0.3540 data: 0.0042 max mem: 3953 +train: [7] [ 80/400] eta: 0:02:08 lr: 0.000284 loss: 3.0865 (3.0834) grad: 0.1763 (0.1791) time: 0.3664 data: 0.0042 max mem: 3953 +train: [7] [100/400] eta: 0:01:58 lr: 0.000284 loss: 3.0686 (3.0845) grad: 0.1732 (0.1762) time: 0.3697 data: 0.0045 max mem: 3953 +train: [7] [120/400] eta: 0:01:49 lr: 0.000283 loss: 3.1011 (3.0854) grad: 0.1708 (0.1755) time: 0.3755 data: 0.0043 max mem: 3953 +train: [7] [140/400] eta: 0:01:41 lr: 0.000282 loss: 3.1105 (3.0871) grad: 0.1733 (0.1763) time: 0.3759 data: 0.0041 max mem: 3953 +train: [7] [160/400] eta: 0:01:32 lr: 0.000282 loss: 3.0936 (3.0887) grad: 0.1789 (0.1768) time: 0.3584 data: 0.0043 max mem: 3953 +train: [7] [180/400] eta: 0:01:24 lr: 0.000281 loss: 3.1129 (3.0921) grad: 0.1803 (0.1776) time: 0.3638 data: 0.0038 max mem: 3953 +train: [7] [200/400] eta: 0:01:16 lr: 0.000280 loss: 3.1007 (3.0889) grad: 0.1808 (0.1778) time: 0.3647 data: 0.0040 max mem: 3953 +train: [7] [220/400] eta: 0:01:08 lr: 0.000279 loss: 3.1010 (3.0917) grad: 0.1753 (0.1777) time: 0.3627 data: 0.0039 max mem: 3953 +train: [7] [240/400] eta: 0:01:00 lr: 0.000278 loss: 3.1010 (3.0915) grad: 0.1705 (0.1771) time: 0.3612 data: 0.0039 max mem: 3953 +train: [7] [260/400] eta: 0:00:52 lr: 0.000278 loss: 3.1001 (3.0935) grad: 0.1622 (0.1760) time: 0.3768 data: 0.0044 max mem: 3953 +train: [7] [280/400] eta: 0:00:45 lr: 0.000277 loss: 3.1045 (3.0951) grad: 0.1668 (0.1768) time: 0.3922 data: 0.0043 max mem: 3953 +train: [7] [300/400] eta: 0:00:37 lr: 0.000276 loss: 3.0915 (3.0949) grad: 0.1785 (0.1772) time: 0.3686 data: 0.0046 max mem: 3953 +train: [7] [320/400] eta: 0:00:30 lr: 0.000275 loss: 3.0946 (3.0951) grad: 0.1726 (0.1768) time: 0.3638 data: 0.0043 max mem: 3953 +train: [7] [340/400] eta: 0:00:22 lr: 0.000274 loss: 3.0876 (3.0948) grad: 0.1738 (0.1772) time: 0.3742 data: 0.0042 max mem: 3953 +train: [7] [360/400] eta: 0:00:15 lr: 0.000273 loss: 3.0760 (3.0940) grad: 0.1800 (0.1768) time: 0.3631 data: 0.0041 max mem: 3953 +train: [7] [380/400] eta: 0:00:07 lr: 0.000272 loss: 3.1030 (3.0944) grad: 0.1805 (0.1772) time: 0.3660 data: 0.0043 max mem: 3953 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 3.1055 (3.0947) grad: 0.1807 (0.1772) time: 0.3549 data: 0.0043 max mem: 3953 +train: [7] Total time: 0:02:30 (0.3751 s / it) +train: [7] Summary: lr: 0.000271 loss: 3.1055 (3.0947) grad: 0.1807 (0.1772) +eval (validation): [7] [ 0/85] eta: 0:05:17 time: 3.7374 data: 3.4183 max mem: 3953 +eval (validation): [7] [20/85] eta: 0:00:37 time: 0.4171 data: 0.0054 max mem: 3953 +eval (validation): [7] [40/85] eta: 0:00:21 time: 0.3538 data: 0.0040 max mem: 3953 +eval (validation): [7] [60/85] eta: 0:00:10 time: 0.3331 data: 0.0048 max mem: 3953 +eval (validation): [7] [80/85] eta: 0:00:02 time: 0.3471 data: 0.0044 max mem: 3953 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3303 data: 0.0043 max mem: 3953 +eval (validation): [7] Total time: 0:00:34 (0.4041 s / it) +cv: [7] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 3.151 acc: 0.100 f1: 0.056 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:24:16 lr: nan time: 3.6419 data: 3.3329 max mem: 3953 +train: [8] [ 20/400] eta: 0:03:32 lr: 0.000270 loss: 3.0653 (3.0777) grad: 0.1779 (0.1810) time: 0.4039 data: 0.0040 max mem: 3953 +train: [8] [ 40/400] eta: 0:02:44 lr: 0.000270 loss: 3.0684 (3.0848) grad: 0.1736 (0.1784) time: 0.3507 data: 0.0036 max mem: 3953 +train: [8] [ 60/400] eta: 0:02:26 lr: 0.000269 loss: 3.0785 (3.0875) grad: 0.1736 (0.1790) time: 0.3744 data: 0.0045 max mem: 3953 +train: [8] [ 80/400] eta: 0:02:12 lr: 0.000268 loss: 3.0786 (3.0843) grad: 0.1793 (0.1796) time: 0.3698 data: 0.0038 max mem: 3953 +train: [8] [100/400] eta: 0:02:03 lr: 0.000267 loss: 3.0681 (3.0797) grad: 0.1815 (0.1805) time: 0.3938 data: 0.0043 max mem: 3953 +train: [8] [120/400] eta: 0:01:53 lr: 0.000266 loss: 3.0613 (3.0778) grad: 0.1779 (0.1808) time: 0.3770 data: 0.0043 max mem: 3953 +train: [8] [140/400] eta: 0:01:43 lr: 0.000265 loss: 3.0647 (3.0776) grad: 0.1790 (0.1817) time: 0.3453 data: 0.0038 max mem: 3953 +train: [8] [160/400] eta: 0:01:33 lr: 0.000264 loss: 3.0841 (3.0796) grad: 0.1788 (0.1816) time: 0.3472 data: 0.0040 max mem: 3953 +train: [8] [180/400] eta: 0:01:25 lr: 0.000263 loss: 3.0993 (3.0809) grad: 0.1807 (0.1819) time: 0.3634 data: 0.0038 max mem: 3953 +train: [8] [200/400] eta: 0:01:16 lr: 0.000262 loss: 3.0985 (3.0823) grad: 0.1864 (0.1811) time: 0.3567 data: 0.0042 max mem: 3953 +train: [8] [220/400] eta: 0:01:08 lr: 0.000260 loss: 3.0910 (3.0811) grad: 0.1673 (0.1800) time: 0.3695 data: 0.0044 max mem: 3953 +train: [8] [240/400] eta: 0:01:01 lr: 0.000259 loss: 3.0785 (3.0830) grad: 0.1725 (0.1800) time: 0.3622 data: 0.0042 max mem: 3953 +train: [8] [260/400] eta: 0:00:53 lr: 0.000258 loss: 3.0783 (3.0827) grad: 0.1808 (0.1805) time: 0.3465 data: 0.0040 max mem: 3953 +train: [8] [280/400] eta: 0:00:45 lr: 0.000257 loss: 3.0688 (3.0844) grad: 0.1798 (0.1801) time: 0.3481 data: 0.0043 max mem: 3953 +train: [8] [300/400] eta: 0:00:37 lr: 0.000256 loss: 3.0966 (3.0854) grad: 0.1832 (0.1813) time: 0.3498 data: 0.0044 max mem: 3953 +train: [8] [320/400] eta: 0:00:29 lr: 0.000255 loss: 3.1006 (3.0873) grad: 0.1933 (0.1818) time: 0.3624 data: 0.0043 max mem: 3953 +train: [8] [340/400] eta: 0:00:22 lr: 0.000254 loss: 3.0991 (3.0865) grad: 0.1799 (0.1819) time: 0.3612 data: 0.0038 max mem: 3953 +train: [8] [360/400] eta: 0:00:14 lr: 0.000253 loss: 3.0778 (3.0880) grad: 0.1794 (0.1820) time: 0.3564 data: 0.0041 max mem: 3953 +train: [8] [380/400] eta: 0:00:07 lr: 0.000252 loss: 3.1029 (3.0886) grad: 0.1907 (0.1831) time: 0.3599 data: 0.0039 max mem: 3953 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 3.1029 (3.0905) grad: 0.2042 (0.1835) time: 0.3639 data: 0.0044 max mem: 3953 +train: [8] Total time: 0:02:28 (0.3716 s / it) +train: [8] Summary: lr: 0.000250 loss: 3.1029 (3.0905) grad: 0.2042 (0.1835) +eval (validation): [8] [ 0/85] eta: 0:04:58 time: 3.5115 data: 3.2597 max mem: 3953 +eval (validation): [8] [20/85] eta: 0:00:33 time: 0.3729 data: 0.0051 max mem: 3953 +eval (validation): [8] [40/85] eta: 0:00:20 time: 0.3921 data: 0.0036 max mem: 3953 +eval (validation): [8] [60/85] eta: 0:00:10 time: 0.3690 data: 0.0043 max mem: 3953 +eval (validation): [8] [80/85] eta: 0:00:02 time: 0.3320 data: 0.0041 max mem: 3953 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3268 data: 0.0036 max mem: 3953 +eval (validation): [8] Total time: 0:00:34 (0.4048 s / it) +cv: [8] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 3.059 acc: 0.100 f1: 0.057 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:23:26 lr: nan time: 3.5153 data: 3.2116 max mem: 3953 +train: [9] [ 20/400] eta: 0:03:09 lr: 0.000249 loss: 3.0933 (3.0844) grad: 0.1806 (0.1800) time: 0.3488 data: 0.0062 max mem: 3953 +train: [9] [ 40/400] eta: 0:02:35 lr: 0.000248 loss: 3.0911 (3.0867) grad: 0.1782 (0.1785) time: 0.3631 data: 0.0034 max mem: 3953 +train: [9] [ 60/400] eta: 0:02:20 lr: 0.000247 loss: 3.0860 (3.0868) grad: 0.1742 (0.1773) time: 0.3717 data: 0.0044 max mem: 3953 +train: [9] [ 80/400] eta: 0:02:07 lr: 0.000246 loss: 3.0771 (3.0807) grad: 0.1684 (0.1747) time: 0.3596 data: 0.0044 max mem: 3953 +train: [9] [100/400] eta: 0:01:57 lr: 0.000244 loss: 3.0748 (3.0801) grad: 0.1676 (0.1733) time: 0.3518 data: 0.0044 max mem: 3953 +train: [9] [120/400] eta: 0:01:47 lr: 0.000243 loss: 3.0748 (3.0789) grad: 0.1687 (0.1759) time: 0.3535 data: 0.0042 max mem: 3953 +train: [9] [140/400] eta: 0:01:38 lr: 0.000242 loss: 3.0745 (3.0807) grad: 0.1697 (0.1763) time: 0.3561 data: 0.0046 max mem: 3953 +train: [9] [160/400] eta: 0:01:30 lr: 0.000241 loss: 3.0715 (3.0799) grad: 0.1720 (0.1756) time: 0.3448 data: 0.0042 max mem: 3953 +train: [9] [180/400] eta: 0:01:22 lr: 0.000240 loss: 3.0910 (3.0812) grad: 0.1720 (0.1759) time: 0.3542 data: 0.0044 max mem: 3953 +train: [9] [200/400] eta: 0:01:14 lr: 0.000238 loss: 3.0756 (3.0797) grad: 0.1803 (0.1761) time: 0.3716 data: 0.0044 max mem: 3953 +train: [9] [220/400] eta: 0:01:06 lr: 0.000237 loss: 3.0805 (3.0811) grad: 0.1718 (0.1761) time: 0.3566 data: 0.0046 max mem: 3953 +train: [9] [240/400] eta: 0:00:58 lr: 0.000236 loss: 3.0954 (3.0834) grad: 0.1814 (0.1767) time: 0.3169 data: 0.0042 max mem: 3953 +train: [9] [260/400] eta: 0:00:50 lr: 0.000234 loss: 3.1093 (3.0850) grad: 0.1814 (0.1772) time: 0.3185 data: 0.0040 max mem: 3953 +train: [9] [280/400] eta: 0:00:43 lr: 0.000233 loss: 3.1073 (3.0855) grad: 0.1792 (0.1772) time: 0.3334 data: 0.0043 max mem: 3953 +train: [9] [300/400] eta: 0:00:36 lr: 0.000232 loss: 3.1020 (3.0853) grad: 0.1732 (0.1771) time: 0.3582 data: 0.0045 max mem: 3953 +train: [9] [320/400] eta: 0:00:28 lr: 0.000230 loss: 3.0739 (3.0858) grad: 0.1771 (0.1776) time: 0.3264 data: 0.0043 max mem: 3953 +train: [9] [340/400] eta: 0:00:21 lr: 0.000229 loss: 3.0684 (3.0843) grad: 0.1860 (0.1778) time: 0.3430 data: 0.0046 max mem: 3953 +train: [9] [360/400] eta: 0:00:14 lr: 0.000228 loss: 3.0552 (3.0830) grad: 0.1804 (0.1777) time: 0.3497 data: 0.0036 max mem: 3953 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 3.0641 (3.0837) grad: 0.1777 (0.1777) time: 0.3510 data: 0.0040 max mem: 3953 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 3.0664 (3.0837) grad: 0.1784 (0.1779) time: 0.3502 data: 0.0037 max mem: 3953 +train: [9] Total time: 0:02:22 (0.3572 s / it) +train: [9] Summary: lr: 0.000225 loss: 3.0664 (3.0837) grad: 0.1784 (0.1779) +eval (validation): [9] [ 0/85] eta: 0:05:08 time: 3.6319 data: 3.3005 max mem: 3953 +eval (validation): [9] [20/85] eta: 0:00:37 time: 0.4317 data: 0.0050 max mem: 3953 +eval (validation): [9] [40/85] eta: 0:00:20 time: 0.3361 data: 0.0043 max mem: 3953 +eval (validation): [9] [60/85] eta: 0:00:10 time: 0.3630 data: 0.0047 max mem: 3953 +eval (validation): [9] [80/85] eta: 0:00:02 time: 0.3597 data: 0.0043 max mem: 3953 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3411 data: 0.0040 max mem: 3953 +eval (validation): [9] Total time: 0:00:35 (0.4122 s / it) +cv: [9] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 3.074 acc: 0.113 f1: 0.064 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:24:08 lr: nan time: 3.6214 data: 3.3653 max mem: 3953 +train: [10] [ 20/400] eta: 0:03:24 lr: 0.000224 loss: 3.0646 (3.0815) grad: 0.1780 (0.1850) time: 0.3834 data: 0.0191 max mem: 3953 +train: [10] [ 40/400] eta: 0:02:42 lr: 0.000222 loss: 3.0656 (3.0785) grad: 0.1661 (0.1746) time: 0.3595 data: 0.0050 max mem: 3953 +train: [10] [ 60/400] eta: 0:02:22 lr: 0.000221 loss: 3.0850 (3.0839) grad: 0.1661 (0.1760) time: 0.3585 data: 0.0033 max mem: 3953 +train: [10] [ 80/400] eta: 0:02:11 lr: 0.000220 loss: 3.0860 (3.0877) grad: 0.1852 (0.1777) time: 0.3775 data: 0.0056 max mem: 3953 +train: [10] [100/400] eta: 0:02:00 lr: 0.000218 loss: 3.0783 (3.0836) grad: 0.1820 (0.1768) time: 0.3630 data: 0.0031 max mem: 3953 +train: [10] [120/400] eta: 0:01:50 lr: 0.000217 loss: 3.0636 (3.0818) grad: 0.1686 (0.1772) time: 0.3674 data: 0.0042 max mem: 3953 +train: [10] [140/400] eta: 0:01:41 lr: 0.000215 loss: 3.0666 (3.0803) grad: 0.1665 (0.1770) time: 0.3669 data: 0.0038 max mem: 3953 +train: [10] [160/400] eta: 0:01:33 lr: 0.000214 loss: 3.0687 (3.0805) grad: 0.1661 (0.1764) time: 0.3723 data: 0.0045 max mem: 3953 +train: [10] [180/400] eta: 0:01:24 lr: 0.000213 loss: 3.0648 (3.0784) grad: 0.1724 (0.1762) time: 0.3663 data: 0.0046 max mem: 3953 +train: [10] [200/400] eta: 0:01:17 lr: 0.000211 loss: 3.0641 (3.0775) grad: 0.1724 (0.1762) time: 0.3784 data: 0.0040 max mem: 3953 +train: [10] [220/400] eta: 0:01:09 lr: 0.000210 loss: 3.0674 (3.0761) grad: 0.1668 (0.1751) time: 0.3859 data: 0.0042 max mem: 3953 +train: [10] [240/400] eta: 0:01:01 lr: 0.000208 loss: 3.0441 (3.0725) grad: 0.1664 (0.1754) time: 0.3692 data: 0.0039 max mem: 3953 +train: [10] [260/400] eta: 0:00:53 lr: 0.000207 loss: 3.0441 (3.0725) grad: 0.1732 (0.1754) time: 0.3897 data: 0.0044 max mem: 3953 +train: [10] [280/400] eta: 0:00:46 lr: 0.000205 loss: 3.0574 (3.0699) grad: 0.1753 (0.1757) time: 0.3778 data: 0.0048 max mem: 3953 +train: [10] [300/400] eta: 0:00:38 lr: 0.000204 loss: 3.0429 (3.0686) grad: 0.1685 (0.1750) time: 0.3730 data: 0.0051 max mem: 3953 +train: [10] [320/400] eta: 0:00:30 lr: 0.000202 loss: 3.0552 (3.0689) grad: 0.1671 (0.1747) time: 0.3683 data: 0.0046 max mem: 3953 +train: [10] [340/400] eta: 0:00:22 lr: 0.000201 loss: 3.0760 (3.0700) grad: 0.1622 (0.1740) time: 0.3751 data: 0.0045 max mem: 3953 +train: [10] [360/400] eta: 0:00:15 lr: 0.000199 loss: 3.0694 (3.0685) grad: 0.1679 (0.1742) time: 0.3661 data: 0.0042 max mem: 3953 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 3.0600 (3.0680) grad: 0.1731 (0.1741) time: 0.3549 data: 0.0043 max mem: 3953 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 3.0629 (3.0686) grad: 0.1765 (0.1742) time: 0.3829 data: 0.0046 max mem: 3953 +train: [10] Total time: 0:02:32 (0.3802 s / it) +train: [10] Summary: lr: 0.000196 loss: 3.0629 (3.0686) grad: 0.1765 (0.1742) +eval (validation): [10] [ 0/85] eta: 0:04:58 time: 3.5132 data: 3.2220 max mem: 3953 +eval (validation): [10] [20/85] eta: 0:00:32 time: 0.3462 data: 0.0138 max mem: 3953 +eval (validation): [10] [40/85] eta: 0:00:18 time: 0.3356 data: 0.0076 max mem: 3953 +eval (validation): [10] [60/85] eta: 0:00:10 time: 0.3810 data: 0.0046 max mem: 3953 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3418 data: 0.0035 max mem: 3953 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3362 data: 0.0040 max mem: 3953 +eval (validation): [10] Total time: 0:00:33 (0.3903 s / it) +cv: [10] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 3.090 acc: 0.104 f1: 0.061 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:24:21 lr: nan time: 3.6535 data: 3.3968 max mem: 3953 +train: [11] [ 20/400] eta: 0:03:18 lr: 0.000195 loss: 3.0688 (3.0624) grad: 0.1745 (0.1776) time: 0.3666 data: 0.0044 max mem: 3953 +train: [11] [ 40/400] eta: 0:02:39 lr: 0.000193 loss: 3.0690 (3.0754) grad: 0.1745 (0.1748) time: 0.3581 data: 0.0042 max mem: 3953 +train: [11] [ 60/400] eta: 0:02:23 lr: 0.000192 loss: 3.0687 (3.0683) grad: 0.1740 (0.1767) time: 0.3779 data: 0.0044 max mem: 3953 +train: [11] [ 80/400] eta: 0:02:13 lr: 0.000190 loss: 3.0692 (3.0683) grad: 0.1755 (0.1782) time: 0.4033 data: 0.0038 max mem: 3953 +train: [11] [100/400] eta: 0:02:02 lr: 0.000189 loss: 3.0759 (3.0708) grad: 0.1755 (0.1777) time: 0.3661 data: 0.0043 max mem: 3953 +train: [11] [120/400] eta: 0:01:51 lr: 0.000187 loss: 3.0719 (3.0687) grad: 0.1711 (0.1758) time: 0.3639 data: 0.0038 max mem: 3953 +train: [11] [140/400] eta: 0:01:43 lr: 0.000186 loss: 3.0629 (3.0651) grad: 0.1683 (0.1754) time: 0.3886 data: 0.0041 max mem: 3953 +train: [11] [160/400] eta: 0:01:35 lr: 0.000184 loss: 3.0379 (3.0631) grad: 0.1646 (0.1744) time: 0.3823 data: 0.0037 max mem: 3953 +train: [11] [180/400] eta: 0:01:27 lr: 0.000183 loss: 3.0433 (3.0625) grad: 0.1638 (0.1738) time: 0.4059 data: 0.0047 max mem: 3953 +train: [11] [200/400] eta: 0:01:19 lr: 0.000181 loss: 3.0467 (3.0611) grad: 0.1671 (0.1732) time: 0.3770 data: 0.0041 max mem: 3953 +train: [11] [220/400] eta: 0:01:10 lr: 0.000180 loss: 3.0381 (3.0608) grad: 0.1673 (0.1733) time: 0.3657 data: 0.0043 max mem: 3953 +train: [11] [240/400] eta: 0:01:02 lr: 0.000178 loss: 3.0740 (3.0634) grad: 0.1799 (0.1738) time: 0.3564 data: 0.0038 max mem: 3953 +train: [11] [260/400] eta: 0:00:54 lr: 0.000177 loss: 3.0646 (3.0627) grad: 0.1719 (0.1738) time: 0.3598 data: 0.0043 max mem: 3953 +train: [11] [280/400] eta: 0:00:46 lr: 0.000175 loss: 3.0342 (3.0604) grad: 0.1695 (0.1732) time: 0.3680 data: 0.0034 max mem: 3953 +train: [11] [300/400] eta: 0:00:38 lr: 0.000174 loss: 3.0314 (3.0596) grad: 0.1649 (0.1730) time: 0.3741 data: 0.0041 max mem: 3953 +train: [11] [320/400] eta: 0:00:30 lr: 0.000172 loss: 3.0688 (3.0604) grad: 0.1737 (0.1731) time: 0.3977 data: 0.0045 max mem: 3953 +train: [11] [340/400] eta: 0:00:23 lr: 0.000170 loss: 3.0594 (3.0603) grad: 0.1737 (0.1732) time: 0.3668 data: 0.0044 max mem: 3953 +train: [11] [360/400] eta: 0:00:15 lr: 0.000169 loss: 3.0568 (3.0600) grad: 0.1738 (0.1732) time: 0.3556 data: 0.0040 max mem: 3953 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 3.0568 (3.0594) grad: 0.1676 (0.1730) time: 0.3666 data: 0.0042 max mem: 3953 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 3.0557 (3.0591) grad: 0.1694 (0.1731) time: 0.3663 data: 0.0043 max mem: 3953 +train: [11] Total time: 0:02:32 (0.3818 s / it) +train: [11] Summary: lr: 0.000166 loss: 3.0557 (3.0591) grad: 0.1694 (0.1731) +eval (validation): [11] [ 0/85] eta: 0:04:50 time: 3.4176 data: 3.1835 max mem: 3953 +eval (validation): [11] [20/85] eta: 0:00:31 time: 0.3422 data: 0.0139 max mem: 3953 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3836 data: 0.0040 max mem: 3953 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3836 data: 0.0044 max mem: 3953 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3360 data: 0.0040 max mem: 3953 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3249 data: 0.0039 max mem: 3953 +eval (validation): [11] Total time: 0:00:33 (0.3975 s / it) +cv: [11] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 3.061 acc: 0.102 f1: 0.060 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:42 lr: nan time: 3.5561 data: 3.3069 max mem: 3953 +train: [12] [ 20/400] eta: 0:03:23 lr: 0.000164 loss: 3.0063 (3.0329) grad: 0.1775 (0.1747) time: 0.3858 data: 0.0045 max mem: 3953 +train: [12] [ 40/400] eta: 0:02:39 lr: 0.000163 loss: 3.0425 (3.0496) grad: 0.1685 (0.1741) time: 0.3432 data: 0.0036 max mem: 3953 +train: [12] [ 60/400] eta: 0:02:21 lr: 0.000161 loss: 3.0618 (3.0576) grad: 0.1672 (0.1714) time: 0.3644 data: 0.0040 max mem: 3953 +train: [12] [ 80/400] eta: 0:02:10 lr: 0.000160 loss: 3.0618 (3.0617) grad: 0.1672 (0.1724) time: 0.3742 data: 0.0044 max mem: 3953 +train: [12] [100/400] eta: 0:01:59 lr: 0.000158 loss: 3.0669 (3.0614) grad: 0.1647 (0.1699) time: 0.3681 data: 0.0038 max mem: 3953 +train: [12] [120/400] eta: 0:01:50 lr: 0.000156 loss: 3.0418 (3.0532) grad: 0.1624 (0.1686) time: 0.3749 data: 0.0040 max mem: 3953 +train: [12] [140/400] eta: 0:01:42 lr: 0.000155 loss: 3.0418 (3.0542) grad: 0.1558 (0.1676) time: 0.3798 data: 0.0039 max mem: 3953 +train: [12] [160/400] eta: 0:01:33 lr: 0.000153 loss: 3.0689 (3.0563) grad: 0.1696 (0.1685) time: 0.3691 data: 0.0041 max mem: 3953 +train: [12] [180/400] eta: 0:01:25 lr: 0.000152 loss: 3.0686 (3.0572) grad: 0.1622 (0.1679) time: 0.3697 data: 0.0041 max mem: 3953 +train: [12] [200/400] eta: 0:01:17 lr: 0.000150 loss: 3.0454 (3.0552) grad: 0.1622 (0.1678) time: 0.3730 data: 0.0040 max mem: 3953 +train: [12] [220/400] eta: 0:01:09 lr: 0.000149 loss: 3.0410 (3.0554) grad: 0.1702 (0.1685) time: 0.3736 data: 0.0040 max mem: 3953 +train: [12] [240/400] eta: 0:01:01 lr: 0.000147 loss: 3.0339 (3.0541) grad: 0.1702 (0.1690) time: 0.3745 data: 0.0043 max mem: 3953 +train: [12] [260/400] eta: 0:00:53 lr: 0.000145 loss: 3.0522 (3.0551) grad: 0.1629 (0.1686) time: 0.3739 data: 0.0041 max mem: 3953 +train: [12] [280/400] eta: 0:00:45 lr: 0.000144 loss: 3.0569 (3.0545) grad: 0.1635 (0.1686) time: 0.3689 data: 0.0041 max mem: 3953 +train: [12] [300/400] eta: 0:00:38 lr: 0.000142 loss: 3.0612 (3.0560) grad: 0.1727 (0.1691) time: 0.3748 data: 0.0039 max mem: 3953 +train: [12] [320/400] eta: 0:00:30 lr: 0.000141 loss: 3.0693 (3.0562) grad: 0.1724 (0.1689) time: 0.3735 data: 0.0039 max mem: 3953 +train: [12] [340/400] eta: 0:00:22 lr: 0.000139 loss: 3.0326 (3.0554) grad: 0.1640 (0.1689) time: 0.3779 data: 0.0038 max mem: 3953 +train: [12] [360/400] eta: 0:00:15 lr: 0.000138 loss: 3.0420 (3.0565) grad: 0.1698 (0.1691) time: 0.3795 data: 0.0041 max mem: 3953 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 3.0510 (3.0557) grad: 0.1645 (0.1689) time: 0.4021 data: 0.0043 max mem: 3953 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 3.0367 (3.0541) grad: 0.1623 (0.1686) time: 0.3823 data: 0.0039 max mem: 3953 +train: [12] Total time: 0:02:32 (0.3823 s / it) +train: [12] Summary: lr: 0.000134 loss: 3.0367 (3.0541) grad: 0.1623 (0.1686) +eval (validation): [12] [ 0/85] eta: 0:04:42 time: 3.3278 data: 3.1019 max mem: 3953 +eval (validation): [12] [20/85] eta: 0:00:33 time: 0.3815 data: 0.0221 max mem: 3953 +eval (validation): [12] [40/85] eta: 0:00:20 time: 0.3691 data: 0.0041 max mem: 3953 +eval (validation): [12] [60/85] eta: 0:00:10 time: 0.3881 data: 0.0042 max mem: 3953 +eval (validation): [12] [80/85] eta: 0:00:02 time: 0.3640 data: 0.0045 max mem: 3953 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3442 data: 0.0042 max mem: 3953 +eval (validation): [12] Total time: 0:00:34 (0.4104 s / it) +cv: [12] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 3.074 acc: 0.104 f1: 0.051 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:23:18 lr: nan time: 3.4968 data: 3.1871 max mem: 3953 +train: [13] [ 20/400] eta: 0:03:23 lr: 0.000133 loss: 3.0535 (3.0362) grad: 0.1739 (0.1710) time: 0.3875 data: 0.0036 max mem: 3953 +train: [13] [ 40/400] eta: 0:02:40 lr: 0.000131 loss: 3.0541 (3.0466) grad: 0.1693 (0.1660) time: 0.3508 data: 0.0042 max mem: 3953 +train: [13] [ 60/400] eta: 0:02:22 lr: 0.000130 loss: 3.0679 (3.0506) grad: 0.1693 (0.1696) time: 0.3650 data: 0.0041 max mem: 3953 +train: [13] [ 80/400] eta: 0:02:11 lr: 0.000128 loss: 3.0607 (3.0515) grad: 0.1721 (0.1708) time: 0.3817 data: 0.0040 max mem: 3953 +train: [13] [100/400] eta: 0:02:02 lr: 0.000127 loss: 3.0478 (3.0497) grad: 0.1721 (0.1717) time: 0.3979 data: 0.0043 max mem: 3953 +train: [13] [120/400] eta: 0:01:53 lr: 0.000125 loss: 3.0341 (3.0498) grad: 0.1739 (0.1722) time: 0.3964 data: 0.0044 max mem: 3953 +train: [13] [140/400] eta: 0:01:44 lr: 0.000124 loss: 3.0389 (3.0490) grad: 0.1719 (0.1720) time: 0.3886 data: 0.0044 max mem: 3953 +train: [13] [160/400] eta: 0:01:36 lr: 0.000122 loss: 3.0458 (3.0504) grad: 0.1697 (0.1709) time: 0.3888 data: 0.0044 max mem: 3953 +train: [13] [180/400] eta: 0:01:27 lr: 0.000120 loss: 3.0745 (3.0532) grad: 0.1612 (0.1704) time: 0.3865 data: 0.0042 max mem: 3953 +train: [13] [200/400] eta: 0:01:19 lr: 0.000119 loss: 3.0539 (3.0528) grad: 0.1682 (0.1704) time: 0.3670 data: 0.0041 max mem: 3953 +train: [13] [220/400] eta: 0:01:10 lr: 0.000117 loss: 3.0539 (3.0539) grad: 0.1705 (0.1709) time: 0.3697 data: 0.0039 max mem: 3953 +train: [13] [240/400] eta: 0:01:02 lr: 0.000116 loss: 3.0527 (3.0527) grad: 0.1657 (0.1706) time: 0.3698 data: 0.0042 max mem: 3953 +train: [13] [260/400] eta: 0:00:54 lr: 0.000114 loss: 3.0692 (3.0552) grad: 0.1673 (0.1708) time: 0.3751 data: 0.0041 max mem: 3953 +train: [13] [280/400] eta: 0:00:46 lr: 0.000113 loss: 3.0692 (3.0563) grad: 0.1664 (0.1701) time: 0.3862 data: 0.0041 max mem: 3953 +train: [13] [300/400] eta: 0:00:38 lr: 0.000111 loss: 3.0504 (3.0554) grad: 0.1573 (0.1701) time: 0.3806 data: 0.0041 max mem: 3953 +train: [13] [320/400] eta: 0:00:31 lr: 0.000110 loss: 3.0389 (3.0558) grad: 0.1562 (0.1693) time: 0.3790 data: 0.0042 max mem: 3953 +train: [13] [340/400] eta: 0:00:23 lr: 0.000108 loss: 3.0475 (3.0553) grad: 0.1558 (0.1692) time: 0.3616 data: 0.0043 max mem: 3953 +train: [13] [360/400] eta: 0:00:15 lr: 0.000107 loss: 3.0386 (3.0545) grad: 0.1716 (0.1694) time: 0.3584 data: 0.0042 max mem: 3953 +train: [13] [380/400] eta: 0:00:07 lr: 0.000105 loss: 3.0156 (3.0529) grad: 0.1653 (0.1688) time: 0.3750 data: 0.0041 max mem: 3953 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 3.0173 (3.0521) grad: 0.1621 (0.1691) time: 0.3774 data: 0.0042 max mem: 3953 +train: [13] Total time: 0:02:34 (0.3851 s / it) +train: [13] Summary: lr: 0.000104 loss: 3.0173 (3.0521) grad: 0.1621 (0.1691) +eval (validation): [13] [ 0/85] eta: 0:04:39 time: 3.2902 data: 3.0302 max mem: 3953 +eval (validation): [13] [20/85] eta: 0:00:32 time: 0.3543 data: 0.0103 max mem: 3953 +eval (validation): [13] [40/85] eta: 0:00:18 time: 0.3369 data: 0.0042 max mem: 3953 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3651 data: 0.0051 max mem: 3953 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3547 data: 0.0031 max mem: 3953 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3435 data: 0.0035 max mem: 3953 +eval (validation): [13] Total time: 0:00:33 (0.3893 s / it) +cv: [13] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 3.029 acc: 0.113 f1: 0.071 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:30:28 lr: nan time: 4.5717 data: 4.2535 max mem: 3953 +train: [14] [ 20/400] eta: 0:03:42 lr: 0.000102 loss: 3.0365 (3.0437) grad: 0.1623 (0.1686) time: 0.3863 data: 0.0032 max mem: 3953 +train: [14] [ 40/400] eta: 0:02:50 lr: 0.000101 loss: 3.0597 (3.0519) grad: 0.1650 (0.1657) time: 0.3565 data: 0.0043 max mem: 3953 +train: [14] [ 60/400] eta: 0:02:31 lr: 0.000099 loss: 3.0490 (3.0459) grad: 0.1650 (0.1654) time: 0.3840 data: 0.0045 max mem: 3953 +train: [14] [ 80/400] eta: 0:02:21 lr: 0.000098 loss: 3.0484 (3.0545) grad: 0.1699 (0.1683) time: 0.4330 data: 0.0042 max mem: 3953 +train: [14] [100/400] eta: 0:02:07 lr: 0.000096 loss: 3.0510 (3.0535) grad: 0.1670 (0.1675) time: 0.3607 data: 0.0042 max mem: 3953 +train: [14] [120/400] eta: 0:01:56 lr: 0.000095 loss: 3.0481 (3.0527) grad: 0.1637 (0.1678) time: 0.3604 data: 0.0039 max mem: 3953 +train: [14] [140/400] eta: 0:01:45 lr: 0.000093 loss: 3.0481 (3.0528) grad: 0.1635 (0.1666) time: 0.3646 data: 0.0043 max mem: 3953 +train: [14] [160/400] eta: 0:01:36 lr: 0.000092 loss: 3.0287 (3.0492) grad: 0.1666 (0.1675) time: 0.3542 data: 0.0040 max mem: 3953 +train: [14] [180/400] eta: 0:01:27 lr: 0.000090 loss: 3.0101 (3.0460) grad: 0.1583 (0.1662) time: 0.3731 data: 0.0040 max mem: 3953 +train: [14] [200/400] eta: 0:01:18 lr: 0.000089 loss: 3.0291 (3.0452) grad: 0.1552 (0.1660) time: 0.3640 data: 0.0042 max mem: 3953 +train: [14] [220/400] eta: 0:01:10 lr: 0.000088 loss: 3.0370 (3.0433) grad: 0.1668 (0.1660) time: 0.3403 data: 0.0040 max mem: 3953 +train: [14] [240/400] eta: 0:01:01 lr: 0.000086 loss: 3.0438 (3.0436) grad: 0.1659 (0.1655) time: 0.3563 data: 0.0041 max mem: 3953 +train: [14] [260/400] eta: 0:00:54 lr: 0.000085 loss: 3.0526 (3.0454) grad: 0.1628 (0.1664) time: 0.3770 data: 0.0043 max mem: 3953 +train: [14] [280/400] eta: 0:00:46 lr: 0.000083 loss: 3.0437 (3.0448) grad: 0.1668 (0.1664) time: 0.3504 data: 0.0041 max mem: 3953 +train: [14] [300/400] eta: 0:00:38 lr: 0.000082 loss: 3.0369 (3.0446) grad: 0.1668 (0.1665) time: 0.3451 data: 0.0042 max mem: 3953 +train: [14] [320/400] eta: 0:00:30 lr: 0.000081 loss: 3.0222 (3.0440) grad: 0.1601 (0.1658) time: 0.3400 data: 0.0040 max mem: 3953 +train: [14] [340/400] eta: 0:00:22 lr: 0.000079 loss: 3.0392 (3.0446) grad: 0.1519 (0.1651) time: 0.3435 data: 0.0042 max mem: 3953 +train: [14] [360/400] eta: 0:00:15 lr: 0.000078 loss: 3.0392 (3.0440) grad: 0.1528 (0.1646) time: 0.3539 data: 0.0044 max mem: 3953 +train: [14] [380/400] eta: 0:00:07 lr: 0.000076 loss: 3.0362 (3.0445) grad: 0.1578 (0.1650) time: 0.3310 data: 0.0040 max mem: 3953 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 3.0582 (3.0443) grad: 0.1625 (0.1649) time: 0.3454 data: 0.0043 max mem: 3953 +train: [14] Total time: 0:02:28 (0.3719 s / it) +train: [14] Summary: lr: 0.000075 loss: 3.0582 (3.0443) grad: 0.1625 (0.1649) +eval (validation): [14] [ 0/85] eta: 0:04:53 time: 3.4479 data: 3.1634 max mem: 3953 +eval (validation): [14] [20/85] eta: 0:00:31 time: 0.3418 data: 0.0047 max mem: 3953 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3512 data: 0.0342 max mem: 3953 +eval (validation): [14] [60/85] eta: 0:00:10 time: 0.3621 data: 0.0063 max mem: 3953 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3153 data: 0.0034 max mem: 3953 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3022 data: 0.0038 max mem: 3953 +eval (validation): [14] Total time: 0:00:32 (0.3789 s / it) +cv: [14] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 3.043 acc: 0.113 f1: 0.067 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:15 lr: nan time: 3.4875 data: 3.2516 max mem: 3953 +train: [15] [ 20/400] eta: 0:03:10 lr: 0.000074 loss: 3.0649 (3.0667) grad: 0.1718 (0.1720) time: 0.3534 data: 0.0163 max mem: 3953 +train: [15] [ 40/400] eta: 0:02:32 lr: 0.000072 loss: 3.0569 (3.0581) grad: 0.1653 (0.1668) time: 0.3389 data: 0.0273 max mem: 3953 +train: [15] [ 60/400] eta: 0:02:19 lr: 0.000071 loss: 3.0540 (3.0530) grad: 0.1619 (0.1630) time: 0.3868 data: 0.0221 max mem: 3953 +train: [15] [ 80/400] eta: 0:02:08 lr: 0.000070 loss: 3.0376 (3.0433) grad: 0.1496 (0.1625) time: 0.3786 data: 0.0031 max mem: 3953 +train: [15] [100/400] eta: 0:01:58 lr: 0.000068 loss: 3.0376 (3.0461) grad: 0.1506 (0.1606) time: 0.3695 data: 0.0037 max mem: 3953 +train: [15] [120/400] eta: 0:01:49 lr: 0.000067 loss: 3.0317 (3.0442) grad: 0.1554 (0.1614) time: 0.3627 data: 0.0041 max mem: 3953 +train: [15] [140/400] eta: 0:01:40 lr: 0.000066 loss: 3.0317 (3.0433) grad: 0.1558 (0.1603) time: 0.3592 data: 0.0044 max mem: 3953 +train: [15] [160/400] eta: 0:01:32 lr: 0.000064 loss: 3.0473 (3.0447) grad: 0.1515 (0.1601) time: 0.3720 data: 0.0042 max mem: 3953 +train: [15] [180/400] eta: 0:01:24 lr: 0.000063 loss: 3.0531 (3.0457) grad: 0.1601 (0.1608) time: 0.3794 data: 0.0040 max mem: 3953 +train: [15] [200/400] eta: 0:01:16 lr: 0.000062 loss: 3.0604 (3.0474) grad: 0.1689 (0.1623) time: 0.3760 data: 0.0041 max mem: 3953 +train: [15] [220/400] eta: 0:01:08 lr: 0.000061 loss: 3.0696 (3.0505) grad: 0.1700 (0.1624) time: 0.3445 data: 0.0047 max mem: 3953 +train: [15] [240/400] eta: 0:01:00 lr: 0.000059 loss: 3.0723 (3.0521) grad: 0.1667 (0.1635) time: 0.3590 data: 0.0039 max mem: 3953 +train: [15] [260/400] eta: 0:00:52 lr: 0.000058 loss: 3.0383 (3.0487) grad: 0.1667 (0.1633) time: 0.3653 data: 0.0038 max mem: 3953 +train: [15] [280/400] eta: 0:00:45 lr: 0.000057 loss: 3.0289 (3.0485) grad: 0.1618 (0.1633) time: 0.3787 data: 0.0043 max mem: 3953 +train: [15] [300/400] eta: 0:00:37 lr: 0.000056 loss: 3.0304 (3.0486) grad: 0.1618 (0.1639) time: 0.3626 data: 0.0042 max mem: 3953 +train: [15] [320/400] eta: 0:00:30 lr: 0.000054 loss: 3.0278 (3.0476) grad: 0.1642 (0.1640) time: 0.3588 data: 0.0041 max mem: 3953 +train: [15] [340/400] eta: 0:00:22 lr: 0.000053 loss: 3.0088 (3.0461) grad: 0.1612 (0.1638) time: 0.3470 data: 0.0040 max mem: 3953 +train: [15] [360/400] eta: 0:00:14 lr: 0.000052 loss: 3.0215 (3.0473) grad: 0.1645 (0.1638) time: 0.3528 data: 0.0042 max mem: 3953 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 3.0323 (3.0465) grad: 0.1660 (0.1640) time: 0.3462 data: 0.0037 max mem: 3953 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 3.0157 (3.0454) grad: 0.1604 (0.1639) time: 0.3425 data: 0.0038 max mem: 3953 +train: [15] Total time: 0:02:27 (0.3699 s / it) +train: [15] Summary: lr: 0.000050 loss: 3.0157 (3.0454) grad: 0.1604 (0.1639) +eval (validation): [15] [ 0/85] eta: 0:05:09 time: 3.6400 data: 3.3170 max mem: 3953 +eval (validation): [15] [20/85] eta: 0:00:34 time: 0.3738 data: 0.0041 max mem: 3953 +eval (validation): [15] [40/85] eta: 0:00:19 time: 0.3218 data: 0.0042 max mem: 3953 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.4013 data: 0.0052 max mem: 3953 +eval (validation): [15] [80/85] eta: 0:00:02 time: 0.3593 data: 0.0045 max mem: 3953 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3474 data: 0.0042 max mem: 3953 +eval (validation): [15] Total time: 0:00:34 (0.4035 s / it) +cv: [15] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 3.030 acc: 0.111 f1: 0.058 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:23:35 lr: nan time: 3.5386 data: 3.2817 max mem: 3953 +train: [16] [ 20/400] eta: 0:03:23 lr: 0.000048 loss: 3.0325 (3.0360) grad: 0.1615 (0.1670) time: 0.3845 data: 0.0038 max mem: 3953 +train: [16] [ 40/400] eta: 0:02:38 lr: 0.000047 loss: 3.0264 (3.0264) grad: 0.1615 (0.1636) time: 0.3408 data: 0.0039 max mem: 3953 +train: [16] [ 60/400] eta: 0:02:21 lr: 0.000046 loss: 3.0264 (3.0316) grad: 0.1560 (0.1619) time: 0.3696 data: 0.0043 max mem: 3953 +train: [16] [ 80/400] eta: 0:02:09 lr: 0.000045 loss: 3.0255 (3.0271) grad: 0.1597 (0.1616) time: 0.3634 data: 0.0040 max mem: 3953 +train: [16] [100/400] eta: 0:01:59 lr: 0.000044 loss: 3.0056 (3.0241) grad: 0.1559 (0.1606) time: 0.3819 data: 0.0041 max mem: 3953 +train: [16] [120/400] eta: 0:01:50 lr: 0.000043 loss: 3.0197 (3.0281) grad: 0.1559 (0.1610) time: 0.3732 data: 0.0042 max mem: 3953 +train: [16] [140/400] eta: 0:01:41 lr: 0.000042 loss: 3.0281 (3.0300) grad: 0.1560 (0.1607) time: 0.3731 data: 0.0043 max mem: 3953 +train: [16] [160/400] eta: 0:01:32 lr: 0.000041 loss: 3.0301 (3.0322) grad: 0.1560 (0.1611) time: 0.3437 data: 0.0042 max mem: 3953 +train: [16] [180/400] eta: 0:01:23 lr: 0.000040 loss: 3.0395 (3.0334) grad: 0.1559 (0.1599) time: 0.3439 data: 0.0042 max mem: 3953 +train: [16] [200/400] eta: 0:01:15 lr: 0.000039 loss: 3.0315 (3.0320) grad: 0.1559 (0.1601) time: 0.3434 data: 0.0041 max mem: 3953 +train: [16] [220/400] eta: 0:01:07 lr: 0.000038 loss: 3.0309 (3.0322) grad: 0.1499 (0.1588) time: 0.3625 data: 0.0045 max mem: 3953 +train: [16] [240/400] eta: 0:01:00 lr: 0.000036 loss: 3.0350 (3.0313) grad: 0.1532 (0.1588) time: 0.3652 data: 0.0040 max mem: 3953 +train: [16] [260/400] eta: 0:00:52 lr: 0.000035 loss: 3.0470 (3.0330) grad: 0.1534 (0.1583) time: 0.3719 data: 0.0042 max mem: 3953 +train: [16] [280/400] eta: 0:00:45 lr: 0.000034 loss: 3.0470 (3.0330) grad: 0.1522 (0.1585) time: 0.3875 data: 0.0045 max mem: 3953 +train: [16] [300/400] eta: 0:00:37 lr: 0.000033 loss: 3.0214 (3.0327) grad: 0.1581 (0.1589) time: 0.3755 data: 0.0044 max mem: 3953 +train: [16] [320/400] eta: 0:00:29 lr: 0.000032 loss: 3.0356 (3.0338) grad: 0.1581 (0.1586) time: 0.3561 data: 0.0044 max mem: 3953 +train: [16] [340/400] eta: 0:00:22 lr: 0.000031 loss: 3.0475 (3.0336) grad: 0.1599 (0.1593) time: 0.3508 data: 0.0042 max mem: 3953 +train: [16] [360/400] eta: 0:00:14 lr: 0.000031 loss: 3.0444 (3.0356) grad: 0.1661 (0.1594) time: 0.3635 data: 0.0038 max mem: 3953 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 3.0444 (3.0352) grad: 0.1694 (0.1597) time: 0.3497 data: 0.0039 max mem: 3953 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 3.0347 (3.0354) grad: 0.1590 (0.1590) time: 0.3439 data: 0.0040 max mem: 3953 +train: [16] Total time: 0:02:28 (0.3705 s / it) +train: [16] Summary: lr: 0.000029 loss: 3.0347 (3.0354) grad: 0.1590 (0.1590) +eval (validation): [16] [ 0/85] eta: 0:05:04 time: 3.5846 data: 3.3347 max mem: 3953 +eval (validation): [16] [20/85] eta: 0:00:33 time: 0.3611 data: 0.0043 max mem: 3953 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3459 data: 0.0059 max mem: 3953 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3861 data: 0.0034 max mem: 3953 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3391 data: 0.0039 max mem: 3953 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3295 data: 0.0038 max mem: 3953 +eval (validation): [16] Total time: 0:00:33 (0.3973 s / it) +cv: [16] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 3.020 acc: 0.107 f1: 0.062 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:07 lr: nan time: 3.4693 data: 3.2248 max mem: 3953 +train: [17] [ 20/400] eta: 0:03:13 lr: 0.000028 loss: 2.9822 (2.9922) grad: 0.1466 (0.1508) time: 0.3622 data: 0.0047 max mem: 3953 +train: [17] [ 40/400] eta: 0:02:35 lr: 0.000027 loss: 2.9940 (3.0075) grad: 0.1457 (0.1507) time: 0.3523 data: 0.0044 max mem: 3953 +train: [17] [ 60/400] eta: 0:02:19 lr: 0.000026 loss: 3.0208 (3.0135) grad: 0.1555 (0.1549) time: 0.3646 data: 0.0043 max mem: 3953 +train: [17] [ 80/400] eta: 0:02:07 lr: 0.000025 loss: 3.0136 (3.0171) grad: 0.1595 (0.1567) time: 0.3648 data: 0.0031 max mem: 3953 +train: [17] [100/400] eta: 0:01:57 lr: 0.000024 loss: 3.0353 (3.0214) grad: 0.1643 (0.1588) time: 0.3557 data: 0.0040 max mem: 3953 +train: [17] [120/400] eta: 0:01:48 lr: 0.000023 loss: 3.0443 (3.0197) grad: 0.1657 (0.1598) time: 0.3722 data: 0.0038 max mem: 3953 +train: [17] [140/400] eta: 0:01:41 lr: 0.000023 loss: 3.0373 (3.0215) grad: 0.1554 (0.1585) time: 0.3968 data: 0.0043 max mem: 3953 +train: [17] [160/400] eta: 0:01:32 lr: 0.000022 loss: 2.9885 (3.0183) grad: 0.1538 (0.1582) time: 0.3665 data: 0.0044 max mem: 3953 +train: [17] [180/400] eta: 0:01:24 lr: 0.000021 loss: 3.0156 (3.0226) grad: 0.1462 (0.1569) time: 0.3591 data: 0.0040 max mem: 3953 +train: [17] [200/400] eta: 0:01:15 lr: 0.000020 loss: 3.0451 (3.0240) grad: 0.1438 (0.1557) time: 0.3494 data: 0.0041 max mem: 3953 +train: [17] [220/400] eta: 0:01:08 lr: 0.000019 loss: 3.0274 (3.0231) grad: 0.1477 (0.1561) time: 0.3801 data: 0.0040 max mem: 3953 +train: [17] [240/400] eta: 0:01:01 lr: 0.000019 loss: 3.0217 (3.0228) grad: 0.1559 (0.1567) time: 0.4115 data: 0.0040 max mem: 3953 +train: [17] [260/400] eta: 0:00:53 lr: 0.000018 loss: 3.0299 (3.0233) grad: 0.1596 (0.1573) time: 0.3923 data: 0.0044 max mem: 3953 +train: [17] [280/400] eta: 0:00:45 lr: 0.000017 loss: 3.0406 (3.0247) grad: 0.1599 (0.1582) time: 0.3645 data: 0.0042 max mem: 3953 +train: [17] [300/400] eta: 0:00:37 lr: 0.000016 loss: 3.0333 (3.0241) grad: 0.1667 (0.1590) time: 0.3376 data: 0.0039 max mem: 3953 +train: [17] [320/400] eta: 0:00:30 lr: 0.000016 loss: 3.0047 (3.0242) grad: 0.1595 (0.1585) time: 0.3570 data: 0.0040 max mem: 3953 +train: [17] [340/400] eta: 0:00:22 lr: 0.000015 loss: 3.0230 (3.0252) grad: 0.1475 (0.1582) time: 0.3636 data: 0.0041 max mem: 3953 +train: [17] [360/400] eta: 0:00:15 lr: 0.000014 loss: 3.0316 (3.0255) grad: 0.1517 (0.1581) time: 0.3530 data: 0.0038 max mem: 3953 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 3.0132 (3.0249) grad: 0.1564 (0.1581) time: 0.3471 data: 0.0039 max mem: 3953 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 3.0107 (3.0249) grad: 0.1538 (0.1581) time: 0.3561 data: 0.0041 max mem: 3953 +train: [17] Total time: 0:02:29 (0.3735 s / it) +train: [17] Summary: lr: 0.000013 loss: 3.0107 (3.0249) grad: 0.1538 (0.1581) +eval (validation): [17] [ 0/85] eta: 0:05:07 time: 3.6224 data: 3.3695 max mem: 3953 +eval (validation): [17] [20/85] eta: 0:00:35 time: 0.3993 data: 0.0045 max mem: 3953 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3288 data: 0.0037 max mem: 3953 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3429 data: 0.0047 max mem: 3953 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3440 data: 0.0039 max mem: 3953 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3385 data: 0.0042 max mem: 3953 +eval (validation): [17] Total time: 0:00:33 (0.3933 s / it) +cv: [17] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 3.008 acc: 0.111 f1: 0.070 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:28:37 lr: nan time: 4.2927 data: 3.9800 max mem: 3953 +train: [18] [ 20/400] eta: 0:03:41 lr: 0.000012 loss: 3.0356 (3.0202) grad: 0.1556 (0.1541) time: 0.3975 data: 0.0036 max mem: 3953 +train: [18] [ 40/400] eta: 0:02:48 lr: 0.000012 loss: 3.0356 (3.0275) grad: 0.1543 (0.1543) time: 0.3496 data: 0.0036 max mem: 3953 +train: [18] [ 60/400] eta: 0:02:28 lr: 0.000011 loss: 3.0184 (3.0348) grad: 0.1449 (0.1532) time: 0.3665 data: 0.0041 max mem: 3953 +train: [18] [ 80/400] eta: 0:02:16 lr: 0.000011 loss: 3.0301 (3.0324) grad: 0.1488 (0.1537) time: 0.3943 data: 0.0039 max mem: 3953 +train: [18] [100/400] eta: 0:02:03 lr: 0.000010 loss: 3.0301 (3.0309) grad: 0.1510 (0.1532) time: 0.3642 data: 0.0041 max mem: 3953 +train: [18] [120/400] eta: 0:01:53 lr: 0.000009 loss: 3.0174 (3.0293) grad: 0.1514 (0.1549) time: 0.3611 data: 0.0043 max mem: 3953 +train: [18] [140/400] eta: 0:01:43 lr: 0.000009 loss: 3.0174 (3.0279) grad: 0.1524 (0.1546) time: 0.3712 data: 0.0040 max mem: 3953 +train: [18] [160/400] eta: 0:01:34 lr: 0.000008 loss: 3.0137 (3.0261) grad: 0.1566 (0.1556) time: 0.3630 data: 0.0041 max mem: 3953 +train: [18] [180/400] eta: 0:01:25 lr: 0.000008 loss: 3.0231 (3.0267) grad: 0.1598 (0.1557) time: 0.3322 data: 0.0040 max mem: 3953 +train: [18] [200/400] eta: 0:01:16 lr: 0.000007 loss: 3.0212 (3.0251) grad: 0.1587 (0.1557) time: 0.3323 data: 0.0040 max mem: 3953 +train: [18] [220/400] eta: 0:01:08 lr: 0.000007 loss: 3.0212 (3.0292) grad: 0.1525 (0.1558) time: 0.3584 data: 0.0040 max mem: 3953 +train: [18] [240/400] eta: 0:01:00 lr: 0.000006 loss: 3.0464 (3.0308) grad: 0.1580 (0.1565) time: 0.3729 data: 0.0040 max mem: 3953 +train: [18] [260/400] eta: 0:00:53 lr: 0.000006 loss: 3.0343 (3.0315) grad: 0.1634 (0.1564) time: 0.3663 data: 0.0040 max mem: 3953 +train: [18] [280/400] eta: 0:00:45 lr: 0.000006 loss: 3.0306 (3.0329) grad: 0.1571 (0.1561) time: 0.3525 data: 0.0041 max mem: 3953 +train: [18] [300/400] eta: 0:00:37 lr: 0.000005 loss: 3.0206 (3.0321) grad: 0.1628 (0.1568) time: 0.3622 data: 0.0041 max mem: 3953 +train: [18] [320/400] eta: 0:00:30 lr: 0.000005 loss: 3.0061 (3.0310) grad: 0.1614 (0.1566) time: 0.3603 data: 0.0038 max mem: 3953 +train: [18] [340/400] eta: 0:00:22 lr: 0.000004 loss: 3.0106 (3.0305) grad: 0.1573 (0.1570) time: 0.3727 data: 0.0042 max mem: 3953 +train: [18] [360/400] eta: 0:00:14 lr: 0.000004 loss: 3.0263 (3.0308) grad: 0.1573 (0.1569) time: 0.3533 data: 0.0036 max mem: 3953 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 3.0297 (3.0306) grad: 0.1619 (0.1575) time: 0.3611 data: 0.0042 max mem: 3953 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 3.0241 (3.0304) grad: 0.1596 (0.1572) time: 0.3826 data: 0.0039 max mem: 3953 +train: [18] Total time: 0:02:29 (0.3738 s / it) +train: [18] Summary: lr: 0.000003 loss: 3.0241 (3.0304) grad: 0.1596 (0.1572) +eval (validation): [18] [ 0/85] eta: 0:05:17 time: 3.7318 data: 3.4358 max mem: 3953 +eval (validation): [18] [20/85] eta: 0:00:36 time: 0.3982 data: 0.0494 max mem: 3953 +eval (validation): [18] [40/85] eta: 0:00:21 time: 0.3728 data: 0.0036 max mem: 3953 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3585 data: 0.0042 max mem: 3953 +eval (validation): [18] [80/85] eta: 0:00:02 time: 0.3326 data: 0.0037 max mem: 3953 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3235 data: 0.0038 max mem: 3953 +eval (validation): [18] Total time: 0:00:34 (0.4049 s / it) +cv: [18] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 3.011 acc: 0.112 f1: 0.064 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:23:44 lr: nan time: 3.5618 data: 3.2996 max mem: 3953 +train: [19] [ 20/400] eta: 0:03:31 lr: 0.000003 loss: 3.0345 (3.0301) grad: 0.1607 (0.1626) time: 0.4057 data: 0.0040 max mem: 3953 +train: [19] [ 40/400] eta: 0:02:45 lr: 0.000003 loss: 3.0280 (3.0288) grad: 0.1607 (0.1637) time: 0.3560 data: 0.0036 max mem: 3953 +train: [19] [ 60/400] eta: 0:02:24 lr: 0.000002 loss: 3.0295 (3.0281) grad: 0.1551 (0.1597) time: 0.3607 data: 0.0043 max mem: 3953 +train: [19] [ 80/400] eta: 0:02:12 lr: 0.000002 loss: 3.0291 (3.0312) grad: 0.1498 (0.1579) time: 0.3803 data: 0.0042 max mem: 3953 +train: [19] [100/400] eta: 0:02:02 lr: 0.000002 loss: 3.0252 (3.0298) grad: 0.1508 (0.1589) time: 0.3829 data: 0.0045 max mem: 3953 +train: [19] [120/400] eta: 0:01:53 lr: 0.000002 loss: 3.0258 (3.0292) grad: 0.1540 (0.1583) time: 0.3871 data: 0.0042 max mem: 3953 +train: [19] [140/400] eta: 0:01:44 lr: 0.000001 loss: 3.0341 (3.0302) grad: 0.1508 (0.1583) time: 0.3782 data: 0.0043 max mem: 3953 +train: [19] [160/400] eta: 0:01:34 lr: 0.000001 loss: 3.0358 (3.0307) grad: 0.1490 (0.1569) time: 0.3433 data: 0.0041 max mem: 3953 +train: [19] [180/400] eta: 0:01:26 lr: 0.000001 loss: 3.0297 (3.0333) grad: 0.1543 (0.1576) time: 0.3735 data: 0.0041 max mem: 3953 +train: [19] [200/400] eta: 0:01:18 lr: 0.000001 loss: 3.0246 (3.0308) grad: 0.1627 (0.1575) time: 0.3797 data: 0.0041 max mem: 3953 +train: [19] [220/400] eta: 0:01:10 lr: 0.000001 loss: 3.0246 (3.0321) grad: 0.1540 (0.1579) time: 0.4216 data: 0.0040 max mem: 3953 +train: [19] [240/400] eta: 0:01:02 lr: 0.000001 loss: 3.0201 (3.0314) grad: 0.1540 (0.1585) time: 0.3689 data: 0.0041 max mem: 3953 +train: [19] [260/400] eta: 0:00:54 lr: 0.000000 loss: 3.0259 (3.0322) grad: 0.1528 (0.1579) time: 0.3648 data: 0.0041 max mem: 3953 +train: [19] [280/400] eta: 0:00:46 lr: 0.000000 loss: 3.0221 (3.0306) grad: 0.1494 (0.1574) time: 0.3792 data: 0.0043 max mem: 3953 +train: [19] [300/400] eta: 0:00:38 lr: 0.000000 loss: 3.0015 (3.0292) grad: 0.1480 (0.1571) time: 0.3612 data: 0.0040 max mem: 3953 +train: [19] [320/400] eta: 0:00:30 lr: 0.000000 loss: 3.0120 (3.0291) grad: 0.1480 (0.1565) time: 0.3658 data: 0.0039 max mem: 3953 +train: [19] [340/400] eta: 0:00:23 lr: 0.000000 loss: 3.0170 (3.0292) grad: 0.1533 (0.1571) time: 0.3686 data: 0.0042 max mem: 3953 +train: [19] [360/400] eta: 0:00:15 lr: 0.000000 loss: 3.0125 (3.0287) grad: 0.1600 (0.1574) time: 0.3518 data: 0.0043 max mem: 3953 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 3.0125 (3.0285) grad: 0.1583 (0.1577) time: 0.3409 data: 0.0041 max mem: 3953 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 3.0093 (3.0282) grad: 0.1604 (0.1579) time: 0.3430 data: 0.0040 max mem: 3953 +train: [19] Total time: 0:02:31 (0.3789 s / it) +train: [19] Summary: lr: 0.000000 loss: 3.0093 (3.0282) grad: 0.1604 (0.1579) +eval (validation): [19] [ 0/85] eta: 0:04:37 time: 3.2630 data: 3.0104 max mem: 3953 +eval (validation): [19] [20/85] eta: 0:00:30 time: 0.3352 data: 0.0050 max mem: 3953 +eval (validation): [19] [40/85] eta: 0:00:18 time: 0.3391 data: 0.0040 max mem: 3953 +eval (validation): [19] [60/85] eta: 0:00:09 time: 0.3068 data: 0.0039 max mem: 3953 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.2920 data: 0.0041 max mem: 3953 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.2873 data: 0.0038 max mem: 3953 +eval (validation): [19] Total time: 0:00:30 (0.3534 s / it) +cv: [19] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 3.010 acc: 0.110 f1: 0.064 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-last.pth +eval model info: +{"score": 0.11037283130306386, "hparam": [31, 1.0], "hparam_id": 45, "epoch": 19, "is_best": false, "best_score": 0.11332595053525286} +eval (train): [20] [ 0/509] eta: 0:27:23 time: 3.2279 data: 3.0184 max mem: 3953 +eval (train): [20] [ 20/509] eta: 0:03:57 time: 0.3485 data: 0.0187 max mem: 3953 +eval (train): [20] [ 40/509] eta: 0:03:06 time: 0.3053 data: 0.0042 max mem: 3953 +eval (train): [20] [ 60/509] eta: 0:02:52 time: 0.3583 data: 0.0058 max mem: 3953 +eval (train): [20] [ 80/509] eta: 0:02:41 time: 0.3496 data: 0.0053 max mem: 3953 +eval (train): [20] [100/509] eta: 0:02:27 time: 0.3010 data: 0.0036 max mem: 3953 +eval (train): [20] [120/509] eta: 0:02:18 time: 0.3240 data: 0.0062 max mem: 3953 +eval (train): [20] [140/509] eta: 0:02:11 time: 0.3614 data: 0.0041 max mem: 3953 +eval (train): [20] [160/509] eta: 0:02:04 time: 0.3537 data: 0.0051 max mem: 3953 +eval (train): [20] [180/509] eta: 0:01:57 time: 0.3643 data: 0.0044 max mem: 3953 +eval (train): [20] [200/509] eta: 0:01:49 time: 0.3364 data: 0.0044 max mem: 3953 +eval (train): [20] [220/509] eta: 0:01:41 time: 0.3295 data: 0.0046 max mem: 3953 +eval (train): [20] [240/509] eta: 0:01:34 time: 0.3560 data: 0.0045 max mem: 3953 +eval (train): [20] [260/509] eta: 0:01:27 time: 0.3476 data: 0.0045 max mem: 3953 +eval (train): [20] [280/509] eta: 0:01:20 time: 0.3405 data: 0.0046 max mem: 3953 +eval (train): [20] [300/509] eta: 0:01:13 time: 0.3220 data: 0.0045 max mem: 3953 +eval (train): [20] [320/509] eta: 0:01:05 time: 0.3423 data: 0.0043 max mem: 3953 +eval (train): [20] [340/509] eta: 0:00:58 time: 0.3373 data: 0.0046 max mem: 3953 +eval (train): [20] [360/509] eta: 0:00:51 time: 0.3260 data: 0.0036 max mem: 3953 +eval (train): [20] [380/509] eta: 0:00:44 time: 0.3293 data: 0.0044 max mem: 3953 +eval (train): [20] [400/509] eta: 0:00:37 time: 0.3283 data: 0.0045 max mem: 3953 +eval (train): [20] [420/509] eta: 0:00:30 time: 0.3376 data: 0.0040 max mem: 3953 +eval (train): [20] [440/509] eta: 0:00:23 time: 0.3230 data: 0.0043 max mem: 3953 +eval (train): [20] [460/509] eta: 0:00:16 time: 0.3229 data: 0.0042 max mem: 3953 +eval (train): [20] [480/509] eta: 0:00:09 time: 0.3386 data: 0.0040 max mem: 3953 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3267 data: 0.0044 max mem: 3953 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3078 data: 0.0038 max mem: 3953 +eval (train): [20] Total time: 0:02:54 (0.3430 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:35 time: 3.2394 data: 2.9908 max mem: 3953 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3667 data: 0.0148 max mem: 3953 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3248 data: 0.0043 max mem: 3953 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3140 data: 0.0045 max mem: 3953 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3372 data: 0.0040 max mem: 3953 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3106 data: 0.0038 max mem: 3953 +eval (validation): [20] Total time: 0:00:31 (0.3711 s / it) +eval (test): [20] [ 0/85] eta: 0:04:55 time: 3.4745 data: 3.2374 max mem: 3953 +eval (test): [20] [20/85] eta: 0:00:31 time: 0.3291 data: 0.0119 max mem: 3953 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3209 data: 0.0043 max mem: 3953 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3346 data: 0.0037 max mem: 3953 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3190 data: 0.0044 max mem: 3953 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3080 data: 0.0041 max mem: 3953 +eval (test): [20] Total time: 0:00:31 (0.3653 s / it) +eval (testid): [20] [ 0/82] eta: 0:05:49 time: 4.2592 data: 4.0317 max mem: 3953 +eval (testid): [20] [20/82] eta: 0:00:32 time: 0.3375 data: 0.0274 max mem: 3953 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3351 data: 0.0310 max mem: 3953 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3385 data: 0.0201 max mem: 3953 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3280 data: 0.0185 max mem: 3953 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3182 data: 0.0185 max mem: 3953 +eval (testid): [20] Total time: 0:00:31 (0.3838 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/checkpoint-best.pth +eval model info: +{"score": 0.11332595053525286, "hparam": [19, 1.0], "hparam_id": 42, "epoch": 9, "is_best": true, "best_score": 0.11332595053525286} +eval (train): [20] [ 0/509] eta: 0:29:56 time: 3.5298 data: 3.2364 max mem: 3953 +eval (train): [20] [ 20/509] eta: 0:04:12 time: 0.3660 data: 0.0045 max mem: 3953 +eval (train): [20] [ 40/509] eta: 0:03:26 time: 0.3596 data: 0.0045 max mem: 3953 +eval (train): [20] [ 60/509] eta: 0:03:11 time: 0.4010 data: 0.0047 max mem: 3953 +eval (train): [20] [ 80/509] eta: 0:02:52 time: 0.3258 data: 0.0042 max mem: 3953 +eval (train): [20] [100/509] eta: 0:02:37 time: 0.3143 data: 0.0039 max mem: 3953 +eval (train): [20] [120/509] eta: 0:02:27 time: 0.3462 data: 0.0048 max mem: 3953 +eval (train): [20] [140/509] eta: 0:02:17 time: 0.3296 data: 0.0040 max mem: 3953 +eval (train): [20] [160/509] eta: 0:02:08 time: 0.3537 data: 0.0045 max mem: 3953 +eval (train): [20] [180/509] eta: 0:02:00 time: 0.3310 data: 0.0047 max mem: 3953 +eval (train): [20] [200/509] eta: 0:01:51 time: 0.3372 data: 0.0044 max mem: 3953 +eval (train): [20] [220/509] eta: 0:01:43 time: 0.3271 data: 0.0040 max mem: 3953 +eval (train): [20] [240/509] eta: 0:01:35 time: 0.3301 data: 0.0042 max mem: 3953 +eval (train): [20] [260/509] eta: 0:01:28 time: 0.3313 data: 0.0045 max mem: 3953 +eval (train): [20] [280/509] eta: 0:01:20 time: 0.3366 data: 0.0038 max mem: 3953 +eval (train): [20] [300/509] eta: 0:01:13 time: 0.3422 data: 0.0044 max mem: 3953 +eval (train): [20] [320/509] eta: 0:01:06 time: 0.3258 data: 0.0041 max mem: 3953 +eval (train): [20] [340/509] eta: 0:00:59 time: 0.3285 data: 0.0041 max mem: 3953 +eval (train): [20] [360/509] eta: 0:00:51 time: 0.3297 data: 0.0038 max mem: 3953 +eval (train): [20] [380/509] eta: 0:00:44 time: 0.3434 data: 0.0049 max mem: 3953 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.4045 data: 0.0044 max mem: 3953 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3655 data: 0.0042 max mem: 3953 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3322 data: 0.0042 max mem: 3953 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3490 data: 0.0043 max mem: 3953 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3843 data: 0.0047 max mem: 3953 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3498 data: 0.0043 max mem: 3953 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3270 data: 0.0042 max mem: 3953 +eval (train): [20] Total time: 0:02:59 (0.3527 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:17 time: 3.0243 data: 2.8161 max mem: 3953 +eval (validation): [20] [20/85] eta: 0:00:29 time: 0.3292 data: 0.0339 max mem: 3953 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3536 data: 0.0278 max mem: 3953 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3671 data: 0.0077 max mem: 3953 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3338 data: 0.0035 max mem: 3953 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3224 data: 0.0039 max mem: 3953 +eval (validation): [20] Total time: 0:00:32 (0.3778 s / it) +eval (test): [20] [ 0/85] eta: 0:04:41 time: 3.3077 data: 3.0710 max mem: 3953 +eval (test): [20] [20/85] eta: 0:00:33 time: 0.3745 data: 0.0047 max mem: 3953 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3872 data: 0.0042 max mem: 3953 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3169 data: 0.0044 max mem: 3953 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3094 data: 0.0041 max mem: 3953 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.2983 data: 0.0038 max mem: 3953 +eval (test): [20] Total time: 0:00:32 (0.3817 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:31 time: 3.3060 data: 3.0202 max mem: 3953 +eval (testid): [20] [20/82] eta: 0:00:32 time: 0.3890 data: 0.0047 max mem: 3953 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3710 data: 0.0045 max mem: 3953 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3476 data: 0.0041 max mem: 3953 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3252 data: 0.0042 max mem: 3953 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3147 data: 0.0039 max mem: 3953 +eval (testid): [20] Total time: 0:00:32 (0.3949 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-------:|--------:|----------:|---------:|----------:| +| flat_mae | patch | linear | nsd_cococlip | best | 9 | 0.0057 | 0.05 | 42 | [19, 1.0] | train | 3.0125 | 0.11555 | 0.0015183 | 0.069439 | 0.0012924 | +| flat_mae | patch | linear | nsd_cococlip | best | 9 | 0.0057 | 0.05 | 42 | [19, 1.0] | validation | 3.0745 | 0.11333 | 0.0036514 | 0.063982 | 0.002658 | +| flat_mae | patch | linear | nsd_cococlip | best | 9 | 0.0057 | 0.05 | 42 | [19, 1.0] | test | 3.0849 | 0.10742 | 0.0036072 | 0.055105 | 0.0023943 | +| flat_mae | patch | linear | nsd_cococlip | best | 9 | 0.0057 | 0.05 | 42 | [19, 1.0] | testid | 3.1207 | 0.09119 | 0.0034888 | 0.052798 | 0.0026162 | + + +done! total time: 1:11:11 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/train_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..5616a335ffb9d123a13b59966e8f1978b804c9bc --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.1696742117404937, "train/grad": 0.18730259492993354, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.2021630859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.20189697265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.201549072265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.2011376953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.20075927734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.200235595703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.19959716796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1989013671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.198040771484375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.19706787109375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1961767578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.19479736328125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.19350341796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.191673583984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.189930419921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.188175048828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.186114501953125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.183753662109375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.181226806640625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.178795166015625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.176119384765625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.17340576171875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.170714111328125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1679541015625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.165389404296875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1626904296875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.160540771484375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.158790283203125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.156575927734375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.154495849609375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.15283935546875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.151541748046875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1500537109375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.148875732421875, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.147728271484375, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.146639404296875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.14568603515625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.145020751953125, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.14429443359375, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1436395263671875, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1433636474609377, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1431488037109374, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.143216552734375, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1432550048828123, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.143690185546875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1446246337890624, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.145943603515625, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.1482000732421875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.150106201171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0293802272900939, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02936016581952572, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02932578436098993, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.029292109562084077, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029257227359339597, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029210455380380154, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.029156912053003907, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.029099054848775268, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.029021667828783394, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02894006349146366, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028861186038702726, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02874204964376986, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02862652501091361, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028460987070575358, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02830299945548177, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.028153776340186596, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.027966281715780496, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02774976449087262, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.027513299249112607, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02729947932995856, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.027046308927237986, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.026797601114958526, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.026541609270498156, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.026288936315104365, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.026051061060279608, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.025798325641080738, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.025599595448002218, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.025437840279191732, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02524335012771189, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.025048333862796427, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024901971742510794, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024789789775386453, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024666978791356087, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024577387832105158, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024497898845002055, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024438505060970783, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02439608455635607, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024373540263622998, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02436754639260471, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024395282240584494, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02443975878879428, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02450271730311215, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024618936832994223, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.024750874927267432, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.024942320007830858, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.025197535725310444, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02546975144185126, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02585533680394292, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.026174103189259768, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1923420429229736, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.19161319732666, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.190481185913086, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.189340591430664, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.188279390335083, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.186837673187256, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1851394176483154, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1833178997039795, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1810414791107178, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1785390377044678, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1762192249298096, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1728873252868652, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1697301864624023, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1654388904571533, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1614911556243896, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1580045223236084, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.153839111328125, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.149461269378662, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.145192861557007, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1416780948638916, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.138118267059326, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.135180950164795, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.132725715637207, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1308376789093018, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.12953782081604, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.128617763519287, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1280717849731445, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1276936531066895, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.127307891845703, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1269826889038086, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1268277168273926, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1267497539520264, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.126617193222046, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1265532970428467, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.126314401626587, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.125833511352539, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1251916885375977, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1246423721313477, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.124558687210083, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.126310110092163, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1294267177581787, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1332383155822754, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.139169454574585, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.144766330718994, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1520943641662598, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1597211360931396, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.164797067642212, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1706666946411133, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.17440128326416, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05629383536360281, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05629383536360281, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05666297526762643, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.05758582502768549, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.05721668512366187, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.05795496493170912, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.056109265411590996, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.05537098560354374, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.05537098560354374, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.056109265411590996, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.055924695459579184, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.05758582502768549, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.05703211517165006, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.05832410483573274, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.057770394979697305, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0592469545957918, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.060538944259874494, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06035437430786268, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.062384643779992616, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06201550387596899, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06275378368401624, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.06293835363602805, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.06459948320413436, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06478405315614617, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.06441491325212255, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.06496862310815799, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.06478405315614617, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.06626061277224068, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06589147286821706, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06441491325212255, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06589147286821706, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.06496862310815799, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0651531930601698, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.0651531930601698, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.06496862310815799, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06589147286821706, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.06423034330011074, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.06423034330011074, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.06607604282022887, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.06533776301218161, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.06921373200442968, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.07308970099667775, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007862846466443725, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007898381336420478, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008115629925126457, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00814685749682662, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00819510387054124, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.00875991961468064, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008090751499929758, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008218670644804056, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.008338766658697096, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.008973121589673358, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.009315676565792748, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.009483559583276594, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010592879800631352, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.009402041506191573, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.009255845306105154, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.009591608684008336, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.010758564131833417, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.011762043397264596, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012756032092468545, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01298118450483267, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.013311613614708668, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.013237605834219094, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.013406474138960178, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.013510202204373725, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.013250166122720662, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.01394502401408889, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.014321517414502452, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.014492621211159594, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.01426432558044789, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.014046965943270963, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.01414430863675511, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.013937352172856866, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.014290652659755343, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.014622418034131293, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.014366877881347473, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.014438475312134517, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.014594576284582278, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.014617818554744368, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.014961517041209418, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.016970482834666293, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.0179557886072691, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.017339018487193856, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.016746983859204617, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.015489960146850126, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.01554690755929367, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.017250358951160664, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.017084830604139598, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.017458194946790767, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.018633112232026735, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 3.150106201171875, "validation/loss_best": 3.17440128326416, "validation/acc_best": 0.07308970099667775, "validation/f1_best": 0.018633112232026735} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 3.145331565141678, "train/grad": 0.17452117569744588, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1919287109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.190535888671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.188221435546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.185986328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.183892822265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.180916748046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1778955078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1747509765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1708642578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.167064208984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1636865234375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.159091796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.15534912109375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.150701904296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.146954345703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.144063720703125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.14114501953125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.138426513671875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.13625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.13476318359375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.133369140625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.132442626953125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.13164794921875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.13107421875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.13055908203125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1299560546875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.129656982421875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.129310302734375, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.12874755859375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.128289794921875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1278125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.127486572265625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.126976318359375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.12663818359375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1263519287109376, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.12637451171875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1264263916015627, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.12670166015625, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.127237548828125, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.128095703125, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.12885498046875, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.129552001953125, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1305963134765626, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1323565673828124, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1357220458984374, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1410626220703124, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1468927001953126, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.154654541015625, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.1639141845703125, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02802662242203951, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027898110542446376, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027689308570697905, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027488601114600896, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.027296624993905425, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.027038753498345613, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.026761436183005572, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02646888983435929, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026109962817281486, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025758200278505682, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0254421344678849, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02501543704420328, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024653595043346285, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024206878952682018, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023856006916612386, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02357957396656275, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023298405152745547, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023046463234350084, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022844813712872565, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022717151497490703, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022612685402855277, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022550474600866437, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022521105725318194, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022517157145775856, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022535791057161986, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02257485269103199, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022620451324619352, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022666795388795437, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02273486393969506, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022824679194018246, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02291367986239493, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02300110607407987, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023130093542858957, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023259746376425028, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023429786842316388, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.023617462273687124, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02384348343592137, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02407744897529483, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024341920586302877, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024678844846785068, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.024958195509389044, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025208743186667562, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.025567858144640923, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.025956184389069676, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.026539338640868663, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02733588091097772, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.028130109487101436, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02900001499801874, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0298032879922539, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1798079013824463, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.177544116973877, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.174107074737549, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.170830011367798, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.167837619781494, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1639456748962402, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.15995717048645, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.155902147293091, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1513705253601074, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.147193193435669, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1437666416168213, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1395835876464844, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.136477470397949, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1332006454467773, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1311240196228027, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1297414302825928, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1285746097564697, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1277246475219727, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.127122640609741, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.126677989959717, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.126220703125, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1258115768432617, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1253716945648193, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1249005794525146, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1244914531707764, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.124022960662842, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.12361216545105, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1231637001037598, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.122473955154419, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1213901042938232, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1202728748321533, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1192820072174072, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1183223724365234, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1179542541503906, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.118173122406006, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1185779571533203, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.117844343185425, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.116760015487671, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1182782649993896, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.125431537628174, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.131443738937378, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1339621543884277, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.131213665008545, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1297738552093506, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.133068561553955, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1315724849700928, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1380414962768555, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.17254900932312, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1940886974334717, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05666297526762643, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05629383536360281, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.05740125507567368, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.057770394979697305, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.059431524547803614, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.05832410483573274, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.059800664451827246, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06127722406792174, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06090808416389812, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06090808416389812, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06035437430786268, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.060723514211886306, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06570690291620525, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06755260243632337, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06718346253229975, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07013658176448874, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0710594315245478, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.073827980804725, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0754891103728313, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08028792912513842, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08157991878922112, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08176448874123293, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08324104835732743, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08361018826135105, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0754891103728313, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07216685123661867, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07511997046880768, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.07179771133259505, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.07272056109265411, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07567368032484312, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07585825027685493, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08139534883720931, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.08711701734957548, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08102620893318568, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00867822969033097, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009066435730759936, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009556056876471672, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01015755884921122, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009635664688386917, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009632145456747883, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009502368940254266, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010423932060837119, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011068666040259611, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012591286732075218, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013093298968194257, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013278633874842322, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013827749076413319, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013399787531052315, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013284728279757288, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013430327910642373, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.013443237754812687, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01355735417697935, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013821456307630492, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.014117404784264953, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01507591927796048, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015549811746104087, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016042276138384996, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.016338531428658824, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01663030969293792, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.017331108268963346, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.016887159699255153, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.017594852361741863, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.017883988222373384, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.018595483764062563, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.02014481351772857, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.020155015762587353, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.01961815476228078, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.0209840390009773, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.021246123050088467, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.022120251450689857, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.02268349709359781, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.019776068953475896, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.015448591753444358, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.014594173529522364, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.02019148232877969, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.025425724504146735, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.023941248531868492, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.023422434925938106, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.026545714675010056, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.03001989699231065, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.032535467382481205, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.030572300662590646, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.026080248326688347, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 3.154654541015625, "validation/loss_best": 3.17254900932312, "validation/acc_best": 0.08711701734957548, "validation/f1_best": 0.030572300662590646} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 3.1299230992794036, "train/grad": 0.1714025068283081, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1787841796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1756640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1708984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.166744384765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.16295654296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.15839599609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.153779296875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.149632568359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.145330810546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.141478271484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.138717041015625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.135643310546875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.133546142578125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.131563720703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.13037353515625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.129498291015625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.128621826171875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12789794921875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1271484375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1265966796875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.125728759765625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1249072265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1239892578125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.122935791015625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.121834716796875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.12037841796875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.11918212890625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.118143310546875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1164874267578124, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.1146600341796873, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.113192138671875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.111849365234375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1102850341796877, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1091314697265626, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1079168701171875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1066571044921876, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.10537353515625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1043463134765625, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.103837890625, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.104066162109375, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.105108642578125, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1067596435546876, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.110265808105469, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1154351806640626, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1234808349609375, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1327218627929687, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1429571533203124, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.157986755371094, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.173345031738281, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.026691580917686225, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.026407688609324395, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025969739141874015, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025575350164435805, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025217808461748065, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02477759181521833, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02434495904482901, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023941329480148852, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02351165873464197, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023156950548291207, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022891169572249057, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022606792910955846, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022426655599847436, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022273062113672495, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02219479862600565, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022158619952388107, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022141801193356514, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022141713085584342, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022153015467338263, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022167946975678206, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022190551455132664, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022218200308270753, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02225129117257893, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02229039098136127, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022333671720698476, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02239068496041, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022446333756670357, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022500237855128943, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022579550980590283, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022685384624637664, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022797570074908437, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022914375672116877, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02309862389229238, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023287212122231723, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023522204896435143, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0237481973413378, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02399246827699244, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024245566679164766, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024545855009928345, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024975548218935727, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025394367752596736, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025819195453077554, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02644407771527767, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02713026450946927, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02810556766577065, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02916466669179499, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03015861659310758, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.031454319050535556, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.032544241799041626, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.16336989402771, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.159921646118164, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1548776626586914, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.150620698928833, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.146944999694824, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1427865028381348, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.139054298400879, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1358842849731445, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.132920265197754, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1308135986328125, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1294376850128174, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1281516551971436, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.127405881881714, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1267282962799072, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.126217842102051, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1257858276367188, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1252670288085938, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.124614953994751, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1238465309143066, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1231157779693604, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1222033500671387, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1212902069091797, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1202874183654785, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1193625926971436, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1185500621795654, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1177966594696045, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.117333173751831, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1169912815093994, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1164023876190186, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.115072250366211, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1130869388580322, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1107726097106934, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.107600212097168, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1059482097625732, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1062750816345215, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1092121601104736, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.113586187362671, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.115748882293701, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.117159366607666, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.126936435699463, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1400279998779297, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.143298625946045, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1391000747680664, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1418094635009766, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.144801616668701, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.14294695854187, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1581995487213135, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1861867904663086, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2214407920837402, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.055740125507567365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.055924695459579184, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.058693244739756366, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.057770394979697305, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06718346253229975, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06884459210040605, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06939830195644149, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07050572166851236, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07179771133259505, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07290513104466592, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0769656699889258, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08028792912513842, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07918050941306755, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08453303802141011, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07918050941306755, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07715023994093761, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07401255075673681, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0754891103728313, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07364341085271318, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07715023994093761, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.0753045404208195, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.07493540051679587, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.0799187892211148, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.0873015873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09080841638981174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.08527131782945736, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08342561830933924, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010287955469849931, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009779492481909869, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010742890487020573, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012163772877095387, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013272392473488591, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01377516559435941, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013529319753673101, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013531109124633353, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013518512340542528, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013735294495417423, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013180069213008953, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.012544252264213325, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.012381715914780976, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012545062725948651, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.012698675206926587, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.012417305595676024, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012394031468603905, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013189904979316477, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013794171513369452, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.013890933148945295, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.014494099380638061, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015064032389207838, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016209649139513737, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.015925101837418507, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01617147770440193, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.015980734404794042, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.015194890249816859, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.014902544931829104, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.014585602946206873, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.01561157095778721, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.017894577843955047, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.02313774552806858, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.02788793853209581, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.02973056086053143, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.032220765949419056, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.0365712158010816, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03207531262868604, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.031073543131723485, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.03031471083056678, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.03214915890391705, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.030216304170470182, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.029782162166143034, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.02776277351584354, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.027240447891931297, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.033181123290066984, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04014815098743368, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04188831595196882, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03855314121750856, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.03867478360721465, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 3.1429571533203124, "validation/loss_best": 3.1581995487213135, "validation/acc_best": 0.09080841638981174, "validation/f1_best": 0.04188831595196882} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 3.1304063057899474, "train/grad": 0.18189712487161158, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.161337890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1573876953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.151993408203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.14754150390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1440283203125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.140166015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13688720703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13430419921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.132158203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.130596923828125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12954345703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.128411865234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.127730712890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.126839599609375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.126033935546875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12542236328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1244873046875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.123538818359375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.122467041015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.121353759765625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.120096435546875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.118792724609375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.117298583984375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.115986328125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1144580078125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.112835693359375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1115948486328127, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.110462646484375, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.109039306640625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.10760986328125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.106463623046875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.105858154296875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.105159912109375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1048699951171876, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1045379638671875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.10424560546875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1042095947265627, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1048175048828126, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1066546630859375, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.11135009765625, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.116566162109375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1224542236328126, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1327978515625, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.144234619140625, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1608242797851562, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1809565734863283, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.193781433105469, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.2099427795410156, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.239782257080078, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025775028206408025, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025406838785856964, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024886055905371903, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024464206686243415, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024124755365774034, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023759848764166237, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023462331453338266, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023233378985896707, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023046426558867097, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02293455651961267, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022876934837549923, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022838747911155224, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02282953919842839, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02283399971202016, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022845566179603337, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022858197698369623, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022877316093072295, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022901141261681916, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02292926512658596, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022956530712544918, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02299286368303001, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02303185660392046, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023081436939537524, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.023138784328475593, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023206154927611352, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023302101101726293, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023401609575375914, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023500879602506755, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023646538015455007, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023840185906738042, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024032948631793262, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024237411739304663, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02455629074946046, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024875149419531225, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025260212700814008, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0256079838052392, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025950623210519552, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.026300725834444164, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02677618673071265, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.027579533280804754, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.028331137914210557, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02911020233295858, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.030310337198898196, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03143097621388733, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03282088439911604, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03431924198754132, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.035271699521690604, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.036478879675269124, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03807646124623716, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.148552179336548, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1450142860412598, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.140394926071167, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1369783878326416, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1344799995422363, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.132007598876953, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.130258083343506, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.129063367843628, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.128204107284546, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1276891231536865, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1273608207702637, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1270155906677246, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1267213821411133, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1263108253479004, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1258647441864014, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.125459909439087, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1249020099639893, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1242268085479736, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.123414993286133, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.122568130493164, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.12143611907959, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.120183229446411, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.118626832962036, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1168792247772217, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1151010990142822, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1132426261901855, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1118392944335938, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.110712766647339, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.109370708465576, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1078174114227295, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1070191860198975, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1075234413146973, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1098833084106445, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1123721599578857, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.114469528198242, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1188957691192627, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.126483678817749, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1307778358459473, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1370842456817627, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1487009525299072, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.155916929244995, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.164496421813965, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1793763637542725, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.193310260772705, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.2255022525787354, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.2598817348480225, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.239384412765503, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.228055715560913, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.237863063812256, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.058508674787744554, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.057770394979697305, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06109265411590993, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06183093392395718, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06755260243632337, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06847545219638243, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06866002214839424, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0695828719084533, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07142857142857142, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08139534883720931, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08324104835732743, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08084163898117387, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07973421926910298, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07807308970099668, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08065706902916205, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07770394979697305, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07936507936507936, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07364341085271318, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07198228128460686, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07235142118863049, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.0754891103728313, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08176448874123293, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08361018826135105, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07862679955703211, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07179771133259505, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08231819859726837, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09136212624584718, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09136212624584718, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012917310410349762, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01369453065175956, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013481258751290873, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013212642038646158, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013534498118876947, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012973049759897813, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013437792056363635, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013384573345194712, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012846932476672302, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012255807132507839, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012604779708475636, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.012717815770522273, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.011625039298719888, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012681850036573723, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.011511423238262361, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.010110395147055274, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.009683043818680712, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.009240930338732163, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.008966410571219838, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.008778247932428847, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.009434185155152677, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.010272558413474386, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.011273417990017437, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.013687490188212479, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01565111333365812, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.016754850492916622, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.01744194283563388, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.01819516331419124, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.018496944261192156, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.02189130338784115, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.027033583182889214, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.030835210986745576, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.0328928758893483, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03162573150103187, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.029976988313355244, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.027037809247060392, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.02434861307867504, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.024458249541912256, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.023518951775426716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.023689481364501636, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.027025193229577405, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.030476138061597685, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.03665966514940064, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03877964392086081, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03194949991662151, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.02745266550841663, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.037008032872106855, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04140370417805813, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04264223669078315, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 3.2099427795410156, "validation/loss_best": 3.228055715560913, "validation/acc_best": 0.09136212624584718, "validation/f1_best": 0.04140370417805813} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 3.120431296825409, "train/grad": 0.18178171269595622, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14636474609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.142952880859375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.138663330078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.135616455078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.133594970703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1315576171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13013427734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.129102783203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.128231201171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.127459716796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.126878662109375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12613037109375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.125330810546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.124307861328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1233837890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1224853515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.121322021484375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.119822998046875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.11828857421875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.116820068359375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.114912109375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.113011474609375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.110997314453125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.10876708984375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1064276123046874, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1035980224609374, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.101170654296875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0989801025390626, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.096270751953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.093365478515625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0909735107421876, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0890386962890624, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0872271728515623, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0860760498046873, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0859393310546874, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.08634521484375, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.08626708984375, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0859130859375, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.08742919921875, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0925714111328126, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0978594970703126, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.104714050292969, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1149359130859375, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1241696166992186, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1416670227050782, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.16707275390625, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1886502075195313, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.21757080078125, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.2507669067382814, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02421086818911135, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023886722307652236, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023484042109921575, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023209473434835672, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02302255669608712, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022860814882442355, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022761527532711625, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02270987309515476, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02268289594911039, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022676246417686344, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022677389197051524, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02268611785955727, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022696399558335544, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022712723473086952, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02272772983647883, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02274308556690812, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02276301573961973, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02278835179284215, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022819695174694062, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02284978985786438, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02289154216647148, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022940341485664248, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022999889450147748, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.023072056611999868, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023157288189977407, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023266156297177078, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023375426698476077, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023481347812339664, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02363897267729044, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02385560919996351, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024081323826685548, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024307543034665285, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024642569152638315, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024981133304536343, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025442207278683782, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025939838187769057, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.026435593869537116, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.026867700861766933, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02748139837756753, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02846478044986725, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.029237252539023758, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.029912661230191588, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03059754298068583, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0312593405880034, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03252373008988798, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.034217876344919206, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.035668506594374774, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03752965621650219, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.039273646529763936, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.138228416442871, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.135542154312134, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1325461864471436, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1306748390197754, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1294732093811035, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1284592151641846, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.127856492996216, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1273276805877686, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1268551349639893, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1263885498046875, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.125944137573242, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1252520084381104, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.124586582183838, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1236579418182373, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.122765064239502, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.121950387954712, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1209704875946045, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.119858741760254, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.118847608566284, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.118007183074951, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1171951293945312, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1164002418518066, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1156246662139893, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1146669387817383, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.113388776779175, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.111428737640381, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.10937762260437, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.107297897338867, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1046338081359863, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1019468307495117, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1000094413757324, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0987277030944824, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0985052585601807, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.100891351699829, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.105226516723633, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1051342487335205, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0983967781066895, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0935590267181396, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.091301202774048, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.090070962905884, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0882582664489746, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0910773277282715, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.100055694580078, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.122199058532715, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1594274044036865, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.214259147644043, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2564845085144043, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.2687995433807373, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.23604416847229, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.058508674787744554, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07715023994093761, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07899593946105574, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0812107788851975, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08010335917312661, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08084163898117387, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08250276854928018, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08545588778146918, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08471760797342193, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0812107788851975, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07862679955703211, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07511997046880768, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07788851974898486, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08545588778146918, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.08471760797342193, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.0946843853820598, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09505352528608342, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09431524547803617, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.0932078257659653, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09486895533407161, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.08988556662975268, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08453303802141011, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07788851974898486, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.0812107788851975, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01359572438524735, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01326750452944084, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014074599884271968, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013391071433632171, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01371432602786758, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01372629541557989, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013676398606267572, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013651324492265185, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013253809193616474, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013180963522018285, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013385525642778381, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013478242526228293, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013256637700193355, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012947920520050706, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013515921850837572, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014124110469950176, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014903893444788834, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014952448688484743, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.014981386353950165, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.016603064960300777, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01743740326946319, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0173031942295387, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.018042170795108014, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.019783944623562713, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02343794938136971, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.027140569824587398, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.02904905639054769, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.031240339191235336, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03051503343284047, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03106435737641182, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03222411224115332, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.03306325956932229, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.029382040289971018, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.026854715555336833, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.027131895129934958, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.02796737267215156, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03130778379385156, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.036872526581355924, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.03771335576863583, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.03511039338760664, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04123079327572509, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04185116948743669, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04485573626410146, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04548634376887129, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.04946852602881715, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0477888247919471, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04206058853116357, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04509536345645359, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.052346766405757784, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 3.104714050292969, "validation/loss_best": 3.0910773277282715, "validation/acc_best": 0.09505352528608342, "validation/f1_best": 0.04185116948743669} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 3.1111643218994143, "train/grad": 0.17860005378723146, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13763671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.134991455078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13217041015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.130411376953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12923095703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.128134765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12736083984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12671142578125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12585693359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12504638671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12431884765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12331298828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12227294921875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1208056640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.119462890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.118223876953125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.116719970703125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.114912109375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.112783203125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.110908203125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.10861083984375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.10618408203125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.103472900390625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.10056884765625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.09755859375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0940155029296874, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0908056640625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0881280517578125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.08478515625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.081268310546875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0786676025390625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0765399169921874, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0740802001953127, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0719403076171874, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.069456787109375, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.067809753417969, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0677609252929687, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0686776733398435, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.07009765625, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.073372802734375, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0782986450195313, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.083939514160156, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.094100341796875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1053009033203125, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1233062744140625, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1511662292480467, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1821287536621092, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.2193736267089843, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.2543673706054688, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02310134466737509, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022853641677647828, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022588209877721966, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022441411674953997, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02235952309332788, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022305524684488775, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02228211231529713, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022275602649897336, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022278587063774467, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022286734762601554, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022295120768249035, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022309934818185866, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022326965844258667, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022349829394370316, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022372476938180626, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022396603571251034, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022428360139019787, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02246476837899536, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022507343157194555, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022548675704747438, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022602886972017587, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022662057378329336, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022733232560567557, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02281693429686129, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022906844541430472, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023022525925189257, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023128189193084837, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023221695106476547, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02336164372973144, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023549412610009313, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023740909104235472, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023940171394497157, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02424307863228023, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024523603431880474, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024855883158743382, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025216829543933273, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025679933559149503, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.026180078545585274, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02676784887909889, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02761168065480888, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02839782813563943, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.029097291929647325, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.030115867303684353, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.031098561985418202, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03223682453855872, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03374771738424897, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03539908401668072, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.037216169154271485, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03894627190195024, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1321239471435547, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1304564476013184, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1287922859191895, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.127786636352539, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1271939277648926, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1265766620635986, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.12605881690979, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.125473737716675, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.124762773513794, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1239898204803467, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1232473850250244, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.122178077697754, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.121201992034912, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.119846820831299, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.11859130859375, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1173861026763916, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.115955352783203, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1142876148223877, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.112449884414673, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.110764741897583, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1086392402648926, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1064791679382324, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1039741039276123, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.101292848587036, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.098470687866211, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0953285694122314, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.092986583709717, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0912914276123047, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0893380641937256, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.086926221847534, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0844342708587646, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0821785926818848, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.080167531967163, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.080827236175537, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0831573009490967, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0854156017303467, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0855190753936768, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.088477611541748, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1049253940582275, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.132431745529175, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.15049147605896, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.163383960723877, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1851320266723633, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2151458263397217, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.273346185684204, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.342379331588745, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.4111130237579346, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.5009005069732666, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.557676315307617, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07567368032484312, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07715023994093761, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07715023994093761, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07862679955703211, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08084163898117387, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08361018826135105, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08637873754152824, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08527131782945736, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08840900701365817, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09357696566998892, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09800664451827243, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08877814691768181, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08951642672572906, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09154669619785899, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09191583610188261, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08619416758951642, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.0828719084533038, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08028792912513842, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013743646421622053, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01334410016946136, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013876884102309585, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01409467073513293, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013504889873935935, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013212121823342624, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013807788119865244, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013940705475772077, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014164826722520367, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014357397029356444, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014220719923425497, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01412719598157929, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014400036417796887, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014448586541240654, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.015176414615798063, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014759201664433586, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014753550466069021, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01486689299206812, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01612527507901931, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.017542499354847717, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.018498347307760784, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.019281883006417692, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.019887021581743304, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.020949778439099013, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.021826023151384637, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.023008034179272014, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.02273580431625108, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.02315992362855158, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.026307918035066588, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.030631895863389266, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03708785923899802, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.0365234396557646, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.037939900822604435, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03554227621718826, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.0376696575835856, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03830240995314719, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03886100538198845, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04805704868367797, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04802291448225507, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04908641396397604, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.05008278996331019, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04422396105697043, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.03658181801963497, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03690054410785696, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03885221673779075, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.038677286996481965, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.038672607988375, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03569034554211795, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.033982596347812165, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 3.0782986450195313, "validation/loss_best": 3.15049147605896, "validation/acc_best": 0.09800664451827243, "validation/f1_best": 0.05008278996331019} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 3.1048061096668245, "train/grad": 0.18106955640017985, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1326904296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13119140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12956298828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12866943359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128033447265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1272998046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12662841796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.125830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12493408203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12392822265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.122930908203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.121634521484375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.120341796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.118533935546875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.11684814453125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.115194091796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.113193359375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1108203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.108280029296875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1059124755859373, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.102984619140625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0999957275390626, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.096868896484375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.09340087890625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0899713134765623, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0860760498046873, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.08292236328125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.080208740234375, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.076585693359375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0724114990234375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.068714599609375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0652825927734373, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0613836669921874, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0585479736328125, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0559515380859374, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.053995361328125, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0529339599609373, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.053375244140625, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0567282104492186, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0656246948242187, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.07454833984375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0827520751953124, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0940213012695312, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1042247009277344, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1214244079589846, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.144569091796875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1676226806640626, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.201513366699219, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.238405303955078, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022739344090223313, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022612149235792457, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022501926771365106, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022457282082177697, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022440660842694343, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022435240326449276, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022437923145480453, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02244380731135607, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022452623471617698, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022461335356347263, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02246965568512678, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02248385822400451, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02249703161418438, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022517271689139307, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022537449700757863, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022557391272857785, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02258386234752834, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022618417642079294, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022660174961201845, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02269996153190732, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02275633134879172, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02282302633859217, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022903596428222955, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.023002090663649142, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023117745025083423, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023275963249616324, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02343391314614564, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023591087409295143, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023807590939104558, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.024053682666271926, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024268557112663985, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024454995542764663, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024716233462095262, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024968771282583475, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025297400280833243, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02565867843106389, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.026121199829503894, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02661933531984687, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02724952326156199, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02832275638356805, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.029221779154613613, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03000097249634564, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.031034051179885863, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03193054870702326, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0333466568775475, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03490982323884964, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03603673368692398, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03759026098996401, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.039189798068255184, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1293258666992188, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.128281831741333, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1272356510162354, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1266121864318848, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1261491775512695, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1255862712860107, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1250100135803223, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1244029998779297, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.12363600730896, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1229019165039062, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.122147560119629, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.121119737625122, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1201367378234863, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1187102794647217, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.117323875427246, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.116034984588623, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1143295764923096, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.11232590675354, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1102142333984375, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.108210563659668, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1059136390686035, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.103713274002075, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1013195514678955, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0988643169403076, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.096226453781128, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.092708110809326, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.089446544647217, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0866217613220215, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0838027000427246, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0828161239624023, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0833423137664795, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0836780071258545, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0833725929260254, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.084103584289551, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0877246856689453, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.095156669616699, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1085681915283203, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.125110387802124, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.140651226043701, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.148221492767334, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1573991775512695, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.172044515609741, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.199760675430298, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2062532901763916, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.2220723628997803, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.239149332046509, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.222841501235962, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.195619583129883, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.156883716583252, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08693244739756367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09357696566998892, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09542266519010704, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0932078257659653, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09339239571797711, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09413067552602436, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09523809523809523, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.08970099667774087, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08785529715762273, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08785529715762273, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08508674787744555, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.0932078257659653, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09597637504614248, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09579180509413067, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09874492432631968, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09431524547803617, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013335428455197957, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013945780466875885, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012751447089378308, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013222475808018287, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013719415458638619, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01420087993846574, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01421876189606091, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014243077590736035, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01372211014059869, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012909390513757016, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013634519147653715, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01409678443692561, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014840168183034385, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.015977117811580163, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01698842485462472, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01825570003104707, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.019570938694470078, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02081228347898178, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.021242302789129783, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02163959533052472, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02180360363998358, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.023107712054423506, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.02522841711007513, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.02654026527651965, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02902314117247128, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03394525632750168, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03552473809448344, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.036900553659250476, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.0379672131970891, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03877327741713436, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03827813440157586, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.03713761515828103, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.036486425681351214, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03569387134681072, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.0362834256947693, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03876028388312348, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.04170919149968627, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04206297355736649, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04327841892014931, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04407655000268604, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04523273607782887, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04279561587019995, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04429961392976908, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04471404755253008, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.044517177241847235, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04107121381197213, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0400788172924908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04237585638510516, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04648724551204483, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 3.201513366699219, "validation/loss_best": 3.195619583129883, "validation/acc_best": 0.09874492432631968, "validation/f1_best": 0.04237585638510516} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 3.0946867763996124, "train/grad": 0.1771689835935831, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13138671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.130264892578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12922119140625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.128372802734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12771240234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1268310546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.125860595703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12480712890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1236083984375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12227294921875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.121134033203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.119256591796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.117615966796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.115262451171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.113231201171875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.111207275390625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.108753662109375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.105794677734375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1026904296875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.09990234375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0964984130859374, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.09300537109375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0892962646484374, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0853155517578124, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0813189697265626, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0766973876953125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0727191162109375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0692340087890626, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.064786376953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.060025634765625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0563592529296875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.053380126953125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0498614501953125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.047521667480469, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0457504272460936, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0450201416015625, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.045309753417969, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0471035766601564, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0500064086914063, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0527005004882812, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.054859924316406, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.058236083984375, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0644671630859377, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.0737686157226562, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0912716674804686, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1168292236328123, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1401708984375, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.170936737060547, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.202017822265625, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022484684814698994, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022422123653814196, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022379835443571208, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02236800694372505, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022364452835172414, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022366156908683477, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022369305146858098, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022373357345350085, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022379059521481396, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022385684461332857, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022391869463026525, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022401730646379292, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022411868730559944, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02242633531335741, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022441020677797498, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022455400205217302, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02247579117771238, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022500265068374575, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022530878055840732, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022561550261452793, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022603646912612022, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022651960598304866, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022713015642948448, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02278397262096405, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02286962823010981, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02299105264712125, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02311423869803548, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02324279533699155, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023436917853541673, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023694270788691938, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023933586650528013, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024142057169228792, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024413463873788715, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024695013924501838, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025097467368468643, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025563809536397458, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.026132304146885874, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02671877196058631, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02730195549316704, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02778771111741662, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02813975497148931, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02856697713956237, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.029273484423756598, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.030087030939757824, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03128825662657619, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03268378495238721, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03398602145723999, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0358874704875052, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0374676478933543, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.128176689147949, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.127495765686035, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.126779317855835, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1262664794921875, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.125842571258545, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.125281572341919, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.12467622756958, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.12404727935791, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1232495307922363, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1224188804626465, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.121716022491455, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1206612586975098, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.119715929031372, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1183950901031494, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.117217779159546, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1161587238311768, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.114802122116089, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1132454872131348, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1114485263824463, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1096878051757812, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1072607040405273, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.104419469833374, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.100919008255005, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.097028970718384, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.092968463897705, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0884804725646973, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.085224151611328, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0830090045928955, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0816335678100586, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.08225154876709, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0848448276519775, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0884132385253906, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0948355197906494, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.100614309310913, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.105508327484131, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.107664108276367, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.107830762863159, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1110353469848633, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1157498359680176, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1142935752868652, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.123234748840332, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.132648468017578, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1313979625701904, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1353392601013184, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.150615930557251, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1536080837249756, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1660711765289307, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.20407772064209, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2268948554992676, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.067921742340347, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.067921742340347, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07807308970099668, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08065706902916205, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08194905869324474, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08379475821336287, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.0858250276854928, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08564045773348099, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0902547065337763, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08711701734957548, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08859357696567, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09080841638981174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.08416389811738649, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08250276854928018, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09283868586194167, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09856035437430787, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10003691399040236, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09616094499815431, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08711701734957548, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.08564045773348099, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09560723514211886, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014120337794410909, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014216826229820331, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01306895160340321, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013455690413391704, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01384334645135313, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013447907183106034, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013132860628201922, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012782471938640792, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012883406859005679, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012781457597666983, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012793739138429098, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013754421074639126, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014058220530242941, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014391619047060366, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01473891932761702, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014956399865840122, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014794044420537363, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.015454813657816117, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.015595419240784883, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.0179116669197908, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.019481065748346894, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.01906738694792612, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.02081219256929746, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.023372409770025998, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.026224927508206652, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.030009468021850297, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.033647269861827825, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03702850835257741, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.038630494965670996, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04016425829627506, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04110673436828246, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.03970203835006732, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04045727241789112, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04025089682274815, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.03759341049816608, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03847075098413316, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.036489745206828635, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03427374579926204, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.03908443440618539, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04583961713885814, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04691256866995452, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.050925927586996766, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05498021395496464, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.057712400229259435, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05586979348992501, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.056893712901760075, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05236488161811161, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05292051204295287, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.061716059223326736, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 3.0912716674804686, "validation/loss_best": 3.150615930557251, "validation/acc_best": 0.10003691399040236, "validation/f1_best": 0.05586979348992501} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 3.09050088763237, "train/grad": 0.18345186404883862, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12806640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.127210693359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.126124267578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12532958984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.124517822265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123519287109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1226171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12158447265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.120201416015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.118895263671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117615966796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.115880126953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.111617431640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.109388427734375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.107318115234375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1045947265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.10175048828125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0984539794921875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0954888916015624, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.091837158203125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0882623291015623, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0843524169921874, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.080245361328125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.076219482421875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0715509033203126, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0676202392578125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0643511962890626, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.06009033203125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0554925537109376, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0519415283203126, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.049091796875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0457159423828126, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0429315185546875, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0398153686523437, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0378662109375, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.037825927734375, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.039376220703125, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.042642822265625, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0496151733398436, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.056947021484375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0627285766601564, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0684466552734375, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.0776127624511718, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0931326293945314, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1107078552246095, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1272294616699217, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.155257263183594, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.1914028930664062, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023039481332525612, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023005284620448947, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022983453683555127, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022978790448978544, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02298013536259532, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022984459670260547, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022988885156810284, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022994275176897644, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023003331841900943, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02301184552721679, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02302074382081628, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02303401967510581, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02304760259576142, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023065389757975938, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02308393788523972, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023102913284674287, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023124877354130147, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023153412444517015, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023186843041330576, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.023217538986355066, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023259821198880672, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023308370159938932, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023369980733841656, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02344689684920013, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023541400525718927, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02366674181073904, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0237956565618515, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023923846511170268, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02411514302715659, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02436562182381749, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024613687843084334, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024862148491665723, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.025217360192909835, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.025527192438021304, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025875662695616485, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.026254517193883657, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02673905024304986, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.027211683141067623, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.027736708130687474, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.028644317714497446, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.029518814152106644, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.030265406994149088, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03111860640347004, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0320545338653028, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0333425211161375, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03470077351666987, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03559959466569126, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03693182935938239, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.038657882222905754, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.127133369445801, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.126601219177246, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1258883476257324, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.125318765640259, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124793529510498, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1240768432617188, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1233325004577637, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.122506856918335, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1214711666107178, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.12044620513916, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.119474172592163, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118046283721924, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1167092323303223, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1148176193237305, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1130001544952393, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1113078594207764, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1091766357421875, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1067123413085938, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1039814949035645, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1014935970306396, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.098567485809326, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0957283973693848, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.092761993408203, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0899438858032227, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0873842239379883, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0848495960235596, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0826995372772217, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.080620765686035, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.077564001083374, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0740463733673096, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0715503692626953, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.070303440093994, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.069509983062744, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.067392587661743, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.062286853790283, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.058983325958252, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.062274217605591, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.072038412094116, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.080794334411621, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0814199447631836, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0764389038085938, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0765786170959473, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0892200469970703, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.094606637954712, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.106102466583252, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.171907424926758, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.249647378921509, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.314138412475586, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.3743414878845215, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08176448874123293, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08342561830933924, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08065706902916205, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08157991878922112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08250276854928018, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08342561830933924, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08397932816537468, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08840900701365817, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09597637504614248, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09892949427833149, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10003691399040236, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09985234403839055, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.0991140642303433, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09449981543004798, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09154669619785899, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09394610557401255, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08914728682170543, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09449981543004798, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09837578442229605, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09800664451827243, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.08656330749354005, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07807308970099668, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.08324104835732743, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09007013658176449, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014233804063726896, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013678777450807441, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01386363828891743, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014037411484940492, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014040174347303957, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013699380637116015, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013352176899930667, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013573683703650946, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013129784109105404, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013308274542543888, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01387230391644041, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01398666674561075, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01488810099430253, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.016224974892285107, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.017230146053444476, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01943783552985962, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.021370954281218366, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.022860233238648917, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.024269773148682585, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02537310402097807, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02912030469268387, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.030702326268157044, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03137170956781513, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03228038075239709, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.033714495678916066, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03577728826727301, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03732368473823946, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.0374541625247631, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.040754268848178794, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.0411677704970489, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04093506668818857, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04350169279297591, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04400731673111407, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04853383875933376, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.053556223900219024, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.056620293741328966, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.054535316806817245, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.053203939258575, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04885395989621483, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04490698528795995, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04807425895341503, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.046829815787410585, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05311084749948536, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06089664527482926, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0637976665513065, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05532812177068545, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04623440838657383, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04910299859875203, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04989851533732494, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 3.0378662109375, "validation/loss_best": 3.058983325958252, "validation/acc_best": 0.10003691399040236, "validation/f1_best": 0.056620293741328966} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 3.0836911749839784, "train/grad": 0.1778644010424614, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130054931640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.129395751953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1284521484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1277099609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.126998291015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.125999755859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.124952392578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1238623046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.122537841796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.121025390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.119693603515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.117762451171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.115911865234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.113394775390625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.110875244140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.108697509765625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.105997314453125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1028216552734373, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.09932373046875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0961279296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.092218017578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0881341552734374, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0837353515625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0790643310546875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0740185546875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.068544921875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0636322021484377, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0594000244140624, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0541033935546875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0484808349609374, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0440155029296876, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0404257202148437, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0363027954101565, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.033119812011719, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.03000732421875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0275994873046876, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0259225463867185, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0254339599609374, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0261605834960936, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0292703247070314, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0336367797851564, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0390774536132814, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0498500061035156, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.062287139892578, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0754150390625, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0880447387695313, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1018327331542968, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.1227375793457033, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.1468049621582033, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02275855140760541, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022740371362306177, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02273210938088596, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022732324032112956, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02273309466429055, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022737060287036003, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0227413152391091, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022746569053269924, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02275417190976441, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022761801676824688, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022770696277730165, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02278043225873262, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02279048792552203, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022806202732026576, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022818733165040614, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022831983398646116, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022846845081076025, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02286621677689254, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022887072302401067, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02291018197312951, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022940115230157973, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022974859904497862, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023019138565286992, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.023071504505351184, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023131231274455787, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023216701885685323, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023301949854940177, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023389872163534164, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023531228732317687, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023727788403630257, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023920684717595578, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024102610535919666, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02437479036860168, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024649995379149914, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025023170253261923, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02540363567881286, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025793153736740352, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02615787148475647, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0265828560013324, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.027220737393945454, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.027876954609528184, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02857507662847638, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.029694981342181563, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.030780977662652732, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03184076745994389, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03274635319598019, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03364023031666875, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.035026641683653, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03636887650005519, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1266207695007324, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.126119613647461, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1254138946533203, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1247751712799072, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1241719722747803, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.123352289199829, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1224911212921143, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1215689182281494, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120398759841919, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.119208335876465, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.118069887161255, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1164751052856445, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1149823665618896, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1128716468811035, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1110141277313232, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.109232187271118, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.107050657272339, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.104570150375366, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1019070148468018, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0994529724121094, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0964925289154053, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0933401584625244, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.089771032333374, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0858397483825684, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.081601619720459, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.07649564743042, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.072039842605591, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0682923793792725, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0640463829040527, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0605249404907227, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.058737277984619, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0578248500823975, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0564799308776855, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0539824962615967, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.048776626586914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0423498153686523, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0375239849090576, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0389745235443115, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0473663806915283, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.062206506729126, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0689380168914795, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.069352149963379, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0744924545288086, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.082233190536499, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1019086837768555, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1409082412719727, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1546988487243652, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1838128566741943, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2213826179504395, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08102620893318568, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08324104835732743, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08508674787744555, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08637873754152824, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0917312661498708, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09154669619785899, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0946843853820598, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09523809523809523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09782207456626062, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09800664451827243, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09782207456626062, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09948320413436693, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10132890365448505, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10409745293466224, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10686600221483943, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1079734219269103, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10834256183093392, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1050203026947213, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10299003322259136, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10077519379844961, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10631229235880399, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.11332595053525286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.11203396087117018, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10095976375046142, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09505352528608342, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09782207456626062, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09099298634182355, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08656330749354005, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013341676869186773, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013472025745314614, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013153348523946241, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013651312535200992, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012924359120770541, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0129449372943686, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013058263953408413, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01394120116651991, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014444057690461783, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014495076165672865, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014828970916060677, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01570194664264753, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.016782596665190472, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01801367378789302, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.018403475996188266, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.019123768927504066, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01904627591876537, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.019512496290118645, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.021106413742635218, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02166683404964938, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.021634537119835803, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.02239445631803068, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.025347768809652924, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.026610639244038493, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.030836828034263995, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.0333849699453591, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.0342419607436798, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03715798219266811, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03919277415130023, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04430637665214729, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.0447132069256748, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04483366641599751, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04674630991345612, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.047669576726538605, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.04950638480539219, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.053956506276005366, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.056397433342681, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05639299896841726, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04965491382392662, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.0455587267334348, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.0457086639358949, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.056175756484847496, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06398156835832226, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06454785638335463, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05474207326093017, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05086737490815296, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05200382385010343, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04400815188053472, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.03859420186803355, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 3.0498500061035156, "validation/loss_best": 3.0744924545288086, "validation/acc_best": 0.11332595053525286, "validation/f1_best": 0.06398156835832226} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 3.06859082698822, "train/grad": 0.17420050241053103, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.129151611328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12849365234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1275048828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.126578369140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.125738525390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1245703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.123436279296875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.122156982421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12053466796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11887451171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117308349609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1149853515625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.112890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.109931640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.10717041015625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.10465576171875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.10141845703125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0978204345703126, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.093798828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0900787353515624, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.085565185546875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0809765625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0759906005859374, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.070616455078125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.06506103515625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0586712646484373, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.053154296875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0482000732421874, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.04178466796875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0346923828125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0286767578125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0237261962890627, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.017642822265625, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0127774047851563, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0077835083007813, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0030953979492185, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9988479614257812, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.996610412597656, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.99605712890625, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9979214477539062, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0012747192382814, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0055502319335936, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0129637145996093, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.020813751220703, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0308887481689455, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.04384162902832, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0595433044433595, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0817304992675782, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.109397125244141, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022465804666280745, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022456428790464996, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02245240641757846, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022451961291953922, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022454015668481587, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022457049572840334, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022459826525300743, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022464437168091534, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02247069938108325, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022479896703734993, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0224866586830467, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022496327990666032, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022508639693260193, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02252607747912407, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022541467919945716, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022559421211481093, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02258133705239743, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022608621716499328, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022640703041106464, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022670493703335525, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02271072261966765, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022750738244503736, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022796906996518373, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022850272543728352, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022906468212604524, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022980732517316936, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023051287019625305, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02312175065279007, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02322506974451244, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02336492878384888, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02350759206339717, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02365275694988668, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023883250700309874, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024122337801381944, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024433156233280896, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02474318684078753, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025097061470150946, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02551663157530129, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02607638029381633, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.026821931768208743, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.027471584277227522, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02808182262815535, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02889626022428274, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02966000306420028, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03051001677289605, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.031388221913948654, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0324229955393821, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03369995190761983, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03526619427837432, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1263439655303955, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125847578048706, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.125155448913574, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.124559164047241, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1239311695098877, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1230900287628174, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1222217082977295, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121302366256714, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120112419128418, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.118882417678833, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1177737712860107, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.116079330444336, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1145219802856445, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.112306833267212, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1102826595306396, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1083359718322754, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1059300899505615, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1030960083007812, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.100006341934204, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.09716534614563, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.093801498413086, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0904393196105957, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0869674682617188, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.083542823791504, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0802857875823975, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0765655040740967, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.073214054107666, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0697638988494873, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.064605712890625, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.058222532272339, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.052931308746338, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0493249893188477, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.047553539276123, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0494885444641113, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.054713487625122, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.059018135070801, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0608396530151367, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0617423057556152, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.065221071243286, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0741333961486816, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.081348419189453, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0862958431243896, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0909736156463623, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0979092121124268, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.099724769592285, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.089874029159546, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.100515365600586, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1613101959228516, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2258574962615967, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.073827980804725, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07622739018087855, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07807308970099668, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07807308970099668, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0799187892211148, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08342561830933924, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08637873754152824, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08490217792543374, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09117755629383537, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0932078257659653, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09800664451827243, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09856035437430787, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.10225175341454411, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10335917312661498, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10077519379844961, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09892949427833149, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09966777408637874, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09579180509413067, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09542266519010704, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0932078257659653, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09357696566998892, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09210040605389443, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09689922480620156, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09856035437430787, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09837578442229605, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1037283130306386, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.10280546327057954, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09449981543004798, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08896271686969362, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013756322906578794, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013972803810050234, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014002378572213786, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01416141984961429, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014381574240856995, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014238239890710805, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014677974678297531, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014686412272556149, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014676827982110492, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015063306834054447, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01583556642841116, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016720370044766283, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01708517035298064, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.017868438604159292, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.019809572796252613, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02135505354577694, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.022694825676856125, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02287771611839397, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.023402784545648595, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.024272979497580182, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.026193092896109684, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.02832083101448268, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.028844456695958687, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.030223778881341545, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.034031309643689785, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03620676388987845, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.038246750230341416, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03916362523885592, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03925709793007456, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04162041545413153, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04630868783145938, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04627050134057031, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04860941990830502, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04872536160269486, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.04655023325900739, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.045139353305577924, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.045315904254491855, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.042859389476847386, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04185566683936635, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.03948697883442308, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.039967283008564954, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.040255485425169976, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04590250958542529, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05041502600042739, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.053450605275430906, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.06138964525068077, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.060569567814600954, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04999458476617304, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04236431219699823, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 3.04384162902832, "validation/loss_best": 3.089874029159546, "validation/acc_best": 0.1037283130306386, "validation/f1_best": 0.06138964525068077} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 3.0591141355037688, "train/grad": 0.17313546437770128, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.127110595703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.126387939453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125345458984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.124366455078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.123375244140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.122198486328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1208251953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.119444580078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.117802734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.115838623046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1141748046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.111632080078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1094189453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.106175537109375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.10322021484375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1005517578125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.09705078125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.093095703125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.088785400390625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0848297119140624, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0799456787109376, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.075015869140625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.06970703125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.064031982421875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0582745361328123, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.05162841796875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.04593017578125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0408258056640625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0342950439453125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.02707275390625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0209619140625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.01571533203125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0093484497070313, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.004393310546875, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9994711303710937, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.995538330078125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.992708435058594, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9910302734375, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9900692749023436, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.989812316894531, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9898236083984373, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.990379638671875, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9920062255859374, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9938003540039064, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9974789428710937, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0072335815429687, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0212306213378906, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0452435302734373, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.0719932556152343, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02275837435387075, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02275171515531838, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022749351020902395, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022748108496889474, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022750381389632823, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02275185096077621, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02275481547228992, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022757166009396316, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022762087723240256, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022766470480710267, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022769931815564633, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022776022842153906, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022782175689935683, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022791066979989408, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022802208177745344, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022812451627105476, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02282575718127191, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02284181296825409, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022862904211506246, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022881894605234265, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02290760837495327, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022936658803373575, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022972714230418204, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02301926418207586, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023073902893811463, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023149374276399612, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023224499970674516, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023299730084836483, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02340795874595642, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023544267117977143, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023667460763826965, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023782105399295688, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023957451656460762, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.024141825344413517, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02440112908370793, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024695667969062925, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025059839161112905, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02544357837177813, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.025888717584311962, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.026453992975875737, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.026904070125892757, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.027298620902001858, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.027791951512917877, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.028203326612710952, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02872879520058632, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02973814123310149, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.030998355802148582, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03265605502761901, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03411588378250599, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.126112461090088, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125664710998535, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124992847442627, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1243669986724854, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123746633529663, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1229889392852783, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.12210750579834, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1212072372436523, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120072841644287, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1189632415771484, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.117931842803955, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.116407871246338, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.114964008331299, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1130032539367676, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1111364364624023, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.109391689300537, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.107193946838379, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.104612112045288, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1017813682556152, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0991439819335938, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.095900297164917, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0925943851470947, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.088958501815796, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0851547718048096, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.081386089324951, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.077200412750244, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.073655605316162, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0706567764282227, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.066735029220581, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.062645196914673, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.059965133666992, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.058532476425171, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.05865216255188, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.060511589050293, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.062805652618408, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0621767044067383, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0582051277160645, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0526366233825684, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.047567129135132, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0443010330200195, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0438573360443115, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.04618501663208, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.050717830657959, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0540242195129395, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0607047080993652, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.070410966873169, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.07816743850708, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0903513431549072, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0911638736724854, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07899593946105574, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08102620893318568, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08564045773348099, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09449981543004798, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09597637504614248, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09671465485418974, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.0976375046142488, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09819121447028424, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09856035437430787, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09819121447028424, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09726836471022518, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09837578442229605, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0976375046142488, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09616094499815431, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09689922480620156, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09653008490217793, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09726836471022518, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10114433370247324, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10169804355850867, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09726836471022518, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09357696566998892, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09616094499815431, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09892949427833149, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013575721810381773, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013483300462942459, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01411605361657866, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013782228860575245, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013873290512887769, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014376960208886722, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01495384368198085, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015125731192710422, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015467976066742684, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0157738633274899, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.017268938353843088, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017313678489367365, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018056010377457483, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020230573350758726, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.020758503723223753, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02063941898919864, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.020881268186799418, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.022374975656651048, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.023128791446284363, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.024917870985551784, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.026532423417507903, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.028289752751928552, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.030275574859420586, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03212198557569281, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.034112259472091976, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03657383000559054, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03698215349001371, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03996552628295876, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.042347811299989734, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.045838176033867674, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.047222702627211365, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.048019856654178045, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.0479912307254221, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04780050768348724, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.048957841269198714, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.04939438949285382, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.050841177289453365, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05253530578636715, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05089894753597988, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.048421157052934476, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.048956744381466216, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.050642758006969886, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05259133792883668, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0571186295454027, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05950666544001134, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05373148826257235, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05088976985938365, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05447935021557244, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.056839942734099626, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 2.9974789428710937, "validation/loss_best": 3.0607047080993652, "validation/acc_best": 0.10169804355850867, "validation/f1_best": 0.05950666544001134} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 3.0540862596035003, "train/grad": 0.16858908046036958, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125267333984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124603271484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.123497314453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.122410888671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12145263671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1202197265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.118836669921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11736572265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115447998046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.113656005859375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.111805419921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10916015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10674072265625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10339599609375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.100345458984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0974334716796874, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0939105224609373, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0898321533203124, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0853936767578123, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.08126953125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.076466064453125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.071456298828125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.066077880859375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0602880859375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0544281005859375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.047587890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.041663818359375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.036524658203125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0298468017578126, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0226226806640626, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.016695556640625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.01178466796875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.00571044921875, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0008966064453126, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9956698608398438, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.990948486328125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.986611022949219, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.983206787109375, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.980482482910156, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9787774658203126, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9786395263671874, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9794927978515626, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9824658203125, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9873919677734375, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9950547790527344, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0044122314453126, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0147853088378906, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.027402648925781, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.0407940673828127, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022413795115426183, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02241021389141679, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022408427344635128, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02240846953354776, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022408700897358358, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02241021417081356, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022412368659861386, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022414321601390837, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022416112259961665, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02242089464329183, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022423812663182618, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022428293279372156, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022435048460029066, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022442378862760962, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022450980320572852, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022460001921281218, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02247139952145517, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0224847972439602, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02250161272007972, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02251725278329104, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022538130227476357, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02256057254038751, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022587774782441557, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022619020333513616, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022657794505357744, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02271060680039227, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022768783746287227, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02283255078829825, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022936321971938015, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023082855567336084, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02323398046195507, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023384696636348963, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023605679492466152, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023803699342533947, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02402171247638762, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02421224497258663, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.024415680300444366, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024637147169560194, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024957755897194147, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.025476586520671845, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025929952459409832, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.026352810235694052, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02694145644083619, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02749359905719757, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02809919774532318, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.028757754247635603, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.029410325083881618, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03014508781954646, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.030951964780688287, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1258561611175537, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1253836154937744, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124671697616577, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1239805221557617, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1233441829681396, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1224758625030518, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1215476989746094, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1205430030822754, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1192569732666016, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1179654598236084, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.116777181625366, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.114964008331299, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.113290548324585, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1109516620635986, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1088242530822754, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106788158416748, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.104326009750366, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1015007495880127, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.098437786102295, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0956814289093018, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0923683643341064, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0890564918518066, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.085515260696411, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0818536281585693, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0781919956207275, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.074101686477661, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.070591449737549, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.067622423171997, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.063826560974121, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0598394870758057, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0568320751190186, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0544207096099854, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0517029762268066, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0498385429382324, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0488831996917725, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.050395965576172, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.055626153945923, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0610923767089844, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.065859317779541, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.07350754737854, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0792651176452637, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0818686485290527, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0789356231689453, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.074113130569458, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0781726837158203, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.091918706893921, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.10514497756958, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1105220317840576, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1181254386901855, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.073827980804725, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08139534883720931, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.082687338501292, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08508674787744555, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08859357696567, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09302325581395349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09597637504614248, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09671465485418974, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09653008490217793, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09874492432631968, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09505352528608342, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09800664451827243, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09819121447028424, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09653008490217793, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09708379475821337, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09929863418235511, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10114433370247324, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10151347360649686, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10428202288667406, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10132890365448505, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.10022148394241417, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09782207456626062, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09819121447028424, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.10243632336655592, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013858000151874295, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014023698969357188, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014031641497501553, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014542561413394878, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0141947058810551, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014336670156720735, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014661853292514182, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014858764484932266, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015193918317030089, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015875649293877307, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016469029039691354, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017140277970899866, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018204861505518807, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019497507657745234, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.021191824029004047, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.021838719081494984, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02237314369816784, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.022798559195007922, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02505026230793626, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.025872043815834175, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02690821096799134, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0299092105153831, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03185349272814539, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03499396005538397, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.038256899202197825, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03914775665805323, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.042053426035106194, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04363971617917472, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.042634138577833776, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04228366272998696, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04429021897498949, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04527136202046506, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.045051608563879235, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04598514972334086, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.04671757508374589, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.04572393005477756, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.046968364268582814, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04827429707241126, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.047612686627275934, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.046709786531583086, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04611928491060851, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04593382322282244, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04743167772593491, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.050635451969990815, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05139491836238966, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0549855166177732, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05484735343361319, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.053267768834889005, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.052466490221678076, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 2.9873919677734375, "validation/loss_best": 3.074113130569458, "validation/acc_best": 0.10428202288667406, "validation/f1_best": 0.050635451969990815} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 3.052132121324539, "train/grad": 0.16907687298953533, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.129056396484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.128392333984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12721923828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12610595703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.125159912109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123946533203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.122406005859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.120860595703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.118978271484375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11700439453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1151806640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.11249267578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11004150390625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.106595458984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1035693359375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.100714111328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0971063232421874, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0931304931640624, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0886517333984376, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0845074462890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0796923828125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0746771240234376, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0691522216796874, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.063416748046875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0574481201171877, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.05075439453125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0448150634765625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0395989990234376, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0328179931640626, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.025069580078125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.018707275390625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.01319091796875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0063742065429686, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.000710754394531, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9947515869140626, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.989465637207031, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9844320678710936, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.980252685546875, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9767277526855467, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.973665771484375, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.972043151855469, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9709933471679686, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9701841735839842, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9705413818359374, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9723301696777344, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9776707458496094, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9857057189941405, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9985626220703123, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.009604644775391, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02282728659454733, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02282522790133953, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022824139948934317, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022823392292484642, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02282379283569753, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02282337062060833, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022824839390814304, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022826891806907952, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022827633065171538, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02283079787157476, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02283274720888585, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022836004868149757, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022840829002670943, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02284552709199488, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022851506415754556, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022857307926751672, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022865410326048732, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02287423293571919, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022886229287832974, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02289880036842078, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022916438058018684, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02293734572827816, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02296519847586751, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022998454426415266, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023040009164251387, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02309935353230685, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023153977114707232, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.023210235578007995, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023292390704154967, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02339705053716898, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02349645330570638, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023591868588700892, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02373077383264899, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023870005616918207, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024054685765877368, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024260918702930213, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.024496642174199224, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024734242148697377, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.025023425798863172, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02544350334443152, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025803401507437228, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.026109386086463928, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.026448757722973823, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.026711622085422278, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02707027077674866, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02762654786929488, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.028288185019046067, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02919036633335054, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.029899394046515225, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1257259845733643, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125258207321167, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124528408050537, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1238059997558594, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1231489181518555, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122260570526123, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1213204860687256, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120281219482422, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1189777851104736, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.117649555206299, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1163790225982666, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1144983768463135, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1128153800964355, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1103813648223877, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1081254482269287, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.105992078781128, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.10335373878479, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.100292444229126, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0969808101654053, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0939950942993164, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0904064178466797, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0868568420410156, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.083144187927246, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0793678760528564, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.075759172439575, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0716729164123535, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0683255195617676, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0653865337371826, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.061636209487915, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.057722806930542, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.054647922515869, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0521481037139893, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0496432781219482, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.047696113586426, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0455591678619385, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0429847240448, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.039637804031372, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.036287546157837, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0323281288146973, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.026881694793701, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0232021808624268, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0211000442504883, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0200226306915283, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0220284461975098, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.025092840194702, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.028735876083374, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.035554885864258, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0453944206237793, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.062870979309082, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07622739018087855, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.08010335917312661, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08305647840531562, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08748615725359911, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.09043927648578812, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.09210040605389443, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0932078257659653, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.09394610557401255, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09579180509413067, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09708379475821337, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09745293466223699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09671465485418974, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09708379475821337, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09726836471022518, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09745293466223699, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09726836471022518, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10151347360649686, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10114433370247324, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10003691399040236, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10446659283868587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10446659283868587, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10575858250276855, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10778885197489849, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10889627168696936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.11092654115909929, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.11277224067921743, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.1124031007751938, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.10778885197489849, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.10741971207087486, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013829445264938798, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013775664381480089, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013668682723125561, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014177399568999693, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0140642629130032, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014245416425491922, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01482638287972394, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015153188547545528, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015763090682295363, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.016483765882294135, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016712121045701744, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017473092320522667, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01908312797850735, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02168196484564026, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02227176014978532, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.022955252781965283, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.023958259507801238, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.024409915756108517, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.024968675789858067, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.027128203795443267, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03039240560119505, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03224415991439688, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03516971700758718, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03759304247471232, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.0396169323074131, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04132796169869724, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04108046245601276, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04301727988548587, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04568494995727976, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04648744821018247, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04793664394346255, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.047666226354224676, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.049372233917839535, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05054997164315267, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05049038468644792, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05082463801275084, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05313776643173543, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05303761891532674, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05251932683390992, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.0575649409814066, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.06018075025137463, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.06406436403445326, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06553279662684892, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06661064250658102, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.06892438843005882, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.07068307357283168, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.06695337523528634, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05850017772465551, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.05282020825612022, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.9776707458496094, "validation/loss_best": 3.028735876083374, "validation/acc_best": 0.11277224067921743, "validation/f1_best": 0.07068307357283168} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 3.0442697966098784, "train/grad": 0.1648866469040513, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125467529296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124783935546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12369384765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1227392578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12167236328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1202880859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.118861083984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11541015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.113311767578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11142333984375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10875244140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.106138916015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10247802734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0992529296875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0961834716796877, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.092452392578125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.088245849609375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0834893798828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0792669677734374, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0742181396484374, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0690411376953124, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0633837890625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.05751708984375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.051435546875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0444342041015626, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0382818603515624, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.032921142578125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.025955810546875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.01812255859375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.01154296875, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.006190185546875, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9991156005859376, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9933065795898437, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.986944274902344, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.981217041015625, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9755218505859373, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.970654296875, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9658856201171875, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.960917663574219, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.957966613769531, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.955995788574219, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.954661102294922, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9547883605957033, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.956664276123047, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9608522033691407, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9662403869628906, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.976340026855469, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.987851409912109, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022603497905656696, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022599823577329516, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02259809708222747, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022597562298178674, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02259660428389907, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022597912503406405, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022596443463116886, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022596890972927212, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022597359037026764, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022597555285319686, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022598801674321295, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022599385781213642, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022602089773863555, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022604889338836073, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022607891373336315, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022612406611442565, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022620496228337287, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0226276084408164, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022638670885935425, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022649108730256558, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022664637956768274, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02268172471784055, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02270200598053634, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02272669777274132, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022755656968802215, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022795760603621602, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022837674953043462, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022880002083256842, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022944943932816386, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02303404494188726, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023120396342128514, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023204960869625212, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023328102631494402, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023444272605702282, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023586445720866323, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02373266448266804, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.023874129708856343, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024007533453404905, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0241518063377589, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024361953036859632, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.024546369472518564, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.024731352468952538, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.025011892383918165, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02529563521966338, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025670796278864146, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.026128721199929716, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.026601796969771385, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.027325722612440585, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02806609958410263, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1257119178771973, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1252381801605225, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1245031356811523, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1237754821777344, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1231236457824707, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122251272201538, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121281147003174, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1202874183654785, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1190264225006104, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.11776065826416, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1164956092834473, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1147656440734863, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.113116979598999, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1108527183532715, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.108729124069214, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1067848205566406, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.104365587234497, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1015963554382324, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.098593235015869, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0959079265594482, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0926504135131836, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0894036293029785, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0860002040863037, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0825564861297607, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0791256427764893, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.07535719871521, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0720973014831543, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0692734718322754, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.065749168395996, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.061905860900879, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0589027404785156, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0564067363739014, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0536561012268066, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0514564514160156, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0493714809417725, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0475289821624756, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0460362434387207, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.044722080230713, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0430259704589844, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0413477420806885, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.040806293487549, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.041292428970337, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.043686866760254, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0467610359191895, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.048360586166382, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.0447733402252197, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0393545627593994, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0356760025024414, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.04278302192688, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.067921742340347, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0812107788851975, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08361018826135105, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08564045773348099, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09043927648578812, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09376153562200074, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09394610557401255, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09560723514211886, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.0976375046142488, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09856035437430787, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10040605389442599, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10188261351052048, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10317460317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10446659283868587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10188261351052048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1020671834625323, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10188261351052048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10095976375046142, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10132890365448505, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.10299003322259136, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.10409745293466224, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.11037283130306386, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.11277224067921743, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01406944516124742, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01362818336155349, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014087603858588786, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014111687384181629, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014688722740420069, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014836547742426225, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01515468449682249, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015346890539585685, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015562249473860921, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01636067658938996, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016581745822107186, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017057099007679717, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0186720304100033, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019600670908218997, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02018363170388041, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020045786643178253, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02026372402668752, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.021630087661746997, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.022434288035567466, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.025586737847904995, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02713794495380491, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.028351749226981905, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.030678643800103034, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03241753934382842, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03439534940584023, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.035699224852055456, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03709718618423001, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.0379791953704822, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03794759307025921, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.038492062091741226, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03815384339250943, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.039643498540700074, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04114383085151708, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04290984863960403, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.0447650992542211, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.0457275203675934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.0471819777634662, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.047800530959176434, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05088521438632773, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.054482031089106826, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.05326961670745414, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.0536241197012829, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.051810908991164854, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05043269917839736, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05053855500384591, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05244577792704975, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.052795289061473134, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.061645145865772556, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06722885206719868, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 2.987851409912109, "validation/loss_best": 3.04278302192688, "validation/acc_best": 0.11277224067921743, "validation/f1_best": 0.06722885206719868} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 3.045362477302551, "train/grad": 0.16385822754353285, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.129976806640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.129300537109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1282373046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.127139892578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.126063232421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1248046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1232666015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.121693115234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11969482421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.117596435546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11575927734375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.112989501953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.110450439453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10682861328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1035650634765624, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.10052978515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0967742919921877, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0924224853515625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0878033447265625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.08345703125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0783062744140626, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0730078125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0671295166015624, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0610101318359373, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0547900390625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0475469970703126, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0413055419921875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0356610107421873, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.028365478515625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0201535034179687, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0132989501953125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.007540283203125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0000994873046873, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.994140319824219, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.987448425292969, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9813079833984375, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9752902221679687, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9698223876953125, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9642459106445314, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9583624267578124, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9546923828125, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9520599365234377, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.950064392089844, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9496121215820312, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9509677124023437, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9540560913085936, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9581239318847654, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9643153381347656, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9716868591308594, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022695570029318334, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02269254336133599, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022689982326701285, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02268849899992347, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022686393968760966, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022685868358239533, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022683786265552043, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022683411864563824, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022681206483393907, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0226807077601552, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02267925416585058, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02267666768282652, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022675449224188925, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022675799620337785, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022676026821136473, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022677301852963863, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022677526720799507, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02268022666219622, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022681060521863403, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022684592423029244, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02269035267177969, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022695944774895907, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02270365203730762, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02271364082582295, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02272802344057709, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02274473531637341, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022757093678228556, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0227753121079877, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02280164576601237, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022835349035449325, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022873441032133998, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02291494617238641, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022980912532657386, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02304860283154994, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02314193258062005, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02324627713300288, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02337010818067938, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02349804141558707, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02365756737999618, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02390490211546421, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.024142045695334673, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02437439206056297, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02470663453452289, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02502003824338317, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025433049974963068, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.025949546052142978, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02643765605054796, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02700806025415659, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02751658009365201, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.125638008117676, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1251654624938965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124396562576294, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1236798763275146, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1230125427246094, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1221253871917725, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121157646179199, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1201064586639404, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.118814706802368, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1175105571746826, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.116220712661743, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1144375801086426, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1127235889434814, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.110360622406006, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1082286834716797, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106196880340576, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.103693962097168, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1008336544036865, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.097749710083008, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.094949722290039, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.091522216796875, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.088085651397705, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.084317922592163, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0803112983703613, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0762364864349365, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0714023113250732, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.067230224609375, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.063579559326172, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0588653087615967, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0537784099578857, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0495893955230713, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.046170234680176, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.04201602935791, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0387542247772217, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0352423191070557, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0326123237609863, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0304107666015625, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0294671058654785, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.029273748397827, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0291080474853516, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.028468132019043, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0273935794830322, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0262722969055176, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0262959003448486, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.027317762374878, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.029179334640503, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.029588460922241, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0330705642700195, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0438976287841797, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07862679955703211, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08084163898117387, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0812107788851975, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08711701734957548, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09449981543004798, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09357696566998892, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09597637504614248, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10003691399040236, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10225175341454411, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10225175341454411, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10262089331856773, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10483573274270949, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10317460317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10409745293466224, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10335917312661498, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10465116279069768, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10557401255075674, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1064968623108158, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.11018826135105204, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.11000369139904023, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.11055740125507567, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.10908084163898117, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.10852713178294573, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01407189483336455, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013568571058571876, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013818311198546143, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013948206381940262, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014075224904450704, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013957434253472602, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014303213227820846, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014704304994380121, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015273490307863756, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015379871774280273, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015718619814285295, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01615243398552964, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.017001774462175788, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019195771783865917, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01907114165751318, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01989658637991671, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.019815353790031905, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.020823042106490185, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02204633409903298, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.024105122147526265, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.024998071350147755, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.027372412805852772, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.030373046213644204, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.031342208547998456, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.034732435134385835, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03725919390278928, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.039917032195026486, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.0415832757731121, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04359096253217328, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.043392303861188364, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04633106381459182, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04606943833898495, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04782550823332554, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05095573643938819, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.052259768031177024, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.053126611224840824, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05364170848102783, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.054924559624689596, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.054230515771402955, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05485741104853364, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.054242480435370666, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.05625555051777059, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05768575872221205, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05887956107590483, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05839522676869569, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05705935655471842, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.057724618849565944, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.055908093403686764, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.055025163699489586, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 2.9581239318847654, "validation/loss_best": 3.029588460922241, "validation/acc_best": 0.11055740125507567, "validation/f1_best": 0.057724618849565944} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 3.0353502345085146, "train/grad": 0.15903729490935803, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12926513671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1285107421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.127391357421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.126297607421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.125347900390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123880615234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.122310791015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.120709228515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11876953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1165771484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11451171875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1116455078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.108857421875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1051318359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.10154052734375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0984222412109377, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0943115234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.08982177734375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0847686767578124, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.08027587890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.074752197265625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.069232177734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.06314697265625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.056695556640625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0501483154296873, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0424298095703124, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.035595703125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0295819091796874, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0217095947265626, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.012694091796875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0051168823242187, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9984722900390626, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.990081787109375, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.982940673828125, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9751296997070313, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9676580810546875, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9601605224609373, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9535946655273437, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.946827697753906, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.939369812011719, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.934241943359375, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9302462768554687, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.926107940673828, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9233712768554687, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9212942504882813, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.920657196044922, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.921133575439453, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9237879943847656, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9276380920410157, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022313583279028534, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02231198748573661, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022309361305087805, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022308407295495273, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022308073230087756, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02230710151605308, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02230661377310753, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02230586964637041, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022305661588907243, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022305319923907517, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022304797023534776, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02230440984480083, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022303425027057528, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02230382355861366, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02230504556559026, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022305206693708897, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02230727853253484, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022310536988079548, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022313262177631257, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022316914815455675, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022320092441514134, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022326195603236555, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022333806958049537, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022340856920927764, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022350796079263092, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022362080086022616, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02237429954111576, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02238602369092405, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022402063319459556, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022424188265576957, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022449812646955253, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022473464366048575, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022514224187470972, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0225548873282969, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.022612050850875677, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0226784526091069, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02276066204532981, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02284782432951033, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022948259459808468, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.023086311258375646, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.023199309478513896, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02331472986843437, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02347968370653689, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02363305744715035, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.023815008508972824, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02403266199864447, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.024247803343459964, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.024588109543547034, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.024936982672661544, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.125635862350464, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1251540184020996, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1244289875030518, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1237354278564453, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1230602264404297, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1221730709075928, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1212191581726074, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120187520980835, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1189229488372803, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.11759877204895, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1163547039031982, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.114581823348999, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.112910747528076, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1106178760528564, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1084742546081543, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106480598449707, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.104001998901367, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.101177453994751, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.098111629486084, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.095280647277832, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.091891288757324, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.088423728942871, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.084683656692505, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0807900428771973, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0768160820007324, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0722169876098633, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.068272590637207, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0648114681243896, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0603525638580322, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.055408477783203, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0513882637023926, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0479965209960938, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.043862819671631, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0405397415161133, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.03702712059021, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0338823795318604, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.030700206756592, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.027867555618286, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0250489711761475, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.022082567214966, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.020366907119751, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0195560455322266, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0196120738983154, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0204503536224365, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.022815704345703, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.027911424636841, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0322890281677246, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0369722843170166, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.039382219314575, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07918050941306755, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.082687338501292, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08397932816537468, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08564045773348099, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08637873754152824, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09357696566998892, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09800664451827243, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.10040605389442599, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10225175341454411, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10391288298265043, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1035437430786268, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10391288298265043, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10575858250276855, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10575858250276855, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10520487264673312, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10612772240679218, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1050203026947213, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1064968623108158, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10741971207087486, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10594315245478036, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1035437430786268, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.10428202288667406, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.10280546327057954, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.10465116279069768, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01413571947842837, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013750136811633845, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014086668978473146, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01375598960628012, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013986048897203862, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013798386298475669, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014423667651961096, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014656538890044325, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01459893660266679, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01572160414182724, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015476466774259479, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016153161942604913, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01721368495281911, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01917313640068479, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.020498388579934697, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020020915735334415, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.021047180750741218, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.022651463252476117, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.024908907470156786, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.026591784211820225, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02814566332714377, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0296785873734802, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.0330900874068862, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.0338470788988579, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03655040074725621, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03750616593619832, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03888813632855533, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.040319206705585635, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04205774019188944, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.041955460634342544, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04442282546207448, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.0476213436079648, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04998524428918022, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05185377941984859, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.053824745228519856, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.054173032413422396, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.054990393595736466, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.057609818021815085, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05853446887749816, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05893839059275874, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.060803071096307844, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.06063393654473176, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06159648773666584, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06163960216414102, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05982178357010732, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05583502933245641, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05635132224499314, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05552776298537624, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.05805398064535022, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 2.9233712768554687, "validation/loss_best": 3.0204503536224365, "validation/acc_best": 0.10741971207087486, "validation/f1_best": 0.06163960216414102} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 3.02494158744812, "train/grad": 0.1581089948117733, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12243896484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.121680908203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120308837890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11920166015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1181640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1166357421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11503173828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113363037109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.111263427734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1089599609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.106988525390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.104085693359375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10138671875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.09760009765625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0941668701171876, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0908673095703123, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0869049072265624, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.08234130859375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.077413330078125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0728826904296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0672064208984375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0616546630859376, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0552728271484373, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0488311767578127, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.04208251953125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0340777587890626, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0271417236328126, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0208489990234373, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0128094482421877, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0035125732421877, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9957589721679687, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.988956298828125, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9803240966796873, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.972882080078125, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9646755981445314, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9569265747070315, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9491461181640624, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.941916198730469, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9345938110351564, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9263067626953125, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.920331573486328, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9157151794433593, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9101528930664062, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9062799072265624, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.902944793701172, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.900476379394531, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.89938232421875, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.8994509887695314, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.900796661376953, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022409383151680232, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02240685772150755, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02240465484559536, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022403763411566614, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02240420219488442, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02240315459668636, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02240238631144166, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02240206565707922, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02240115771070123, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0223992270603776, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022399057615548373, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02239891010336578, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02239715007133782, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02239834701642394, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022396922986954452, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022395271230489015, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02239541942253709, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022394714169204234, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022394453277811407, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02239398492500186, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022395259393379094, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02239564321935177, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02239601836539805, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022396526178345085, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02239811611827463, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02240043261554092, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02240335472393781, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02240451393648982, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022409153459593652, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02241583798080683, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022421311074867844, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022427474353462457, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022442666189745067, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.022457295265048742, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02247829597443342, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02250330581329763, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.022536503439769148, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022575180679559707, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02262239404954016, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.022691574105992915, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.022759826201945544, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022825662093237043, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.022925961324945093, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023027571002021432, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02316318893805146, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.023327861484140157, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.023482784703373907, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.023681594152003528, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.023858175138011573, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1256320476531982, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125156879425049, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124448776245117, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1237354278564453, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1230580806732178, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122190237045288, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1211907863616943, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120196580886841, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.118913173675537, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.117609977722168, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.116349697113037, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.114579677581787, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.112910270690918, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1105809211730957, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1084697246551514, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106419801712036, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.10390305519104, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1010823249816895, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0979208946228027, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0950589179992676, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0915470123291016, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.088001012802124, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0841472148895264, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.080064535140991, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0758817195892334, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.071027994155884, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.066732406616211, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0629634857177734, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0581114292144775, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0526092052459717, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.048168182373047, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0442817211151123, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0395989418029785, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0357666015625, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.031475305557251, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0278420448303223, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0243372917175293, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0211660861968994, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.018259048461914, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0152013301849365, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0130505561828613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.011672019958496, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0100820064544678, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0092222690582275, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0084352493286133, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.007855176925659, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.00754976272583, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.007416248321533, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0078225135803223, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.082687338501292, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08453303802141011, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08656330749354005, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08822443706164636, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09339239571797711, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09653008490217793, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09874492432631968, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1005906238464378, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10225175341454411, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1037283130306386, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10557401255075674, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10686600221483943, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10760428202288667, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10815799187892211, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10926541159099298, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.11037283130306386, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1111111111111111, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10889627168696936, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10871170173495755, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.11055740125507567, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1111111111111111, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.11129568106312292, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.11018826135105204, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.10981912144702842, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014136934413191216, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013838581124353176, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013716737609130935, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013884203165082388, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01398813683034334, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01384168830728686, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014279423752554367, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014236337828837117, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014443818678531674, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015356779963240882, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015604621085408743, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016202684103321294, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.017418001227912202, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019358019868812644, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.019932648833911482, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020238539692048797, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02101592355473354, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02337721273664339, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02486944279267063, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.026832383786954992, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.027590889836784774, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.02959466353406585, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.033415339411830426, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03511635141448099, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03757848883435264, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03874675978875785, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03923353035317633, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04177784509669893, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04370810539067301, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04554425026840119, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.048241062652579374, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.050999413523319416, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.05326414279272181, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05557196981746124, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05740304992362416, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.058842391815503464, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.06033297557383004, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.061803391191429836, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.06366031886666583, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.06484736307952675, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.06652682658806981, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.06737157843388994, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06631923241721151, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06736184283809861, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.07032033168634794, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.07083451314053092, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.07039999980407266, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0698406726497268, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06861363200277042, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 2.89938232421875, "validation/loss_best": 3.00754976272583, "validation/acc_best": 0.11129568106312292, "validation/f1_best": 0.07039999980407266} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 3.030448147058487, "train/grad": 0.1572164500877261, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12917236328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.128519287109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.127291259765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12626953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12521240234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123907470703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.122393798828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1207421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1187060546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11663330078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11468505859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.111876220703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.109105224609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.105430908203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1020782470703123, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0987738037109374, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.09483154296875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0903070068359373, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0852691650390627, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0808038330078125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0752178955078127, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0695745849609377, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0632281494140625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0566448974609375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.04989990234375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.04198486328125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.034908447265625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0287152099609376, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0205169677734376, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0111138916015623, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.003176574707031, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.996204528808594, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9873220825195315, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9796487426757814, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.970994567871094, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.96265380859375, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.954292907714844, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9465957641601563, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9386520385742188, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9294332885742187, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9228338623046874, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9175164794921873, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9112454223632813, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9064480590820314, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.901826934814453, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.897845916748047, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.895253143310547, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.893348388671875, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.8928561401367188, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022473707837052645, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022471556956879794, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022470335909165442, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02246808575000614, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022467910018749534, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022467106375843288, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0224658738495782, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022464306182228028, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022461209055036305, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022459389651194216, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022458108267746867, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022456053378991783, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022453073905780913, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022449522498063743, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02244703474920243, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022444120021536947, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02244223685003817, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022438338748179375, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022434242689050733, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022431422513909637, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022426528353244067, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022422788464464247, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022417283519171178, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02241182366386056, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02240762235596776, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0224013193231076, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022396332705393433, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022391385678201915, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02238642008509487, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022382854176685216, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022379875574260952, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02237532618455589, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022372325216419995, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.022369295819662512, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.022367895832285285, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.022367499507963656, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.022369593335315584, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02237384019419551, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022386858351528646, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.022406277498230338, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.022424355428665877, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02244483756367117, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.022480671238154174, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02251755802426487, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02257125009316951, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.022632808606140316, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.022698344383388757, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.022780829896219074, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.022860812391154468, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1256351470947266, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1251466274261475, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124436855316162, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1237053871154785, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1230480670928955, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122170925140381, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1211812496185303, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120168685913086, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1188652515411377, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.117560863494873, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1162970066070557, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.114537239074707, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.11285400390625, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1104917526245117, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1083343029022217, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106288433074951, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1037464141845703, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1008689403533936, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0976743698120117, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.094783067703247, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0912835597991943, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0876619815826416, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0837535858154297, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0796566009521484, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0754566192626953, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.070551872253418, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.066267490386963, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0625057220458984, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0575954914093018, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0522103309631348, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0476598739624023, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.04392147064209, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0392398834228516, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0355448722839355, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.031486749649048, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0279572010040283, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.024548053741455, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.021815776824951, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.019155263900757, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.016425371170044, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0146946907043457, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.013479232788086, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0121824741363525, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0111732482910156, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0106351375579834, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.010268211364746, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.010138988494873, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.010284423828125, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.010530471801758, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07788851974898486, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08324104835732743, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08527131782945736, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09154669619785899, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09117755629383537, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0932078257659653, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0946843853820598, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09726836471022518, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09745293466223699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09985234403839055, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10151347360649686, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10114433370247324, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10409745293466224, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10538944259874493, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1079734219269103, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10889627168696936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10871170173495755, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1079734219269103, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10834256183093392, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10834256183093392, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10889627168696936, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1094499815430048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.10981912144702842, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.11074197120708748, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.11166482096714656, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.11203396087117018, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014030946191560511, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013886381716139859, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013436909711567517, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013933083224374003, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01399748087539289, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013905452046406122, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01430241550770337, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014540990933111654, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01468870277362986, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015378324441347556, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015512200629509353, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016065669571379174, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01722560192541305, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01910070349857194, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.020259404950433647, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02030469199626644, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.021006091491395717, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.022803908607616993, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.025414099075030557, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.027827793721336064, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.028411583764295385, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0298936218160594, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03326159038700736, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03552958621648289, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03812191197034722, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03918109489655739, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04144974083962968, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.041644408695544595, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.043260831570006646, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04568520864111466, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04798001131116636, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04833720882517917, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.051252063370852224, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.052999038545132215, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05338497190511702, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05637370376315467, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05842024269492138, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.059976130996946576, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.06114267994693698, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.06117978110274148, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.06108094938243006, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.061342297861677136, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06155061576608981, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06160567134404107, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.061420739125543154, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.06186326723985806, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.062497415345813156, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.06336398512878813, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06379486481512878, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 2.8928561401367188, "validation/loss_best": 3.010530471801758, "validation/acc_best": 0.11203396087117018, "validation/f1_best": 0.06379486481512878} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 3.0281921422481535, "train/grad": 0.15794416274875403, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.128052978515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.127366943359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.126141357421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1249462890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12393310546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1225244140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.120902099609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.119307861328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.117235107421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11507568359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1131640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.110299072265625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.107640380859375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10397705078125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.10058837890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.09739501953125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0935076904296874, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.089044189453125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0841845703125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0796337890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.074185791015625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0687005615234373, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.062464599609375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0559527587890627, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0492279052734377, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.041314697265625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0344512939453123, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.028223876953125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.01988525390625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.01053466796875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0024038696289064, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.995358581542969, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.986183776855469, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.978343505859375, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9694921875, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.961007080078125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9522857666015625, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9442584228515627, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9358587646484375, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9261404418945314, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9191378784179687, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9133651733398436, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.906412353515625, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.90086181640625, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.8955587768554687, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.8906924438476564, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.8873255920410155, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.8841905212402343, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.8826795959472657, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022644905680790542, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022642752248793842, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02264010174199939, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022638384620659053, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02263725044671446, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02263578406069428, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02263180470559746, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02262999797705561, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02262695941142738, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02262408812996, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02262159962207079, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022618105220608413, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022616339656524362, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022613095371052622, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022609456232748924, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02260733544360846, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022603275226429104, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022598792063072325, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02259447705000639, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02258941392414272, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022584015624597668, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02257759521715343, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0225715868268162, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022568563031964004, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022562382761389017, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022556765442714094, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022551111541688443, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02254606558009982, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022538442462682724, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02252973944414407, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022524050306528806, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02251841296441853, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022510397955775262, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02250481436494738, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.022498548864386975, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02249424058943987, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02248920041602105, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022485457411967218, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02248310672584921, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02248277057427913, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.022482240423560142, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022483245697803796, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02248572597280145, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.022488965722732245, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02249394042417407, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.022503110943362117, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02251286292914301, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.022525615934282543, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.022538236202672125, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.125633478164673, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125143527984619, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124431848526001, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1237072944641113, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123044013977051, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122170925140381, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1211740970611572, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1201658248901367, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.118860960006714, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1175544261932373, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.116288423538208, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1145317554473877, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1128220558166504, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1104938983917236, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.108320951461792, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.106262445449829, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1037275791168213, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1008477210998535, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0976479053497314, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0947463512420654, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.091204881668091, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0876033306121826, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0837175846099854, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0795745849609375, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0753607749938965, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0704503059387207, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0661838054656982, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.062413454055786, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.057501792907715, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0521275997161865, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0475785732269287, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0438714027404785, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0392439365386963, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.03555965423584, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0315752029418945, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.028120994567871, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0248000621795654, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0220651626586914, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0194694995880127, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.016862630844116, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0150837898254395, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0136947631835938, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0122876167297363, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.011340856552124, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.010441541671753, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.009662628173828, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0093131065368652, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.00923228263855, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.009190082550049, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.067921742340347, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07807308970099668, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07881136950904392, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08324104835732743, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08527131782945736, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08840900701365817, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0932078257659653, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09486895533407161, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09653008490217793, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09708379475821337, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09966777408637874, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.10114433370247324, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.10188261351052048, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.10446659283868587, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10446659283868587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.10668143226282761, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.10778885197489849, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.10760428202288667, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.10815799187892211, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.10871170173495755, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.10871170173495755, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.10778885197489849, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.10908084163898117, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.11037283130306386, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.11018826135105204, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.10981912144702842, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.11018826135105204, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014033641071753547, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013880480432642067, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013626962799624874, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013909956897153469, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014056794854096237, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01389599231823755, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014292649813014225, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014535973254282436, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014682266545112054, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015357756227770839, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015635894622037964, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016125344071582996, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01716071206205737, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01920012659635455, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02042698174898962, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02033074969437659, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02105835202233294, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02265016177958723, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02562911000528395, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.027933212772196833, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.028002193963226642, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0297190852919154, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.033356513433831024, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03565512780801122, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03838250835613929, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03910527397569527, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.040897604700800706, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.042037243624191305, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.044300417212095024, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04641023837711547, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04769843736205093, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04868752111133556, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.05130326086643955, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05286365289830436, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05416493233694225, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.056702534094118275, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05789767760759201, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.0593859644060582, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.060568489079297926, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.060444145422250774, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.06062562036579, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.061136188206134694, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.06123290862059558, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.06054741388494143, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.06218185979685295, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0635803202356986, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.06344371093995713, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.063295549195685, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06385605712941875, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.8906924438476564, "validation/loss_best": 3.009662628173828, "validation/acc_best": 0.11037283130306386, "validation/f1_best": 0.0635803202356986} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/config.yaml b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b283e40680dbf2cdcfc07d0166ec8281a726c02 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..2300c6328405ae7f2beadda09d8ee71285d6de66 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 16, "eval/id_best": 41, "eval/lr_best": 0.0048, "eval/wd_best": 0.05, "eval/train/loss": 2.8874471187591553, "eval/train/acc": 0.15040413042810166, "eval/train/acc_std": 0.0017435014016992524, "eval/train/f1": 0.10184379852202219, "eval/train/f1_std": 0.0016095791164648693, "eval/validation/loss": 3.096717596054077, "eval/validation/acc": 0.09653008490217793, "eval/validation/acc_std": 0.0034188513533546074, "eval/validation/f1": 0.053008536855324334, "eval/validation/f1_std": 0.0027002339805007568, "eval/test/loss": 3.1075639724731445, "eval/test/acc": 0.10612244897959183, "eval/test/acc_std": 0.0036193725947027736, "eval/test/f1": 0.05562487056695591, "eval/test/f1_std": 0.0024498446764065224, "eval/testid/loss": 3.0921642780303955, "eval/testid/acc": 0.09369577790630422, "eval/testid/acc_std": 0.003605034797665975, "eval/testid/f1": 0.05419867419765029, "eval/testid/f1_std": 0.0027640601769595525} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_best.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..f0d402bc188cee51ad1de54821b58024a068ff04 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 16, "eval/best/id_best": 41, "eval/best/lr_best": 0.0048, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.8874471187591553, "eval/best/train/acc": 0.15040413042810166, "eval/best/train/acc_std": 0.0017435014016992524, "eval/best/train/f1": 0.10184379852202219, "eval/best/train/f1_std": 0.0016095791164648693, "eval/best/validation/loss": 3.096717596054077, "eval/best/validation/acc": 0.09653008490217793, "eval/best/validation/acc_std": 0.0034188513533546074, "eval/best/validation/f1": 0.053008536855324334, "eval/best/validation/f1_std": 0.0027002339805007568, "eval/best/test/loss": 3.1075639724731445, "eval/best/test/acc": 0.10612244897959183, "eval/best/test/acc_std": 0.0036193725947027736, "eval/best/test/f1": 0.05562487056695591, "eval/best/test/f1_std": 0.0024498446764065224, "eval/best/testid/loss": 3.0921642780303955, "eval/best/testid/acc": 0.09369577790630422, "eval/best/testid/acc_std": 0.003605034797665975, "eval/best/testid/f1": 0.05419867419765029, "eval/best/testid/f1_std": 0.0027640601769595525} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_last.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..961b07d38ea5a838f11f21dcdbc708cddbf121fd --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 40, "eval/last/lr_best": 0.0042, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.8733935356140137, "eval/last/train/acc": 0.15667353022526814, "eval/last/train/acc_std": 0.0019192478294595783, "eval/last/train/f1": 0.11614106693103732, "eval/last/train/f1_std": 0.001857135546715004, "eval/last/validation/loss": 3.077610492706299, "eval/last/validation/acc": 0.09486895533407161, "eval/last/validation/acc_std": 0.003588911037860246, "eval/last/validation/f1": 0.059873501520058846, "eval/last/validation/f1_std": 0.0029231502783163992, "eval/last/test/loss": 3.07747745513916, "eval/last/test/acc": 0.10575139146567718, "eval/last/test/acc_std": 0.0035716372442230475, "eval/last/test/f1": 0.05840693988518062, "eval/last/test/f1_std": 0.0025867256868786466, "eval/last/testid/loss": 3.072106122970581, "eval/last/testid/acc": 0.0989010989010989, "eval/last/testid/acc_std": 0.0037477596326529667, "eval/last/testid/f1": 0.06449148633317596, "eval/last/testid/f1_std": 0.002952852977701098} diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..511359cd09441797aa57c4d6c1d6c00aa9c39edf --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",train,2.8874471187591553,0.15040413042810166,0.0017435014016992524,0.10184379852202219,0.0016095791164648693 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",validation,3.096717596054077,0.09653008490217793,0.0034188513533546074,0.053008536855324334,0.0027002339805007568 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",test,3.1075639724731445,0.10612244897959183,0.0036193725947027736,0.05562487056695591,0.0024498446764065224 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",testid,3.0921642780303955,0.09369577790630422,0.003605034797665975,0.05419867419765029,0.0027640601769595525 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_best.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..511359cd09441797aa57c4d6c1d6c00aa9c39edf --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",train,2.8874471187591553,0.15040413042810166,0.0017435014016992524,0.10184379852202219,0.0016095791164648693 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",validation,3.096717596054077,0.09653008490217793,0.0034188513533546074,0.053008536855324334,0.0027002339805007568 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",test,3.1075639724731445,0.10612244897959183,0.0036193725947027736,0.05562487056695591,0.0024498446764065224 +flat_mae,reg,linear,nsd_cococlip,best,16,0.0048,0.05,41,"[16, 1.0]",testid,3.0921642780303955,0.09369577790630422,0.003605034797665975,0.05419867419765029,0.0027640601769595525 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_last.csv b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..dadb62065d49c732f712fbe19c72b910aab7407b --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,nsd_cococlip,last,19,0.0042,0.05,40,"[14, 1.0]",train,2.8733935356140137,0.15667353022526814,0.0019192478294595783,0.11614106693103732,0.001857135546715004 +flat_mae,reg,linear,nsd_cococlip,last,19,0.0042,0.05,40,"[14, 1.0]",validation,3.077610492706299,0.09486895533407161,0.003588911037860246,0.059873501520058846,0.0029231502783163992 +flat_mae,reg,linear,nsd_cococlip,last,19,0.0042,0.05,40,"[14, 1.0]",test,3.07747745513916,0.10575139146567718,0.0035716372442230475,0.05840693988518062,0.0025867256868786466 +flat_mae,reg,linear,nsd_cococlip,last,19,0.0042,0.05,40,"[14, 1.0]",testid,3.072106122970581,0.0989010989010989,0.0037477596326529667,0.06449148633317596,0.002952852977701098 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/log.txt b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f29cb0c590a334d847b918da6a4dcf1a6e40a55 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/log.txt @@ -0,0 +1,957 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 21:43:16 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (nsd_cococlip reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (reg): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 0.9M (0.9M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:10 lr: nan time: 3.3273 data: 2.9951 max mem: 3910 +train: [0] [ 20/400] eta: 0:02:55 lr: 0.000003 loss: 3.3511 (3.3554) grad: 0.5526 (0.5462) time: 0.3192 data: 0.0039 max mem: 3953 +train: [0] [ 40/400] eta: 0:02:19 lr: 0.000006 loss: 3.3339 (3.3348) grad: 0.5526 (0.5443) time: 0.3065 data: 0.0043 max mem: 3953 +train: [0] [ 60/400] eta: 0:02:04 lr: 0.000009 loss: 3.2770 (3.3210) grad: 0.5026 (0.5235) time: 0.3275 data: 0.0048 max mem: 3953 +train: [0] [ 80/400] eta: 0:01:54 lr: 0.000012 loss: 3.3049 (3.3166) grad: 0.4800 (0.5126) time: 0.3235 data: 0.0047 max mem: 3953 +train: [0] [100/400] eta: 0:01:45 lr: 0.000015 loss: 3.2828 (3.3005) grad: 0.4505 (0.4975) time: 0.3278 data: 0.0049 max mem: 3953 +train: [0] [120/400] eta: 0:01:37 lr: 0.000018 loss: 3.2472 (3.2950) grad: 0.4438 (0.4923) time: 0.3302 data: 0.0049 max mem: 3953 +train: [0] [140/400] eta: 0:01:29 lr: 0.000021 loss: 3.2455 (3.2897) grad: 0.4438 (0.4853) time: 0.3248 data: 0.0048 max mem: 3953 +train: [0] [160/400] eta: 0:01:21 lr: 0.000024 loss: 3.2392 (3.2823) grad: 0.4328 (0.4816) time: 0.3239 data: 0.0046 max mem: 3953 +train: [0] [180/400] eta: 0:01:14 lr: 0.000027 loss: 3.2312 (3.2780) grad: 0.4299 (0.4773) time: 0.3062 data: 0.0042 max mem: 3953 +train: [0] [200/400] eta: 0:01:07 lr: 0.000030 loss: 3.2436 (3.2759) grad: 0.4264 (0.4744) time: 0.3257 data: 0.0047 max mem: 3953 +train: [0] [220/400] eta: 0:01:00 lr: 0.000033 loss: 3.2436 (3.2717) grad: 0.4378 (0.4723) time: 0.3201 data: 0.0046 max mem: 3953 +train: [0] [240/400] eta: 0:00:53 lr: 0.000036 loss: 3.2435 (3.2697) grad: 0.4378 (0.4703) time: 0.3234 data: 0.0049 max mem: 3953 +train: [0] [260/400] eta: 0:00:46 lr: 0.000039 loss: 3.2224 (3.2650) grad: 0.4299 (0.4674) time: 0.3146 data: 0.0049 max mem: 3953 +train: [0] [280/400] eta: 0:00:39 lr: 0.000042 loss: 3.2104 (3.2625) grad: 0.4297 (0.4652) time: 0.3286 data: 0.0049 max mem: 3953 +train: [0] [300/400] eta: 0:00:33 lr: 0.000045 loss: 3.2161 (3.2586) grad: 0.4271 (0.4629) time: 0.3180 data: 0.0048 max mem: 3953 +train: [0] [320/400] eta: 0:00:26 lr: 0.000048 loss: 3.2161 (3.2563) grad: 0.4218 (0.4602) time: 0.3176 data: 0.0046 max mem: 3953 +train: [0] [340/400] eta: 0:00:19 lr: 0.000051 loss: 3.2303 (3.2548) grad: 0.4111 (0.4578) time: 0.3246 data: 0.0046 max mem: 3953 +train: [0] [360/400] eta: 0:00:13 lr: 0.000054 loss: 3.2136 (3.2510) grad: 0.4052 (0.4542) time: 0.3528 data: 0.0048 max mem: 3953 +train: [0] [380/400] eta: 0:00:06 lr: 0.000057 loss: 3.2079 (3.2491) grad: 0.3754 (0.4515) time: 0.3478 data: 0.0053 max mem: 3953 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.2079 (3.2471) grad: 0.3929 (0.4488) time: 0.3381 data: 0.0045 max mem: 3953 +train: [0] Total time: 0:02:13 (0.3329 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.2079 (3.2471) grad: 0.3929 (0.4488) +eval (validation): [0] [ 0/85] eta: 0:04:40 time: 3.3047 data: 3.0944 max mem: 3953 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3284 data: 0.0136 max mem: 3953 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3512 data: 0.0121 max mem: 3953 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3185 data: 0.0043 max mem: 3953 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3250 data: 0.0039 max mem: 3953 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3277 data: 0.0043 max mem: 3953 +eval (validation): [0] Total time: 0:00:31 (0.3691 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 3.205 acc: 0.068 f1: 0.024 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:23:05 lr: nan time: 3.4625 data: 3.1829 max mem: 3953 +train: [1] [ 20/400] eta: 0:03:14 lr: 0.000063 loss: 3.2034 (3.2154) grad: 0.3878 (0.3968) time: 0.3638 data: 0.0110 max mem: 3953 +train: [1] [ 40/400] eta: 0:02:33 lr: 0.000066 loss: 3.1988 (3.2032) grad: 0.4176 (0.4136) time: 0.3398 data: 0.0038 max mem: 3953 +train: [1] [ 60/400] eta: 0:02:16 lr: 0.000069 loss: 3.1955 (3.1966) grad: 0.4176 (0.4103) time: 0.3493 data: 0.0051 max mem: 3953 +train: [1] [ 80/400] eta: 0:02:04 lr: 0.000072 loss: 3.1854 (3.1955) grad: 0.3982 (0.4085) time: 0.3510 data: 0.0056 max mem: 3953 +train: [1] [100/400] eta: 0:01:55 lr: 0.000075 loss: 3.1804 (3.1917) grad: 0.3849 (0.4039) time: 0.3641 data: 0.0044 max mem: 3953 +train: [1] [120/400] eta: 0:01:45 lr: 0.000078 loss: 3.1874 (3.1925) grad: 0.3836 (0.4032) time: 0.3327 data: 0.0047 max mem: 3953 +train: [1] [140/400] eta: 0:01:36 lr: 0.000081 loss: 3.2088 (3.1944) grad: 0.3927 (0.4011) time: 0.3518 data: 0.0050 max mem: 3953 +train: [1] [160/400] eta: 0:01:28 lr: 0.000084 loss: 3.1784 (3.1940) grad: 0.3903 (0.4003) time: 0.3315 data: 0.0047 max mem: 3953 +train: [1] [180/400] eta: 0:01:20 lr: 0.000087 loss: 3.1909 (3.1939) grad: 0.3909 (0.4015) time: 0.3433 data: 0.0049 max mem: 3953 +train: [1] [200/400] eta: 0:01:13 lr: 0.000090 loss: 3.1891 (3.1940) grad: 0.3900 (0.4014) time: 0.3689 data: 0.0052 max mem: 3953 +train: [1] [220/400] eta: 0:01:05 lr: 0.000093 loss: 3.1891 (3.1937) grad: 0.3859 (0.4010) time: 0.3680 data: 0.0048 max mem: 3953 +train: [1] [240/400] eta: 0:00:57 lr: 0.000096 loss: 3.1927 (3.1941) grad: 0.4040 (0.4019) time: 0.3284 data: 0.0048 max mem: 3953 +train: [1] [260/400] eta: 0:00:50 lr: 0.000099 loss: 3.1945 (3.1943) grad: 0.3942 (0.4009) time: 0.3872 data: 0.0052 max mem: 3953 +train: [1] [280/400] eta: 0:00:43 lr: 0.000102 loss: 3.1696 (3.1934) grad: 0.3933 (0.4012) time: 0.3633 data: 0.0051 max mem: 3953 +train: [1] [300/400] eta: 0:00:36 lr: 0.000105 loss: 3.1725 (3.1932) grad: 0.3960 (0.4006) time: 0.3211 data: 0.0050 max mem: 3953 +train: [1] [320/400] eta: 0:00:28 lr: 0.000108 loss: 3.1831 (3.1922) grad: 0.3944 (0.3997) time: 0.3296 data: 0.0048 max mem: 3953 +train: [1] [340/400] eta: 0:00:21 lr: 0.000111 loss: 3.1740 (3.1909) grad: 0.3769 (0.3986) time: 0.3684 data: 0.0051 max mem: 3953 +train: [1] [360/400] eta: 0:00:14 lr: 0.000114 loss: 3.1689 (3.1906) grad: 0.3830 (0.3982) time: 0.3425 data: 0.0050 max mem: 3953 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 3.1680 (3.1902) grad: 0.3926 (0.3981) time: 0.3378 data: 0.0046 max mem: 3953 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 3.2154 (3.1915) grad: 0.3942 (0.3981) time: 0.3413 data: 0.0046 max mem: 3953 +train: [1] Total time: 0:02:23 (0.3576 s / it) +train: [1] Summary: lr: 0.000120 loss: 3.2154 (3.1915) grad: 0.3942 (0.3981) +eval (validation): [1] [ 0/85] eta: 0:04:35 time: 3.2418 data: 3.0305 max mem: 3953 +eval (validation): [1] [20/85] eta: 0:00:30 time: 0.3301 data: 0.0057 max mem: 3953 +eval (validation): [1] [40/85] eta: 0:00:17 time: 0.3277 data: 0.0037 max mem: 3953 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3412 data: 0.0048 max mem: 3953 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3387 data: 0.0048 max mem: 3953 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3169 data: 0.0045 max mem: 3953 +eval (validation): [1] Total time: 0:00:31 (0.3703 s / it) +cv: [1] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 3.132 acc: 0.076 f1: 0.025 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:20:42 lr: nan time: 3.1062 data: 2.8420 max mem: 3953 +train: [2] [ 20/400] eta: 0:03:09 lr: 0.000123 loss: 3.1430 (3.1537) grad: 0.3657 (0.3740) time: 0.3687 data: 0.0048 max mem: 3953 +train: [2] [ 40/400] eta: 0:02:36 lr: 0.000126 loss: 3.1587 (3.1685) grad: 0.3650 (0.3667) time: 0.3654 data: 0.0050 max mem: 3953 +train: [2] [ 60/400] eta: 0:02:16 lr: 0.000129 loss: 3.1684 (3.1623) grad: 0.3789 (0.3750) time: 0.3384 data: 0.0049 max mem: 3953 +train: [2] [ 80/400] eta: 0:02:04 lr: 0.000132 loss: 3.1684 (3.1629) grad: 0.3981 (0.3832) time: 0.3483 data: 0.0051 max mem: 3953 +train: [2] [100/400] eta: 0:01:54 lr: 0.000135 loss: 3.1726 (3.1681) grad: 0.4006 (0.3880) time: 0.3465 data: 0.0047 max mem: 3953 +train: [2] [120/400] eta: 0:01:45 lr: 0.000138 loss: 3.1515 (3.1651) grad: 0.4065 (0.3883) time: 0.3566 data: 0.0052 max mem: 3953 +train: [2] [140/400] eta: 0:01:36 lr: 0.000141 loss: 3.1515 (3.1658) grad: 0.3715 (0.3859) time: 0.3295 data: 0.0049 max mem: 3953 +train: [2] [160/400] eta: 0:01:27 lr: 0.000144 loss: 3.1621 (3.1690) grad: 0.3805 (0.3869) time: 0.3418 data: 0.0048 max mem: 3953 +train: [2] [180/400] eta: 0:01:20 lr: 0.000147 loss: 3.1655 (3.1665) grad: 0.3888 (0.3865) time: 0.3441 data: 0.0049 max mem: 3953 +train: [2] [200/400] eta: 0:01:12 lr: 0.000150 loss: 3.1676 (3.1688) grad: 0.3849 (0.3866) time: 0.3403 data: 0.0048 max mem: 3953 +train: [2] [220/400] eta: 0:01:04 lr: 0.000153 loss: 3.1639 (3.1692) grad: 0.3989 (0.3873) time: 0.3392 data: 0.0047 max mem: 3953 +train: [2] [240/400] eta: 0:00:57 lr: 0.000156 loss: 3.1713 (3.1715) grad: 0.3989 (0.3887) time: 0.3446 data: 0.0049 max mem: 3953 +train: [2] [260/400] eta: 0:00:50 lr: 0.000159 loss: 3.1989 (3.1725) grad: 0.3998 (0.3890) time: 0.3509 data: 0.0046 max mem: 3953 +train: [2] [280/400] eta: 0:00:42 lr: 0.000162 loss: 3.2098 (3.1746) grad: 0.4005 (0.3896) time: 0.3238 data: 0.0046 max mem: 3953 +train: [2] [300/400] eta: 0:00:35 lr: 0.000165 loss: 3.1969 (3.1731) grad: 0.3870 (0.3895) time: 0.3368 data: 0.0049 max mem: 3953 +train: [2] [320/400] eta: 0:00:28 lr: 0.000168 loss: 3.1226 (3.1718) grad: 0.3915 (0.3901) time: 0.3430 data: 0.0048 max mem: 3953 +train: [2] [340/400] eta: 0:00:21 lr: 0.000171 loss: 3.1613 (3.1714) grad: 0.3992 (0.3905) time: 0.3419 data: 0.0050 max mem: 3953 +train: [2] [360/400] eta: 0:00:14 lr: 0.000174 loss: 3.1711 (3.1717) grad: 0.3992 (0.3904) time: 0.3381 data: 0.0048 max mem: 3953 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 3.1711 (3.1718) grad: 0.3735 (0.3895) time: 0.3412 data: 0.0047 max mem: 3953 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 3.1484 (3.1714) grad: 0.3767 (0.3893) time: 0.3460 data: 0.0047 max mem: 3953 +train: [2] Total time: 0:02:20 (0.3517 s / it) +train: [2] Summary: lr: 0.000180 loss: 3.1484 (3.1714) grad: 0.3767 (0.3893) +eval (validation): [2] [ 0/85] eta: 0:04:39 time: 3.2863 data: 3.0306 max mem: 3953 +eval (validation): [2] [20/85] eta: 0:00:30 time: 0.3264 data: 0.0044 max mem: 3953 +eval (validation): [2] [40/85] eta: 0:00:19 time: 0.3885 data: 0.0043 max mem: 3953 +eval (validation): [2] [60/85] eta: 0:00:10 time: 0.3566 data: 0.0041 max mem: 3953 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3374 data: 0.0043 max mem: 3953 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3284 data: 0.0039 max mem: 3953 +eval (validation): [2] Total time: 0:00:32 (0.3868 s / it) +cv: [2] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 3.201 acc: 0.075 f1: 0.037 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [3] [ 0/400] eta: 0:21:49 lr: nan time: 3.2734 data: 3.0473 max mem: 3953 +train: [3] [ 20/400] eta: 0:03:03 lr: 0.000183 loss: 3.1724 (3.1727) grad: 0.4387 (0.4410) time: 0.3435 data: 0.0048 max mem: 3953 +train: [3] [ 40/400] eta: 0:02:28 lr: 0.000186 loss: 3.1715 (3.1710) grad: 0.4320 (0.4341) time: 0.3387 data: 0.0042 max mem: 3953 +train: [3] [ 60/400] eta: 0:02:11 lr: 0.000189 loss: 3.1658 (3.1734) grad: 0.4112 (0.4298) time: 0.3352 data: 0.0046 max mem: 3953 +train: [3] [ 80/400] eta: 0:02:00 lr: 0.000192 loss: 3.1696 (3.1753) grad: 0.3975 (0.4230) time: 0.3412 data: 0.0035 max mem: 3953 +train: [3] [100/400] eta: 0:01:51 lr: 0.000195 loss: 3.1696 (3.1759) grad: 0.4006 (0.4221) time: 0.3550 data: 0.0051 max mem: 3953 +train: [3] [120/400] eta: 0:01:42 lr: 0.000198 loss: 3.1697 (3.1739) grad: 0.4178 (0.4217) time: 0.3386 data: 0.0048 max mem: 3953 +train: [3] [140/400] eta: 0:01:33 lr: 0.000201 loss: 3.1697 (3.1748) grad: 0.3966 (0.4177) time: 0.3262 data: 0.0047 max mem: 3953 +train: [3] [160/400] eta: 0:01:25 lr: 0.000204 loss: 3.1777 (3.1783) grad: 0.4113 (0.4190) time: 0.3393 data: 0.0040 max mem: 3953 +train: [3] [180/400] eta: 0:01:18 lr: 0.000207 loss: 3.1932 (3.1815) grad: 0.4209 (0.4188) time: 0.3391 data: 0.0049 max mem: 3953 +train: [3] [200/400] eta: 0:01:10 lr: 0.000210 loss: 3.1885 (3.1823) grad: 0.4107 (0.4175) time: 0.3412 data: 0.0042 max mem: 3953 +train: [3] [220/400] eta: 0:01:03 lr: 0.000213 loss: 3.1726 (3.1808) grad: 0.4119 (0.4175) time: 0.3476 data: 0.0048 max mem: 3953 +train: [3] [240/400] eta: 0:00:56 lr: 0.000216 loss: 3.1603 (3.1798) grad: 0.4096 (0.4174) time: 0.3387 data: 0.0045 max mem: 3953 +train: [3] [260/400] eta: 0:00:49 lr: 0.000219 loss: 3.1614 (3.1793) grad: 0.4063 (0.4163) time: 0.3410 data: 0.0044 max mem: 3953 +train: [3] [280/400] eta: 0:00:41 lr: 0.000222 loss: 3.1882 (3.1819) grad: 0.4063 (0.4163) time: 0.3238 data: 0.0044 max mem: 3953 +train: [3] [300/400] eta: 0:00:34 lr: 0.000225 loss: 3.2096 (3.1824) grad: 0.4184 (0.4164) time: 0.3314 data: 0.0041 max mem: 3953 +train: [3] [320/400] eta: 0:00:27 lr: 0.000228 loss: 3.1791 (3.1813) grad: 0.4047 (0.4159) time: 0.3711 data: 0.0052 max mem: 3953 +train: [3] [340/400] eta: 0:00:20 lr: 0.000231 loss: 3.1604 (3.1801) grad: 0.4080 (0.4166) time: 0.3469 data: 0.0048 max mem: 3953 +train: [3] [360/400] eta: 0:00:13 lr: 0.000234 loss: 3.1686 (3.1801) grad: 0.4357 (0.4180) time: 0.3432 data: 0.0046 max mem: 3953 +train: [3] [380/400] eta: 0:00:06 lr: 0.000237 loss: 3.1916 (3.1805) grad: 0.4063 (0.4165) time: 0.3423 data: 0.0050 max mem: 3953 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 3.1901 (3.1805) grad: 0.4063 (0.4159) time: 0.3608 data: 0.0051 max mem: 3953 +train: [3] Total time: 0:02:19 (0.3500 s / it) +train: [3] Summary: lr: 0.000240 loss: 3.1901 (3.1805) grad: 0.4063 (0.4159) +eval (validation): [3] [ 0/85] eta: 0:04:42 time: 3.3216 data: 3.0385 max mem: 3953 +eval (validation): [3] [20/85] eta: 0:00:32 time: 0.3620 data: 0.0052 max mem: 3953 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3166 data: 0.0044 max mem: 3953 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3358 data: 0.0044 max mem: 3953 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3436 data: 0.0048 max mem: 3953 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3308 data: 0.0046 max mem: 3953 +eval (validation): [3] Total time: 0:00:32 (0.3771 s / it) +cv: [3] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 3.280 acc: 0.082 f1: 0.030 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:35 lr: nan time: 3.3880 data: 3.1129 max mem: 3953 +train: [4] [ 20/400] eta: 0:03:06 lr: 0.000243 loss: 3.1457 (3.1583) grad: 0.3957 (0.4108) time: 0.3451 data: 0.0075 max mem: 3953 +train: [4] [ 40/400] eta: 0:02:30 lr: 0.000246 loss: 3.1480 (3.1701) grad: 0.4126 (0.4211) time: 0.3407 data: 0.0049 max mem: 3953 +train: [4] [ 60/400] eta: 0:02:13 lr: 0.000249 loss: 3.1509 (3.1629) grad: 0.4272 (0.4237) time: 0.3391 data: 0.0034 max mem: 3953 +train: [4] [ 80/400] eta: 0:02:02 lr: 0.000252 loss: 3.1776 (3.1709) grad: 0.4160 (0.4224) time: 0.3519 data: 0.0051 max mem: 3953 +train: [4] [100/400] eta: 0:01:53 lr: 0.000255 loss: 3.1987 (3.1787) grad: 0.4188 (0.4264) time: 0.3590 data: 0.0044 max mem: 3953 +train: [4] [120/400] eta: 0:01:42 lr: 0.000258 loss: 3.1670 (3.1755) grad: 0.4233 (0.4263) time: 0.3186 data: 0.0045 max mem: 3953 +train: [4] [140/400] eta: 0:01:35 lr: 0.000261 loss: 3.1548 (3.1741) grad: 0.4193 (0.4251) time: 0.3557 data: 0.0042 max mem: 3953 +train: [4] [160/400] eta: 0:01:27 lr: 0.000264 loss: 3.1548 (3.1766) grad: 0.4135 (0.4244) time: 0.3441 data: 0.0047 max mem: 3953 +train: [4] [180/400] eta: 0:01:19 lr: 0.000267 loss: 3.1848 (3.1772) grad: 0.4146 (0.4239) time: 0.3353 data: 0.0047 max mem: 3953 +train: [4] [200/400] eta: 0:01:11 lr: 0.000270 loss: 3.1848 (3.1796) grad: 0.4211 (0.4252) time: 0.3375 data: 0.0048 max mem: 3953 +train: [4] [220/400] eta: 0:01:04 lr: 0.000273 loss: 3.1687 (3.1790) grad: 0.4148 (0.4240) time: 0.3431 data: 0.0047 max mem: 3953 +train: [4] [240/400] eta: 0:00:57 lr: 0.000276 loss: 3.1752 (3.1792) grad: 0.3980 (0.4228) time: 0.3674 data: 0.0050 max mem: 3953 +train: [4] [260/400] eta: 0:00:49 lr: 0.000279 loss: 3.1714 (3.1760) grad: 0.4125 (0.4239) time: 0.3228 data: 0.0046 max mem: 3953 +train: [4] [280/400] eta: 0:00:42 lr: 0.000282 loss: 3.1682 (3.1755) grad: 0.4263 (0.4240) time: 0.3503 data: 0.0049 max mem: 3953 +train: [4] [300/400] eta: 0:00:35 lr: 0.000285 loss: 3.1702 (3.1755) grad: 0.4216 (0.4241) time: 0.3636 data: 0.0049 max mem: 3953 +train: [4] [320/400] eta: 0:00:28 lr: 0.000288 loss: 3.1898 (3.1767) grad: 0.4114 (0.4232) time: 0.3521 data: 0.0053 max mem: 3953 +train: [4] [340/400] eta: 0:00:21 lr: 0.000291 loss: 3.1780 (3.1774) grad: 0.4069 (0.4239) time: 0.3247 data: 0.0048 max mem: 3953 +train: [4] [360/400] eta: 0:00:14 lr: 0.000294 loss: 3.2047 (3.1797) grad: 0.4247 (0.4246) time: 0.3723 data: 0.0052 max mem: 3953 +train: [4] [380/400] eta: 0:00:07 lr: 0.000297 loss: 3.2016 (3.1796) grad: 0.4180 (0.4230) time: 0.3606 data: 0.0053 max mem: 3953 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 3.1776 (3.1799) grad: 0.3984 (0.4224) time: 0.3625 data: 0.0051 max mem: 3953 +train: [4] Total time: 0:02:22 (0.3552 s / it) +train: [4] Summary: lr: 0.000300 loss: 3.1776 (3.1799) grad: 0.3984 (0.4224) +eval (validation): [4] [ 0/85] eta: 0:04:09 time: 2.9356 data: 2.6965 max mem: 3953 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3461 data: 0.0036 max mem: 3953 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3314 data: 0.0042 max mem: 3953 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3027 data: 0.0041 max mem: 3953 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.2888 data: 0.0038 max mem: 3953 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.2839 data: 0.0035 max mem: 3953 +eval (validation): [4] Total time: 0:00:29 (0.3499 s / it) +cv: [4] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 3.128 acc: 0.088 f1: 0.039 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:20:04 lr: nan time: 3.0110 data: 2.8080 max mem: 3953 +train: [5] [ 20/400] eta: 0:02:53 lr: 0.000300 loss: 3.1275 (3.1385) grad: 0.4328 (0.4312) time: 0.3284 data: 0.0044 max mem: 3953 +train: [5] [ 40/400] eta: 0:02:20 lr: 0.000300 loss: 3.1509 (3.1544) grad: 0.4280 (0.4259) time: 0.3187 data: 0.0037 max mem: 3953 +train: [5] [ 60/400] eta: 0:02:03 lr: 0.000300 loss: 3.1509 (3.1446) grad: 0.4060 (0.4199) time: 0.3127 data: 0.0044 max mem: 3953 +train: [5] [ 80/400] eta: 0:01:54 lr: 0.000300 loss: 3.1525 (3.1484) grad: 0.3937 (0.4137) time: 0.3331 data: 0.0050 max mem: 3953 +train: [5] [100/400] eta: 0:01:44 lr: 0.000300 loss: 3.1553 (3.1486) grad: 0.4065 (0.4153) time: 0.3100 data: 0.0044 max mem: 3953 +train: [5] [120/400] eta: 0:01:36 lr: 0.000300 loss: 3.1710 (3.1582) grad: 0.4232 (0.4180) time: 0.3360 data: 0.0049 max mem: 3953 +train: [5] [140/400] eta: 0:01:28 lr: 0.000300 loss: 3.1823 (3.1588) grad: 0.4206 (0.4183) time: 0.3227 data: 0.0050 max mem: 3953 +train: [5] [160/400] eta: 0:01:21 lr: 0.000299 loss: 3.1282 (3.1562) grad: 0.4089 (0.4157) time: 0.3272 data: 0.0050 max mem: 3953 +train: [5] [180/400] eta: 0:01:14 lr: 0.000299 loss: 3.1494 (3.1563) grad: 0.4089 (0.4161) time: 0.3245 data: 0.0044 max mem: 3953 +train: [5] [200/400] eta: 0:01:07 lr: 0.000299 loss: 3.1843 (3.1599) grad: 0.4171 (0.4165) time: 0.3343 data: 0.0048 max mem: 3953 +train: [5] [220/400] eta: 0:01:00 lr: 0.000299 loss: 3.1893 (3.1621) grad: 0.4217 (0.4159) time: 0.3269 data: 0.0050 max mem: 3953 +train: [5] [240/400] eta: 0:00:53 lr: 0.000299 loss: 3.1720 (3.1626) grad: 0.3957 (0.4151) time: 0.3141 data: 0.0048 max mem: 3953 +train: [5] [260/400] eta: 0:00:46 lr: 0.000299 loss: 3.1653 (3.1644) grad: 0.3962 (0.4145) time: 0.3225 data: 0.0050 max mem: 3953 +train: [5] [280/400] eta: 0:00:40 lr: 0.000298 loss: 3.1602 (3.1654) grad: 0.3973 (0.4132) time: 0.3240 data: 0.0048 max mem: 3953 +train: [5] [300/400] eta: 0:00:33 lr: 0.000298 loss: 3.1567 (3.1650) grad: 0.3898 (0.4114) time: 0.3309 data: 0.0050 max mem: 3953 +train: [5] [320/400] eta: 0:00:26 lr: 0.000298 loss: 3.1567 (3.1641) grad: 0.4004 (0.4117) time: 0.3446 data: 0.0051 max mem: 3953 +train: [5] [340/400] eta: 0:00:20 lr: 0.000298 loss: 3.1695 (3.1653) grad: 0.4116 (0.4118) time: 0.3467 data: 0.0049 max mem: 3953 +train: [5] [360/400] eta: 0:00:13 lr: 0.000297 loss: 3.1722 (3.1658) grad: 0.4190 (0.4129) time: 0.3620 data: 0.0053 max mem: 3953 +train: [5] [380/400] eta: 0:00:06 lr: 0.000297 loss: 3.1669 (3.1663) grad: 0.4346 (0.4143) time: 0.3501 data: 0.0052 max mem: 3953 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 3.1785 (3.1680) grad: 0.4371 (0.4159) time: 0.3572 data: 0.0048 max mem: 3953 +train: [5] Total time: 0:02:15 (0.3384 s / it) +train: [5] Summary: lr: 0.000297 loss: 3.1785 (3.1680) grad: 0.4371 (0.4159) +eval (validation): [5] [ 0/85] eta: 0:04:45 time: 3.3579 data: 3.1382 max mem: 3953 +eval (validation): [5] [20/85] eta: 0:00:31 time: 0.3423 data: 0.0211 max mem: 3953 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3540 data: 0.0043 max mem: 3953 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3227 data: 0.0043 max mem: 3953 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3356 data: 0.0043 max mem: 3953 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3201 data: 0.0042 max mem: 3953 +eval (validation): [5] Total time: 0:00:31 (0.3755 s / it) +cv: [5] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 3.112 acc: 0.086 f1: 0.033 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:22:21 lr: nan time: 3.3550 data: 3.0836 max mem: 3953 +train: [6] [ 20/400] eta: 0:03:13 lr: 0.000296 loss: 3.1827 (3.1856) grad: 0.4332 (0.4371) time: 0.3665 data: 0.0045 max mem: 3953 +train: [6] [ 40/400] eta: 0:02:31 lr: 0.000296 loss: 3.1792 (3.1722) grad: 0.4188 (0.4297) time: 0.3272 data: 0.0045 max mem: 3953 +train: [6] [ 60/400] eta: 0:02:16 lr: 0.000296 loss: 3.1250 (3.1591) grad: 0.4251 (0.4332) time: 0.3593 data: 0.0052 max mem: 3953 +train: [6] [ 80/400] eta: 0:02:02 lr: 0.000295 loss: 3.1420 (3.1581) grad: 0.4362 (0.4325) time: 0.3355 data: 0.0050 max mem: 3953 +train: [6] [100/400] eta: 0:01:53 lr: 0.000295 loss: 3.1311 (3.1533) grad: 0.4217 (0.4282) time: 0.3583 data: 0.0053 max mem: 3953 +train: [6] [120/400] eta: 0:01:45 lr: 0.000295 loss: 3.1334 (3.1561) grad: 0.3845 (0.4232) time: 0.3558 data: 0.0047 max mem: 3953 +train: [6] [140/400] eta: 0:01:36 lr: 0.000294 loss: 3.1653 (3.1547) grad: 0.3824 (0.4194) time: 0.3378 data: 0.0050 max mem: 3953 +train: [6] [160/400] eta: 0:01:27 lr: 0.000294 loss: 3.1491 (3.1552) grad: 0.3929 (0.4182) time: 0.3274 data: 0.0047 max mem: 3953 +train: [6] [180/400] eta: 0:01:19 lr: 0.000293 loss: 3.1591 (3.1542) grad: 0.4172 (0.4189) time: 0.3499 data: 0.0050 max mem: 3953 +train: [6] [200/400] eta: 0:01:12 lr: 0.000293 loss: 3.1591 (3.1554) grad: 0.4239 (0.4216) time: 0.3341 data: 0.0043 max mem: 3953 +train: [6] [220/400] eta: 0:01:04 lr: 0.000292 loss: 3.1595 (3.1567) grad: 0.4211 (0.4209) time: 0.3476 data: 0.0049 max mem: 3953 +train: [6] [240/400] eta: 0:00:57 lr: 0.000292 loss: 3.1258 (3.1541) grad: 0.4178 (0.4223) time: 0.3444 data: 0.0045 max mem: 3953 +train: [6] [260/400] eta: 0:00:49 lr: 0.000291 loss: 3.1446 (3.1563) grad: 0.4212 (0.4223) time: 0.3465 data: 0.0048 max mem: 3953 +train: [6] [280/400] eta: 0:00:42 lr: 0.000291 loss: 3.1562 (3.1562) grad: 0.4234 (0.4226) time: 0.3356 data: 0.0045 max mem: 3953 +train: [6] [300/400] eta: 0:00:35 lr: 0.000290 loss: 3.1666 (3.1580) grad: 0.4234 (0.4216) time: 0.3344 data: 0.0045 max mem: 3953 +train: [6] [320/400] eta: 0:00:28 lr: 0.000290 loss: 3.1747 (3.1597) grad: 0.4273 (0.4220) time: 0.3503 data: 0.0049 max mem: 3953 +train: [6] [340/400] eta: 0:00:21 lr: 0.000289 loss: 3.1926 (3.1613) grad: 0.4057 (0.4207) time: 0.3484 data: 0.0047 max mem: 3953 +train: [6] [360/400] eta: 0:00:14 lr: 0.000288 loss: 3.1507 (3.1593) grad: 0.3880 (0.4193) time: 0.3414 data: 0.0048 max mem: 3953 +train: [6] [380/400] eta: 0:00:07 lr: 0.000288 loss: 3.1317 (3.1588) grad: 0.4105 (0.4203) time: 0.3279 data: 0.0048 max mem: 3953 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 3.1317 (3.1576) grad: 0.4157 (0.4201) time: 0.3315 data: 0.0050 max mem: 3953 +train: [6] Total time: 0:02:20 (0.3511 s / it) +train: [6] Summary: lr: 0.000287 loss: 3.1317 (3.1576) grad: 0.4157 (0.4201) +eval (validation): [6] [ 0/85] eta: 0:04:42 time: 3.3224 data: 3.0637 max mem: 3953 +eval (validation): [6] [20/85] eta: 0:00:29 time: 0.3122 data: 0.0057 max mem: 3953 +eval (validation): [6] [40/85] eta: 0:00:17 time: 0.3099 data: 0.0040 max mem: 3953 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3192 data: 0.0049 max mem: 3953 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3137 data: 0.0034 max mem: 3953 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3003 data: 0.0038 max mem: 3953 +eval (validation): [6] Total time: 0:00:29 (0.3521 s / it) +cv: [6] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 3.131 acc: 0.089 f1: 0.039 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:21:42 lr: nan time: 3.2565 data: 2.9801 max mem: 3953 +train: [7] [ 20/400] eta: 0:03:11 lr: 0.000286 loss: 3.1073 (3.1162) grad: 0.4146 (0.4252) time: 0.3657 data: 0.0040 max mem: 3953 +train: [7] [ 40/400] eta: 0:02:32 lr: 0.000286 loss: 3.1201 (3.1297) grad: 0.4136 (0.4171) time: 0.3384 data: 0.0042 max mem: 3953 +train: [7] [ 60/400] eta: 0:02:15 lr: 0.000285 loss: 3.1347 (3.1282) grad: 0.3966 (0.4127) time: 0.3461 data: 0.0052 max mem: 3953 +train: [7] [ 80/400] eta: 0:02:01 lr: 0.000284 loss: 3.1310 (3.1275) grad: 0.4016 (0.4100) time: 0.3282 data: 0.0049 max mem: 3953 +train: [7] [100/400] eta: 0:01:52 lr: 0.000284 loss: 3.1178 (3.1286) grad: 0.3997 (0.4047) time: 0.3454 data: 0.0051 max mem: 3953 +train: [7] [120/400] eta: 0:01:43 lr: 0.000283 loss: 3.1217 (3.1283) grad: 0.3919 (0.4028) time: 0.3424 data: 0.0049 max mem: 3953 +train: [7] [140/400] eta: 0:01:34 lr: 0.000282 loss: 3.1431 (3.1308) grad: 0.3987 (0.4044) time: 0.3340 data: 0.0051 max mem: 3953 +train: [7] [160/400] eta: 0:01:26 lr: 0.000282 loss: 3.1568 (3.1336) grad: 0.4055 (0.4054) time: 0.3304 data: 0.0049 max mem: 3953 +train: [7] [180/400] eta: 0:01:18 lr: 0.000281 loss: 3.1703 (3.1381) grad: 0.4076 (0.4077) time: 0.3434 data: 0.0048 max mem: 3953 +train: [7] [200/400] eta: 0:01:11 lr: 0.000280 loss: 3.1418 (3.1359) grad: 0.4052 (0.4078) time: 0.3344 data: 0.0049 max mem: 3953 +train: [7] [220/400] eta: 0:01:03 lr: 0.000279 loss: 3.1303 (3.1383) grad: 0.4046 (0.4082) time: 0.3307 data: 0.0050 max mem: 3953 +train: [7] [240/400] eta: 0:00:56 lr: 0.000278 loss: 3.1226 (3.1368) grad: 0.3929 (0.4067) time: 0.3426 data: 0.0045 max mem: 3953 +train: [7] [260/400] eta: 0:00:49 lr: 0.000278 loss: 3.1236 (3.1376) grad: 0.3821 (0.4042) time: 0.3499 data: 0.0050 max mem: 3953 +train: [7] [280/400] eta: 0:00:42 lr: 0.000277 loss: 3.1545 (3.1409) grad: 0.3920 (0.4062) time: 0.3431 data: 0.0049 max mem: 3953 +train: [7] [300/400] eta: 0:00:35 lr: 0.000276 loss: 3.1320 (3.1399) grad: 0.4185 (0.4069) time: 0.3323 data: 0.0048 max mem: 3953 +train: [7] [320/400] eta: 0:00:28 lr: 0.000275 loss: 3.1248 (3.1407) grad: 0.4026 (0.4060) time: 0.3564 data: 0.0043 max mem: 3953 +train: [7] [340/400] eta: 0:00:20 lr: 0.000274 loss: 3.1260 (3.1413) grad: 0.4025 (0.4074) time: 0.3410 data: 0.0052 max mem: 3953 +train: [7] [360/400] eta: 0:00:13 lr: 0.000273 loss: 3.1371 (3.1405) grad: 0.4002 (0.4068) time: 0.3357 data: 0.0047 max mem: 3953 +train: [7] [380/400] eta: 0:00:06 lr: 0.000272 loss: 3.1597 (3.1417) grad: 0.4131 (0.4073) time: 0.3256 data: 0.0046 max mem: 3953 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 3.1717 (3.1429) grad: 0.4165 (0.4076) time: 0.3416 data: 0.0044 max mem: 3953 +train: [7] Total time: 0:02:19 (0.3482 s / it) +train: [7] Summary: lr: 0.000271 loss: 3.1717 (3.1429) grad: 0.4165 (0.4076) +eval (validation): [7] [ 0/85] eta: 0:04:46 time: 3.3704 data: 3.1042 max mem: 3953 +eval (validation): [7] [20/85] eta: 0:00:33 time: 0.3800 data: 0.0051 max mem: 3953 +eval (validation): [7] [40/85] eta: 0:00:19 time: 0.3606 data: 0.0039 max mem: 3953 +eval (validation): [7] [60/85] eta: 0:00:10 time: 0.3388 data: 0.0050 max mem: 3953 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3238 data: 0.0046 max mem: 3953 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.2960 data: 0.0042 max mem: 3953 +eval (validation): [7] Total time: 0:00:32 (0.3864 s / it) +cv: [7] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 3.134 acc: 0.081 f1: 0.037 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:27:52 lr: nan time: 4.1821 data: 3.9524 max mem: 3953 +train: [8] [ 20/400] eta: 0:03:18 lr: 0.000270 loss: 3.1145 (3.1286) grad: 0.4222 (0.4179) time: 0.3390 data: 0.0153 max mem: 3953 +train: [8] [ 40/400] eta: 0:02:36 lr: 0.000270 loss: 3.1282 (3.1342) grad: 0.4116 (0.4138) time: 0.3433 data: 0.0043 max mem: 3953 +train: [8] [ 60/400] eta: 0:02:15 lr: 0.000269 loss: 3.1313 (3.1360) grad: 0.4072 (0.4172) time: 0.3270 data: 0.0048 max mem: 3953 +train: [8] [ 80/400] eta: 0:02:03 lr: 0.000268 loss: 3.1170 (3.1329) grad: 0.4189 (0.4167) time: 0.3489 data: 0.0051 max mem: 3953 +train: [8] [100/400] eta: 0:01:53 lr: 0.000267 loss: 3.0910 (3.1258) grad: 0.4143 (0.4160) time: 0.3454 data: 0.0052 max mem: 3953 +train: [8] [120/400] eta: 0:01:44 lr: 0.000266 loss: 3.0896 (3.1223) grad: 0.3985 (0.4157) time: 0.3528 data: 0.0051 max mem: 3953 +train: [8] [140/400] eta: 0:01:36 lr: 0.000265 loss: 3.1228 (3.1239) grad: 0.4294 (0.4190) time: 0.3494 data: 0.0050 max mem: 3953 +train: [8] [160/400] eta: 0:01:28 lr: 0.000264 loss: 3.1269 (3.1238) grad: 0.4193 (0.4181) time: 0.3527 data: 0.0050 max mem: 3953 +train: [8] [180/400] eta: 0:01:20 lr: 0.000263 loss: 3.1199 (3.1242) grad: 0.4119 (0.4169) time: 0.3467 data: 0.0048 max mem: 3953 +train: [8] [200/400] eta: 0:01:12 lr: 0.000262 loss: 3.1246 (3.1230) grad: 0.4119 (0.4150) time: 0.3476 data: 0.0048 max mem: 3953 +train: [8] [220/400] eta: 0:01:04 lr: 0.000260 loss: 3.1246 (3.1215) grad: 0.3911 (0.4127) time: 0.3247 data: 0.0046 max mem: 3953 +train: [8] [240/400] eta: 0:00:57 lr: 0.000259 loss: 3.1255 (3.1227) grad: 0.3918 (0.4126) time: 0.3634 data: 0.0052 max mem: 3953 +train: [8] [260/400] eta: 0:00:50 lr: 0.000258 loss: 3.1130 (3.1224) grad: 0.4170 (0.4132) time: 0.3435 data: 0.0049 max mem: 3953 +train: [8] [280/400] eta: 0:00:43 lr: 0.000257 loss: 3.1242 (3.1245) grad: 0.4169 (0.4129) time: 0.3620 data: 0.0055 max mem: 3953 +train: [8] [300/400] eta: 0:00:35 lr: 0.000256 loss: 3.1394 (3.1260) grad: 0.4253 (0.4150) time: 0.3167 data: 0.0046 max mem: 3953 +train: [8] [320/400] eta: 0:00:28 lr: 0.000255 loss: 3.1532 (3.1279) grad: 0.4353 (0.4158) time: 0.3405 data: 0.0048 max mem: 3953 +train: [8] [340/400] eta: 0:00:21 lr: 0.000254 loss: 3.1497 (3.1269) grad: 0.4058 (0.4156) time: 0.3463 data: 0.0053 max mem: 3953 +train: [8] [360/400] eta: 0:00:14 lr: 0.000253 loss: 3.1383 (3.1290) grad: 0.4026 (0.4156) time: 0.3329 data: 0.0050 max mem: 3953 +train: [8] [380/400] eta: 0:00:07 lr: 0.000252 loss: 3.1524 (3.1304) grad: 0.4438 (0.4180) time: 0.3346 data: 0.0047 max mem: 3953 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 3.1558 (3.1325) grad: 0.4530 (0.4193) time: 0.3442 data: 0.0053 max mem: 3953 +train: [8] Total time: 0:02:21 (0.3531 s / it) +train: [8] Summary: lr: 0.000250 loss: 3.1558 (3.1325) grad: 0.4530 (0.4193) +eval (validation): [8] [ 0/85] eta: 0:04:40 time: 3.3048 data: 3.0978 max mem: 3953 +eval (validation): [8] [20/85] eta: 0:00:29 time: 0.3169 data: 0.0047 max mem: 3953 +eval (validation): [8] [40/85] eta: 0:00:18 time: 0.3409 data: 0.0040 max mem: 3953 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3356 data: 0.0046 max mem: 3953 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3021 data: 0.0040 max mem: 3953 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.2942 data: 0.0040 max mem: 3953 +eval (validation): [8] Total time: 0:00:30 (0.3613 s / it) +cv: [8] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 3.109 acc: 0.081 f1: 0.044 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:06 lr: nan time: 3.1656 data: 2.9472 max mem: 3953 +train: [9] [ 20/400] eta: 0:03:03 lr: 0.000249 loss: 3.0936 (3.1142) grad: 0.4005 (0.4060) time: 0.3496 data: 0.0223 max mem: 3953 +train: [9] [ 40/400] eta: 0:02:32 lr: 0.000248 loss: 3.0936 (3.1100) grad: 0.3995 (0.4043) time: 0.3620 data: 0.0042 max mem: 3953 +train: [9] [ 60/400] eta: 0:02:14 lr: 0.000247 loss: 3.0932 (3.1057) grad: 0.3912 (0.4002) time: 0.3334 data: 0.0047 max mem: 3953 +train: [9] [ 80/400] eta: 0:02:03 lr: 0.000246 loss: 3.0870 (3.1045) grad: 0.3896 (0.3970) time: 0.3542 data: 0.0052 max mem: 3953 +train: [9] [100/400] eta: 0:01:53 lr: 0.000244 loss: 3.1065 (3.1033) grad: 0.3828 (0.3948) time: 0.3450 data: 0.0051 max mem: 3953 +train: [9] [120/400] eta: 0:01:43 lr: 0.000243 loss: 3.1076 (3.1034) grad: 0.3897 (0.3998) time: 0.3371 data: 0.0048 max mem: 3953 +train: [9] [140/400] eta: 0:01:34 lr: 0.000242 loss: 3.1157 (3.1072) grad: 0.4025 (0.4018) time: 0.3281 data: 0.0047 max mem: 3953 +train: [9] [160/400] eta: 0:01:26 lr: 0.000241 loss: 3.1075 (3.1073) grad: 0.3978 (0.4002) time: 0.3496 data: 0.0050 max mem: 3953 +train: [9] [180/400] eta: 0:01:19 lr: 0.000240 loss: 3.1108 (3.1072) grad: 0.3916 (0.4000) time: 0.3371 data: 0.0048 max mem: 3953 +train: [9] [200/400] eta: 0:01:11 lr: 0.000238 loss: 3.1196 (3.1071) grad: 0.3941 (0.4002) time: 0.3372 data: 0.0048 max mem: 3953 +train: [9] [220/400] eta: 0:01:04 lr: 0.000237 loss: 3.1111 (3.1085) grad: 0.3946 (0.3996) time: 0.3506 data: 0.0046 max mem: 3953 +train: [9] [240/400] eta: 0:00:57 lr: 0.000236 loss: 3.1492 (3.1127) grad: 0.4011 (0.4009) time: 0.3632 data: 0.0053 max mem: 3953 +train: [9] [260/400] eta: 0:00:49 lr: 0.000234 loss: 3.1507 (3.1138) grad: 0.4050 (0.4017) time: 0.3402 data: 0.0048 max mem: 3953 +train: [9] [280/400] eta: 0:00:42 lr: 0.000233 loss: 3.1285 (3.1137) grad: 0.4029 (0.4017) time: 0.3374 data: 0.0049 max mem: 3953 +train: [9] [300/400] eta: 0:00:35 lr: 0.000232 loss: 3.1006 (3.1142) grad: 0.4069 (0.4017) time: 0.3260 data: 0.0046 max mem: 3953 +train: [9] [320/400] eta: 0:00:28 lr: 0.000230 loss: 3.1164 (3.1150) grad: 0.4083 (0.4026) time: 0.3260 data: 0.0044 max mem: 3953 +train: [9] [340/400] eta: 0:00:21 lr: 0.000229 loss: 3.1223 (3.1145) grad: 0.4225 (0.4035) time: 0.3625 data: 0.0051 max mem: 3953 +train: [9] [360/400] eta: 0:00:14 lr: 0.000228 loss: 3.1016 (3.1132) grad: 0.4108 (0.4032) time: 0.3285 data: 0.0047 max mem: 3953 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 3.1012 (3.1140) grad: 0.3956 (0.4030) time: 0.3497 data: 0.0054 max mem: 3953 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 3.0927 (3.1143) grad: 0.3986 (0.4035) time: 0.3498 data: 0.0045 max mem: 3953 +train: [9] Total time: 0:02:20 (0.3509 s / it) +train: [9] Summary: lr: 0.000225 loss: 3.0927 (3.1143) grad: 0.3986 (0.4035) +eval (validation): [9] [ 0/85] eta: 0:04:48 time: 3.3954 data: 3.1258 max mem: 3953 +eval (validation): [9] [20/85] eta: 0:00:32 time: 0.3478 data: 0.0035 max mem: 3953 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3408 data: 0.0042 max mem: 3953 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3199 data: 0.0039 max mem: 3953 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3187 data: 0.0043 max mem: 3953 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3070 data: 0.0041 max mem: 3953 +eval (validation): [9] Total time: 0:00:31 (0.3693 s / it) +cv: [9] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 3.087 acc: 0.096 f1: 0.050 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:20:59 lr: nan time: 3.1477 data: 2.8649 max mem: 3953 +train: [10] [ 20/400] eta: 0:03:13 lr: 0.000224 loss: 3.1099 (3.1167) grad: 0.4185 (0.4203) time: 0.3781 data: 0.0059 max mem: 3953 +train: [10] [ 40/400] eta: 0:02:30 lr: 0.000222 loss: 3.0983 (3.1019) grad: 0.3835 (0.4015) time: 0.3210 data: 0.0041 max mem: 3953 +train: [10] [ 60/400] eta: 0:02:14 lr: 0.000221 loss: 3.0954 (3.1071) grad: 0.3805 (0.4033) time: 0.3469 data: 0.0052 max mem: 3953 +train: [10] [ 80/400] eta: 0:02:03 lr: 0.000220 loss: 3.1033 (3.1137) grad: 0.4173 (0.4068) time: 0.3573 data: 0.0050 max mem: 3953 +train: [10] [100/400] eta: 0:01:53 lr: 0.000218 loss: 3.0798 (3.1071) grad: 0.4026 (0.4036) time: 0.3430 data: 0.0050 max mem: 3953 +train: [10] [120/400] eta: 0:01:43 lr: 0.000217 loss: 3.0786 (3.1052) grad: 0.3937 (0.4040) time: 0.3345 data: 0.0047 max mem: 3953 +train: [10] [140/400] eta: 0:01:35 lr: 0.000215 loss: 3.0975 (3.1037) grad: 0.3961 (0.4037) time: 0.3501 data: 0.0052 max mem: 3953 +train: [10] [160/400] eta: 0:01:27 lr: 0.000214 loss: 3.1060 (3.1041) grad: 0.3892 (0.4014) time: 0.3529 data: 0.0050 max mem: 3953 +train: [10] [180/400] eta: 0:01:19 lr: 0.000213 loss: 3.0989 (3.1025) grad: 0.3795 (0.4009) time: 0.3325 data: 0.0049 max mem: 3953 +train: [10] [200/400] eta: 0:01:11 lr: 0.000211 loss: 3.0775 (3.1004) grad: 0.3920 (0.4002) time: 0.3370 data: 0.0045 max mem: 3953 +train: [10] [220/400] eta: 0:01:04 lr: 0.000210 loss: 3.0760 (3.0983) grad: 0.3800 (0.3972) time: 0.3550 data: 0.0050 max mem: 3953 +train: [10] [240/400] eta: 0:00:57 lr: 0.000208 loss: 3.0685 (3.0953) grad: 0.3718 (0.3968) time: 0.3493 data: 0.0046 max mem: 3953 +train: [10] [260/400] eta: 0:00:49 lr: 0.000207 loss: 3.0885 (3.0967) grad: 0.3900 (0.3967) time: 0.3410 data: 0.0046 max mem: 3953 +train: [10] [280/400] eta: 0:00:42 lr: 0.000205 loss: 3.0986 (3.0944) grad: 0.3900 (0.3969) time: 0.3417 data: 0.0046 max mem: 3953 +train: [10] [300/400] eta: 0:00:35 lr: 0.000204 loss: 3.0934 (3.0948) grad: 0.3818 (0.3958) time: 0.3449 data: 0.0051 max mem: 3953 +train: [10] [320/400] eta: 0:00:28 lr: 0.000202 loss: 3.0984 (3.0961) grad: 0.3818 (0.3957) time: 0.3177 data: 0.0044 max mem: 3953 +train: [10] [340/400] eta: 0:00:21 lr: 0.000201 loss: 3.1068 (3.0968) grad: 0.3663 (0.3941) time: 0.3413 data: 0.0046 max mem: 3953 +train: [10] [360/400] eta: 0:00:14 lr: 0.000199 loss: 3.1003 (3.0955) grad: 0.3688 (0.3940) time: 0.3370 data: 0.0047 max mem: 3953 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 3.0841 (3.0948) grad: 0.3892 (0.3938) time: 0.3612 data: 0.0051 max mem: 3953 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 3.0844 (3.0950) grad: 0.3934 (0.3940) time: 0.3477 data: 0.0045 max mem: 3953 +train: [10] Total time: 0:02:20 (0.3519 s / it) +train: [10] Summary: lr: 0.000196 loss: 3.0844 (3.0950) grad: 0.3934 (0.3940) +eval (validation): [10] [ 0/85] eta: 0:04:36 time: 3.2566 data: 3.0354 max mem: 3953 +eval (validation): [10] [20/85] eta: 0:00:31 time: 0.3495 data: 0.0227 max mem: 3953 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3603 data: 0.0046 max mem: 3953 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3261 data: 0.0045 max mem: 3953 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3116 data: 0.0042 max mem: 3953 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3021 data: 0.0038 max mem: 3953 +eval (validation): [10] Total time: 0:00:31 (0.3732 s / it) +cv: [10] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 3.101 acc: 0.096 f1: 0.045 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:21 lr: nan time: 3.3526 data: 3.1274 max mem: 3953 +train: [11] [ 20/400] eta: 0:03:13 lr: 0.000195 loss: 3.0555 (3.0674) grad: 0.3895 (0.4010) time: 0.3665 data: 0.0055 max mem: 3953 +train: [11] [ 40/400] eta: 0:02:32 lr: 0.000193 loss: 3.0723 (3.0773) grad: 0.3821 (0.3941) time: 0.3348 data: 0.0041 max mem: 3953 +train: [11] [ 60/400] eta: 0:02:15 lr: 0.000192 loss: 3.0855 (3.0796) grad: 0.3800 (0.3949) time: 0.3502 data: 0.0049 max mem: 3953 +train: [11] [ 80/400] eta: 0:02:03 lr: 0.000190 loss: 3.0821 (3.0770) grad: 0.3919 (0.3964) time: 0.3426 data: 0.0046 max mem: 3953 +train: [11] [100/400] eta: 0:01:53 lr: 0.000189 loss: 3.0912 (3.0818) grad: 0.3880 (0.3953) time: 0.3418 data: 0.0050 max mem: 3953 +train: [11] [120/400] eta: 0:01:44 lr: 0.000187 loss: 3.1044 (3.0832) grad: 0.3827 (0.3929) time: 0.3505 data: 0.0042 max mem: 3953 +train: [11] [140/400] eta: 0:01:35 lr: 0.000186 loss: 3.0728 (3.0825) grad: 0.3840 (0.3928) time: 0.3404 data: 0.0044 max mem: 3953 +train: [11] [160/400] eta: 0:01:27 lr: 0.000184 loss: 3.0663 (3.0817) grad: 0.3745 (0.3913) time: 0.3317 data: 0.0047 max mem: 3953 +train: [11] [180/400] eta: 0:01:19 lr: 0.000183 loss: 3.0796 (3.0809) grad: 0.3745 (0.3905) time: 0.3241 data: 0.0042 max mem: 3953 +train: [11] [200/400] eta: 0:01:11 lr: 0.000181 loss: 3.0609 (3.0783) grad: 0.3759 (0.3890) time: 0.3599 data: 0.0048 max mem: 3953 +train: [11] [220/400] eta: 0:01:04 lr: 0.000180 loss: 3.0504 (3.0770) grad: 0.3759 (0.3892) time: 0.3642 data: 0.0050 max mem: 3953 +train: [11] [240/400] eta: 0:00:57 lr: 0.000178 loss: 3.1145 (3.0809) grad: 0.3916 (0.3896) time: 0.3434 data: 0.0047 max mem: 3953 +train: [11] [260/400] eta: 0:00:49 lr: 0.000177 loss: 3.1078 (3.0801) grad: 0.3899 (0.3897) time: 0.3380 data: 0.0048 max mem: 3953 +train: [11] [280/400] eta: 0:00:42 lr: 0.000175 loss: 3.0573 (3.0774) grad: 0.3819 (0.3892) time: 0.3372 data: 0.0047 max mem: 3953 +train: [11] [300/400] eta: 0:00:35 lr: 0.000174 loss: 3.0683 (3.0779) grad: 0.3821 (0.3888) time: 0.3346 data: 0.0048 max mem: 3953 +train: [11] [320/400] eta: 0:00:28 lr: 0.000172 loss: 3.0765 (3.0784) grad: 0.3912 (0.3891) time: 0.3310 data: 0.0047 max mem: 3953 +train: [11] [340/400] eta: 0:00:21 lr: 0.000170 loss: 3.0763 (3.0787) grad: 0.3845 (0.3895) time: 0.3329 data: 0.0043 max mem: 3953 +train: [11] [360/400] eta: 0:00:14 lr: 0.000169 loss: 3.0729 (3.0777) grad: 0.3845 (0.3892) time: 0.3508 data: 0.0052 max mem: 3953 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 3.0583 (3.0765) grad: 0.3794 (0.3888) time: 0.3400 data: 0.0048 max mem: 3953 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 3.0583 (3.0763) grad: 0.3794 (0.3887) time: 0.3334 data: 0.0046 max mem: 3953 +train: [11] Total time: 0:02:20 (0.3505 s / it) +train: [11] Summary: lr: 0.000166 loss: 3.0583 (3.0763) grad: 0.3794 (0.3887) +eval (validation): [11] [ 0/85] eta: 0:04:48 time: 3.3941 data: 3.1718 max mem: 3953 +eval (validation): [11] [20/85] eta: 0:00:30 time: 0.3239 data: 0.0044 max mem: 3953 +eval (validation): [11] [40/85] eta: 0:00:17 time: 0.3261 data: 0.0045 max mem: 3953 +eval (validation): [11] [60/85] eta: 0:00:09 time: 0.3319 data: 0.0062 max mem: 3953 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3158 data: 0.0029 max mem: 3953 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3122 data: 0.0031 max mem: 3953 +eval (validation): [11] Total time: 0:00:30 (0.3638 s / it) +cv: [11] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 3.091 acc: 0.086 f1: 0.041 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:05 lr: nan time: 3.3147 data: 3.0836 max mem: 3953 +train: [12] [ 20/400] eta: 0:02:57 lr: 0.000164 loss: 3.0301 (3.0459) grad: 0.3817 (0.3896) time: 0.3249 data: 0.0047 max mem: 3953 +train: [12] [ 40/400] eta: 0:02:29 lr: 0.000163 loss: 3.0441 (3.0575) grad: 0.3817 (0.3888) time: 0.3626 data: 0.0038 max mem: 3953 +train: [12] [ 60/400] eta: 0:02:21 lr: 0.000161 loss: 3.0596 (3.0632) grad: 0.3697 (0.3806) time: 0.4168 data: 0.0058 max mem: 3953 +train: [12] [ 80/400] eta: 0:02:07 lr: 0.000160 loss: 3.0596 (3.0665) grad: 0.3714 (0.3817) time: 0.3463 data: 0.0054 max mem: 3953 +train: [12] [100/400] eta: 0:01:56 lr: 0.000158 loss: 3.0606 (3.0697) grad: 0.3719 (0.3783) time: 0.3369 data: 0.0049 max mem: 3953 +train: [12] [120/400] eta: 0:01:46 lr: 0.000156 loss: 3.0577 (3.0628) grad: 0.3678 (0.3766) time: 0.3448 data: 0.0049 max mem: 3953 +train: [12] [140/400] eta: 0:01:37 lr: 0.000155 loss: 3.0716 (3.0672) grad: 0.3616 (0.3747) time: 0.3411 data: 0.0050 max mem: 3953 +train: [12] [160/400] eta: 0:01:28 lr: 0.000153 loss: 3.0781 (3.0700) grad: 0.3843 (0.3764) time: 0.3332 data: 0.0050 max mem: 3953 +train: [12] [180/400] eta: 0:01:20 lr: 0.000152 loss: 3.0698 (3.0708) grad: 0.3644 (0.3749) time: 0.3359 data: 0.0048 max mem: 3953 +train: [12] [200/400] eta: 0:01:12 lr: 0.000150 loss: 3.0668 (3.0695) grad: 0.3714 (0.3751) time: 0.3504 data: 0.0049 max mem: 3953 +train: [12] [220/400] eta: 0:01:05 lr: 0.000149 loss: 3.0595 (3.0698) grad: 0.3800 (0.3763) time: 0.3478 data: 0.0045 max mem: 3953 +train: [12] [240/400] eta: 0:00:57 lr: 0.000147 loss: 3.0687 (3.0682) grad: 0.3842 (0.3781) time: 0.3353 data: 0.0049 max mem: 3953 +train: [12] [260/400] eta: 0:00:50 lr: 0.000145 loss: 3.0745 (3.0695) grad: 0.3789 (0.3781) time: 0.3503 data: 0.0046 max mem: 3953 +train: [12] [280/400] eta: 0:00:42 lr: 0.000144 loss: 3.0675 (3.0687) grad: 0.3753 (0.3777) time: 0.3373 data: 0.0050 max mem: 3953 +train: [12] [300/400] eta: 0:00:35 lr: 0.000142 loss: 3.0630 (3.0696) grad: 0.3753 (0.3783) time: 0.3469 data: 0.0044 max mem: 3953 +train: [12] [320/400] eta: 0:00:28 lr: 0.000141 loss: 3.0719 (3.0695) grad: 0.3704 (0.3775) time: 0.3284 data: 0.0051 max mem: 3953 +train: [12] [340/400] eta: 0:00:21 lr: 0.000139 loss: 3.0498 (3.0682) grad: 0.3618 (0.3774) time: 0.3359 data: 0.0046 max mem: 3953 +train: [12] [360/400] eta: 0:00:14 lr: 0.000138 loss: 3.0650 (3.0701) grad: 0.3730 (0.3775) time: 0.3656 data: 0.0053 max mem: 3953 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 3.0596 (3.0686) grad: 0.3748 (0.3768) time: 0.3497 data: 0.0045 max mem: 3953 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 3.0422 (3.0670) grad: 0.3542 (0.3761) time: 0.3405 data: 0.0050 max mem: 3953 +train: [12] Total time: 0:02:21 (0.3545 s / it) +train: [12] Summary: lr: 0.000134 loss: 3.0422 (3.0670) grad: 0.3542 (0.3761) +eval (validation): [12] [ 0/85] eta: 0:04:40 time: 3.2956 data: 3.0388 max mem: 3953 +eval (validation): [12] [20/85] eta: 0:00:31 time: 0.3383 data: 0.0049 max mem: 3953 +eval (validation): [12] [40/85] eta: 0:00:18 time: 0.3290 data: 0.0040 max mem: 3953 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3284 data: 0.0035 max mem: 3953 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3235 data: 0.0043 max mem: 3953 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3131 data: 0.0042 max mem: 3953 +eval (validation): [12] Total time: 0:00:31 (0.3658 s / it) +cv: [12] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 3.085 acc: 0.092 f1: 0.044 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:20:39 lr: nan time: 3.0981 data: 2.8908 max mem: 3953 +train: [13] [ 20/400] eta: 0:02:53 lr: 0.000133 loss: 3.0476 (3.0316) grad: 0.3777 (0.3718) time: 0.3244 data: 0.0040 max mem: 3953 +train: [13] [ 40/400] eta: 0:02:24 lr: 0.000131 loss: 3.0487 (3.0478) grad: 0.3679 (0.3628) time: 0.3439 data: 0.0042 max mem: 3953 +train: [13] [ 60/400] eta: 0:02:10 lr: 0.000130 loss: 3.0641 (3.0485) grad: 0.3675 (0.3710) time: 0.3468 data: 0.0046 max mem: 3953 +train: [13] [ 80/400] eta: 0:01:59 lr: 0.000128 loss: 3.0464 (3.0498) grad: 0.3743 (0.3740) time: 0.3484 data: 0.0049 max mem: 3953 +train: [13] [100/400] eta: 0:01:51 lr: 0.000127 loss: 3.0464 (3.0501) grad: 0.3736 (0.3760) time: 0.3513 data: 0.0046 max mem: 3953 +train: [13] [120/400] eta: 0:01:42 lr: 0.000125 loss: 3.0426 (3.0515) grad: 0.3755 (0.3765) time: 0.3517 data: 0.0051 max mem: 3953 +train: [13] [140/400] eta: 0:01:35 lr: 0.000124 loss: 3.0474 (3.0512) grad: 0.3755 (0.3761) time: 0.3681 data: 0.0050 max mem: 3953 +train: [13] [160/400] eta: 0:01:27 lr: 0.000122 loss: 3.0573 (3.0518) grad: 0.3671 (0.3742) time: 0.3421 data: 0.0050 max mem: 3953 +train: [13] [180/400] eta: 0:01:19 lr: 0.000120 loss: 3.0597 (3.0550) grad: 0.3594 (0.3732) time: 0.3415 data: 0.0050 max mem: 3953 +train: [13] [200/400] eta: 0:01:12 lr: 0.000119 loss: 3.0642 (3.0559) grad: 0.3699 (0.3728) time: 0.3494 data: 0.0054 max mem: 3953 +train: [13] [220/400] eta: 0:01:05 lr: 0.000117 loss: 3.0825 (3.0588) grad: 0.3744 (0.3738) time: 0.3860 data: 0.0052 max mem: 3953 +train: [13] [240/400] eta: 0:00:57 lr: 0.000116 loss: 3.0776 (3.0581) grad: 0.3678 (0.3733) time: 0.3438 data: 0.0048 max mem: 3953 +train: [13] [260/400] eta: 0:00:50 lr: 0.000114 loss: 3.0776 (3.0607) grad: 0.3729 (0.3738) time: 0.3519 data: 0.0049 max mem: 3953 +train: [13] [280/400] eta: 0:00:43 lr: 0.000113 loss: 3.0852 (3.0614) grad: 0.3659 (0.3729) time: 0.3436 data: 0.0050 max mem: 3953 +train: [13] [300/400] eta: 0:00:35 lr: 0.000111 loss: 3.0568 (3.0604) grad: 0.3498 (0.3727) time: 0.3422 data: 0.0047 max mem: 3953 +train: [13] [320/400] eta: 0:00:28 lr: 0.000110 loss: 3.0609 (3.0605) grad: 0.3473 (0.3716) time: 0.3289 data: 0.0045 max mem: 3953 +train: [13] [340/400] eta: 0:00:21 lr: 0.000108 loss: 3.0609 (3.0601) grad: 0.3520 (0.3715) time: 0.3373 data: 0.0040 max mem: 3953 +train: [13] [360/400] eta: 0:00:14 lr: 0.000107 loss: 3.0310 (3.0589) grad: 0.3716 (0.3717) time: 0.3615 data: 0.0050 max mem: 3953 +train: [13] [380/400] eta: 0:00:07 lr: 0.000105 loss: 3.0287 (3.0570) grad: 0.3716 (0.3706) time: 0.3513 data: 0.0045 max mem: 3953 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 3.0297 (3.0569) grad: 0.3680 (0.3710) time: 0.3421 data: 0.0048 max mem: 3953 +train: [13] Total time: 0:02:22 (0.3552 s / it) +train: [13] Summary: lr: 0.000104 loss: 3.0297 (3.0569) grad: 0.3680 (0.3710) +eval (validation): [13] [ 0/85] eta: 0:04:49 time: 3.4059 data: 3.1374 max mem: 3953 +eval (validation): [13] [20/85] eta: 0:00:33 time: 0.3684 data: 0.0047 max mem: 3953 +eval (validation): [13] [40/85] eta: 0:00:19 time: 0.3540 data: 0.0045 max mem: 3953 +eval (validation): [13] [60/85] eta: 0:00:09 time: 0.3054 data: 0.0042 max mem: 3953 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3236 data: 0.0040 max mem: 3953 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3087 data: 0.0035 max mem: 3953 +eval (validation): [13] Total time: 0:00:31 (0.3746 s / it) +cv: [13] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 3.080 acc: 0.094 f1: 0.049 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:20:46 lr: nan time: 3.1168 data: 2.9101 max mem: 3953 +train: [14] [ 20/400] eta: 0:03:02 lr: 0.000102 loss: 3.0136 (3.0227) grad: 0.3655 (0.3734) time: 0.3492 data: 0.0041 max mem: 3953 +train: [14] [ 40/400] eta: 0:02:30 lr: 0.000101 loss: 3.0317 (3.0380) grad: 0.3655 (0.3670) time: 0.3494 data: 0.0039 max mem: 3953 +train: [14] [ 60/400] eta: 0:02:14 lr: 0.000099 loss: 3.0351 (3.0366) grad: 0.3647 (0.3669) time: 0.3487 data: 0.0046 max mem: 3953 +train: [14] [ 80/400] eta: 0:02:01 lr: 0.000098 loss: 3.0351 (3.0458) grad: 0.3747 (0.3724) time: 0.3383 data: 0.0049 max mem: 3953 +train: [14] [100/400] eta: 0:01:52 lr: 0.000096 loss: 3.0498 (3.0482) grad: 0.3641 (0.3692) time: 0.3506 data: 0.0046 max mem: 3953 +train: [14] [120/400] eta: 0:01:43 lr: 0.000095 loss: 3.0460 (3.0490) grad: 0.3623 (0.3698) time: 0.3496 data: 0.0053 max mem: 3953 +train: [14] [140/400] eta: 0:01:35 lr: 0.000093 loss: 3.0453 (3.0488) grad: 0.3666 (0.3676) time: 0.3365 data: 0.0048 max mem: 3953 +train: [14] [160/400] eta: 0:01:26 lr: 0.000092 loss: 3.0371 (3.0449) grad: 0.3670 (0.3692) time: 0.3274 data: 0.0037 max mem: 3953 +train: [14] [180/400] eta: 0:01:19 lr: 0.000090 loss: 3.0068 (3.0414) grad: 0.3573 (0.3668) time: 0.3527 data: 0.0047 max mem: 3953 +train: [14] [200/400] eta: 0:01:11 lr: 0.000089 loss: 3.0234 (3.0405) grad: 0.3457 (0.3658) time: 0.3445 data: 0.0049 max mem: 3953 +train: [14] [220/400] eta: 0:01:04 lr: 0.000088 loss: 3.0337 (3.0380) grad: 0.3619 (0.3650) time: 0.3403 data: 0.0045 max mem: 3953 +train: [14] [240/400] eta: 0:00:56 lr: 0.000086 loss: 3.0339 (3.0392) grad: 0.3590 (0.3635) time: 0.3493 data: 0.0047 max mem: 3953 +train: [14] [260/400] eta: 0:00:49 lr: 0.000085 loss: 3.0552 (3.0423) grad: 0.3590 (0.3654) time: 0.3468 data: 0.0046 max mem: 3953 +train: [14] [280/400] eta: 0:00:42 lr: 0.000083 loss: 3.0473 (3.0427) grad: 0.3619 (0.3655) time: 0.3568 data: 0.0050 max mem: 3953 +train: [14] [300/400] eta: 0:00:35 lr: 0.000082 loss: 3.0260 (3.0419) grad: 0.3619 (0.3655) time: 0.3499 data: 0.0047 max mem: 3953 +train: [14] [320/400] eta: 0:00:28 lr: 0.000081 loss: 3.0309 (3.0405) grad: 0.3531 (0.3640) time: 0.3450 data: 0.0043 max mem: 3953 +train: [14] [340/400] eta: 0:00:21 lr: 0.000079 loss: 3.0375 (3.0414) grad: 0.3432 (0.3626) time: 0.3403 data: 0.0046 max mem: 3953 +train: [14] [360/400] eta: 0:00:14 lr: 0.000078 loss: 3.0509 (3.0410) grad: 0.3401 (0.3615) time: 0.3489 data: 0.0048 max mem: 3953 +train: [14] [380/400] eta: 0:00:07 lr: 0.000076 loss: 3.0486 (3.0415) grad: 0.3427 (0.3618) time: 0.3595 data: 0.0047 max mem: 3953 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 3.0477 (3.0413) grad: 0.3514 (0.3613) time: 0.3730 data: 0.0051 max mem: 3953 +train: [14] Total time: 0:02:22 (0.3552 s / it) +train: [14] Summary: lr: 0.000075 loss: 3.0477 (3.0413) grad: 0.3514 (0.3613) +eval (validation): [14] [ 0/85] eta: 0:04:45 time: 3.3591 data: 3.0929 max mem: 3953 +eval (validation): [14] [20/85] eta: 0:00:31 time: 0.3427 data: 0.0041 max mem: 3953 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3186 data: 0.0038 max mem: 3953 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3304 data: 0.0039 max mem: 3953 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3187 data: 0.0040 max mem: 3953 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3046 data: 0.0039 max mem: 3953 +eval (validation): [14] Total time: 0:00:31 (0.3649 s / it) +cv: [14] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 3.125 acc: 0.093 f1: 0.052 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:20:44 lr: nan time: 3.1116 data: 2.8455 max mem: 3953 +train: [15] [ 20/400] eta: 0:03:22 lr: 0.000074 loss: 3.0555 (3.0612) grad: 0.3730 (0.3727) time: 0.4053 data: 0.0057 max mem: 3953 +train: [15] [ 40/400] eta: 0:02:38 lr: 0.000072 loss: 3.0519 (3.0462) grad: 0.3663 (0.3633) time: 0.3400 data: 0.0048 max mem: 3953 +train: [15] [ 60/400] eta: 0:02:21 lr: 0.000071 loss: 3.0483 (3.0468) grad: 0.3537 (0.3563) time: 0.3665 data: 0.0052 max mem: 3953 +train: [15] [ 80/400] eta: 0:02:07 lr: 0.000070 loss: 3.0361 (3.0383) grad: 0.3395 (0.3548) time: 0.3439 data: 0.0050 max mem: 3953 +train: [15] [100/400] eta: 0:01:58 lr: 0.000068 loss: 3.0259 (3.0370) grad: 0.3374 (0.3505) time: 0.3846 data: 0.0056 max mem: 3953 +train: [15] [120/400] eta: 0:01:48 lr: 0.000067 loss: 3.0244 (3.0351) grad: 0.3388 (0.3515) time: 0.3539 data: 0.0048 max mem: 3953 +train: [15] [140/400] eta: 0:01:39 lr: 0.000066 loss: 3.0179 (3.0325) grad: 0.3419 (0.3493) time: 0.3382 data: 0.0048 max mem: 3953 +train: [15] [160/400] eta: 0:01:30 lr: 0.000064 loss: 3.0236 (3.0326) grad: 0.3360 (0.3491) time: 0.3411 data: 0.0042 max mem: 3953 +train: [15] [180/400] eta: 0:01:22 lr: 0.000063 loss: 3.0333 (3.0326) grad: 0.3521 (0.3502) time: 0.3480 data: 0.0050 max mem: 3953 +train: [15] [200/400] eta: 0:01:14 lr: 0.000062 loss: 3.0496 (3.0353) grad: 0.3677 (0.3534) time: 0.3475 data: 0.0043 max mem: 3953 +train: [15] [220/400] eta: 0:01:06 lr: 0.000061 loss: 3.0598 (3.0385) grad: 0.3707 (0.3539) time: 0.3444 data: 0.0045 max mem: 3953 +train: [15] [240/400] eta: 0:00:59 lr: 0.000059 loss: 3.0588 (3.0406) grad: 0.3630 (0.3560) time: 0.3765 data: 0.0047 max mem: 3953 +train: [15] [260/400] eta: 0:00:51 lr: 0.000058 loss: 3.0279 (3.0384) grad: 0.3582 (0.3558) time: 0.3294 data: 0.0044 max mem: 3953 +train: [15] [280/400] eta: 0:00:43 lr: 0.000057 loss: 3.0058 (3.0388) grad: 0.3536 (0.3560) time: 0.3552 data: 0.0050 max mem: 3953 +train: [15] [300/400] eta: 0:00:36 lr: 0.000056 loss: 3.0340 (3.0385) grad: 0.3567 (0.3573) time: 0.3256 data: 0.0041 max mem: 3953 +train: [15] [320/400] eta: 0:00:28 lr: 0.000054 loss: 3.0238 (3.0363) grad: 0.3506 (0.3573) time: 0.3573 data: 0.0045 max mem: 3953 +train: [15] [340/400] eta: 0:00:21 lr: 0.000053 loss: 3.0106 (3.0360) grad: 0.3506 (0.3568) time: 0.3386 data: 0.0048 max mem: 3953 +train: [15] [360/400] eta: 0:00:14 lr: 0.000052 loss: 3.0486 (3.0386) grad: 0.3587 (0.3568) time: 0.3306 data: 0.0046 max mem: 3953 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 3.0556 (3.0380) grad: 0.3601 (0.3570) time: 0.3548 data: 0.0049 max mem: 3953 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 3.0167 (3.0373) grad: 0.3464 (0.3569) time: 0.3486 data: 0.0044 max mem: 3953 +train: [15] Total time: 0:02:23 (0.3589 s / it) +train: [15] Summary: lr: 0.000050 loss: 3.0167 (3.0373) grad: 0.3464 (0.3569) +eval (validation): [15] [ 0/85] eta: 0:04:37 time: 3.2702 data: 3.0557 max mem: 3953 +eval (validation): [15] [20/85] eta: 0:00:32 time: 0.3568 data: 0.0053 max mem: 3953 +eval (validation): [15] [40/85] eta: 0:00:18 time: 0.3238 data: 0.0039 max mem: 3953 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3401 data: 0.0042 max mem: 3953 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3182 data: 0.0047 max mem: 3953 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.2977 data: 0.0041 max mem: 3953 +eval (validation): [15] Total time: 0:00:31 (0.3700 s / it) +cv: [15] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 3.156 acc: 0.093 f1: 0.046 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:49 lr: nan time: 3.4246 data: 3.1304 max mem: 3953 +train: [16] [ 20/400] eta: 0:03:08 lr: 0.000048 loss: 3.0175 (3.0078) grad: 0.3527 (0.3611) time: 0.3499 data: 0.0042 max mem: 3953 +train: [16] [ 40/400] eta: 0:02:31 lr: 0.000047 loss: 3.0118 (3.0011) grad: 0.3452 (0.3520) time: 0.3419 data: 0.0045 max mem: 3953 +train: [16] [ 60/400] eta: 0:02:14 lr: 0.000046 loss: 3.0132 (3.0121) grad: 0.3428 (0.3497) time: 0.3468 data: 0.0042 max mem: 3953 +train: [16] [ 80/400] eta: 0:02:02 lr: 0.000045 loss: 3.0271 (3.0112) grad: 0.3447 (0.3503) time: 0.3399 data: 0.0046 max mem: 3953 +train: [16] [100/400] eta: 0:01:53 lr: 0.000044 loss: 2.9974 (3.0099) grad: 0.3417 (0.3487) time: 0.3548 data: 0.0041 max mem: 3953 +train: [16] [120/400] eta: 0:01:44 lr: 0.000043 loss: 2.9999 (3.0150) grad: 0.3395 (0.3494) time: 0.3454 data: 0.0046 max mem: 3953 +train: [16] [140/400] eta: 0:01:35 lr: 0.000042 loss: 3.0138 (3.0162) grad: 0.3395 (0.3485) time: 0.3363 data: 0.0045 max mem: 3953 +train: [16] [160/400] eta: 0:01:27 lr: 0.000041 loss: 3.0157 (3.0172) grad: 0.3402 (0.3486) time: 0.3524 data: 0.0050 max mem: 3953 +train: [16] [180/400] eta: 0:01:20 lr: 0.000040 loss: 3.0200 (3.0192) grad: 0.3384 (0.3467) time: 0.3523 data: 0.0049 max mem: 3953 +train: [16] [200/400] eta: 0:01:12 lr: 0.000039 loss: 3.0166 (3.0179) grad: 0.3384 (0.3465) time: 0.3501 data: 0.0048 max mem: 3953 +train: [16] [220/400] eta: 0:01:05 lr: 0.000038 loss: 3.0109 (3.0180) grad: 0.3301 (0.3436) time: 0.3508 data: 0.0048 max mem: 3953 +train: [16] [240/400] eta: 0:00:57 lr: 0.000036 loss: 3.0277 (3.0180) grad: 0.3361 (0.3438) time: 0.3454 data: 0.0051 max mem: 3953 +train: [16] [260/400] eta: 0:00:50 lr: 0.000035 loss: 3.0354 (3.0199) grad: 0.3361 (0.3426) time: 0.3508 data: 0.0053 max mem: 3953 +train: [16] [280/400] eta: 0:00:43 lr: 0.000034 loss: 3.0361 (3.0191) grad: 0.3238 (0.3423) time: 0.3528 data: 0.0046 max mem: 3953 +train: [16] [300/400] eta: 0:00:35 lr: 0.000033 loss: 3.0289 (3.0196) grad: 0.3406 (0.3428) time: 0.3463 data: 0.0051 max mem: 3953 +train: [16] [320/400] eta: 0:00:28 lr: 0.000032 loss: 3.0190 (3.0198) grad: 0.3401 (0.3423) time: 0.3603 data: 0.0050 max mem: 3953 +train: [16] [340/400] eta: 0:00:21 lr: 0.000031 loss: 3.0190 (3.0195) grad: 0.3480 (0.3431) time: 0.3546 data: 0.0046 max mem: 3953 +train: [16] [360/400] eta: 0:00:14 lr: 0.000031 loss: 3.0229 (3.0211) grad: 0.3528 (0.3432) time: 0.3253 data: 0.0046 max mem: 3953 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 3.0364 (3.0216) grad: 0.3608 (0.3438) time: 0.3607 data: 0.0048 max mem: 3953 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 3.0203 (3.0212) grad: 0.3421 (0.3424) time: 0.3510 data: 0.0050 max mem: 3953 +train: [16] Total time: 0:02:22 (0.3566 s / it) +train: [16] Summary: lr: 0.000029 loss: 3.0203 (3.0212) grad: 0.3421 (0.3424) +eval (validation): [16] [ 0/85] eta: 0:04:39 time: 3.2893 data: 3.0380 max mem: 3953 +eval (validation): [16] [20/85] eta: 0:00:32 time: 0.3583 data: 0.0051 max mem: 3953 +eval (validation): [16] [40/85] eta: 0:00:18 time: 0.3229 data: 0.0039 max mem: 3953 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3436 data: 0.0049 max mem: 3953 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3067 data: 0.0039 max mem: 3953 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3001 data: 0.0039 max mem: 3953 +eval (validation): [16] Total time: 0:00:31 (0.3702 s / it) +cv: [16] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 3.097 acc: 0.097 f1: 0.053 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +train: [17] [ 0/400] eta: 0:22:19 lr: nan time: 3.3492 data: 3.0784 max mem: 3953 +train: [17] [ 20/400] eta: 0:03:11 lr: 0.000028 loss: 2.9622 (2.9702) grad: 0.3121 (0.3244) time: 0.3612 data: 0.0188 max mem: 3953 +train: [17] [ 40/400] eta: 0:02:34 lr: 0.000027 loss: 2.9846 (2.9886) grad: 0.3121 (0.3249) time: 0.3492 data: 0.0040 max mem: 3953 +train: [17] [ 60/400] eta: 0:02:15 lr: 0.000026 loss: 3.0032 (2.9923) grad: 0.3342 (0.3321) time: 0.3397 data: 0.0041 max mem: 3953 +train: [17] [ 80/400] eta: 0:02:04 lr: 0.000025 loss: 3.0041 (2.9990) grad: 0.3412 (0.3359) time: 0.3590 data: 0.0048 max mem: 3953 +train: [17] [100/400] eta: 0:01:54 lr: 0.000024 loss: 3.0471 (3.0031) grad: 0.3470 (0.3396) time: 0.3535 data: 0.0045 max mem: 3953 +train: [17] [120/400] eta: 0:01:44 lr: 0.000023 loss: 3.0209 (3.0025) grad: 0.3556 (0.3420) time: 0.3367 data: 0.0048 max mem: 3953 +train: [17] [140/400] eta: 0:01:35 lr: 0.000023 loss: 3.0092 (3.0037) grad: 0.3346 (0.3396) time: 0.3303 data: 0.0043 max mem: 3953 +train: [17] [160/400] eta: 0:01:28 lr: 0.000022 loss: 2.9733 (3.0012) grad: 0.3345 (0.3392) time: 0.3608 data: 0.0050 max mem: 3953 +train: [17] [180/400] eta: 0:01:20 lr: 0.000021 loss: 3.0144 (3.0057) grad: 0.3196 (0.3369) time: 0.3687 data: 0.0050 max mem: 3953 +train: [17] [200/400] eta: 0:01:13 lr: 0.000020 loss: 3.0197 (3.0070) grad: 0.3117 (0.3345) time: 0.3467 data: 0.0051 max mem: 3953 +train: [17] [220/400] eta: 0:01:05 lr: 0.000019 loss: 3.0042 (3.0053) grad: 0.3172 (0.3351) time: 0.3409 data: 0.0050 max mem: 3953 +train: [17] [240/400] eta: 0:00:57 lr: 0.000019 loss: 2.9817 (3.0061) grad: 0.3372 (0.3363) time: 0.3426 data: 0.0052 max mem: 3953 +train: [17] [260/400] eta: 0:00:50 lr: 0.000018 loss: 2.9938 (3.0058) grad: 0.3373 (0.3371) time: 0.3598 data: 0.0049 max mem: 3953 +train: [17] [280/400] eta: 0:00:43 lr: 0.000017 loss: 2.9965 (3.0077) grad: 0.3488 (0.3388) time: 0.3338 data: 0.0049 max mem: 3953 +train: [17] [300/400] eta: 0:00:36 lr: 0.000016 loss: 3.0109 (3.0075) grad: 0.3507 (0.3401) time: 0.3707 data: 0.0050 max mem: 3953 +train: [17] [320/400] eta: 0:00:28 lr: 0.000016 loss: 3.0109 (3.0082) grad: 0.3459 (0.3394) time: 0.3639 data: 0.0051 max mem: 3953 +train: [17] [340/400] eta: 0:00:21 lr: 0.000015 loss: 3.0276 (3.0105) grad: 0.3256 (0.3389) time: 0.3531 data: 0.0051 max mem: 3953 +train: [17] [360/400] eta: 0:00:14 lr: 0.000014 loss: 3.0276 (3.0107) grad: 0.3287 (0.3384) time: 0.3344 data: 0.0048 max mem: 3953 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 2.9932 (3.0099) grad: 0.3287 (0.3384) time: 0.3397 data: 0.0048 max mem: 3953 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.9711 (3.0093) grad: 0.3264 (0.3385) time: 0.3798 data: 0.0052 max mem: 3953 +train: [17] Total time: 0:02:23 (0.3593 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.9711 (3.0093) grad: 0.3264 (0.3385) +eval (validation): [17] [ 0/85] eta: 0:04:41 time: 3.3162 data: 3.0890 max mem: 3953 +eval (validation): [17] [20/85] eta: 0:00:30 time: 0.3325 data: 0.0051 max mem: 3953 +eval (validation): [17] [40/85] eta: 0:00:18 time: 0.3405 data: 0.0035 max mem: 3953 +eval (validation): [17] [60/85] eta: 0:00:09 time: 0.3152 data: 0.0042 max mem: 3953 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3266 data: 0.0032 max mem: 3953 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3278 data: 0.0035 max mem: 3953 +eval (validation): [17] Total time: 0:00:31 (0.3672 s / it) +cv: [17] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 3.071 acc: 0.093 f1: 0.054 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:25:17 lr: nan time: 3.7950 data: 3.5213 max mem: 3953 +train: [18] [ 20/400] eta: 0:03:20 lr: 0.000012 loss: 3.0176 (3.0083) grad: 0.3169 (0.3316) time: 0.3650 data: 0.0035 max mem: 3953 +train: [18] [ 40/400] eta: 0:02:39 lr: 0.000012 loss: 3.0176 (3.0158) grad: 0.3169 (0.3318) time: 0.3507 data: 0.0043 max mem: 3953 +train: [18] [ 60/400] eta: 0:02:19 lr: 0.000011 loss: 3.0104 (3.0189) grad: 0.3147 (0.3291) time: 0.3465 data: 0.0049 max mem: 3953 +train: [18] [ 80/400] eta: 0:02:06 lr: 0.000011 loss: 3.0033 (3.0132) grad: 0.3281 (0.3296) time: 0.3503 data: 0.0049 max mem: 3953 +train: [18] [100/400] eta: 0:01:56 lr: 0.000010 loss: 3.0127 (3.0143) grad: 0.3301 (0.3284) time: 0.3517 data: 0.0049 max mem: 3953 +train: [18] [120/400] eta: 0:01:45 lr: 0.000009 loss: 2.9993 (3.0103) grad: 0.3251 (0.3310) time: 0.3276 data: 0.0046 max mem: 3953 +train: [18] [140/400] eta: 0:01:36 lr: 0.000009 loss: 2.9949 (3.0106) grad: 0.3251 (0.3307) time: 0.3407 data: 0.0049 max mem: 3953 +train: [18] [160/400] eta: 0:01:28 lr: 0.000008 loss: 2.9997 (3.0084) grad: 0.3326 (0.3322) time: 0.3615 data: 0.0051 max mem: 3953 +train: [18] [180/400] eta: 0:01:20 lr: 0.000008 loss: 2.9930 (3.0077) grad: 0.3392 (0.3330) time: 0.3448 data: 0.0042 max mem: 3953 +train: [18] [200/400] eta: 0:01:13 lr: 0.000007 loss: 2.9950 (3.0064) grad: 0.3376 (0.3331) time: 0.3522 data: 0.0050 max mem: 3953 +train: [18] [220/400] eta: 0:01:05 lr: 0.000007 loss: 3.0012 (3.0105) grad: 0.3284 (0.3333) time: 0.3464 data: 0.0048 max mem: 3953 +train: [18] [240/400] eta: 0:00:58 lr: 0.000006 loss: 3.0280 (3.0127) grad: 0.3354 (0.3344) time: 0.3441 data: 0.0049 max mem: 3953 +train: [18] [260/400] eta: 0:00:50 lr: 0.000006 loss: 3.0263 (3.0133) grad: 0.3433 (0.3340) time: 0.3609 data: 0.0051 max mem: 3953 +train: [18] [280/400] eta: 0:00:43 lr: 0.000006 loss: 3.0240 (3.0141) grad: 0.3305 (0.3336) time: 0.3427 data: 0.0048 max mem: 3953 +train: [18] [300/400] eta: 0:00:36 lr: 0.000005 loss: 3.0008 (3.0134) grad: 0.3435 (0.3345) time: 0.3514 data: 0.0051 max mem: 3953 +train: [18] [320/400] eta: 0:00:28 lr: 0.000005 loss: 2.9961 (3.0122) grad: 0.3294 (0.3341) time: 0.3524 data: 0.0051 max mem: 3953 +train: [18] [340/400] eta: 0:00:21 lr: 0.000004 loss: 2.9908 (3.0121) grad: 0.3294 (0.3347) time: 0.3671 data: 0.0050 max mem: 3953 +train: [18] [360/400] eta: 0:00:14 lr: 0.000004 loss: 2.9914 (3.0121) grad: 0.3374 (0.3346) time: 0.3462 data: 0.0047 max mem: 3953 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 3.0016 (3.0113) grad: 0.3459 (0.3358) time: 0.3318 data: 0.0047 max mem: 3953 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 3.0016 (3.0109) grad: 0.3386 (0.3350) time: 0.3550 data: 0.0052 max mem: 3953 +train: [18] Total time: 0:02:23 (0.3586 s / it) +train: [18] Summary: lr: 0.000003 loss: 3.0016 (3.0109) grad: 0.3386 (0.3350) +eval (validation): [18] [ 0/85] eta: 0:04:46 time: 3.3666 data: 3.0977 max mem: 3953 +eval (validation): [18] [20/85] eta: 0:00:31 time: 0.3417 data: 0.0049 max mem: 3953 +eval (validation): [18] [40/85] eta: 0:00:18 time: 0.3335 data: 0.0040 max mem: 3953 +eval (validation): [18] [60/85] eta: 0:00:09 time: 0.3430 data: 0.0047 max mem: 3953 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3403 data: 0.0047 max mem: 3953 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3299 data: 0.0045 max mem: 3953 +eval (validation): [18] Total time: 0:00:32 (0.3784 s / it) +cv: [18] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 3.071 acc: 0.095 f1: 0.059 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:17 lr: nan time: 3.3450 data: 3.1107 max mem: 3953 +train: [19] [ 20/400] eta: 0:03:20 lr: 0.000003 loss: 3.0026 (3.0103) grad: 0.3332 (0.3440) time: 0.3856 data: 0.0048 max mem: 3953 +train: [19] [ 40/400] eta: 0:02:37 lr: 0.000003 loss: 3.0021 (3.0061) grad: 0.3365 (0.3457) time: 0.3437 data: 0.0040 max mem: 3953 +train: [19] [ 60/400] eta: 0:02:19 lr: 0.000002 loss: 2.9955 (3.0019) grad: 0.3272 (0.3381) time: 0.3585 data: 0.0049 max mem: 3953 +train: [19] [ 80/400] eta: 0:02:06 lr: 0.000002 loss: 2.9971 (3.0062) grad: 0.3157 (0.3350) time: 0.3494 data: 0.0050 max mem: 3953 +train: [19] [100/400] eta: 0:01:55 lr: 0.000002 loss: 2.9971 (3.0074) grad: 0.3157 (0.3365) time: 0.3480 data: 0.0053 max mem: 3953 +train: [19] [120/400] eta: 0:01:45 lr: 0.000002 loss: 2.9991 (3.0076) grad: 0.3253 (0.3361) time: 0.3374 data: 0.0050 max mem: 3953 +train: [19] [140/400] eta: 0:01:37 lr: 0.000001 loss: 3.0137 (3.0090) grad: 0.3266 (0.3363) time: 0.3606 data: 0.0050 max mem: 3953 +train: [19] [160/400] eta: 0:01:29 lr: 0.000001 loss: 2.9985 (3.0075) grad: 0.3182 (0.3335) time: 0.3581 data: 0.0050 max mem: 3953 +train: [19] [180/400] eta: 0:01:21 lr: 0.000001 loss: 2.9965 (3.0092) grad: 0.3179 (0.3345) time: 0.3432 data: 0.0048 max mem: 3953 +train: [19] [200/400] eta: 0:01:13 lr: 0.000001 loss: 3.0078 (3.0076) grad: 0.3397 (0.3344) time: 0.3619 data: 0.0050 max mem: 3953 +train: [19] [220/400] eta: 0:01:06 lr: 0.000001 loss: 3.0016 (3.0081) grad: 0.3287 (0.3350) time: 0.3525 data: 0.0053 max mem: 3953 +train: [19] [240/400] eta: 0:00:58 lr: 0.000001 loss: 2.9922 (3.0065) grad: 0.3277 (0.3357) time: 0.3541 data: 0.0050 max mem: 3953 +train: [19] [260/400] eta: 0:00:50 lr: 0.000000 loss: 3.0038 (3.0084) grad: 0.3277 (0.3347) time: 0.3168 data: 0.0048 max mem: 3953 +train: [19] [280/400] eta: 0:00:43 lr: 0.000000 loss: 3.0050 (3.0067) grad: 0.3153 (0.3337) time: 0.3361 data: 0.0048 max mem: 3953 +train: [19] [300/400] eta: 0:00:36 lr: 0.000000 loss: 2.9845 (3.0057) grad: 0.3124 (0.3333) time: 0.3638 data: 0.0053 max mem: 3953 +train: [19] [320/400] eta: 0:00:28 lr: 0.000000 loss: 2.9924 (3.0053) grad: 0.3124 (0.3324) time: 0.3699 data: 0.0052 max mem: 3953 +train: [19] [340/400] eta: 0:00:21 lr: 0.000000 loss: 2.9845 (3.0044) grad: 0.3276 (0.3335) time: 0.3320 data: 0.0046 max mem: 3953 +train: [19] [360/400] eta: 0:00:14 lr: 0.000000 loss: 2.9867 (3.0044) grad: 0.3405 (0.3341) time: 0.3551 data: 0.0046 max mem: 3953 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 2.9969 (3.0047) grad: 0.3383 (0.3345) time: 0.3355 data: 0.0044 max mem: 3953 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.9904 (3.0043) grad: 0.3452 (0.3351) time: 0.3627 data: 0.0053 max mem: 3953 +train: [19] Total time: 0:02:23 (0.3593 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.9904 (3.0043) grad: 0.3452 (0.3351) +eval (validation): [19] [ 0/85] eta: 0:04:17 time: 3.0237 data: 2.7810 max mem: 3953 +eval (validation): [19] [20/85] eta: 0:00:30 time: 0.3371 data: 0.0029 max mem: 3953 +eval (validation): [19] [40/85] eta: 0:00:18 time: 0.3461 data: 0.0039 max mem: 3953 +eval (validation): [19] [60/85] eta: 0:00:09 time: 0.3640 data: 0.0049 max mem: 3953 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3243 data: 0.0041 max mem: 3953 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3232 data: 0.0043 max mem: 3953 +eval (validation): [19] Total time: 0:00:32 (0.3780 s / it) +cv: [19] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 3.078 acc: 0.095 f1: 0.060 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-last.pth +eval model info: +{"score": 0.09486895533407161, "hparam": [14, 1.0], "hparam_id": 40, "epoch": 19, "is_best": false, "best_score": 0.09653008490217793} +eval (train): [20] [ 0/509] eta: 0:28:41 time: 3.3814 data: 3.1055 max mem: 3953 +eval (train): [20] [ 20/509] eta: 0:04:09 time: 0.3666 data: 0.0478 max mem: 3953 +eval (train): [20] [ 40/509] eta: 0:03:19 time: 0.3376 data: 0.0109 max mem: 3953 +eval (train): [20] [ 60/509] eta: 0:02:58 time: 0.3417 data: 0.0041 max mem: 3953 +eval (train): [20] [ 80/509] eta: 0:02:45 time: 0.3433 data: 0.0046 max mem: 3953 +eval (train): [20] [100/509] eta: 0:02:32 time: 0.3299 data: 0.0045 max mem: 3953 +eval (train): [20] [120/509] eta: 0:02:22 time: 0.3341 data: 0.0044 max mem: 3953 +eval (train): [20] [140/509] eta: 0:02:14 time: 0.3471 data: 0.0049 max mem: 3953 +eval (train): [20] [160/509] eta: 0:02:07 time: 0.3607 data: 0.0050 max mem: 3953 +eval (train): [20] [180/509] eta: 0:01:59 time: 0.3449 data: 0.0043 max mem: 3953 +eval (train): [20] [200/509] eta: 0:01:51 time: 0.3390 data: 0.0044 max mem: 3953 +eval (train): [20] [220/509] eta: 0:01:42 time: 0.3178 data: 0.0043 max mem: 3953 +eval (train): [20] [240/509] eta: 0:01:35 time: 0.3330 data: 0.0046 max mem: 3953 +eval (train): [20] [260/509] eta: 0:01:27 time: 0.3450 data: 0.0046 max mem: 3953 +eval (train): [20] [280/509] eta: 0:01:20 time: 0.3346 data: 0.0044 max mem: 3953 +eval (train): [20] [300/509] eta: 0:01:13 time: 0.3700 data: 0.0044 max mem: 3953 +eval (train): [20] [320/509] eta: 0:01:06 time: 0.3257 data: 0.0040 max mem: 3953 +eval (train): [20] [340/509] eta: 0:00:59 time: 0.4021 data: 0.0048 max mem: 3953 +eval (train): [20] [360/509] eta: 0:00:52 time: 0.3355 data: 0.0043 max mem: 3953 +eval (train): [20] [380/509] eta: 0:00:45 time: 0.3184 data: 0.0041 max mem: 3953 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3157 data: 0.0042 max mem: 3953 +eval (train): [20] [420/509] eta: 0:00:30 time: 0.3065 data: 0.0041 max mem: 3953 +eval (train): [20] [440/509] eta: 0:00:23 time: 0.3138 data: 0.0039 max mem: 3953 +eval (train): [20] [460/509] eta: 0:00:16 time: 0.3326 data: 0.0043 max mem: 3953 +eval (train): [20] [480/509] eta: 0:00:09 time: 0.3247 data: 0.0040 max mem: 3953 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.2933 data: 0.0040 max mem: 3953 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.2866 data: 0.0040 max mem: 3953 +eval (train): [20] Total time: 0:02:55 (0.3439 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:11 time: 2.9540 data: 2.7558 max mem: 3953 +eval (validation): [20] [20/85] eta: 0:00:28 time: 0.3063 data: 0.0052 max mem: 3953 +eval (validation): [20] [40/85] eta: 0:00:17 time: 0.3386 data: 0.0044 max mem: 3953 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3080 data: 0.0033 max mem: 3953 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3192 data: 0.0040 max mem: 3953 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3075 data: 0.0042 max mem: 3953 +eval (validation): [20] Total time: 0:00:29 (0.3512 s / it) +eval (test): [20] [ 0/85] eta: 0:04:19 time: 3.0491 data: 2.8003 max mem: 3953 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3438 data: 0.0039 max mem: 3953 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3327 data: 0.0043 max mem: 3953 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3178 data: 0.0041 max mem: 3953 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3064 data: 0.0043 max mem: 3953 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.2883 data: 0.0040 max mem: 3953 +eval (test): [20] Total time: 0:00:30 (0.3575 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:10 time: 3.0547 data: 2.8139 max mem: 3953 +eval (testid): [20] [20/82] eta: 0:00:28 time: 0.3362 data: 0.0331 max mem: 3953 +eval (testid): [20] [40/82] eta: 0:00:16 time: 0.3269 data: 0.0128 max mem: 3953 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3752 data: 0.0028 max mem: 3953 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3163 data: 0.0047 max mem: 3953 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3055 data: 0.0045 max mem: 3953 +eval (testid): [20] Total time: 0:00:30 (0.3737 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/checkpoint-best.pth +eval model info: +{"score": 0.09653008490217793, "hparam": [16, 1.0], "hparam_id": 41, "epoch": 16, "is_best": true, "best_score": 0.09653008490217793} +eval (train): [20] [ 0/509] eta: 0:28:30 time: 3.3612 data: 3.1470 max mem: 3953 +eval (train): [20] [ 20/509] eta: 0:03:57 time: 0.3413 data: 0.0046 max mem: 3953 +eval (train): [20] [ 40/509] eta: 0:03:10 time: 0.3246 data: 0.0037 max mem: 3953 +eval (train): [20] [ 60/509] eta: 0:02:48 time: 0.3096 data: 0.0043 max mem: 3953 +eval (train): [20] [ 80/509] eta: 0:02:33 time: 0.3084 data: 0.0036 max mem: 3953 +eval (train): [20] [100/509] eta: 0:02:23 time: 0.3229 data: 0.0041 max mem: 3953 +eval (train): [20] [120/509] eta: 0:02:17 time: 0.3613 data: 0.0044 max mem: 3953 +eval (train): [20] [140/509] eta: 0:02:08 time: 0.3261 data: 0.0045 max mem: 3953 +eval (train): [20] [160/509] eta: 0:01:59 time: 0.3008 data: 0.0042 max mem: 3953 +eval (train): [20] [180/509] eta: 0:01:52 time: 0.3382 data: 0.0043 max mem: 3953 +eval (train): [20] [200/509] eta: 0:01:45 time: 0.3309 data: 0.0045 max mem: 3953 +eval (train): [20] [220/509] eta: 0:01:38 time: 0.3292 data: 0.0043 max mem: 3953 +eval (train): [20] [240/509] eta: 0:01:31 time: 0.3507 data: 0.0046 max mem: 3953 +eval (train): [20] [260/509] eta: 0:01:24 time: 0.3010 data: 0.0040 max mem: 3953 +eval (train): [20] [280/509] eta: 0:01:16 time: 0.3077 data: 0.0041 max mem: 3953 +eval (train): [20] [300/509] eta: 0:01:09 time: 0.3028 data: 0.0041 max mem: 3953 +eval (train): [20] [320/509] eta: 0:01:02 time: 0.2959 data: 0.0038 max mem: 3953 +eval (train): [20] [340/509] eta: 0:00:55 time: 0.3020 data: 0.0041 max mem: 3953 +eval (train): [20] [360/509] eta: 0:00:48 time: 0.3045 data: 0.0042 max mem: 3953 +eval (train): [20] [380/509] eta: 0:00:42 time: 0.2973 data: 0.0039 max mem: 3953 +eval (train): [20] [400/509] eta: 0:00:35 time: 0.3022 data: 0.0039 max mem: 3953 +eval (train): [20] [420/509] eta: 0:00:28 time: 0.3033 data: 0.0041 max mem: 3953 +eval (train): [20] [440/509] eta: 0:00:22 time: 0.2993 data: 0.0041 max mem: 3953 +eval (train): [20] [460/509] eta: 0:00:15 time: 0.3003 data: 0.0042 max mem: 3953 +eval (train): [20] [480/509] eta: 0:00:09 time: 0.2899 data: 0.0040 max mem: 3953 +eval (train): [20] [500/509] eta: 0:00:02 time: 0.2898 data: 0.0040 max mem: 3953 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.2845 data: 0.0037 max mem: 3953 +eval (train): [20] Total time: 0:02:43 (0.3208 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:05 time: 2.8869 data: 2.6785 max mem: 3953 +eval (validation): [20] [20/85] eta: 0:00:29 time: 0.3379 data: 0.0154 max mem: 3953 +eval (validation): [20] [40/85] eta: 0:00:17 time: 0.3089 data: 0.0050 max mem: 3953 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3183 data: 0.0034 max mem: 3953 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.2814 data: 0.0037 max mem: 3953 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.2746 data: 0.0037 max mem: 3953 +eval (validation): [20] Total time: 0:00:29 (0.3435 s / it) +eval (test): [20] [ 0/85] eta: 0:04:23 time: 3.1028 data: 2.8988 max mem: 3953 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3398 data: 0.0049 max mem: 3953 +eval (test): [20] [40/85] eta: 0:00:17 time: 0.3047 data: 0.0033 max mem: 3953 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3333 data: 0.0046 max mem: 3953 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.2978 data: 0.0042 max mem: 3953 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.2877 data: 0.0041 max mem: 3953 +eval (test): [20] Total time: 0:00:30 (0.3534 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:20 time: 3.1770 data: 2.9279 max mem: 3953 +eval (testid): [20] [20/82] eta: 0:00:28 time: 0.3212 data: 0.0044 max mem: 3953 +eval (testid): [20] [40/82] eta: 0:00:16 time: 0.3205 data: 0.0041 max mem: 3953 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3218 data: 0.0046 max mem: 3953 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3046 data: 0.0049 max mem: 3953 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.2936 data: 0.0046 max mem: 3953 +eval (testid): [20] Total time: 0:00:29 (0.3542 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-------:|---------:|----------:|---------:|----------:| +| flat_mae | reg | linear | nsd_cococlip | best | 16 | 0.0048 | 0.05 | 41 | [16, 1.0] | train | 2.8874 | 0.1504 | 0.0017435 | 0.10184 | 0.0016096 | +| flat_mae | reg | linear | nsd_cococlip | best | 16 | 0.0048 | 0.05 | 41 | [16, 1.0] | validation | 3.0967 | 0.09653 | 0.0034189 | 0.053009 | 0.0027002 | +| flat_mae | reg | linear | nsd_cococlip | best | 16 | 0.0048 | 0.05 | 41 | [16, 1.0] | test | 3.1076 | 0.10612 | 0.0036194 | 0.055625 | 0.0024498 | +| flat_mae | reg | linear | nsd_cococlip | best | 16 | 0.0048 | 0.05 | 41 | [16, 1.0] | testid | 3.0922 | 0.093696 | 0.003605 | 0.054199 | 0.0027641 | + + +done! total time: 1:07:53 diff --git a/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/train_log.json b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..8a9fd23a09e2dd510673ae0d98d1916f5cc6107f --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/nsd_cococlip__reg__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.2471210193634032, "train/grad": 0.4487511743605137, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.3461566162109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.3451495361328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.34333740234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.3416986083984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.34007080078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.3377398681640624, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.3350567626953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.3323095703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.328778076171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.324959716796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.3212640380859373, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.315997314453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.31089111328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.30396240234375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.2974993896484377, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.29167236328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.2846893310546874, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.2768975830078126, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.26883056640625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.2619207763671874, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.25421142578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.2468212890625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.23955810546875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.2326226806640626, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.2264093017578124, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.2198382568359376, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.2146820068359374, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.2106036376953124, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.2056427001953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.2005499267578124, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1966986083984374, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.19360595703125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.190089111328125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.187388916015625, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1848150634765626, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1827398681640626, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.180972900390625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1795281982421875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1784271240234374, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1774957275390623, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1774237060546877, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1778594970703127, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1794390869140625, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.181629638671875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.184510498046875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1884393310546875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1932757568359373, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.20213623046875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.21263671875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0764706059731543, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0763359396159649, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.07611555863171816, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0759010423719883, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0756856307759881, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.07538736106827855, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.07505376409739256, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.07468370445072652, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0742059450224042, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.07370266485959291, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.073217245452106, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.07249296497553587, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.07180619049817323, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.07084410084411502, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0699528068304062, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.06912969885393977, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0681313100643456, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0670242116972804, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.06587407277897, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.06488732166588307, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.06376402856782079, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.06271455738693475, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.061665648091584445, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.06066863896325231, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.059755768962204456, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.05880922194570303, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.05807006141170859, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.057476613372564316, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0567816143669188, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05609983880072832, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05559996176511049, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05522669591009617, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0548161680996418, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05453555729240179, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05429720152169466, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.054141202252358196, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05405962575227022, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0540524941124022, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05411531355232, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05430481946095824, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05453784776851535, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054832061659544705, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05538099905475974, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05596364330500364, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05665400022640824, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05738151216879487, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.058192946817725896, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.059527448508888484, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06088147282600403, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.3472535610198975, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.3439691066741943, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.3385913372039795, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.3334343433380127, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.328387498855591, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.3214783668518066, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.313833475112915, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.305849075317383, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.295630693435669, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.2853639125823975, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.2755770683288574, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.262073040008545, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.249767780303955, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.233767509460449, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.220222234725952, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.208482503890991, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.195892572402954, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1831133365631104, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.171536922454834, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1628684997558594, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1547446250915527, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1486668586730957, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1441256999969482, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1412429809570312, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.139660358428955, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1386516094207764, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1382064819335938, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.137819766998291, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.137415647506714, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1369082927703857, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1364214420318604, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.135929584503174, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1355838775634766, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1356682777404785, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.136662244796753, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1388399600982666, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.142411947250366, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1467223167419434, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.153543710708618, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1643877029418945, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1714279651641846, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1755807399749756, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1805288791656494, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.187805652618408, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.199369192123413, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.205451250076294, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2060904502868652, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.211625576019287, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.217236042022705, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.055740125507567365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05555555555555555, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05555555555555555, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05518641565153193, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.055924695459579184, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.05555555555555555, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.05629383536360281, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.05703211517165006, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.057770394979697305, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.05795496493170912, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.05832410483573274, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.058508674787744554, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0592469545957918, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.05832410483573274, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.058508674787744554, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.056847545219638244, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.05758582502768549, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.05703211517165006, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.05813953488372093, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.056847545219638244, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.059616094499815434, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.060538944259874494, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05906238464377999, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0592469545957918, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.06164636397194537, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06201550387596899, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.061461794019933555, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.06312292358803986, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.06164636397194537, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.06312292358803986, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06404577334809892, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06349206349206349, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06349206349206349, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.062384643779992616, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.058693244739756366, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.057770394979697305, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.05832410483573274, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06090808416389812, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.06349206349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.067921742340347, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.0681063122923588, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.05887781469176818, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.04688076781100037, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04632705795496493, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013704758346436188, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013637461629110973, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01323133127931165, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013555649419396994, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013217056401372879, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013603188044423625, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013743945542014848, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013688778114964236, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013674721653499228, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01376572629331639, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01361604071941748, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013474063607169393, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01314769556490229, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013998299661541985, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.014480807515867798, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.015146209165972641, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014826912210474885, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01607878108855954, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01704287038047578, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01771904845332912, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.017975298145957077, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.01912418575165964, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.019567606929839467, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.02085252555164307, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02053381349143581, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.021516093178948714, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.021375880458798263, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.02097929285184945, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.021187379086988162, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.021725524292607695, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.020965035942729787, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.01932901901029181, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.01794891278489678, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.016266379855297816, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.016149950470287545, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.01616291948060511, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.016742472527061448, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.017293058795711337, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.015603041100882432, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.013196693272571119, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.01372175688605827, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.014593203224487046, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.01627644898372897, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.01716192773902016, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.021483508866720114, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.024057217906599964, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.021772285944823555, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.01907642102098485, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.02752911438897154, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 3.1884393310546875, "validation/loss_best": 3.205451250076294, "validation/acc_best": 0.0681063122923588, "validation/f1_best": 0.024057217906599964} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 3.191520459651947, "train/grad": 0.3981149086356163, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.3123236083984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.30634033203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.2969317626953125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.2879864501953127, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.27986083984375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.26937744140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.258404541015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.2475531005859377, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.2349298095703123, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.2233984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.21367919921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.201458740234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1919146728515626, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.181082763671875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.17334716796875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1676153564453124, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1623223876953124, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.158133544921875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.15479736328125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.152738037109375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.151021728515625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1497607421875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.148795166015625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.147900390625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.147071533203125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.146253662109375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.145577392578125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.14513427734375, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.144656982421875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.144349365234375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.144415283203125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.144647216796875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1451348876953125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1457550048828127, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1468218994140624, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1481768798828127, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.14978759765625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1513671875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1540618896484376, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.159432373046875, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.165159912109375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.17114990234375, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.179765625, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.18910888671875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.205103759765625, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.226731872558594, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.244334716796875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.270060729980469, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.298773193359375, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.07064711872488261, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.06986491799354554, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.06862049467861653, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.06744373131543398, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0663351116888225, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.06488774677738547, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.06338986050337553, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.061866338476538656, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.060090701058506966, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.058450262621045114, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.05705508491024375, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0552893784083426, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0538968245126307, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.05231364438310265, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.05116608211770654, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.05033253163099289, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04955734893679619, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04893966149538755, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04850939793512225, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.048269317205995324, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0481196678429842, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0480805448628962, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04811441024765372, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.048209325093775984, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04833830088376999, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.048519570603966715, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04869265468791127, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04886478552594781, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04911978419870138, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04946017784997821, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04980194229632616, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05014199400320649, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.050631948299705985, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05107972219586372, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0515984052978456, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05212037755176425, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05272221922874451, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05333589555695653, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05413620658218861, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0554898938536644, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.056822116654366255, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05817284721881151, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05978310463950038, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.061186428591609, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06321454478427768, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0656871784850955, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06748524447903037, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07033599955961108, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.07328986695036292, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.291011333465576, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.281952142715454, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.2677273750305176, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.255167245864868, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.243708372116089, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.2299158573150635, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.2163798809051514, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.2038156986236572, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1900761127471924, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1787798404693604, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1699838638305664, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.160187005996704, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1535208225250244, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.147277355194092, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.143822431564331, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.141890287399292, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.140425682067871, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1393067836761475, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.138442277908325, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.137807846069336, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.136979341506958, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1362414360046387, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.135444164276123, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.134671449661255, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1339032649993896, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.133011817932129, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.132253646850586, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.131776809692383, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1318156719207764, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1326849460601807, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1338369846343994, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.134103775024414, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1336545944213867, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.135573148727417, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1427507400512695, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1514627933502197, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1580538749694824, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.158012866973877, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.154372215270996, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1575632095336914, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.159986734390259, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.173680067062378, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2183566093444824, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.25543475151062, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.280224084854126, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.352367877960205, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.3983898162841797, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.42568039894104, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.4338929653167725, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.055740125507567365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05537098560354374, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05795496493170912, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.05887781469176818, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.059616094499815434, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.05906238464377999, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.059800664451827246, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.05758582502768549, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.05721668512366187, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.05813953488372093, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.05906238464377999, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.05813953488372093, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06090808416389812, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06866002214839424, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07087486157253599, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07290513104466592, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07585825027685493, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07493540051679587, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.07585825027685493, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07567368032484312, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0725359911406423, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07179771133259505, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07272056109265411, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06681432262827612, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07124400147655961, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07327427094868956, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.06736803248431156, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.07013658176448874, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.0681063122923588, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.06183093392395718, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.06349206349206349, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.050387596899224806, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.04226651901070506, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.041343669250646, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01367620685843259, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01355885774398029, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013229289940157618, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013530698755195275, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014399416484493022, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013519302659252788, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013318129305185, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014716971326352241, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01577814510640592, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.016464346524394644, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01809768540350857, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01926355082939505, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.019298368394484216, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020696976463170865, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.022202611480070866, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.023019458565162413, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02321952730117753, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.024399874198148413, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.024606362877396898, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02404280167326228, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.023649900152719765, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.022758538366411406, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.022900271785080698, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.02204656062291858, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02328910945854495, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.024642010854243418, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.02520425956240532, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.02510431216051553, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.02479877409992171, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.022654317938513264, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.02153994348836472, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.021676469035048575, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.01820281963314863, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.015622579716146064, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.016156120401226017, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.019460633118730572, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.024892458350664667, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.02548964414961905, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.027274810582238056, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.026398045591931363, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.028221131290872673, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.02380738041632253, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.022091265955993405, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.01939483547009135, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.018388887294324373, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.02579010512289014, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.021279383079177202, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.016816978889378536, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.019571851899617496, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 3.144656982421875, "validation/loss_best": 3.1318156719207764, "validation/acc_best": 0.07585825027685493, "validation/f1_best": 0.02479877409992171} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 3.1714108061790465, "train/grad": 0.3893373617529869, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.2538690185546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.2436444091796877, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.2292041015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.2170758056640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.206959228515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.19539306640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1851055908203123, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1764508056640626, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.167960205078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1618536376953124, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.15751220703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.153177490234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.150546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.147940673828125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.146302490234375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.145096435546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1437939453125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1423828125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.140887451171875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.139576416015625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.138114013671875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.136605224609375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.134986572265625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.13328369140625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.131541748046875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.129683837890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.128251953125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.126929931640625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1256048583984377, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.1243743896484375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1233978271484375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.122552490234375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.121876220703125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1218634033203125, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.122523193359375, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1238427734375, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.126480712890625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.130438232421875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1369305419921876, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.148590087890625, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.159102783203125, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.170364990234375, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1878128051757812, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.2055459594726563, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.231539306640625, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.269381408691406, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.306269226074219, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.359505310058594, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.417006378173828, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0627413764409721, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.061266912594437596, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.059110992643982174, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05728557363152504, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0557457023859024, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.053982117027044293, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.052411656863987445, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.05107475988566876, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04979297708719969, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04886453555896878, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04824465164914727, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04767908915877342, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.047386168222874406, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04720014531165361, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04714851200580597, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04715303147211671, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.047184206768870356, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04723374828696251, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04730312492698431, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04736848911270499, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04746056014671922, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0475580582767725, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04767683180049062, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04781549464911222, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.047975022587925194, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04819236066192389, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.048433827832341195, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04867879973724484, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04907119011506438, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04957430612295866, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.050021537486463784, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05042929554358125, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05101507678627968, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05157201811671257, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05224482035264373, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05299114491790533, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.053945178855210545, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.054990810714662074, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.056371252406388524, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05858271175995469, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06042824136093259, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.062107231691479685, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06445731479674578, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06639777217060328, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06924082152545452, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07285144180059433, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07571210846304893, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07982103068381548, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08461728259921074, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.228985071182251, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.2173283100128174, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.201533079147339, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1890017986297607, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1792423725128174, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1687285900115967, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.160020589828491, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1533758640289307, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1476452350616455, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.144010305404663, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.141941785812378, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.140052556991577, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1388230323791504, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.137550115585327, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1365365982055664, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1354894638061523, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.134373664855957, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1331934928894043, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.132021188735962, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.131263017654419, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1306509971618652, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.130326271057129, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.130377769470215, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1305198669433594, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1304574012756348, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1295182704925537, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1280386447906494, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.126509428024292, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.125227928161621, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1263792514801025, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.12990665435791, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1346323490142822, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1397316455841064, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.143578052520752, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1543679237365723, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.176219940185547, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1918914318084717, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.193024158477783, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1972763538360596, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2006354331970215, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2012417316436768, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.211886405944824, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2398083209991455, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.277339458465576, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.337165594100952, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.4348151683807373, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.5028576850891113, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.56596040725708, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.5590155124664307, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.057770394979697305, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.058508674787744554, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05906238464377999, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.059431524547803614, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.059616094499815434, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.055740125507567365, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.05887781469176818, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.058508674787744554, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.059431524547803614, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06016980435585087, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06035437430786268, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06035437430786268, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06035437430786268, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.060538944259874494, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06607604282022887, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06755260243632337, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06866002214839424, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.06939830195644149, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0725359911406423, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07198228128460686, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.07161314138058324, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07161314138058324, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.06681432262827612, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.06312292358803986, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.05647840531561462, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06293835363602805, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06829088224437062, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0710594315245478, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07179771133259505, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07475083056478406, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07069029162052418, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.07069029162052418, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.06902916205241787, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.06773717238833518, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.05813953488372093, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.052233296419342934, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.04983388704318937, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06330749354005168, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013343508450750973, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0140441184038805, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015157886217401996, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015518315041692744, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017485348951570626, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01750823226781424, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017819653057408968, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.018556709871931903, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.019674256123835512, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01856792554493648, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01976718800801032, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.020714790617166554, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.022044454118979078, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020315087970656596, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02008633229197505, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.019811967964289506, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01968727265998346, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01985525599893131, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.020194458856032514, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01961908057324604, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01990859302759278, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.019330072924153962, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.018924987880331874, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.018014479532859725, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.016914120437388495, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.01978461523719397, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.02285509081351428, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.025289174108387408, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.02995531757228402, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03169803448873388, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.029329862324610868, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.02852413634033991, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.029441324312771258, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.026139704594894633, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.02488647292775385, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.02350021658265532, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.024108845293902795, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.026576287201171805, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.029618424549066064, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.03346702875720721, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.03728873426959631, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.03418028712881078, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.030114223522473487, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03518634455544236, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.038185481667808295, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.030018240872697152, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.021371461674101828, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.02082610839066816, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.02893392510561996, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 3.159102783203125, "validation/loss_best": 3.2012417316436768, "validation/acc_best": 0.07475083056478406, "validation/f1_best": 0.03728873426959631} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 3.1804602336883545, "train/grad": 0.4159290254116058, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.202349853515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1926641845703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1803814697265627, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.17146728515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1650128173828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.158746337890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.154134521484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.150830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1483056640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.146541748046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1452734375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.143699951171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.142362060546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.140599365234375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.139173583984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.13777099609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.136265869140625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.13449462890625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.13269775390625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.131201171875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.129447021484375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1279962158203123, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1265716552734375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1250933837890624, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.123760986328125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.122509765625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1215814208984374, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1210247802734377, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1208251953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.1208306884765626, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1207135009765623, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.12080322265625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1216796875, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.123519287109375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.12747314453125, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1330194091796875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.140887451171875, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1515878295898436, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1662237548828127, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1884619140625, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.2063031005859375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.2184002685546873, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2363925170898438, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.269337921142578, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.314860534667969, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.377904357910156, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.4331722259521484, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.511271743774414, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.5869288635253906, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0565553948096931, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.055118428561836484, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.053269288297742606, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.051934788934886456, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.05096430018544197, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.050017700400203466, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04932928640395403, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04888320200145244, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.048582575879991055, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.048451081458479164, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.048411003947257995, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04841172030195594, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04843457700684667, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048478699009865525, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.048529044780880215, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04858275532722473, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04865375993773341, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04874353643506765, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.048847851995378735, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04895314365625381, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04910258499905467, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.049277789704501626, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04949856555089355, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04976142004132271, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.050062713697552684, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.050460921563208104, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.050873067658394576, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.05130451399832964, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05197168201208115, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05282086761668325, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.053517596535384654, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.054047807827591894, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05475124077871442, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05550435729324818, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0566548933647573, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0580335384234786, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0597615553252399, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.061858687959611414, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.06449028119444847, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06775862259790301, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.07002776693552733, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.07167583348229528, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07378760581836104, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0766424529068172, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08073303079232573, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08496361097320915, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0875291502662003, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09250689901411534, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0977502578496933, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1839444637298584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1747066974639893, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.163619041442871, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1561663150787354, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1510539054870605, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1466422080993652, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.143673896789551, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1419591903686523, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.140662431716919, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.139775037765503, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1390395164489746, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.138122797012329, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.13728404045105, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1362242698669434, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.135220527648926, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1342833042144775, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1331098079681396, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1317992210388184, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.130232095718384, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1290245056152344, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1276254653930664, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1266863346099854, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.126004219055176, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.125420331954956, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.124906301498413, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1249120235443115, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.126493453979492, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.129378080368042, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1335434913635254, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1366207599639893, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.140434741973877, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.147047758102417, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1546144485473633, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1610119342803955, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1767826080322266, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1942780017852783, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2149860858917236, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2382805347442627, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2605175971984863, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.311555862426758, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.347595691680908, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.3121931552886963, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2796666622161865, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2871975898742676, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.3079543113708496, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.412139654159546, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.4482176303863525, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.4678337574005127, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.6511688232421875, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05906238464377999, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05555555555555555, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05647840531561462, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05813953488372093, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05758582502768549, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0592469545957918, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.060538944259874494, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06183093392395718, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.061461794019933555, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06201550387596899, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07290513104466592, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07641196013289037, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0753045404208195, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07419712070874862, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07327427094868956, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07511997046880768, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0754891103728313, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07161314138058324, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07142857142857142, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.06736803248431156, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06589147286821706, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06404577334809892, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06459948320413436, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0681063122923588, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.06736803248431156, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.06496862310815799, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.06976744186046512, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08231819859726837, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.073827980804725, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07438169066076043, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07438169066076043, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07327427094868956, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.0710594315245478, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06755260243632337, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01614792269880091, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016940345996933667, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016799893510675137, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016837935412823385, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018205314135591424, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019271764191932904, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.018852347372463885, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01949502062007546, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.019451829180153923, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.019551779337417256, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018645083936997053, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.018375602632360884, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.016976604058647697, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.016964034766726855, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01648320201199956, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01694201525461247, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01711042924536223, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01710039133050974, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.019082644419972147, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01934460718601477, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.022101356205190492, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.024845993187956546, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.02498484113780017, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.025851302909386226, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02854501194161239, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03128870449142061, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.032845786417721345, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.033328292673915766, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03362591774997288, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03208925925267281, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.030078944750438607, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.025987046353872233, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.020783094470868783, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01989015639938993, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.016994054939702928, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.01811315828951558, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.01771504690790522, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.02271777001119941, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.029486377532722114, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.024494652500161213, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.019227011181703255, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.024968852919412515, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.029895683243921046, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.030185916425219877, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03282940568894854, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.029076961495039935, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0324242764702849, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03561345092684818, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.03173106058824674, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 3.2363925170898438, "validation/loss_best": 3.2796666622161865, "validation/acc_best": 0.08231819859726837, "validation/f1_best": 0.029895683243921046} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 3.179901976585388, "train/grad": 0.42244323894381525, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.16998046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.16363525390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.156307373046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.152086181640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.149364013671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.14687744140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.145205078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1439599609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1424951171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.141182861328125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.139891357421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.138223876953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.136781005859375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.134677734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.132886962890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.13105224609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.129072265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.126849365234375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.124525146484375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1223583984375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.119886474609375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.117471923828125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.114927978515625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.112342529296875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1099725341796876, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1074310302734376, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.105574951171875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.104332275390625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1034637451171876, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.103763427734375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1049127197265625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.10556884765625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.106234130859375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.107987060546875, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1133673095703127, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1205487060546875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.130768127441406, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1422348022460938, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.154530334472656, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.180556335449219, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.20720458984375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.230689697265625, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.262215270996094, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.295587921142578, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.345723876953125, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.405705871582031, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.493296241760254, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.625796661376953, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.755692443847656, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.051462111193686724, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05052779246121645, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04948956647887826, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04887228649109602, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.048516246937215325, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.048271154146641494, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04816741885617375, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04814156420528889, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04814601680263877, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.048167520835995674, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04818750998005271, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.048220368940383194, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04825903967022896, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04831780510023236, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04837778186425567, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04843872644007206, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04852338211610913, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04863069105893374, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.048769076000899074, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.048903386052697895, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0490861789137125, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.049290672168135645, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04953801311552525, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04983412543311715, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.05017914891242981, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.05065371064469218, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.051136193741112945, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.05162049565464258, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05234849490225315, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0533316252939403, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.054271886944770815, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.055055436212569475, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05598118364810944, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05695452498272061, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05850845729932189, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0601560683734715, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.061874328535050156, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.06313898026943207, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.06439341362565756, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06747043056413532, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.07055805759504437, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.07323530357331037, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07659394346177578, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07945511743426323, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08283002074807883, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08728610116988421, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09326717233285307, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09990189295262099, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10699447721242905, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1589016914367676, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1533279418945312, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.147524833679199, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.144308567047119, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.142467498779297, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.140979051589966, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.139714002609253, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1386477947235107, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1374030113220215, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1362392902374268, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1351938247680664, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.133800983428955, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1326725482940674, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1313323974609375, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.130399703979492, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.129730701446533, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.129091501235962, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1287100315093994, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1282472610473633, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.127835512161255, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1269969940185547, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1258625984191895, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.124497175216675, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1230547428131104, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.121755599975586, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.120851516723633, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.120527744293213, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1209352016448975, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1236438751220703, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1299431324005127, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1326398849487305, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.130044937133789, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.125541925430298, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1250815391540527, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1280486583709717, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.128464937210083, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.127176284790039, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1354548931121826, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1634111404418945, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.196643829345703, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2495551109313965, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.3108434677124023, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.376413106918335, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3932342529296875, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.388071298599243, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.341550350189209, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.403353452682495, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.6655490398406982, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.035415172576904, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05906238464377999, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05813953488372093, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05813953488372093, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.058693244739756366, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06035437430786268, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06293835363602805, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07641196013289037, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07825765965300849, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08157991878922112, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07862679955703211, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07862679955703211, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07161314138058324, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07604282022886674, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08047249907715023, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08508674787744555, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08065706902916205, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08139534883720931, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08527131782945736, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.08084163898117387, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07511997046880768, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.0710594315245478, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06662975267626431, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.0710594315245478, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.05906238464377999, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07308970099667775, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07308970099667775, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06441491325212255, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.017326452196123856, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.018724895316622102, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020134315305446387, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020620367570552998, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.020598703136472555, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0207397252882003, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020809406231560534, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02140425695630374, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021608717290974355, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.021493765522589196, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.021411104732845788, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.021185721931861395, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0198659375007921, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019582171941732177, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02006892017150068, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.020119538062274716, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.021526983854434047, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02215689596305888, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02424526044065042, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.024648249788877672, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.026657180123346075, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.030525836616023636, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03288600474559349, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.030636803066609486, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.030371021148380088, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.030264594024416894, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03107613204175223, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.027970635512397174, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.02600153842011632, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.022665747170060317, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.02373186647255264, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.027713456066334654, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.03340791904543381, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03459956892753837, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.035570466575769046, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.038707779026127896, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03870439918547274, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.0403862305591343, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04243018696734659, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04372516503120629, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04208532396914041, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.038245817357417396, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.03779368968345935, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04148555607753545, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03601594071209376, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.043513655767424386, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04220032091059293, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03488336914702913, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.016669847870459536, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 3.1205487060546875, "validation/loss_best": 3.128464937210083, "validation/acc_best": 0.08767072720561092, "validation/f1_best": 0.038707779026127896} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 3.1680091726779938, "train/grad": 0.41592412158846853, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.152684326171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.149154052734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1456201171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.143765869140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.142330322265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.140997314453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.139674072265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1383447265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.13671142578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13515869140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1337841796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.131798095703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.129913330078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.127652587890625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12556640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.123507080078125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1211865234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1184912109375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.115616455078125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.11308837890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1099163818359377, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1069580078125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1038226318359374, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1007562255859376, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0979638671875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.095162353515625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.093089599609375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0915753173828127, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0896697998046876, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0875537109375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.086568603515625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0870462036132813, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0888482666015626, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.091007080078125, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0943307495117187, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.09956298828125, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.10758056640625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.117648010253906, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.13259033203125, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.158112030029297, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1862754821777344, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.2162123107910157, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2580606079101564, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.296103668212891, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.3494478607177736, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.4091686248779296, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.4747400665283203, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.6003006744384765, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.737335662841797, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04864652708172798, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04813630968332291, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04769625307992101, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04752522150054574, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04746740996837616, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04745215149596334, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04746106557548046, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04748019406571984, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04751218307763338, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.047549734730273484, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04758265385404229, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.047643537390977146, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.047699146680533884, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04778417346999049, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04786674434319139, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.047944663614034655, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.048049362637102605, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04817701200023294, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04832838486880064, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04848104020580649, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.048664954118430616, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04886779084801674, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.049089070856571194, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.049348900001496075, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04964446976780892, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.05005128314718604, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.05046754777431488, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0508837629109621, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05147755868732929, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.052193928752094505, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05288318734616041, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05360804624855518, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.054698736723512414, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.055702854227274654, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05700634902343154, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.058527714051306245, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.06021253205835819, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0619089088588953, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.06414038280025125, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06686066467314959, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06957273190841079, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.07231603363528848, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07587400438264012, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07913062257692217, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08275621989741921, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08651379304006696, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09119285458698868, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09935974495485425, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1052432569861412, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.146552801132202, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1435837745666504, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1408603191375732, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1392204761505127, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.138015031814575, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1365954875946045, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1352431774139404, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1339328289031982, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1323204040527344, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.13081693649292, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1295104026794434, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.127798557281494, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1262900829315186, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.124504804611206, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1230738162994385, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.121742010116577, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.120227575302124, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.118548631668091, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1167314052581787, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.115147829055786, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.113222599029541, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.111643075942993, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.110316276550293, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1092920303344727, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1083426475524902, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1069235801696777, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1054742336273193, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1048407554626465, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1061220169067383, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.111539363861084, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.11734938621521, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.120614767074585, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1264488697052, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.143075942993164, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1697936058044434, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1896157264709473, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2006137371063232, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2114367485046387, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2341482639312744, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3129758834838867, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3848929405212402, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.4584646224975586, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.576831579208374, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.620701551437378, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.6086525917053223, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.697777509689331, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.5758206844329834, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.7650392055511475, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.953446626663208, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05887781469176818, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05813953488372093, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05813953488372093, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07733480989294943, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07733480989294943, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07954964931709117, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08361018826135105, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08600959763750461, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08194905869324474, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07788851974898486, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07142857142857142, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.0753045404208195, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.0799187892211148, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0812107788851975, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0828719084533038, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07973421926910298, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.0769656699889258, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06995201181247693, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.0695828719084533, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.06773717238833518, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.06607604282022887, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.06976744186046512, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.059431524547803614, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06035437430786268, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02066429352735461, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02049953856198174, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.021393944148018488, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02091431382766669, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021877876702555824, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022395237883585425, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02334147909718688, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022524406992943936, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022918799104399912, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02214215509999561, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022026901098586415, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.020491222086980062, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.020708667939829815, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02118480712083376, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.021747326084473075, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.021220316224038124, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.022212703812081153, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.024412226906642017, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.0255079093966622, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02623378711419683, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02728837575568729, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.027794325402271475, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.026642527153816542, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.026815904962218584, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02885993412854586, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.029788759138730497, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03171399756068984, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03374748999652362, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03432775387756812, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.033249891671635695, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03290656030949823, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.034543527394932126, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.03849104926212877, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.042273269742495685, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.04019643380960895, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.04178000833745595, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.043650899645213924, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03709435467025091, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.036030360510578216, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.03789716531522225, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.03937744724652223, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.037254506719228246, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.03230037097521211, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03191295295549921, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03285974048835552, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.025388919499930496, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.042299134008658534, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.030868071717375423, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.02385908183227735, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 3.0875537109375, "validation/loss_best": 3.111539363861084, "validation/acc_best": 0.08600959763750461, "validation/f1_best": 0.033249891671635695} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 3.157608106136322, "train/grad": 0.420059265345335, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.148519287109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.146634521484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.144737548828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.14326416015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.141982421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.140443115234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.138795166015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13706787109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1351318359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13318115234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13139892578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12880615234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1265478515625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.123411865234375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12072265625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.11821044921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1150927734375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1117633056640623, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1081964111328126, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.104991455078125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1013568115234373, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0979571533203125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.094488525390625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.090980224609375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.087484130859375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0832586669921875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0796942138671874, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0767767333984377, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.07354736328125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0709765625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0695819091796874, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.069039611816406, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0700497436523437, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0733432006835937, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0810250854492187, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0907049560546875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.101641845703125, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1137631225585936, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.128554382324219, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.153226013183594, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.173583984375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1938296508789064, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2290496826171875, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.263508148193359, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.3206058502197267, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.395768051147461, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.4636103820800783, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.566110496520996, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.710384922027588, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04806806646287441, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.047899708170443776, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04780754690989852, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04779610902070999, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04779616558924318, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04780669666826725, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.047824272084981204, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04784369109198451, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.047868958581238984, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04790170023217797, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04793566135689616, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04798588089644909, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04804205490276217, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048122628517448904, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04821256624534726, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04829544579610229, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04841515947133303, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04856709472835064, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04875082230195403, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04894309528172016, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.049200719129294156, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04950665755197406, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.049864866193383935, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.05026516124606133, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.05067112388089299, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.05112506231293082, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.05152942541986704, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.051902178302407266, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0524508174136281, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05315576503053308, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05382458949461579, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.054523494224995375, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0556116184219718, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.056685564629733565, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05830123106017709, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.060112076494842764, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.061974353920668364, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.06392243910580873, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.06608927441760898, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0695291380211711, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0716639507189393, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0732016453705728, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07598680419847369, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07857191525399684, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08315233532339335, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08820891657844186, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09212673924863339, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09739264611154795, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10430600509047508, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1419615745544434, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1402759552001953, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.138566493988037, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.137296199798584, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1362764835357666, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1349921226501465, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1337497234344482, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.132585287094116, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1311492919921875, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1298110485076904, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1286888122558594, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.127084970474243, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1256673336029053, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1239616870880127, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1225426197052, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1212899684906006, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1198785305023193, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.118385076522827, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.116701126098633, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1151158809661865, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1131932735443115, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.110997200012207, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.109032154083252, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.108116626739502, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1092960834503174, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1122798919677734, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1145782470703125, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1158738136291504, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.117938756942749, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1228737831115723, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1309964656829834, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1418402194976807, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1588897705078125, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1704745292663574, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1755075454711914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.181926965713501, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.194939136505127, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2161190509796143, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2450530529022217, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.296577215194702, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3224270343780518, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.315964698791504, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.296848773956299, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2473018169403076, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.2495031356811523, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.2980494499206543, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.3538620471954346, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.5085806846618652, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.5418267250061035, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.059616094499815434, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0592469545957918, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.059431524547803614, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.059431524547803614, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06090808416389812, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08084163898117387, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08324104835732743, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08471760797342193, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.082687338501292, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.082687338501292, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08397932816537468, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08471760797342193, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08397932816537468, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08527131782945736, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08324104835732743, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08102620893318568, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07567368032484312, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07069029162052418, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0828719084533038, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.08139534883720931, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08361018826135105, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08674787744555186, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08453303802141011, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07622739018087855, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07290513104466592, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.0754891103728313, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07290513104466592, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.07235142118863049, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.021601709699556427, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.021842727663289208, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022103867018683065, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021997580676097486, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02243639137501302, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02228305733779473, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02180084882670046, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021743545335316583, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021986657213008156, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02231783449113765, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022199260515979563, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.021737945647308573, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.021958571301871982, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.022492300022247835, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02345021969374687, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02360216036030111, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02414999508174742, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.025448641820143263, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.026655964187394354, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02816240605365718, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03004642495361327, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.033420752401767694, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03459809472278278, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.034163208694635586, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.034589946392101216, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.033339281967232307, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.0321454773875944, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03206035291669003, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03348581252714813, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03434664299492184, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03949967276144538, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.03892158261713659, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.039330490162590305, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03829054205994706, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.03897705651768075, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03982258403708929, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03959942900131686, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03743704711755488, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.03730890186186715, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.031840345247124806, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.027344912047248687, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.02906854734722318, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.032396318667942924, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.039736397175981604, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.04636595578132174, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04127945240291759, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04296967726511549, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03994808737396271, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.039085445133891644, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 3.0695819091796874, "validation/loss_best": 3.1309964656829834, "validation/acc_best": 0.08914728682170543, "validation/f1_best": 0.03949967276144538} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 3.142901648283005, "train/grad": 0.4076113329827786, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.145902099609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.144556884765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.14276611328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.141197509765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.13973388671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.137908935546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.135980224609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.134046630859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.131744384765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.129483642578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.127420654296875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12433837890625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.121776123046875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.11829833984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.115115966796875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.112235107421875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.108751220703125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1047601318359375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1005364990234376, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0968450927734374, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0926416015625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0882867431640624, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.083936767578125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.079649658203125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0755706787109376, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0712945556640623, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.06811767578125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0656097412109373, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.062884521484375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0610931396484373, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0603738403320313, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.060823974609375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0632244873046877, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.066470031738281, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.069943542480469, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.073092041015625, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.07915771484375, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0869491577148436, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.098477783203125, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.12350341796875, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1490914916992185, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.172137908935547, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.2076402282714844, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.2453309631347658, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.293270721435547, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.367079620361328, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.433301467895508, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.5178227996826172, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.572009506225586, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04753926694393158, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.047492149118334055, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04747242961078882, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0474713708460331, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04747121267020702, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047480471581220624, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04749168071895838, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.047502302527427674, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04752313943579793, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.047548764236271385, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04757259264588356, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04761177819222212, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.047653468064963816, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04771659534424543, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.047775573115795854, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.047840189505368474, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.047929634004831315, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04803786233067513, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04816911637783051, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04830147163942456, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.048491066358983516, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.048708844631910324, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.048995903003960846, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0493453629501164, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04974662022665143, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.050267591811716555, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.050721840485930446, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.051127881947904824, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.051720595713704826, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.052571182548999784, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.053430742751806974, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05430070545524359, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05554513933137059, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.056521955393254754, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05737464755773544, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05809609197080135, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05912588184699416, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.060327576864510776, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.061985512580722574, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06491094237193465, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06750746257603168, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06995808582752944, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0739922777377069, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07752349669113755, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08082895981147885, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08542790323495865, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08829274013638497, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09193771613761782, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09386502033099532, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.139918804168701, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1388142108917236, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1373751163482666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.136277914047241, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.135249376296997, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.134019136428833, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1329076290130615, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.131824493408203, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1306159496307373, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.129572868347168, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1286189556121826, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1274757385253906, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1263694763183594, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.125070571899414, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.123716354370117, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.122382164001465, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.12052321434021, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.118030309677124, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.115166664123535, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.112687826156616, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.10996413230896, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1079294681549072, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1067588329315186, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.107003688812256, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1085948944091797, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.11249041557312, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1180202960968018, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1242823600769043, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1335840225219727, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1432154178619385, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.148684501647949, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1514976024627686, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.15830397605896, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.166637897491455, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.173110008239746, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1886990070343018, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.214311361312866, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.221266508102417, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.228541851043701, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.251772880554199, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2532432079315186, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2682883739471436, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.3083059787750244, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3236429691314697, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.370927095413208, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.4973936080932617, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.485931158065796, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.5087313652038574, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.627777099609375, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.05795496493170912, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.061461794019933555, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07511997046880768, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07622739018087855, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07641196013289037, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0769656699889258, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07899593946105574, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07770394979697305, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08139534883720931, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08010335917312661, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.07899593946105574, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.07585825027685493, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.0754891103728313, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07327427094868956, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07327427094868956, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06423034330011074, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06090808416389812, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06496862310815799, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07087486157253599, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07069029162052418, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07235142118863049, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07013658176448874, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06570690291620525, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.06109265411590993, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.06552233296419344, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07050572166851236, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.058508674787744554, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.05149501661129568, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.05758582502768549, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.021338014459812107, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02238757664340912, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02294522116786589, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02235785773747621, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02215089526903148, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022272656333024873, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022099694204690587, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020918715618660353, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020858388617957913, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01923062983287036, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018311914033768008, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.018865441108939025, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.019280443506995048, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020310178915738837, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.022190203387919893, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02274219335863535, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.022517740242411505, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.0245520062657656, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.026452691241492524, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02881183807717422, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.029824746168705837, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.031166754275594125, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03492902437362524, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03668895532319332, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03603725707543636, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.036374037470274685, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.038707521150770675, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03627374458040644, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03704381808898144, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.0345897699593876, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.03153721640168881, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.028133878137223668, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.02909487918288141, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.029276647808176876, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.03485799646403971, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03541316438694612, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03645710577396579, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03743948541017639, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04143062736300837, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.040526118013129546, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.0450171977278973, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04502519724265341, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04059749304015022, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.038500358489739296, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03714511675029156, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.03114122340271343, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.028666312193688648, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.027254473660680945, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.02985912757618441, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 3.062884521484375, "validation/loss_best": 3.1335840225219727, "validation/acc_best": 0.08139534883720931, "validation/f1_best": 0.03704381808898144} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 3.132473328113556, "train/grad": 0.4193361333012581, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.143001708984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.14160400390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.139639892578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.137886962890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.136337890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1344482421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.132364501953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.130286865234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.127691650390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.125340576171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12303955078125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1198583984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.117044677734375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.11321044921875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.109638671875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.106505126953125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.102578125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0983544921875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.093875732421875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.089969482421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.08549072265625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0811370849609374, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.076632080078125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.072021484375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0677825927734377, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.06315673828125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0596197509765624, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0569244384765626, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.053458251953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.049871826171875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0478497314453126, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0477261352539062, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0495986938476562, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.05279052734375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.0582070922851563, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0658612060546875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0747366333007813, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0820046997070314, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0939653015136717, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1146270751953127, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.132371063232422, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1505677795410154, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1817453002929685, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.2185354614257813, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.2735868072509766, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.342210388183594, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.403193588256836, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.474021453857422, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.5388254165649413, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04874946724623442, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04873646846041083, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04873481428250671, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.048733985926955936, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.048743763193488124, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0487601606734097, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04877654718235135, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04879753796383739, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04882604904472828, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04885641725733876, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0488857314735651, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04893101338297129, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04897696699947119, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0490414903499186, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.049103957172483205, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04916671998798847, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04924929771572351, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04935651697218418, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.049488192833960054, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0496387067809701, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.049837710689753296, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.050069821029901505, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.05035212107002735, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0506827905587852, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.05106154402717948, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.051569202449172735, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.052076080534607175, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.05257131315767765, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05323114842176437, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05398256544023752, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.054694144148379566, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.055460271779447796, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05652413215488195, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05738184154033661, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0586198965087533, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.060259660705924034, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.06220456207171082, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.06386782329529524, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0659236204624176, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06898455971851945, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.07124488772824407, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.07276885461062194, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0751899447478354, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07820017313584686, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.08240340048447252, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08709670193493366, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09089907098561525, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.09307622760534287, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09581005439162255, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1380488872528076, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.136949062347412, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.135467052459717, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.134211301803589, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1330835819244385, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1317453384399414, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.130382537841797, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.129028558731079, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.127380609512329, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1258535385131836, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1243951320648193, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.122418165206909, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.120692253112793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.11836838722229, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1163620948791504, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.114619493484497, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.112647771835327, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.110644578933716, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1087758541107178, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1075706481933594, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1065948009490967, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.105886936187744, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.105163812637329, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1044704914093018, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1036622524261475, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1034345626831055, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.104768753051758, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.106903553009033, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1090967655181885, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1077747344970703, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1068155765533447, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.109200954437256, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1202423572540283, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1297807693481445, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1310718059539795, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.129359722137451, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1387298107147217, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1677517890930176, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.195373773574829, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2122020721435547, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.275247097015381, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.340745687484741, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.4072258472442627, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.484758138656616, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.5599584579467773, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.5061404705047607, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.55001163482666, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.6597931385040283, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.707160711288452, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.059431524547803614, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.073827980804725, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07475083056478406, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07438169066076043, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07419712070874862, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.07475083056478406, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07770394979697305, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08010335917312661, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08102620893318568, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.0784422296050203, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0784422296050203, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.0799187892211148, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08010335917312661, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.07511997046880768, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.07733480989294943, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.07401255075673681, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07475083056478406, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.06736803248431156, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.0651531930601698, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.06718346253229975, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.07069029162052418, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07032115171650055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07290513104466592, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07290513104466592, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.06884459210040605, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.07198228128460686, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022129436914478332, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022543753383908512, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022333222569639186, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022126587341859835, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021737696464598364, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022685304304540136, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020277710633105284, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02046316093818001, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020550716433428592, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02228378970732467, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022505773837578746, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02288594994961854, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0225192235714373, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02508756643387003, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02765422883252089, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.029075126598207598, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.03097480325009321, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03146626101191228, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.031212161056634217, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.030402643136955, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.028748427298528223, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.029658331435300556, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03204095677014695, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.031204765303895793, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03249524342204376, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.0332085965731103, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.03275847954296474, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03162737814683895, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.03332229745649603, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03766811379075108, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04303296245247539, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04415457439051997, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.038464355899637016, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.040875089329241464, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.042238483255008424, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.043700831207731376, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.04081014072649281, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.043990989237061684, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.0443174124479562, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.041420355767895094, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.035510556767456555, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.03386368923768146, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.03754256985796999, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03706818056918779, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03940070464188596, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.041416338985811464, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.034309087282667045, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.032163548756704165, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.03749805936008987, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 3.0477261352539062, "validation/loss_best": 3.109200954437256, "validation/acc_best": 0.08102620893318568, "validation/f1_best": 0.04415457439051997} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 3.1143157577514646, "train/grad": 0.40348389580845834, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.144112548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.14284912109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.14083984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.139112548828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.137470703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.135439453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.133326416015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13115966796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.128489990234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.125819091796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12341796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.120076904296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11708251953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.112969970703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1092041015625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1057421875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.101494140625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0967523193359376, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0916143798828126, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.087037353515625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0816656494140626, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.07647705078125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0708734130859376, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0655487060546873, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0606488037109374, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.055377197265625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.051385498046875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.04823486328125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0445458984375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.041181640625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0387808227539064, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0374716186523436, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.036504211425781, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.036956481933594, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.038970947265625, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0430892944335937, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.049357604980469, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.0580239868164063, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0709942626953124, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0871588134765626, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1005859375, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.114959259033203, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.140456085205078, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1637396240234374, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1991265869140624, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.247105712890625, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.300489807128906, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.3783311462402343, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.439423065185547, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04823551302775741, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04822987765073776, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04823235888034105, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04823525926098227, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.048243440371006724, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.048252065125852825, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04826640848070383, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04828218210488558, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04830346019938588, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04832510739564896, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04834517814218998, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.048376741111278536, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.048399851862341164, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0484407945163548, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.048480285685509446, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0485160706192255, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0485721386410296, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.048644159939140084, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.048732202984392646, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04882925637066364, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.048953870981931685, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04910427682101726, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.049293986912816765, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04953791294246912, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04983319062739611, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.05022418318316341, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.05060453109443188, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.050974216554313896, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.051539879348129036, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05230484144762158, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05300295593217015, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05359935818240046, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0543704385869205, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.055091419536620376, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05606175415217876, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.057277607657015325, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05885155625641346, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.060589677914977075, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0628054011799395, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06544598992913961, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06732881661504507, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0690589059703052, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0719498752988875, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07434038504958153, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07734438590705395, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08093300633132458, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08428697479888797, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08854572799056769, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09040484705939889, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.137079954147339, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1359548568725586, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1343960762023926, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.132993221282959, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1318273544311523, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1303205490112305, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1287894248962402, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1273088455200195, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.125582456588745, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1239609718322754, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.122532844543457, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.120572090148926, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.118852138519287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1166763305664062, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1147773265838623, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.113149404525757, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.111145257949829, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1088013648986816, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1061933040618896, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1036696434020996, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1004958152770996, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0973098278045654, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.094240665435791, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.09163761138916, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0899057388305664, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0891804695129395, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0895113945007324, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.090102195739746, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0901894569396973, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0886948108673096, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.086566209793091, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0850989818573, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.088813543319702, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.100428342819214, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1233346462249756, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1432721614837646, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1529576778411865, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1630656719207764, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.178621292114258, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2121317386627197, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2540857791900635, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2682979106903076, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.308356523513794, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3764290809631348, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.3910248279571533, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.4338605403900146, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.4756534099578857, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.6800696849823, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.808117628097534, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06090808416389812, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06183093392395718, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07954964931709117, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.08342561830933924, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08453303802141011, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08416389811738649, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08767072720561092, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08840900701365817, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.0873015873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09265411590992986, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09542266519010704, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09505352528608342, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09486895533407161, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09560723514211886, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0946843853820598, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08397932816537468, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08508674787744555, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07456626061277224, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07715023994093761, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.07179771133259505, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.07142857142857142, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07641196013289037, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.08342561830933924, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07678110003691399, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07511997046880768, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06607604282022887, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02288613557669689, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02289659682141063, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022642700373933438, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02323470924158512, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0235124001063063, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02269015158525592, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.023239844806445698, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021466147306540645, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021911118817134822, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.021298522508522627, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.020679259954270137, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.022660333075990743, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.023263978695137424, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02316458291553296, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02459095412183678, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.025253372086245493, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.024625715016628687, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02751615183387879, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.030769826413390836, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.031714730131304554, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03218881348257089, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03497959646973026, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03653561712634273, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03593960281485093, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.038074888142047736, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.03901618934642546, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.040315909419388986, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.042014847794333, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04335109749986029, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04570119047546209, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04994287868616543, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.05150859862539228, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04640618166716289, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.040414514826587436, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.03219744807493353, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03012590189150602, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.035764032482256986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.042248184891722294, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.047205449223233616, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.039844447058101036, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.03488940821801547, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.03713914420831723, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.02873342110681676, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.02426787702295098, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.027338904323429437, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.03351094771224512, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.03066062145875369, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.022625261488672544, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.02051364453251564, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 3.0387808227539064, "validation/loss_best": 3.086566209793091, "validation/acc_best": 0.09560723514211886, "validation/f1_best": 0.04994287868616543} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 3.094993282556534, "train/grad": 0.39398836210370064, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14297119140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.14150634765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13935302734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13761474609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1358544921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.133634033203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.131385498046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12899658203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.126195068359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.123446044921875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12083251953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.117220458984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11399169921875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.109434814453125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1054248046875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.101724853515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.097130126953125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0918634033203123, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0863671875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0813848876953127, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.075430908203125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0696356201171877, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0634588623046874, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0572503662109374, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0511090087890627, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0443408203125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.038919677734375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.03426513671875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0288461303710936, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0231063842773436, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.018199462890625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.014493713378906, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.011124267578125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.010025939941406, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.010611267089844, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0134896850585937, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0190805053710936, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.026089324951172, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.035727844238281, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0486932373046876, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0598991394042967, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.073591766357422, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0943894958496094, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1190407562255857, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1545623779296874, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.2010198974609376, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.238349494934082, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.297068214416504, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.356522789001465, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.047767104748636484, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04776487961411476, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.047762574609369036, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.047768290340900424, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04777344785630703, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04778633408248425, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04779938828200102, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0478149289637804, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04784148612990975, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04786969730630517, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.047896474692970516, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04793987970799208, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04798527901992202, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048046974483877423, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04811205131933093, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04817328162491322, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.048249985724687576, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.048337380569428205, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04843895709142089, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.048534697256982326, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.048654450289905074, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04878794120624661, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04894541000947356, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04913580982014537, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04935454282909632, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04965443771332503, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.049970438163727524, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.050296256672590973, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.050811234209686515, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05145770478993654, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05199976416304707, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05248774776235223, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.053301951717585325, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05422615958377719, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05538994751870632, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05666194012388587, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.058199388347566126, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05966829175129533, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.061370389964431525, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.06327788235619664, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06469043718650937, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06638426333665848, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06872918458655476, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07147589148953556, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07471498714759946, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07776349050924182, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07983964590355754, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08370299192145467, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0860387121886015, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.136455535888672, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.135406970977783, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1338305473327637, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1324844360351562, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.131314277648926, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1298792362213135, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.128416061401367, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.126885414123535, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1252145767211914, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1234853267669678, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1220154762268066, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.119828462600708, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1179311275482178, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1153271198272705, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.112976312637329, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1109724044799805, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.10854434967041, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.106137752532959, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1038177013397217, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.102017402648926, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.100104570388794, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0983335971832275, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0961151123046875, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.093482494354248, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0906360149383545, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0875041484832764, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0857503414154053, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0855226516723633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.087681531906128, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0933189392089844, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.098064661026001, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.10058856010437, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.102924346923828, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.10686993598938, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1152243614196777, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.125380039215088, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.133784294128418, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.139826536178589, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.148876190185547, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1530494689941406, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1463265419006348, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1642379760742188, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2356374263763428, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3124330043792725, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.366992950439453, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.386475086212158, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.3872897624969482, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.4676480293273926, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.51230788230896, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06127722406792174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07862679955703211, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07954964931709117, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08176448874123293, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08342561830933924, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08600959763750461, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08693244739756367, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08656330749354005, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.09117755629383537, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08970099667774087, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09302325581395349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09560723514211886, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.0828719084533038, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08194905869324474, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08047249907715023, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0799187892211148, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.07936507936507936, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.07733480989294943, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.07585825027685493, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.073827980804725, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.0710594315245478, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07087486157253599, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.06976744186046512, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07216685123661867, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.06921373200442968, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022550715645683936, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02306442203485555, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022862794821996147, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.023498237690789806, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02426298304764873, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022888849686427504, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022302126995053528, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022669129255158075, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021944551078849305, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.021871751052032835, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022080800534660494, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023140790297844122, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.022838391656284335, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02507574509084871, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.026159189878948014, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028210588689769413, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0287132318384648, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.028645314674454036, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.030303026085110412, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.030804807240753813, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.032772231523994576, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.032821261778996434, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.033109979424078693, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03451253267571779, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.035626842534484886, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04065677110289175, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04165601065294174, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04173028572514683, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04456046935642507, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04395759338003619, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04347161231474311, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04524928663752777, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04041548859878418, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03623876152064031, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.033411335110268996, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03189330810851207, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03345985820314911, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03894133630652342, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04235840710198733, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04408509297666525, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04923804435381881, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.047977531038637035, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.039252302369774214, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.03237769864717721, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.03301223847380754, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.037115594948373826, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04306809298160042, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04665801220780582, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.038111769022867746, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 3.014493713378906, "validation/loss_best": 3.10058856010437, "validation/acc_best": 0.09560723514211886, "validation/f1_best": 0.04524928663752777} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 3.0763126969337464, "train/grad": 0.38869635462760926, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14009765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1385498046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13632080078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.134346923828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1325634765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.130145263671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.127515869140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.124898681640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12169189453125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.118494873046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11557861328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.111478271484375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1077490234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10267333984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.097989501953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0937091064453126, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.088563232421875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0828533935546876, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0768072509765627, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.071446533203125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.06515869140625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0589947509765625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0525396728515624, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.045980224609375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.039764404296875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0327728271484373, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.02702880859375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0220343017578126, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0162017822265623, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.010736083984375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0068670654296876, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0043798828125, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.0021041870117187, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0013186645507814, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.000904235839844, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.0007415771484376, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.0008229064941405, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.001453857421875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.0023045349121094, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0058543395996096, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0139369201660156, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0253587341308594, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0470694732666015, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.073843231201172, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.1065687561035156, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.1384327697753904, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.174439926147461, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.2274089813232423, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.280830154418945, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.048221034705638884, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.048219260424375535, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04821897527202964, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.048219614885747435, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.048224101588129994, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04822882926091552, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04823543796315789, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.048242949862033126, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04825690675526857, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.048267946504056455, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0482825961150229, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04830297874286771, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04833378832787275, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048366989959031347, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.048407516721636054, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04843915017321706, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04848997922614217, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04855568617582321, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04863442040979862, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04872019382193685, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04884095957502723, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.048978137485682964, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04914263086393476, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04933814529329538, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04954932613298297, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04980496728792787, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.05004041979089379, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.05028068229556084, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05065756019204855, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.051200403552502396, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05174631401896477, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05229412032291293, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05310960032045841, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05389903802424669, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05482433933764696, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05570249354466796, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05660914411768317, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05742072742432356, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0582987529784441, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05967713713645935, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06149966631084681, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06355512646958232, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06656585952267051, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06956483600661159, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07261952882632614, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07500128533691168, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07754755409434438, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08103383101522922, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08447231901809574, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1362335681915283, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.135233163833618, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.13379168510437, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1325905323028564, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1315085887908936, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1302473545074463, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.129023790359497, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.127750873565674, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1263208389282227, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.124932289123535, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1236705780029297, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1219747066497803, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1203479766845703, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1181838512420654, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1161060333251953, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.114226818084717, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1118884086608887, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.109239339828491, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1063880920410156, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.104044198989868, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1013541221618652, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0987396240234375, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.096243143081665, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.093855619430542, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.091829776763916, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.090433120727539, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0906832218170166, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0921785831451416, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.096442222595215, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.102191209793091, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1054680347442627, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1063473224639893, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1061007976531982, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1063594818115234, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.108156204223633, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1116647720336914, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.117119789123535, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.123497486114502, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.129410982131958, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.136413812637329, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.139716625213623, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1435837745666504, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1555323600769043, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.159344434738159, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.18394136428833, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.2213714122772217, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2546896934509277, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.3235061168670654, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.365518808364868, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06183093392395718, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.073827980804725, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07918050941306755, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08065706902916205, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08250276854928018, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08361018826135105, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08564045773348099, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08250276854928018, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08157991878922112, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08379475821336287, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.082687338501292, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08084163898117387, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08176448874123293, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08194905869324474, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08231819859726837, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.08490217792543374, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.0812107788851975, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08231819859726837, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08028792912513842, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08010335917312661, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07272056109265411, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07419712070874862, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.0769656699889258, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07807308970099668, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.07659653008490218, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02314188179328658, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.023290469686415682, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022776093763423235, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02270053836029415, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.022861398427113298, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022914487680527548, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02236970826022092, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022575495781997256, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022453658179194664, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022955966547384733, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022811701666727958, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02365030242510421, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02451187390066827, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.025880497065805363, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02777056146186578, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02976260518525771, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02994003505127481, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03211081604538753, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03368289089983476, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03409411895171616, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.036611204311263114, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03830883404912839, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03952216868567171, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.0396749054373974, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04115643842107591, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04069331868241341, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.0414140099808464, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04056342740217284, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04020728262214846, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.03952459511887671, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.040456844020458786, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04145001396184403, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.0399555280061655, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.03642210334674958, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.03465445780659052, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.03615976552341497, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.03744345897024855, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.03753253065294924, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04078793114046068, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04437021016477811, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.04137067310545305, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04148107752973917, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04454186268133617, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04778811382349737, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.042281320862464646, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04318920884191311, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04582756714428323, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04176565487784136, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04059402566513022, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 3.02702880859375, "validation/loss_best": 3.0906832218170166, "validation/acc_best": 0.08564045773348099, "validation/f1_best": 0.0414140099808464} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 3.067026951313019, "train/grad": 0.37609999522566795, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13696533203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13559326171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13336669921875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.131292724609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1294140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127032470703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.124468994140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.121773681640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.118538818359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1153955078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11240966796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.108245849609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.104468994140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0993841552734374, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0948577880859376, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.090640869140625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0855523681640626, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0798895263671877, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0738134765625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.068519287109375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.062142333984375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0559661865234373, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0494757080078125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0428985595703124, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.03666015625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0297467041015627, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0242498779296874, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0196051025390624, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.013768310546875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.007625732421875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0028414916992188, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.99906005859375, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.994736328125, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.99174072265625, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9894235229492185, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9883123779296876, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.98867919921875, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.990733642578125, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9957516479492186, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.004539794921875, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0126490783691406, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0216110229492186, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.034371185302734, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.047960052490234, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0719407653808593, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.100115203857422, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.1275995635986327, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.17427375793457, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.2142223358154296, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04767460187897086, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04767089523375034, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04767022080719471, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.047670272998511794, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.047675223797559736, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047677318714559075, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04768808752298355, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04769696194678545, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.047705126497894525, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.047722406145185234, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04773858295753598, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.047760947793722155, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04778570322319865, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04782157432287931, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04785622296854854, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.047885747402906416, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04792877454310655, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04798321083188057, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04804423647001386, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04810931010171771, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04819785512983799, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04830689907073975, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.048442641012370585, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04862540973350406, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04884503394365311, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04915193822234869, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.049463160987943414, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.049762326385825874, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.05018062632530928, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05062788100913167, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05096791116520762, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05127046572044492, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.051727863159030676, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.052270966954529284, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05307399917393923, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05397028716281056, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05500844040885568, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.056057433392852546, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05730383360758424, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05874223256483674, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.059872904252260925, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06098895888775587, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06248783683404326, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06410457946360111, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06681591333821416, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0693327135592699, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07149193255230785, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07505917435511947, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.07769838206470013, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1356353759765625, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1345503330230713, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.133030891418457, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1316142082214355, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1304779052734375, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.129009246826172, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.127516508102417, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1259799003601074, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1242480278015137, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122511148452759, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.121018886566162, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1189463138580322, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1171176433563232, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.114649772644043, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1125009059906006, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1105263233184814, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1081905364990234, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.105611801147461, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1030101776123047, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1007001399993896, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0982131958007812, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.095867395401001, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0934815406799316, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0912258625030518, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0892584323883057, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.087392807006836, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.086038112640381, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0853524208068848, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0848495960235596, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0861804485321045, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.08998441696167, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.096147298812866, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.104749917984009, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.109686851501465, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1142797470092773, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1187922954559326, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1242239475250244, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.127338171005249, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1282577514648438, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1458985805511475, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1694953441619873, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.190446615219116, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.200817823410034, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2106800079345703, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.2320432662963867, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.2559804916381836, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.295726776123047, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.3494961261749268, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.351609468460083, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07954964931709117, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08139534883720931, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.082687338501292, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08656330749354005, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08970099667774087, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08545588778146918, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08600959763750461, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08748615725359911, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08656330749354005, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08693244739756367, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.08951642672572906, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09210040605389443, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.08693244739756367, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.08490217792543374, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08490217792543374, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08564045773348099, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.08711701734957548, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.0858250276854928, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08028792912513842, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07918050941306755, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08324104835732743, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.023384914991977312, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02326359855568789, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02289177033155908, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02334581605883294, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02343262844180111, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023512712138148983, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02252712192225792, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021929758842800105, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022348486642009266, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.023279345394410584, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.024438546898050497, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02606096199863565, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02706238227422351, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.028510002745509442, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.029766148956083063, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.030723722672246406, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.030616140799755203, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03292280030325832, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.032390086973514684, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03367606813734897, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.033475699105342135, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03496656854893387, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.036649954286807465, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03943023696033667, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04164826208753823, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04173087057101398, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04340746487761265, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04324697308249001, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.044207586508054314, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04165095678291739, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.041652637693143015, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.03881432908060752, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.040218294282427604, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.042920396320362576, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.0441519387946182, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.041940097660292386, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.040580231654525074, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04109207571964065, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04513151554413313, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05013864148324457, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.048963930350203876, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.04768479344690573, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.046227869806700045, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.043443204801834466, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.04157850826648183, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04154019377432828, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.037935373505433065, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.03699049710074473, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.040901624327671936, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 3.013768310546875, "validation/loss_best": 3.0848495960235596, "validation/acc_best": 0.09228497600590624, "validation/f1_best": 0.044207586508054314} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 3.056879367828369, "train/grad": 0.3710200859606266, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14390380859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.142281494140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.139764404296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13750244140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1355615234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.132882080078125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13023193359375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12749267578125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.124085693359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12070556640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117763671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.113507080078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10966064453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1044561767578127, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0996551513671875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0953778076171874, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0899755859375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.08408935546875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0779095458984376, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.07228515625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0657635498046876, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0593060302734374, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0526123046875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.04557373046875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0387872314453124, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.031068115234375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0246612548828127, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0191116333007812, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0122116088867186, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0048992919921873, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9990496826171875, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9942532348632813, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9886074829101563, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9846649169921875, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9808944702148437, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9781008911132814, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9757534790039064, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.974012603759766, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9729319763183595, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.973479461669922, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.976980743408203, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9831524658203126, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.994120788574219, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.0040748596191404, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0190141296386717, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.039510498046875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0632887268066407, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.099661560058594, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.13245361328125, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.048279368728399275, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.048275145888328555, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04826726045459509, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04826663866639137, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04827022005803883, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.048270489955320954, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0482733117043972, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04828212040476501, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04828902504406869, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04830048973672092, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04831342928111553, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04832973525859416, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04834750109352171, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048374607460573316, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04839931584894657, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04842542697675526, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04846153813414276, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.048511678958311676, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04857232344336808, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04863869754597545, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04872699562460184, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04883251823484898, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.048956129401922226, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04909816447645426, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04925716059282422, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0494645693525672, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04966927679255605, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04986248631030321, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.050155936665833, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.050551075935363766, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.050949186254292726, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05131175028160215, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05181567022576928, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05229778116568923, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05292852697893977, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0536130203306675, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05434358993545175, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05492964845150709, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05545993452891707, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.056309604458510874, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05730150012299418, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05845715653151274, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0601403390429914, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0615169201605022, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06318560188636184, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06534408833831548, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0676825886592269, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07043262643739581, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.07255257047712803, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.135263204574585, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1341826915740967, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.132603406906128, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1312551498413086, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.130018472671509, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1285133361816406, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.126986026763916, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.125448703765869, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1236634254455566, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.121873378753662, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.12030291557312, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1180641651153564, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1160292625427246, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.113295555114746, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1109414100646973, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1087756156921387, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1062264442443848, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1035382747650146, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1008706092834473, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0986154079437256, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.096224308013916, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0939881801605225, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.091808319091797, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.089799165725708, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0879604816436768, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.08626127243042, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0850398540496826, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.084369421005249, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.083923101425171, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0838429927825928, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0834500789642334, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.083021879196167, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0822641849517822, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0810225009918213, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0797317028045654, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0797550678253174, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.082043409347534, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.085435390472412, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.091416597366333, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.100955009460449, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.107457160949707, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.113403797149658, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1199686527252197, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1352760791778564, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.179312229156494, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.231111526489258, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.247189521789551, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.2319531440734863, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2383368015289307, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07511997046880768, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07862679955703211, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0799187892211148, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.08176448874123293, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08490217792543374, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08471760797342193, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08674787744555186, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08748615725359911, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08859357696567, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08951642672572906, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08693244739756367, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08822443706164636, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09394610557401255, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09043927648578812, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.0932078257659653, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09302325581395349, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09228497600590624, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09007013658176449, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.0932078257659653, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09228497600590624, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09191583610188261, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08914728682170543, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09080841638981174, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.023371693811235602, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02410549366524607, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.023446327843022332, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.024055157340574857, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.023456514427226835, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023649660050196017, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022860919846094185, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02197734547516757, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02205315749596802, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022294118261386536, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.024043524568675343, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.024996190683508956, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.025851788560392313, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.027362062346986543, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02929030107969964, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02974922154077418, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.031362683169380505, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03232282140337658, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.033473290956568744, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.034009739423942344, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03551515735678776, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.037323704110983257, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.0375325380323784, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.039191599120657235, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04024339145108761, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04143499014957212, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04250472063523677, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.0434687691816959, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04421347781673596, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04408718347079744, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.045288536661251065, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04665034152232898, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04600398010057271, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04730870002360801, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.049346351017879375, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.050815377064020416, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.053892461138622415, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05706582808550873, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05757856744852552, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05918491361546188, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.05923860844465629, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.0566244336351992, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05036625287578835, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04695440181441491, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.035923469355577275, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0285474624529166, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.028180106610053143, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.037515982373874025, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04049035233622388, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 2.9808944702148437, "validation/loss_best": 3.0797317028045654, "validation/acc_best": 0.09394610557401255, "validation/f1_best": 0.049346351017879375} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 3.041276297569275, "train/grad": 0.36132585793733596, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13669189453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.135274658203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.132928466796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13083251953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1288720703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.126334228515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.123673095703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12080810546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11751220703125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.114117431640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11105224609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.106683349609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1027679443359375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0973565673828123, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0924359130859376, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0879083251953126, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0826031494140627, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0764111328125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0698114013671876, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0640521240234375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.05720458984375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0504150390625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0433038330078124, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.035921630859375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.028623046875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0205230712890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0137420654296876, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0078445434570313, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.000568542480469, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.992652587890625, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.986302490234375, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9809979248046874, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.974537658691406, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.969378662109375, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9638616943359377, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.959415283203125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.955494689941406, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.953116607666016, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9517604064941407, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9520228576660155, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9540771484375, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.957491760253906, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9650538635253905, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9743231201171874, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9869044494628905, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0006509399414063, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.016498260498047, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0424848937988282, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.0692435455322267, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0480493601039052, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04804114880040288, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04803698955103755, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04803342629224062, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04803157364949584, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04803227750584483, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.048031536750495436, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04803275708109141, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04803952703252435, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04804222563281655, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04804828904569149, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04806053277105093, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.048070135917514564, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.048090495113283394, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04811424270272255, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04813433684408665, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04816530231386423, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04820335187017918, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04824280943721533, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04829220959916711, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04835574291646481, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04843186043202877, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04852437760680914, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04863513564690947, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04876899966970086, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04894931364804506, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04912778615951538, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04930023530498147, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0495441560447216, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049845312125980853, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.050117622390389444, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05035178165882826, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05062126461416483, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05086683945730328, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05118518147617578, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05155784728005528, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05201327882707119, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052548598349094394, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05320384668186307, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05409976646304131, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.054881284460425374, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05571551447734237, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05707836730405688, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05844882829114795, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05996264856308699, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.061242356710135934, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06278319021686912, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06545666463673115, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06812299968674779, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.13508939743042, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1340324878692627, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1324687004089355, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.13114333152771, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1299755573272705, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.128519058227539, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1270785331726074, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1256556510925293, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1240570545196533, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1223526000976562, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1208975315093994, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118868112564087, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.116992473602295, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1144795417785645, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.112191915512085, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.110114812850952, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1076483726501465, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1050169467926025, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1023364067077637, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.100102424621582, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.097618341445923, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.095409631729126, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0933008193969727, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0912935733795166, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0895330905914307, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.088003158569336, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0867807865142822, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.086103677749634, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0855636596679688, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0856404304504395, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0865442752838135, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.087798595428467, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0900561809539795, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0923123359680176, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0955142974853516, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.099055767059326, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1041579246520996, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.11092209815979, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1189913749694824, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1253268718719482, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.125380039215088, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1253387928009033, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1302266120910645, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.144874095916748, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1699161529541016, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.184011459350586, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.186365842819214, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.189950942993164, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1981611251831055, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07881136950904392, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08047249907715023, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.082687338501292, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08305647840531562, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08508674787744555, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08656330749354005, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08859357696567, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0902547065337763, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08748615725359911, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08822443706164636, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09099298634182355, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09283868586194167, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09265411590992986, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09228497600590624, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.08397932816537468, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.07770394979697305, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.07881136950904392, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.07733480989294943, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.023343511529729116, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022891939122995763, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022843018727067194, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022749450562417336, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02246854561863269, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022693389149374344, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021615563464474283, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02168466583807915, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022042579126833325, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022317756630422143, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02224691023589691, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0228477514077458, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.025138669335110456, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02457794048459837, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.025610925796602176, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.026430771286917853, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.028078092654389934, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02851530456019152, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03034028131210791, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03143288452079853, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.031632909381733, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03405112727093832, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03568549515477858, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03622709353521758, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03739858170423337, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.038696187380821014, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04011154587356322, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04055033573243961, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04178710800474685, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04263171129962742, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.04181404780441216, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04247599954204256, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.042392024133575644, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04454288629449199, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.04430274938610238, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.04683978584475864, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.04825423571265547, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.04795877866701017, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.04881335895423431, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.04864116434428326, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.052122788713467184, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.053918375592935636, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.0555309344009512, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05218218872842526, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.048975529679437975, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0492263405509725, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05163091174898857, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.052663828180776806, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04834451310617832, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 2.9540771484375, "validation/loss_best": 3.125380039215088, "validation/acc_best": 0.09283868586194167, "validation/f1_best": 0.052122788713467184} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 3.0373394691944124, "train/grad": 0.35693194419145585, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.141839599609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.140225830078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.137880859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13565673828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.133658447265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.130980224609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.128251953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1254541015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.122052001953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.118701171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.115594482421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.111143798828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.107135009765625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.101697998046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.096739501953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.092119140625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0866204833984376, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0804364013671877, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0737200927734376, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.067735595703125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.06053955078125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.053475341796875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.046041259765625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0383807373046876, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0307318115234376, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.022081298828125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.014884033203125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.008641357421875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.000797119140625, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.9923135375976564, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9852786254882813, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9794430541992187, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.97197265625, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.966025085449219, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.959505615234375, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9539111328125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.949085388183594, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9454312133789062, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9430581665039064, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9422261047363283, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9430584716796875, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9447625732421874, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9484556579589842, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.952975006103516, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.961536712646484, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9738650512695313, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9848883056640627, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.998201599121094, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.0104273986816406, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.048186198193579916, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04817816751077771, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.048169636372476815, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.048161039035767315, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04815678531304002, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04815395606681704, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.048148452937603, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04814803339540959, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04814571404829621, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04814461724832654, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04814344864338636, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04814150692895055, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04814570400863886, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04814938608556986, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04815572677180171, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.048162398096174, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04816732132807374, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04818187436088919, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.048200193494558334, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04821801085025072, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04823938770219684, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04827117152512073, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.048305243626236914, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04834982914850116, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04840173300355673, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04847825914621353, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.048562168683856724, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0486496365070343, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04879238002002239, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04898705082014203, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049179823510348795, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04936698691919446, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0496439853310585, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049920364636927846, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050294494684785605, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050735010486096146, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05128516985103488, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05185232564806938, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05254625950008631, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.053510835953056814, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05439180184155703, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05523135831579566, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05630511406809092, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05723780252039432, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05855981450527906, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06012559223920107, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06142495213076472, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06262955762445926, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06353286422789096, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.134953737258911, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1338791847229004, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1323366165161133, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1309447288513184, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1298108100891113, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1283211708068848, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.126922130584717, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1254513263702393, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1236953735351562, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122044086456299, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.120591878890991, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118457078933716, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1165342330932617, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.113974094390869, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1116843223571777, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.109621524810791, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1071345806121826, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.104292869567871, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1013729572296143, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0987706184387207, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0958328247070312, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0929813385009766, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0901379585266113, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.087442636489868, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.085024356842041, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.082530975341797, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.080579996109009, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0791070461273193, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.077423572540283, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.075867176055908, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.074979066848755, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0747532844543457, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.075448989868164, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.076920509338379, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0792813301086426, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0810773372650146, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.08256196975708, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.084353446960449, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.087512731552124, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0947327613830566, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.102440595626831, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.108682870864868, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1182758808135986, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1327064037323, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1559600830078125, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.183004379272461, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.203486204147339, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.228524684906006, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.2557637691497803, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06183093392395718, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07918050941306755, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08139534883720931, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08397932816537468, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08508674787744555, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08914728682170543, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09007013658176449, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0902547065337763, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08970099667774087, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.0902547065337763, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.0902547065337763, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09136212624584718, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09099298634182355, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09154669619785899, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09136212624584718, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09080841638981174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09265411590992986, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.08859357696567, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08490217792543374, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.0828719084533038, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08656330749354005, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.023122524686314688, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02266395210249281, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022897021750452037, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.023633448302959593, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021808523140304042, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021876812295710046, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021660956773347423, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02089942926193356, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.020765013777166854, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02111510876522178, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02159980962003638, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.021873974081571993, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.023319112124209108, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02389584306261726, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02395073190907449, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02438848942101468, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02598454997484066, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.02917939242572146, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.02981100292648149, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03141791257732413, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.033431542033717974, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03534227248747095, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.036421101245287996, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.03853723971549516, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.03951313927870508, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.042896255571977356, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.044731432173403, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.046035306394029585, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04716657635393864, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04763841027948954, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.049836202320897416, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04871881575559414, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.04970878008546725, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.04972216526401501, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.0504975419989556, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05296315262244657, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05456218519301276, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.053931675197577945, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.053984304085599566, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.05115729548525155, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.050041654179466176, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.049780695203490065, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.04793899347813089, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.04516484335083696, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.045660059482572374, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.04632743214108287, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.04805725158660031, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.04505495593435735, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.04146183806684288, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 2.961536712646484, "validation/loss_best": 3.1559600830078125, "validation/acc_best": 0.09265411590992986, "validation/f1_best": 0.045660059482572374} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 3.0211986446380616, "train/grad": 0.3423781245946884, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14153076171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.139918212890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13732177734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13496826171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.132918701171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.130054931640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.127120361328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12408203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1204150390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11676025390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11339111328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.108570556640625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1042724609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.098228759765625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0930035400390623, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.088321533203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0823406982421875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.07568115234375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0687298583984375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0623577880859374, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0551116943359373, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0476629638671877, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0398773193359374, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0319183349609373, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.023824462890625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.01474853515625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0068896484375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.000168762207031, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.991441650390625, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.9817349243164064, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9735296630859374, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9666943359375, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9579571533203124, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.950601806640625, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.942571716308594, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9352352905273436, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9280941772460936, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.922199401855469, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9166934204101564, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.9113543701171873, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.908553466796875, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.9067257690429686, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.9060711669921875, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.9070790100097654, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.909789276123047, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9144297790527345, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9199476623535157, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.928730926513672, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9391123962402346, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04742312513291836, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04741675613448024, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04740758551284671, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.047400159258395434, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04739293400198221, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047389388028532264, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04738413110375404, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.047381447330117225, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04737726129591465, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04737427037209272, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04737453505396843, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04736954171210527, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04737093346193433, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.047371575701981784, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04737352384254336, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04737517597153783, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04738063622266054, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.047385772466659544, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.047396318372339014, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.047402259688824416, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04741745308041573, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0474338736012578, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04745305795222521, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.047479234356433156, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.047506576236337424, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04755102381110191, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04759739052504301, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047648603338748215, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047729056309908625, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.047834468241780996, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04794956982135773, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04806657176464796, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.048241370208561424, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.048407604806125167, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04861721215769649, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04883583987131715, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04909400030970573, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04937072379514575, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04970307108014822, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05013946052640676, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.050526916086673736, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05090849429368973, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05153777997940779, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.052177920304238797, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05297853495925665, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05387160230427981, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05467851988971233, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.05569868905469775, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.056895155664533376, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1349740028381348, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1338791847229004, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.132397174835205, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1311051845550537, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1299445629119873, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1285338401794434, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.127117872238159, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1256914138793945, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.124037504196167, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1224164962768555, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.120981216430664, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118926763534546, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.117055654525757, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1145646572113037, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1123411655426025, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1103100776672363, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.107867479324341, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.105215311050415, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.102430820465088, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1000022888183594, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.097256898880005, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.094639778137207, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.09209942817688, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.089571952819824, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.087458848953247, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.085146188735962, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0834012031555176, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0821993350982666, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0808990001678467, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0798070430755615, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0793473720550537, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0791311264038086, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.079008102416992, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.079055070877075, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0793488025665283, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0797674655914307, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0805866718292236, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0819687843322754, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0834808349609375, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0871353149414062, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0915260314941406, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.096717596054077, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.103696584701538, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1091742515563965, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.114553451538086, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.118265390396118, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1200122833251953, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1190450191497803, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.120157241821289, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.067921742340347, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07567368032484312, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08010335917312661, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08416389811738649, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08767072720561092, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.0873015873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08840900701365817, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08693244739756367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.08656330749354005, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08822443706164636, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.08748615725359911, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.08988556662975268, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0946843853820598, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09486895533407161, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09542266519010704, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09653008490217793, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09579180509413067, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09154669619785899, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09136212624584718, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.08767072720561092, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.08619416758951642, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.0843484680693983, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08324104835732743, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022872876361843042, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022752856797041015, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022805721992464056, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022653223215347865, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02180503084454934, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02220581783242444, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022035415103656752, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021836905702046328, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021490097139370095, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02208354159827979, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022019542894810114, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02394810300841706, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02431131815374706, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.025215026502579455, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02857754959359249, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.029273202406127224, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.03032615187704245, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.032148782485291315, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.0335683765402918, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03478390163215008, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03538254279247911, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.036099138551513775, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03806296896181027, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.039729175647471464, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04100614711430078, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.043771286969744444, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.045188360291747394, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.046391770283561305, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.047017180918688284, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.048225013993556665, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.047897751934115516, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.04992277838313442, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.05146033318828361, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05167915756181277, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.052461988196439914, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05399835182904244, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.053175941434935946, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05303668144591034, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05472746141965968, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.054298206372414365, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.05285394284203973, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.053008536855324334, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.052662531796231205, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.050062241522753605, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05146475921653717, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.051283746594045165, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05124792647184647, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05272460991018279, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.053756005099263184, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 2.9067257690429686, "validation/loss_best": 3.096717596054077, "validation/acc_best": 0.09653008490217793, "validation/f1_best": 0.053008536855324334} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 3.0092829847335816, "train/grad": 0.33849770545959473, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.134012451171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.132320556640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1296826171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.127305908203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.125128173828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12243408203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11952880859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.116468505859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.112880859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10935302734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1060595703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1014990234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.097305908203125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0916070556640625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0863201904296873, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0817059326171874, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.075916748046875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.069384765625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0624334716796877, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0562042236328124, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0488507080078127, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.04150146484375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0335272216796874, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0254248046875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0172259521484377, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.0079388427734375, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.999949951171875, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.992952880859375, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.9839031982421873, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.9737319946289062, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.96513427734375, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9578240966796874, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.948522033691406, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9405722045898437, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.93183837890625, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9239178466796876, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9159982299804685, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.909249114990234, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9024897766113282, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.8955657958984373, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.8910696411132815, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.887859649658203, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.884873046875, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.883316650390625, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.8828074645996096, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.883742218017578, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.8852989196777346, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.888860168457031, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.893371124267578, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04764673240482807, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.047640590500086545, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0476341993547976, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0476271085254848, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04762209888547659, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047614795733243225, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04761100145056844, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04760555876418948, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04760254198685288, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04759693985804916, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04759143086150289, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.047585967853665354, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04758115964010358, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.047578125204890964, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.047574950847774744, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.047572192661464216, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04757021263241768, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04756478123366833, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.047564935609698294, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04756694225594402, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04756878137588501, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04757088612765074, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0475736822001636, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04758476357907057, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.047594853360205885, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.047609667256474496, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047630832139402626, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047651285324245694, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047685592416673896, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04773090546950698, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.047778378315269945, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04782653611153364, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04790123952552676, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04797827426344156, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.048086203802376985, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.048208021577447656, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04835608012974262, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.048525387179106476, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.048727961480617525, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04901887223124504, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04927893832325935, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049535394720733164, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04989137627184391, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05022704927250743, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.050623617973178624, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05105799686163664, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05145666364580393, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.05201367216184735, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.052593804448843005, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1349854469299316, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.133896589279175, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1324048042297363, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1310958862304688, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1299469470977783, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.128504753112793, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1270785331726074, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1256682872772217, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123969078063965, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1223771572113037, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.120896816253662, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118807554244995, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1169257164001465, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1143548488616943, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1121151447296143, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.110011339187622, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1075072288513184, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1046926975250244, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1017496585845947, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0991933345794678, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.096191883087158, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.09334397315979, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0903568267822266, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0875301361083984, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0847363471984863, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0818891525268555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0795960426330566, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0778799057006836, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0758895874023438, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.074117422103882, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.072856903076172, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0721890926361084, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0715973377227783, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0714938640594482, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0716490745544434, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0722882747650146, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0732455253601074, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0746469497680664, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0765013694763184, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0793817043304443, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0816054344177246, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0834388732910156, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.085639715194702, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.087418794631958, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.089859962463379, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.093196392059326, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.096693754196167, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.101039171218872, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.10471248626709, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.061461794019933555, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07788851974898486, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07715023994093761, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08102620893318568, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08305647840531562, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08619416758951642, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08748615725359911, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08822443706164636, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09043927648578812, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09062384643779993, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09117755629383537, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09136212624584718, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09099298634182355, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09154669619785899, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09191583610188261, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09080841638981174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.08877814691768181, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09007013658176449, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.08970099667774087, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09007013658176449, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.08619416758951642, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.08859357696567, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02288246108141843, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022414504569588708, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02311278704552885, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022152281585476316, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021638299953666895, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02209679708473129, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021618323425350514, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020725950801300307, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021605945190557353, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0222369150852117, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.023143790791479175, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023497324973254463, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.023940857125048554, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.025411279116556867, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.027099268661296746, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028054114468922992, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02907833028106636, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.032207686083750596, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03392396197627509, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03510528257691116, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.036068038007052265, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0379427896111538, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.039370629365603975, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.0413104316492524, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04230644104065493, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04403900502437582, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04500620052209739, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.045857714136065965, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04842717961538731, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.04950294167393967, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.05053304678575763, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.05169302456950295, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.052802441973878324, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05434751960263651, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05396755685793653, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05482669569431694, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05536495371398457, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.055780691332403674, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05715411209365437, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.059565904196875914, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.06041911856833321, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.060516314905981534, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05888883331392885, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05988016139532143, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05850549976979652, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05855655261195206, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.05604949073324953, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.05289975848529599, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.05508019189472927, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 2.9405722045898437, "validation/loss_best": 3.0714938640594482, "validation/acc_best": 0.09283868586194167, "validation/f1_best": 0.05434751960263651} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 3.0108576011657715, "train/grad": 0.3350328390300274, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.144783935546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1431494140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.140411376953125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13811279296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.135853271484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.13298828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.130047607421875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.126953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12313720703125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.119412841796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.115888671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1110443115234374, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.10664794921875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1004986572265625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0950640869140624, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.089962158203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0838067626953123, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0770233154296873, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0696258544921875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0630230712890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0552947998046873, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.04751953125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.039320068359375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.03079345703125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0223284912109376, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.012640380859375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0043099975585936, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.9970355224609375, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.9874652099609373, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.9767767333984376, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9677249145507814, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.959791259765625, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.949791259765625, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.941270446777344, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.9316952514648436, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9229071044921877, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9141357421875, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.9061932373046875, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.8982669067382814, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.8895123291015623, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.883291015625, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.8785003662109374, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.8731227111816406, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.8693450927734374, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.8664442443847657, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.864442901611328, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.8637852478027344, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.8646371459960935, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.8662493896484373, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04781487205997109, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04780702790245414, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04779449297115207, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04778495367616415, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.047777344714850185, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.047767941411584615, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04775855725631118, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04775033537298441, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.047739362027496096, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04773007500916719, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.047724299617111686, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04771205583587289, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0477024419978261, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.047690019197762015, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.047684718240052465, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04767142247408628, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04766127159819007, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04764780219644308, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04763831468299031, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.047628067173063754, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04761783011257648, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04761060729622841, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.047604641560465094, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04759699825197458, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0475905049033463, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04758169211447239, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04757577393203974, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04756870100274682, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04756432350724935, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04755992937833071, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.047556329648941756, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04755770921707153, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04756834691390395, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04757466001436114, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04759323528036475, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04762694947421551, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0476683977432549, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04771814826875925, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04778044054284692, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.047885280884802345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.047979337461292744, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04808015080168843, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04821309937164187, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.048337688464671374, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.048525550346821544, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04874757070094347, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04897446848452091, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04929258307442069, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.049593891799449924, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1349847316741943, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1338772773742676, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1323704719543457, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1310641765594482, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1298987865448, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1284618377685547, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1270370483398438, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1256020069122314, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123898983001709, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1222736835479736, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1207873821258545, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.118654251098633, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1167304515838623, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1141488552093506, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1118626594543457, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1097536087036133, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1071889400482178, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1043386459350586, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.101332426071167, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.098721504211426, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0956966876983643, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0927860736846924, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.089757204055786, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0868020057678223, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.0840203762054443, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.0810208320617676, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.078686237335205, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0767526626586914, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.074580669403076, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.072486162185669, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0711121559143066, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.070176124572754, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.069427967071533, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0690512657165527, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0691676139831543, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0697011947631836, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0705580711364746, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.071906328201294, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0736281871795654, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0759551525115967, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.078148126602173, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.079793691635132, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0821356773376465, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0840394496917725, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0863895416259766, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.0882961750030518, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0895354747772217, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.090299367904663, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.090754985809326, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07973421926910298, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08102620893318568, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08342561830933924, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0843484680693983, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08693244739756367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08951642672572906, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.09043927648578812, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.0917312661498708, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09302325581395349, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09302325581395349, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09431524547803617, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09394610557401255, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09376153562200074, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09376153562200074, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09542266519010704, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09357696566998892, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.0917312661498708, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09246954595791805, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09339239571797711, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09191583610188261, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09228497600590624, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09302325581395349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09117755629383537, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.0917312661498708, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09246954595791805, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09043927648578812, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.0932078257659653, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022571972022144393, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022451957383661524, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02310119722619552, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022092818268820102, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02161755662165238, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021491431010967316, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021299025098712353, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021008614253302447, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021630466675708986, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022098724132690917, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02263729982730027, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023643831276292975, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.024142780385508007, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.025938994627870957, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02731924262325362, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028513740310896627, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02923325200879716, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.031999107041616466, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.0339213617646902, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.03611252926058505, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.036152930421527306, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03770562755904767, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03944927792075969, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.04068708322480153, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04187228019191545, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04402141293950523, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04498312666476085, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.04633667199253913, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.048255129118754986, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.050113539860142144, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.05218614622082083, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.05282340350678758, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.054662903798945284, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05463014917496989, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.055966047451039114, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.0569428362374188, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.05915702897092354, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05736756441349087, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.056706003255956906, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.0583350425290016, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.059069805286639354, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.05840323397901794, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.058740452692720664, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.060664476028101766, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.05939181457071663, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.05968317245731485, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0605640111767633, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.058633773859625905, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06063705605443832, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 2.9141357421875, "validation/loss_best": 3.0705580711364746, "validation/acc_best": 0.09542266519010704, "validation/f1_best": 0.05915702897092354} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 3.0042679381370543, "train/grad": 0.3350503946840763, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14028564453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1386669921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.135948486328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13357421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.131453857421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12865478515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12572998046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.122725830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.118984375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11531982421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1118994140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10719970703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1028118896484376, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.09672119140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0914508056640626, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.086385498046875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.080281982421875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0735821533203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.066204833984375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.05954833984375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.051761474609375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.044005126953125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0356011962890626, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.02701904296875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.018316650390625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.008482666015625, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.999830322265625, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.992376708984375, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.9826226806640626, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.971588439941406, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9622933959960935, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9541851806640627, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.9438937377929686, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9352777099609373, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.925406188964844, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.916353454589844, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9071305847167968, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.8987747192382813, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.890331573486328, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.8806446838378905, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.8737567138671873, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.8682164001464843, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.861774444580078, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.856968994140625, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.852443161010742, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.848597640991211, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.846053695678711, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.844401168823242, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.8435943603515623, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.048077657837420705, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0480665173009038, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04805465692654252, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04804480014368892, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.048038358874619005, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.048031073603779076, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.048024621065706014, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04801822828128934, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.048012584596872326, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04800401920452714, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.047997569534927606, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.047987681943923235, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.047982082571834325, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04796981858089566, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.047959455493837594, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04795143200084567, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04794196505099535, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04793070813640952, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04791797284036875, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.047909285835921765, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04789680939167738, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04788723792880774, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.047879042141139506, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.047864724323153494, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04784828828647733, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04783345239236951, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04782037043944001, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047809704951941964, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04779167905449867, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.047775146905332806, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04776230098679662, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.047751653324812654, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0477344455756247, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04772103002294898, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047708037700504066, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04770078198984265, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.047689894642680884, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04768329231068492, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.047683470528572795, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.047672466989606616, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04767502792179584, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04768047545105219, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04768921624869108, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04769853653386235, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.047722512111067775, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04774320365861058, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04778083227574825, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04782801825553179, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04787720175459981, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.134986639022827, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.133863925933838, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.13236141204834, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.131073474884033, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1298928260803223, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1284685134887695, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1270411014556885, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1255972385406494, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123891830444336, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1222598552703857, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1207833290100098, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1186180114746094, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1166954040527344, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1141345500946045, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.111802101135254, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1097145080566406, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1071414947509766, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.104304313659668, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.101285934448242, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.098706007003784, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.095649003982544, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0927445888519287, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0897178649902344, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.0867600440979004, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.083958864212036, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.080984354019165, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.078709602355957, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0768532752990723, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0747263431549072, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.072709083557129, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.071408271789551, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.070524215698242, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.069826602935791, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.069582939147949, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0697739124298096, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.070228099822998, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.071098804473877, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.072260618209839, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0738866329193115, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0759050846099854, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.077610492706299, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0792388916015625, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0809898376464844, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.082369089126587, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.084044933319092, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.085380792617798, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0863051414489746, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.086571216583252, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.086620569229126, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.08010335917312661, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08047249907715023, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.08176448874123293, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.08508674787744555, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08674787744555186, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08803986710963455, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.08896271686969362, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08933185677371724, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.09246954595791805, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.09283868586194167, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.09302325581395349, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09228497600590624, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09191583610188261, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.09210040605389443, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.09339239571797711, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.09486895533407161, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.09413067552602436, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.09191583610188261, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.09136212624584718, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.09099298634182355, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.09265411590992986, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.09357696566998892, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.09302325581395349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.09339239571797711, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.022442081328194006, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.022619481516644637, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02307530785698217, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021985367092312202, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021606578386292635, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021624375946651064, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021527527929082118, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021344876175791404, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021621878687995127, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022103533349833226, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022600615515184577, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023483354790549905, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.023924214211833456, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02590207204184952, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.027343394589460307, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028919521285554865, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.029774251945109038, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03279655188062061, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03416723171432925, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.0365454632269304, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03570487834235298, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03765475640316364, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03920586044831912, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.039987341974820755, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.04232309686555608, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.04425390148705525, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04529622046167195, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.046142274397460596, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.04817369939971839, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.05003100952846153, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.05110220999681566, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.05247699267760597, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.05315675111317645, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.05373128225827808, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.05395989108887848, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.05596379697415408, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.055715741755090975, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.05568772771876074, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.05634781874583147, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.058309723749491636, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.059873501520058846, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.05928519416078288, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.05760530376539092, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.05789520554323271, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0575123682787338, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.058971891817300094, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.059979415241941725, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.06087136391349348, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.06113189662999501, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 2.8737567138671873, "validation/loss_best": 3.077610492706299, "validation/acc_best": 0.09486895533407161, "validation/f1_best": 0.059873501520058846} diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f3c0e2fa31f9934ea0329c54c86f6dd98ca1472 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..a9a475193006d9e062f408ee58daa180fee6b7e9 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.3593813663804626,train,0.9252669039145908,0.010904871593248695,0.9203224369101146,0.01173719867432208,0.9160488879983971,0.012318429369024114 +flat_mae,patch,logistic,ppmi_dx,,0.3593813663804626,test,0.64,0.045881146454725824,0.6043956043956044,0.049682007235790186,0.6027456027456027,0.048692052715307486 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,train,0.7455516014234875,0.016622797895076633,0.7047137193520059,0.020704635534619445,0.6968127809890816,0.018880162976549215 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,test,0.7,0.03753730944007574,0.6357455075279261,0.05223531085843538,0.6358234295415959,0.04368095787486435 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,train,0.9092526690391459,0.011979709123387305,0.9025880673611228,0.013043591835859264,0.896729822307857,0.01361131291235458 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,test,0.72,0.042007123205475526,0.7057587221521648,0.04405794564614997,0.7079796264855688,0.044810475576369094 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,train,0.8167259786476868,0.015392034630178368,0.7985053066140357,0.017643345948946836,0.7894053735816742,0.017728633294575937 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,test,0.68,0.04266837236173884,0.6527777777777778,0.04739976315806683,0.6502546689303905,0.046163240465538304 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,train,0.8131672597864769,0.015793045413442616,0.7936099889827396,0.018226110659478406,0.783906015842432,0.018117148660130613 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,test,0.69,0.045380784479777336,0.6656239887822242,0.0488311890541267,0.6634125636672326,0.04803909987123628 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,train,0.8167259786476868,0.01584508333276818,0.7965449976275416,0.018430339651149485,0.7859264611432242,0.018296044653756713 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,test,0.64,0.04540371790943997,0.6043956043956044,0.05040360846540398,0.6027164685908319,0.04833037499866901 +flat_mae,patch,logistic,ppmi_dx,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,6,166.81005372000556,test,0.62,0.048204049622412426,0.6006725514922235,0.050010297965332365,0.6018675721561969,0.05048184636553182 +flat_mae,patch,logistic,ppmi_dx,7,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,7,2.782559402207126,test,0.58,0.04867332328904612,0.5625,0.050442389601862904,0.564516129032258,0.05152750907386934 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,train,0.9306049822064056,0.01034204296117467,0.9256560805104148,0.011195520994473717,0.9201589595375723,0.011823258509858051 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,test,0.61,0.04389279667553664,0.5400400990682863,0.051792123034808825,0.547962648556876,0.04576022588867022 +flat_mae,patch,logistic,ppmi_dx,9,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,9,1291.5496650148827,test,0.67,0.045856031228181965,0.648,0.04940321643955519,0.6472835314091681,0.04908078260371035 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,train,0.8256227758007118,0.014924954643008382,0.8071352532496638,0.01722503260980756,0.7966308071076857,0.01724403588572248 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,test,0.65,0.043420732375214495,0.6224786970121885,0.04735263819392983,0.6209677419354839,0.046405224668126126 +flat_mae,patch,logistic,ppmi_dx,11,0.3593813663804626,train,0.9323843416370107,0.010878865027400311,0.9280458221024259,0.011685164953687394,0.9250829586812246,0.012291076491252129 +flat_mae,patch,logistic,ppmi_dx,11,0.3593813663804626,test,0.73,0.04258696514193047,0.7052079921388797,0.04678460616993768,0.7007640067911716,0.04563627854118045 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,train,0.9252669039145908,0.011117257164955157,0.9203224369101146,0.011925190364792221,0.9166934275315779,0.012397632391748467 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,test,0.68,0.03707022524884356,0.6114618746964546,0.0489530168987849,0.6146010186757216,0.04148849992927919 +flat_mae,patch,logistic,ppmi_dx,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,13,2.782559402207126,test,0.6,0.046793349954881415,0.570999570999571,0.05030151943042741,0.5704584040747029,0.04950818943127722 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,train,0.8096085409252669,0.01636918262902895,0.7870269695950134,0.01917970983380262,0.7757974737743524,0.018785733197136275 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,test,0.66,0.0457470261328537,0.6155585707824514,0.053982095571807304,0.6137521222410866,0.05014193654852695 +flat_mae,patch,logistic,ppmi_dx,15,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,15,2.782559402207126,test,0.61,0.04850148038977779,0.5882166613873931,0.05028483318145729,0.5887096774193548,0.050459794337685424 +flat_mae,patch,logistic,ppmi_dx,16,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,16,2.782559402207126,test,0.66,0.045545981161898355,0.6458333333333333,0.04674980921630824,0.6494057724957556,0.04762828151886699 +flat_mae,patch,logistic,ppmi_dx,17,0.3593813663804626,train,0.9323843416370107,0.010412482997864795,0.9279107762520085,0.01122882348076594,0.924213230571612,0.011967511021478678 +flat_mae,patch,logistic,ppmi_dx,17,0.3593813663804626,test,0.59,0.047298621544395986,0.5577607593571352,0.050581542267941156,0.5573005093378608,0.04947443681383299 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,train,0.8149466192170819,0.015767373296236085,0.794313063063063,0.0183247931638623,0.7836116463284093,0.018089967892855826 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,test,0.61,0.0463800129366088,0.568536342515765,0.05079232561200265,0.5683361629881154,0.04835984175660792 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,train,0.9288256227758007,0.01114315287541471,0.9242587601078167,0.011956790507608726,0.9213230571612074,0.012540763354375985 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,test,0.66,0.04718675661666099,0.6458333333333333,0.04869286214685603,0.6494057724957556,0.04968508420984136 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.7330960854092526,0.0170728089950266,0.6907329958177416,0.021264883615439854,0.6840879897238279,0.019363028621217953 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.68,0.0371785906134162,0.6114618746964546,0.0502118867854834,0.6146010186757216,0.0420608066262241 +flat_mae,patch,logistic,ppmi_dx,21,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,21,166.81005372000556,test,0.72,0.04303194627250782,0.6961805555555556,0.0466807150561355,0.6926994906621392,0.0455611637069626 +flat_mae,patch,logistic,ppmi_dx,22,0.046415888336127774,train,0.8185053380782918,0.015571728955720754,0.7997484804024313,0.017891961957398214,0.789980732177264,0.017887982995089598 +flat_mae,patch,logistic,ppmi_dx,22,0.046415888336127774,test,0.64,0.04716990142029131,0.5989304812834224,0.052840356336346934,0.597623089983022,0.050081553477274554 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,train,0.8078291814946619,0.015205112657221848,0.7879689792496332,0.017464559345508463,0.7787010276172126,0.017327725296106426 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,test,0.6,0.04467654418148297,0.5604395604395604,0.05035303970457466,0.5602716468590832,0.04816941042906309 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7348754448398577,0.016464439252728575,0.6932617813513068,0.020629439325529856,0.6864028045386428,0.018880606209153886 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.69,0.042843319199147016,0.6408295678368672,0.05295534921934128,0.6379456706281834,0.04789047841103359 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.8238434163701067,0.01474487597903719,0.8044461627682196,0.017215142130554043,0.7934462641832585,0.017191172579012596 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.64,0.03764505279581901,0.5714285714285714,0.04941972554337127,0.5772495755517827,0.04213139745476362 +flat_mae,patch,logistic,ppmi_dx,26,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,26,10000.0,test,0.55,0.04531883052330454,0.5021573182874212,0.049581078778630445,0.5046689303904923,0.04728774884185553 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,train,0.9288256227758007,0.010597802030841357,0.9236744893524554,0.01153307684654649,0.9178441447227574,0.012345604689423853 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,test,0.72,0.04094691197147839,0.6996996996996997,0.04489494166581603,0.6977928692699491,0.044815311509624055 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,train,0.8131672597864769,0.014508838750727301,0.7920740795551844,0.01709277730003896,0.7812968315135945,0.017009881018698932 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,test,0.7,0.040608821701694324,0.66078697421981,0.0492798591851148,0.6561969439728353,0.04556005741349238 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,train,0.7259786476868327,0.017218288153651083,0.6843940079058302,0.021451176175089785,0.6783076429030186,0.019608733841196817 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,test,0.68,0.04173713454467136,0.6259934548854604,0.05149650627634641,0.6247877758913413,0.04589965217505582 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,train,0.8096085409252669,0.014855253711560002,0.7886438324868636,0.017165490471453555,0.7784066581031899,0.01698497609902487 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,test,0.67,0.042191273979343163,0.6108031607500884,0.05345141772946449,0.6116298811544991,0.04709743808922692 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,train,0.8220640569395018,0.015469134835098144,0.8046032960155761,0.017700328129338642,0.7954800899165061,0.017876224381167902 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,test,0.69,0.0481094211147879,0.6726850385386971,0.050448145255090775,0.6735993208828523,0.05064966945399532 +flat_mae,patch,logistic,ppmi_dx,32,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,32,166.81005372000556,test,0.62,0.04997542996313289,0.6006725514922235,0.051810266681872115,0.6018675721561969,0.052200039071397863 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,train,0.806049822064057,0.01620030260006846,0.7852246869949057,0.018757902355288538,0.7755164846927853,0.018598834108329517 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,test,0.7,0.04388942469433838,0.6782496782496783,0.04736060293638377,0.6765704584040747,0.04708365621289806 +flat_mae,patch,logistic,ppmi_dx,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,34,2.782559402207126,test,0.61,0.048451753322248305,0.6010230179028133,0.04878601704489718,0.6090831918505942,0.05013582500343524 +flat_mae,patch,logistic,ppmi_dx,35,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,35,21.54434690031882,test,0.61,0.047698989507116395,0.6010230179028133,0.04815472534490107,0.6090831918505942,0.0494261416945548 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,train,0.905693950177936,0.012127965076025647,0.8983562139344738,0.013321635520492784,0.8912304645686149,0.014038436814406407 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,test,0.68,0.04152190265390062,0.64349376114082,0.048708242887887165,0.6400679117147707,0.046196014849308055 +flat_mae,patch,logistic,ppmi_dx,37,0.046415888336127774,train,0.8149466192170819,0.015701332178204238,0.794313063063063,0.018286471866121295,0.7836116463284093,0.018079750591492447 +flat_mae,patch,logistic,ppmi_dx,37,0.046415888336127774,test,0.68,0.04347815543465476,0.6483516483516483,0.04927717899521001,0.6451612903225806,0.04788757177522289 +flat_mae,patch,logistic,ppmi_dx,38,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,38,10000.0,test,0.66,0.04791099665003849,0.6392190152801358,0.050765460154544105,0.6392190152801358,0.05085579747309805 +flat_mae,patch,logistic,ppmi_dx,39,0.005994842503189409,train,0.7259786476868327,0.016102406539364296,0.6824858757062147,0.020483269363487263,0.6765681866837936,0.018612241132785336 +flat_mae,patch,logistic,ppmi_dx,39,0.005994842503189409,test,0.71,0.03991753499403489,0.6640018537828757,0.048865050691920965,0.6591680814940577,0.04453294996499716 +flat_mae,patch,logistic,ppmi_dx,40,2.782559402207126,train,0.998220640569395,0.001642336223700383,0.9981184064710746,0.0017392343628209858,0.9976851851851851,0.0021365577724991093 +flat_mae,patch,logistic,ppmi_dx,40,2.782559402207126,test,0.67,0.04433405463072377,0.6576408341114224,0.04537592184800704,0.6625636672325976,0.04636933324754521 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,test,0.68,0.04481169043899147,0.6483516483516483,0.05028248528870723,0.6451612903225806,0.048558914638662985 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,train,0.9163701067615658,0.010957560049461754,0.9102282189406427,0.011930565071262843,0.9042496253478913,0.012634792498471676 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,test,0.66,0.04046655903335493,0.6026180458158018,0.049950687477143624,0.6035653650254669,0.044457057023643706 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,train,0.9234875444839857,0.01075659848829362,0.9183479015958076,0.01158130392782445,0.914378612716763,0.012181143460895194 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,test,0.67,0.04465057222477669,0.6296711929076422,0.05095170630844664,0.6269100169779287,0.047939166690458924 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7259786476868327,0.016590072519409395,0.6824858757062147,0.02058941334847783,0.6765681866837936,0.01872400131442885 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.65,0.039448173595237584,0.5872154735228211,0.05051085449348592,0.5904074702886248,0.04427074361963526 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8256227758007118,0.015413930071784055,0.8071352532496638,0.017831239837141154,0.7966308071076857,0.0178640701649665 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.7,0.03658840800034895,0.6428571428571428,0.04888158555880266,0.6409168081494058,0.04277373526012462 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,train,0.8220640569395018,0.014310385136564425,0.8022240990990992,0.01660007172513949,0.7911314493684436,0.016459156080491644 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,test,0.57,0.04447551686040309,0.5174503422735944,0.049201560416930926,0.5207979626485568,0.046093888329012754 +flat_mae,patch,logistic,ppmi_dx,47,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,2.782559402207126,test,0.64,0.047331173659650574,0.625,0.04852006273665714,0.6281833616298811,0.049238172365568995 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7348754448398577,0.016645296431397432,0.6923240851989433,0.020918490968019645,0.6855330764290302,0.019063914761253596 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.7,0.04099761456475242,0.66078697421981,0.0488361459372054,0.6561969439728353,0.045789870046624714 +flat_mae,patch,logistic,ppmi_dx,49,2.782559402207126,train,0.998220640569395,0.0018515737565159118,0.9981184064710746,0.0019616110682506094,0.9976851851851851,0.0024087603036156077 +flat_mae,patch,logistic,ppmi_dx,49,2.782559402207126,test,0.59,0.0459595082654286,0.5523528769516323,0.05016721098921262,0.5522071307300509,0.048292666464913495 +flat_mae,patch,logistic,ppmi_dx,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,50,166.81005372000556,test,0.59,0.048430915746039735,0.5670995670995671,0.05023278132470401,0.5674872665534805,0.05045748647558014 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,train,0.9217081850533808,0.011767473118255265,0.9158739878886848,0.012837602056504306,0.9094546135731107,0.013564995480070502 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,test,0.67,0.047143097055666584,0.6396986570586308,0.05205165676583107,0.6370967741935484,0.05049754138116273 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,train,0.8185053380782918,0.015602295284367651,0.7987699566114832,0.01803031473021635,0.7882412759580391,0.01786871617027637 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,test,0.62,0.04486012037433694,0.5766488413547237,0.050033507414195044,0.5764006791171477,0.04741804001880338 +flat_mae,patch,logistic,ppmi_dx,53,2.782559402207126,train,0.998220640569395,0.0018490412814937181,0.9981184064710746,0.0019590073326530314,0.9976851851851851,0.002405465741202476 +flat_mae,patch,logistic,ppmi_dx,53,2.782559402207126,test,0.66,0.04386772845726115,0.6310763888888888,0.048454052997592,0.6290322580645161,0.047605272353931305 +flat_mae,patch,logistic,ppmi_dx,54,0.3593813663804626,train,0.9359430604982206,0.010703568012914647,0.9318328840970351,0.01148918857831659,0.9288428602012417,0.012180831705198451 +flat_mae,patch,logistic,ppmi_dx,54,0.3593813663804626,test,0.59,0.047968739820845825,0.5626666666666666,0.05010204682104342,0.5623938879456706,0.049609711845432676 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,train,0.7402135231316725,0.016107832448285498,0.694200954084675,0.021205334581966523,0.6872591522157996,0.018922652858925852 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,test,0.72,0.039567961787284424,0.6727442730247779,0.05047170190371301,0.66723259762309,0.04527720016832182 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,train,0.798932384341637,0.015667772569062596,0.7773430241323334,0.01799263455886617,0.767996681652751,0.017809299551262193 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,test,0.65,0.046896801596697395,0.612789025334661,0.05299494124150444,0.6107809847198642,0.05045445348823193 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,train,0.8113879003558719,0.015287770789591914,0.7903575450450451,0.017763201099972932,0.7798517448083921,0.01754837378142448 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,test,0.72,0.04202083292844158,0.6834011759384894,0.04947822463029884,0.6774193548387097,0.046601465451410515 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,train,0.7473309608540926,0.01619592111745097,0.7044821661334282,0.021077592316561835,0.6965184114750589,0.019042613461253768 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,test,0.66,0.04100099510987507,0.6026180458158018,0.05151343984720501,0.6035653650254669,0.045811545840942304 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,train,0.8220640569395018,0.015246881909455848,0.8041429686628749,0.01748810664870347,0.7946103618068936,0.01756344740214459 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,test,0.64,0.0445430847607123,0.592944369063772,0.05199685266896293,0.5925297113752122,0.04812809740882392 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,train,0.9252669039145908,0.011383790962286732,0.9201704545454545,0.012270819216035875,0.9158236994219653,0.01286136996681112 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,test,0.65,0.043811500773198814,0.6072270227808326,0.05143470719284786,0.6056876061120543,0.04813510659146682 +flat_mae,patch,logistic,ppmi_dx,61,0.3593813663804626,train,0.9217081850533808,0.010433879679393871,0.9163690476190476,0.011243518007286766,0.9120637979019481,0.0117482856010109 +flat_mae,patch,logistic,ppmi_dx,61,0.3593813663804626,test,0.66,0.045871123814443436,0.6310763888888888,0.050731762239093606,0.6290322580645161,0.04960391878611363 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,train,0.7419928825622776,0.015869001780592338,0.702394647559154,0.019874234801519022,0.6947923356882895,0.018304251358908834 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,test,0.63,0.04076415582346825,0.5552350042072365,0.051821334829008275,0.5640916808149405,0.04401073688104307 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,train,0.8042704626334519,0.01583739889818191,0.7824465090090089,0.01840890244060131,0.772331941768358,0.018097168618855054 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,test,0.64,0.04239816505463415,0.5989304812834224,0.048055231074232076,0.597623089983022,0.04537159640092476 +flat_mae,patch,logistic,ppmi_dx,64,0.3593813663804626,train,0.9234875444839857,0.010653318395788041,0.9178683705201625,0.011572368762437663,0.9117694283879255,0.012223974153403153 +flat_mae,patch,logistic,ppmi_dx,64,0.3593813663804626,test,0.62,0.04699191419808305,0.5967741935483871,0.048264003553954764,0.5967741935483871,0.04795706486722177 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,train,0.9288256227758007,0.01091159379480688,0.9242587601078167,0.011722942424458233,0.9213230571612074,0.012371635437446061 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,test,0.59,0.046193315533743624,0.5577607593571352,0.04942733157144697,0.5573005093378608,0.04822536695406484 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7526690391459074,0.016227094635876065,0.7147093517980856,0.02020502828763148,0.7060720402483409,0.018645131420412967 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.65,0.04298261974333347,0.5872154735228211,0.05353218144802641,0.5904074702886248,0.046860107115852256 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.8078291814946619,0.016092169397976835,0.7864020270270271,0.018701224751430063,0.776091843288375,0.0184959400595744 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.7,0.04300844568221455,0.6744791666666667,0.0476230707137528,0.6714770797962648,0.04714113751581404 +flat_mae,patch,logistic,ppmi_dx,68,2.782559402207126,train,0.998220640569395,0.0016300486335967834,0.9981184064710746,0.0017260498385248439,0.9976851851851851,0.0021205725279661845 +flat_mae,patch,logistic,ppmi_dx,68,2.782559402207126,test,0.65,0.045087554823920094,0.6224786970121885,0.04900501078569784,0.6209677419354839,0.04809922303515717 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,train,0.8131672597864769,0.015449963890820477,0.7925944150572026,0.01778192668440933,0.782166559623207,0.01751974609114687 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,test,0.68,0.041635921990511984,0.6323529411764706,0.05038457100061926,0.6298811544991512,0.046258978856040814 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,train,0.7384341637010676,0.01568877465681731,0.6935898434457513,0.020272206939475203,0.6866837936202098,0.01824417419743342 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,test,0.61,0.04535077066599861,0.5555555555555556,0.052278742376909076,0.5581494057724957,0.048018382292677984 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,train,0.9252669039145908,0.01101875219973931,0.9198582138200783,0.011927279451830565,0.9140842432027403,0.012488738242540267 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,test,0.62,0.04634627924655872,0.5703301673450927,0.0533848291194738,0.5713073005093379,0.04950913201876023 +flat_mae,patch,logistic,ppmi_dx,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,72,166.81005372000556,test,0.62,0.04979652999958932,0.6124031007751938,0.049906958701880284,0.6222410865874364,0.05124733599222944 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,train,0.8274021352313167,0.01440014557042522,0.807916054446218,0.016969749867547693,0.796336437593663,0.01699145141303498 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,test,0.64,0.04498913646648488,0.6179966044142615,0.04795835630530793,0.6179966044142615,0.04822496430193871 +flat_mae,patch,logistic,ppmi_dx,74,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,74,21.54434690031882,test,0.59,0.04785385668888141,0.5710848415106182,0.04900115957644546,0.5725806451612903,0.0494493611284456 +flat_mae,patch,logistic,ppmi_dx,75,0.3593813663804626,train,0.9270462633451957,0.01090233663075646,0.9219959714957937,0.011780970554230843,0.9172687861271676,0.012464550703876984 +flat_mae,patch,logistic,ppmi_dx,75,0.3593813663804626,test,0.62,0.04879337659969845,0.6006725514922235,0.050163043054400995,0.6018675721561969,0.05057758765120271 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,train,0.7295373665480427,0.016351238933267512,0.683671051072402,0.020833592993124796,0.6777189038749732,0.01872897309157593 +flat_mae,patch,logistic,ppmi_dx,76,0.005994842503189409,test,0.71,0.04048293961658417,0.6640018537828757,0.049934500979280275,0.6591680814940577,0.04533422290368596 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,train,0.8167259786476868,0.015273860081466785,0.7980272362815463,0.01752556459756183,0.7885356454720616,0.017503459019346846 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,test,0.65,0.04363841885311612,0.6072270227808326,0.05107103288059304,0.6056876061120543,0.047761799354458866 +flat_mae,patch,logistic,ppmi_dx,78,0.046415888336127774,train,0.8042704626334519,0.015461628460024286,0.7845572655291625,0.017568407524469518,0.775810854206808,0.01745895139244613 +flat_mae,patch,logistic,ppmi_dx,78,0.046415888336127774,test,0.67,0.04268291930034776,0.6108031607500884,0.05354353252809076,0.6116298811544991,0.047301046179540014 +flat_mae,patch,logistic,ppmi_dx,79,0.046415888336127774,train,0.8096085409252669,0.015291562761646291,0.7870269695950134,0.017982933270669486,0.7757974737743524,0.01766096538291344 +flat_mae,patch,logistic,ppmi_dx,79,0.046415888336127774,test,0.7,0.04436784421177121,0.6657754010695187,0.05050456032082533,0.6612903225806452,0.04812933159284557 +flat_mae,patch,logistic,ppmi_dx,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,80,2.782559402207126,test,0.59,0.04780466922801579,0.5626666666666666,0.04979074618350361,0.5623938879456706,0.04951916531931988 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,train,0.7206405693950177,0.01596294316579038,0.6737813106571772,0.020478970092737175,0.6687540141297367,0.018396879470565355 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,test,0.68,0.039936244189958565,0.6323529411764706,0.04765848923102589,0.6298811544991512,0.04356461810764437 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,test,0.61,0.04408834766692896,0.5793334052421529,0.04849628328729169,0.5785229202037352,0.0478556625189827 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,train,0.7455516014234875,0.01691499917520841,0.7099066904903713,0.020347032421486423,0.7020311496467566,0.0190968043347211 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,test,0.59,0.04730363199586264,0.5523528769516323,0.051330018654450596,0.5522071307300509,0.04965819799613873 +flat_mae,patch,logistic,ppmi_dx,84,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,84,2.782559402207126,test,0.61,0.048246268249471896,0.5920075321686369,0.04940541246850299,0.5938030560271647,0.05011242505150788 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,train,0.7366548042704626,0.015770523564800704,0.6966903452601485,0.01957897713248988,0.68958734746307,0.018004785302609686 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,test,0.63,0.0368519416042086,0.5460679671205987,0.04915379311913211,0.5589983022071308,0.04063005901407071 +flat_mae,patch,logistic,ppmi_dx,86,0.046415888336127774,train,0.8291814946619217,0.015061182283033723,0.8128538128538129,0.017062661003230523,0.8038696210661529,0.017156748432381384 +flat_mae,patch,logistic,ppmi_dx,86,0.046415888336127774,test,0.61,0.046822644094497705,0.5623386825272135,0.05308330134902225,0.5632427843803056,0.049634237452652205 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7348754448398577,0.017217505537355397,0.6941848447331997,0.021602103133521712,0.6872725326482552,0.019829096731725702 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.63,0.04621357376355999,0.5847828526540231,0.05314556920586391,0.5844651952461799,0.04975534880617717 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8185053380782918,0.014891140470523432,0.7987699566114832,0.01729343066375224,0.7882412759580391,0.017242139595917885 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.68,0.04031746023747031,0.6323529411764706,0.04896371967452105,0.6298811544991512,0.04456028096188414 +flat_mae,patch,logistic,ppmi_dx,89,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,89,1291.5496650148827,test,0.64,0.04374187467404661,0.6043956043956044,0.048375244019457,0.6027164685908319,0.04641180484164536 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7366548042704626,0.015588204195127853,0.6975858057009889,0.018823008354813025,0.6904570755726825,0.017366909926442933 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.62,0.04322006941225338,0.5703301673450927,0.04952817749048187,0.5713073005093379,0.04579765987711686 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,train,0.9252669039145908,0.010816981068869568,0.9200157230573214,0.011736665006112321,0.9149539713123528,0.012476436940166449 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,test,0.7,0.043749742856387164,0.6782496782496783,0.04615983995455694,0.6765704584040747,0.045658643443627404 +flat_mae,patch,logistic,ppmi_dx,92,0.3593813663804626,train,0.9199288256227758,0.011342308089045136,0.9140482947304025,0.012366757943605594,0.9080095268679084,0.013190505011025132 +flat_mae,patch,logistic,ppmi_dx,92,0.3593813663804626,test,0.64,0.04545373031996383,0.6279454319966928,0.04594504314092345,0.633276740237691,0.04699125499812413 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7473309608540926,0.0166990541981978,0.7131498734759605,0.01986974758014441,0.7052156925711839,0.01872503395802783 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.55,0.04581222107691352,0.5021573182874212,0.04930466796091339,0.5046689303904923,0.04723567279678342 +flat_mae,patch,logistic,ppmi_dx,94,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,94,2.782559402207126,test,0.63,0.04313474701444301,0.5906626839252129,0.048636761040670703,0.5895585738539898,0.04647554434594961 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,train,0.8131672597864769,0.015974040106394582,0.793106349857478,0.018477581945906266,0.7830362877328195,0.018380806054926375 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,test,0.59,0.044776350900894095,0.539894512400404,0.04975157679868264,0.5420203735144312,0.046540764982263776 +flat_mae,patch,logistic,ppmi_dx,96,0.3593813663804626,train,0.9288256227758007,0.010695626374872433,0.9241166065810615,0.011536494878034848,0.920453329051595,0.012236263513603437 +flat_mae,patch,logistic,ppmi_dx,96,0.3593813663804626,test,0.64,0.04412300533735208,0.6043956043956044,0.04903661807519851,0.6027164685908319,0.04717355096268645 +flat_mae,patch,logistic,ppmi_dx,97,0.046415888336127774,train,0.8167259786476868,0.015528096115575696,0.7975412272878304,0.01782647540233806,0.7876659173624492,0.017759298338975552 +flat_mae,patch,logistic,ppmi_dx,97,0.046415888336127774,test,0.69,0.045634771830261184,0.6656239887822242,0.049871804299422465,0.6634125636672326,0.04932784543749887 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,train,0.9234875444839857,0.011077191449697073,0.9180310631268677,0.012032725080718254,0.912639156497538,0.012752677672122046 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,test,0.62,0.046658229713524284,0.5824175824175825,0.05262212093912312,0.5814940577249575,0.0503162602372028 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,train,0.9270462633451957,0.010760329510598493,0.9218435718186413,0.011677482534189276,0.9163990580175552,0.012454824685332003 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,test,0.62,0.04718702787843286,0.6006725514922235,0.04889385848816648,0.6018675721561969,0.04953727866389368 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,train,0.9199288256227758,0.010782081289536945,0.9143858223734322,0.011665619575049713,0.9097489830871334,0.0123790090000188 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,test,0.7,0.04011122536148702,0.66078697421981,0.04826994590298666,0.6561969439728353,0.04504928862130183 diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..57be011a149b8b932cec2ab43d4eba8a4b1947c9 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:53:59 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:15:08 time: 3.9148 data: 3.1353 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:14 time: 0.1744 data: 0.0512 max mem: 3005 +extract (train) [ 40/232] eta: 0:00:49 time: 0.1538 data: 0.0437 max mem: 3005 +extract (train) [ 60/232] eta: 0:00:39 time: 0.1808 data: 0.0594 max mem: 3005 +extract (train) [ 80/232] eta: 0:00:32 time: 0.1642 data: 0.0509 max mem: 3005 +extract (train) [100/232] eta: 0:00:27 time: 0.1669 data: 0.0525 max mem: 3005 +extract (train) [120/232] eta: 0:00:22 time: 0.1738 data: 0.0573 max mem: 3005 +extract (train) [140/232] eta: 0:00:17 time: 0.1630 data: 0.0506 max mem: 3005 +extract (train) [160/232] eta: 0:00:13 time: 0.1648 data: 0.0516 max mem: 3005 +extract (train) [180/232] eta: 0:00:09 time: 0.1565 data: 0.0471 max mem: 3005 +extract (train) [200/232] eta: 0:00:05 time: 0.1718 data: 0.0552 max mem: 3005 +extract (train) [220/232] eta: 0:00:02 time: 0.1568 data: 0.0479 max mem: 3005 +extract (train) [231/232] eta: 0:00:00 time: 0.1514 data: 0.0468 max mem: 3005 +extract (train) Total time: 0:00:42 (0.1825 s / it) +extract (validation) [ 0/50] eta: 0:02:50 time: 3.4129 data: 3.2765 max mem: 3005 +extract (validation) [20/50] eta: 0:00:10 time: 0.1925 data: 0.0618 max mem: 3005 +extract (validation) [40/50] eta: 0:00:02 time: 0.1401 data: 0.0361 max mem: 3005 +extract (validation) [49/50] eta: 0:00:00 time: 0.1481 data: 0.0421 max mem: 3005 +extract (validation) Total time: 0:00:11 (0.2355 s / it) +extract (test) [ 0/50] eta: 0:02:45 time: 3.3183 data: 3.1923 max mem: 3005 +extract (test) [20/50] eta: 0:00:10 time: 0.2079 data: 0.0740 max mem: 3005 +extract (test) [40/50] eta: 0:00:02 time: 0.1488 data: 0.0414 max mem: 3005 +extract (test) [49/50] eta: 0:00:00 time: 0.1531 data: 0.0439 max mem: 3005 +extract (test) Total time: 0:00:12 (0.2438 s / it) +feature extraction time: 0:01:06 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.35938 | train | 0.92527 | 0.010905 | 0.92032 | 0.011737 | 0.91605 | 0.012318 | +| flat_mae | patch | logistic | ppmi_dx | | 0.35938 | test | 0.64 | 0.045881 | 0.6044 | 0.049682 | 0.60275 | 0.048692 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.03753730944007574, "f1": 0.6357455075279261, "f1_std": 0.05223531085843538, "bacc": 0.6358234295415959, "bacc_std": 0.04368095787486435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.72, "acc_std": 0.042007123205475526, "f1": 0.7057587221521648, "f1_std": 0.04405794564614997, "bacc": 0.7079796264855688, "bacc_std": 0.044810475576369094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04266837236173884, "f1": 0.6527777777777778, "f1_std": 0.04739976315806683, "bacc": 0.6502546689303905, "bacc_std": 0.046163240465538304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.045380784479777336, "f1": 0.6656239887822242, "f1_std": 0.0488311890541267, "bacc": 0.6634125636672326, "bacc_std": 0.04803909987123628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04540371790943997, "f1": 0.6043956043956044, "f1_std": 0.05040360846540398, "bacc": 0.6027164685908319, "bacc_std": 0.04833037499866901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.62, "acc_std": 0.048204049622412426, "f1": 0.6006725514922235, "f1_std": 0.050010297965332365, "bacc": 0.6018675721561969, "bacc_std": 0.05048184636553182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 2.782559402207126, "split": "test", "acc": 0.58, "acc_std": 0.04867332328904612, "f1": 0.5625, "f1_std": 0.050442389601862904, "bacc": 0.564516129032258, "bacc_std": 0.05152750907386934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04389279667553664, "f1": 0.5400400990682863, "f1_std": 0.051792123034808825, "bacc": 0.547962648556876, "bacc_std": 0.04576022588867022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 1291.5496650148827, "split": "test", "acc": 0.67, "acc_std": 0.045856031228181965, "f1": 0.648, "f1_std": 0.04940321643955519, "bacc": 0.6472835314091681, "bacc_std": 0.04908078260371035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.043420732375214495, "f1": 0.6224786970121885, "f1_std": 0.04735263819392983, "bacc": 0.6209677419354839, "bacc_std": 0.046405224668126126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.73, "acc_std": 0.04258696514193047, "f1": 0.7052079921388797, "f1_std": 0.04678460616993768, "bacc": 0.7007640067911716, "bacc_std": 0.04563627854118045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.03707022524884356, "f1": 0.6114618746964546, "f1_std": 0.0489530168987849, "bacc": 0.6146010186757216, "bacc_std": 0.04148849992927919} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.046793349954881415, "f1": 0.570999570999571, "f1_std": 0.05030151943042741, "bacc": 0.5704584040747029, "bacc_std": 0.04950818943127722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.0457470261328537, "f1": 0.6155585707824514, "f1_std": 0.053982095571807304, "bacc": 0.6137521222410866, "bacc_std": 0.05014193654852695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.04850148038977779, "f1": 0.5882166613873931, "f1_std": 0.05028483318145729, "bacc": 0.5887096774193548, "bacc_std": 0.050459794337685424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.66, "acc_std": 0.045545981161898355, "f1": 0.6458333333333333, "f1_std": 0.04674980921630824, "bacc": 0.6494057724957556, "bacc_std": 0.04762828151886699} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.047298621544395986, "f1": 0.5577607593571352, "f1_std": 0.050581542267941156, "bacc": 0.5573005093378608, "bacc_std": 0.04947443681383299} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.0463800129366088, "f1": 0.568536342515765, "f1_std": 0.05079232561200265, "bacc": 0.5683361629881154, "bacc_std": 0.04835984175660792} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04718675661666099, "f1": 0.6458333333333333, "f1_std": 0.04869286214685603, "bacc": 0.6494057724957556, "bacc_std": 0.04968508420984136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.0371785906134162, "f1": 0.6114618746964546, "f1_std": 0.0502118867854834, "bacc": 0.6146010186757216, "bacc_std": 0.0420608066262241} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 166.81005372000556, "split": "test", "acc": 0.72, "acc_std": 0.04303194627250782, "f1": 0.6961805555555556, "f1_std": 0.0466807150561355, "bacc": 0.6926994906621392, "bacc_std": 0.0455611637069626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04716990142029131, "f1": 0.5989304812834224, "f1_std": 0.052840356336346934, "bacc": 0.597623089983022, "bacc_std": 0.050081553477274554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04467654418148297, "f1": 0.5604395604395604, "f1_std": 0.05035303970457466, "bacc": 0.5602716468590832, "bacc_std": 0.04816941042906309} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.042843319199147016, "f1": 0.6408295678368672, "f1_std": 0.05295534921934128, "bacc": 0.6379456706281834, "bacc_std": 0.04789047841103359} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.03764505279581901, "f1": 0.5714285714285714, "f1_std": 0.04941972554337127, "bacc": 0.5772495755517827, "bacc_std": 0.04213139745476362} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 10000.0, "split": "test", "acc": 0.55, "acc_std": 0.04531883052330454, "f1": 0.5021573182874212, "f1_std": 0.049581078778630445, "bacc": 0.5046689303904923, "bacc_std": 0.04728774884185553} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.72, "acc_std": 0.04094691197147839, "f1": 0.6996996996996997, "f1_std": 0.04489494166581603, "bacc": 0.6977928692699491, "bacc_std": 0.044815311509624055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.040608821701694324, "f1": 0.66078697421981, "f1_std": 0.0492798591851148, "bacc": 0.6561969439728353, "bacc_std": 0.04556005741349238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04173713454467136, "f1": 0.6259934548854604, "f1_std": 0.05149650627634641, "bacc": 0.6247877758913413, "bacc_std": 0.04589965217505582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.042191273979343163, "f1": 0.6108031607500884, "f1_std": 0.05345141772946449, "bacc": 0.6116298811544991, "bacc_std": 0.04709743808922692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.0481094211147879, "f1": 0.6726850385386971, "f1_std": 0.050448145255090775, "bacc": 0.6735993208828523, "bacc_std": 0.05064966945399532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 166.81005372000556, "split": "test", "acc": 0.62, "acc_std": 0.04997542996313289, "f1": 0.6006725514922235, "f1_std": 0.051810266681872115, "bacc": 0.6018675721561969, "bacc_std": 0.052200039071397863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04388942469433838, "f1": 0.6782496782496783, "f1_std": 0.04736060293638377, "bacc": 0.6765704584040747, "bacc_std": 0.04708365621289806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.048451753322248305, "f1": 0.6010230179028133, "f1_std": 0.04878601704489718, "bacc": 0.6090831918505942, "bacc_std": 0.05013582500343524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 21.54434690031882, "split": "test", "acc": 0.61, "acc_std": 0.047698989507116395, "f1": 0.6010230179028133, "f1_std": 0.04815472534490107, "bacc": 0.6090831918505942, "bacc_std": 0.0494261416945548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04152190265390062, "f1": 0.64349376114082, "f1_std": 0.048708242887887165, "bacc": 0.6400679117147707, "bacc_std": 0.046196014849308055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04347815543465476, "f1": 0.6483516483516483, "f1_std": 0.04927717899521001, "bacc": 0.6451612903225806, "bacc_std": 0.04788757177522289} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 10000.0, "split": "test", "acc": 0.66, "acc_std": 0.04791099665003849, "f1": 0.6392190152801358, "f1_std": 0.050765460154544105, "bacc": 0.6392190152801358, "bacc_std": 0.05085579747309805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03991753499403489, "f1": 0.6640018537828757, "f1_std": 0.048865050691920965, "bacc": 0.6591680814940577, "bacc_std": 0.04453294996499716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.67, "acc_std": 0.04433405463072377, "f1": 0.6576408341114224, "f1_std": 0.04537592184800704, "bacc": 0.6625636672325976, "bacc_std": 0.04636933324754521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.68, "acc_std": 0.04481169043899147, "f1": 0.6483516483516483, "f1_std": 0.05028248528870723, "bacc": 0.6451612903225806, "bacc_std": 0.048558914638662985} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04046655903335493, "f1": 0.6026180458158018, "f1_std": 0.049950687477143624, "bacc": 0.6035653650254669, "bacc_std": 0.044457057023643706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.04465057222477669, "f1": 0.6296711929076422, "f1_std": 0.05095170630844664, "bacc": 0.6269100169779287, "bacc_std": 0.047939166690458924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.039448173595237584, "f1": 0.5872154735228211, "f1_std": 0.05051085449348592, "bacc": 0.5904074702886248, "bacc_std": 0.04427074361963526} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.03658840800034895, "f1": 0.6428571428571428, "f1_std": 0.04888158555880266, "bacc": 0.6409168081494058, "bacc_std": 0.04277373526012462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.04447551686040309, "f1": 0.5174503422735944, "f1_std": 0.049201560416930926, "bacc": 0.5207979626485568, "bacc_std": 0.046093888329012754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.64, "acc_std": 0.047331173659650574, "f1": 0.625, "f1_std": 0.04852006273665714, "bacc": 0.6281833616298811, "bacc_std": 0.049238172365568995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04099761456475242, "f1": 0.66078697421981, "f1_std": 0.0488361459372054, "bacc": 0.6561969439728353, "bacc_std": 0.045789870046624714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 2.782559402207126, "split": "test", "acc": 0.59, "acc_std": 0.0459595082654286, "f1": 0.5523528769516323, "f1_std": 0.05016721098921262, "bacc": 0.5522071307300509, "bacc_std": 0.048292666464913495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.59, "acc_std": 0.048430915746039735, "f1": 0.5670995670995671, "f1_std": 0.05023278132470401, "bacc": 0.5674872665534805, "bacc_std": 0.05045748647558014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.047143097055666584, "f1": 0.6396986570586308, "f1_std": 0.05205165676583107, "bacc": 0.6370967741935484, "bacc_std": 0.05049754138116273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04486012037433694, "f1": 0.5766488413547237, "f1_std": 0.050033507414195044, "bacc": 0.5764006791171477, "bacc_std": 0.04741804001880338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.66, "acc_std": 0.04386772845726115, "f1": 0.6310763888888888, "f1_std": 0.048454052997592, "bacc": 0.6290322580645161, "bacc_std": 0.047605272353931305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.047968739820845825, "f1": 0.5626666666666666, "f1_std": 0.05010204682104342, "bacc": 0.5623938879456706, "bacc_std": 0.049609711845432676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.039567961787284424, "f1": 0.6727442730247779, "f1_std": 0.05047170190371301, "bacc": 0.66723259762309, "bacc_std": 0.04527720016832182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.046896801596697395, "f1": 0.612789025334661, "f1_std": 0.05299494124150444, "bacc": 0.6107809847198642, "bacc_std": 0.05045445348823193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.72, "acc_std": 0.04202083292844158, "f1": 0.6834011759384894, "f1_std": 0.04947822463029884, "bacc": 0.6774193548387097, "bacc_std": 0.046601465451410515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04100099510987507, "f1": 0.6026180458158018, "f1_std": 0.05151343984720501, "bacc": 0.6035653650254669, "bacc_std": 0.045811545840942304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.0445430847607123, "f1": 0.592944369063772, "f1_std": 0.05199685266896293, "bacc": 0.5925297113752122, "bacc_std": 0.04812809740882392} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.043811500773198814, "f1": 0.6072270227808326, "f1_std": 0.05143470719284786, "bacc": 0.6056876061120543, "bacc_std": 0.04813510659146682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.045871123814443436, "f1": 0.6310763888888888, "f1_std": 0.050731762239093606, "bacc": 0.6290322580645161, "bacc_std": 0.04960391878611363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04076415582346825, "f1": 0.5552350042072365, "f1_std": 0.051821334829008275, "bacc": 0.5640916808149405, "bacc_std": 0.04401073688104307} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04239816505463415, "f1": 0.5989304812834224, "f1_std": 0.048055231074232076, "bacc": 0.597623089983022, "bacc_std": 0.04537159640092476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04699191419808305, "f1": 0.5967741935483871, "f1_std": 0.048264003553954764, "bacc": 0.5967741935483871, "bacc_std": 0.04795706486722177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.046193315533743624, "f1": 0.5577607593571352, "f1_std": 0.04942733157144697, "bacc": 0.5573005093378608, "bacc_std": 0.04822536695406484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04298261974333347, "f1": 0.5872154735228211, "f1_std": 0.05353218144802641, "bacc": 0.5904074702886248, "bacc_std": 0.046860107115852256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04300844568221455, "f1": 0.6744791666666667, "f1_std": 0.0476230707137528, "bacc": 0.6714770797962648, "bacc_std": 0.04714113751581404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.045087554823920094, "f1": 0.6224786970121885, "f1_std": 0.04900501078569784, "bacc": 0.6209677419354839, "bacc_std": 0.04809922303515717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.041635921990511984, "f1": 0.6323529411764706, "f1_std": 0.05038457100061926, "bacc": 0.6298811544991512, "bacc_std": 0.046258978856040814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04535077066599861, "f1": 0.5555555555555556, "f1_std": 0.052278742376909076, "bacc": 0.5581494057724957, "bacc_std": 0.048018382292677984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04634627924655872, "f1": 0.5703301673450927, "f1_std": 0.0533848291194738, "bacc": 0.5713073005093379, "bacc_std": 0.04950913201876023} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.62, "acc_std": 0.04979652999958932, "f1": 0.6124031007751938, "f1_std": 0.049906958701880284, "bacc": 0.6222410865874364, "bacc_std": 0.05124733599222944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04498913646648488, "f1": 0.6179966044142615, "f1_std": 0.04795835630530793, "bacc": 0.6179966044142615, "bacc_std": 0.04822496430193871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 21.54434690031882, "split": "test", "acc": 0.59, "acc_std": 0.04785385668888141, "f1": 0.5710848415106182, "f1_std": 0.04900115957644546, "bacc": 0.5725806451612903, "bacc_std": 0.0494493611284456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04879337659969845, "f1": 0.6006725514922235, "f1_std": 0.050163043054400995, "bacc": 0.6018675721561969, "bacc_std": 0.05057758765120271} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.04048293961658417, "f1": 0.6640018537828757, "f1_std": 0.049934500979280275, "bacc": 0.6591680814940577, "bacc_std": 0.04533422290368596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04363841885311612, "f1": 0.6072270227808326, "f1_std": 0.05107103288059304, "bacc": 0.6056876061120543, "bacc_std": 0.047761799354458866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04268291930034776, "f1": 0.6108031607500884, "f1_std": 0.05354353252809076, "bacc": 0.6116298811544991, "bacc_std": 0.047301046179540014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04436784421177121, "f1": 0.6657754010695187, "f1_std": 0.05050456032082533, "bacc": 0.6612903225806452, "bacc_std": 0.04812933159284557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.59, "acc_std": 0.04780466922801579, "f1": 0.5626666666666666, "f1_std": 0.04979074618350361, "bacc": 0.5623938879456706, "bacc_std": 0.04951916531931988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.039936244189958565, "f1": 0.6323529411764706, "f1_std": 0.04765848923102589, "bacc": 0.6298811544991512, "bacc_std": 0.04356461810764437} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.04408834766692896, "f1": 0.5793334052421529, "f1_std": 0.04849628328729169, "bacc": 0.5785229202037352, "bacc_std": 0.0478556625189827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04730363199586264, "f1": 0.5523528769516323, "f1_std": 0.051330018654450596, "bacc": 0.5522071307300509, "bacc_std": 0.04965819799613873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.048246268249471896, "f1": 0.5920075321686369, "f1_std": 0.04940541246850299, "bacc": 0.5938030560271647, "bacc_std": 0.05011242505150788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.0368519416042086, "f1": 0.5460679671205987, "f1_std": 0.04915379311913211, "bacc": 0.5589983022071308, "bacc_std": 0.04063005901407071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.046822644094497705, "f1": 0.5623386825272135, "f1_std": 0.05308330134902225, "bacc": 0.5632427843803056, "bacc_std": 0.049634237452652205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04621357376355999, "f1": 0.5847828526540231, "f1_std": 0.05314556920586391, "bacc": 0.5844651952461799, "bacc_std": 0.04975534880617717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04031746023747031, "f1": 0.6323529411764706, "f1_std": 0.04896371967452105, "bacc": 0.6298811544991512, "bacc_std": 0.04456028096188414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 1291.5496650148827, "split": "test", "acc": 0.64, "acc_std": 0.04374187467404661, "f1": 0.6043956043956044, "f1_std": 0.048375244019457, "bacc": 0.6027164685908319, "bacc_std": 0.04641180484164536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04322006941225338, "f1": 0.5703301673450927, "f1_std": 0.04952817749048187, "bacc": 0.5713073005093379, "bacc_std": 0.04579765987711686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.7, "acc_std": 0.043749742856387164, "f1": 0.6782496782496783, "f1_std": 0.04615983995455694, "bacc": 0.6765704584040747, "bacc_std": 0.045658643443627404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04545373031996383, "f1": 0.6279454319966928, "f1_std": 0.04594504314092345, "bacc": 0.633276740237691, "bacc_std": 0.04699125499812413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04581222107691352, "f1": 0.5021573182874212, "f1_std": 0.04930466796091339, "bacc": 0.5046689303904923, "bacc_std": 0.04723567279678342} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.63, "acc_std": 0.04313474701444301, "f1": 0.5906626839252129, "f1_std": 0.048636761040670703, "bacc": 0.5895585738539898, "bacc_std": 0.04647554434594961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.044776350900894095, "f1": 0.539894512400404, "f1_std": 0.04975157679868264, "bacc": 0.5420203735144312, "bacc_std": 0.046540764982263776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04412300533735208, "f1": 0.6043956043956044, "f1_std": 0.04903661807519851, "bacc": 0.6027164685908319, "bacc_std": 0.04717355096268645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.045634771830261184, "f1": 0.6656239887822242, "f1_std": 0.049871804299422465, "bacc": 0.6634125636672326, "bacc_std": 0.04932784543749887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.046658229713524284, "f1": 0.5824175824175825, "f1_std": 0.05262212093912312, "bacc": 0.5814940577249575, "bacc_std": 0.0503162602372028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04718702787843286, "f1": 0.6006725514922235, "f1_std": 0.04889385848816648, "bacc": 0.6018675721561969, "bacc_std": 0.04953727866389368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.7, "acc_std": 0.04011122536148702, "f1": 0.66078697421981, "f1_std": 0.04826994590298666, "bacc": 0.6561969439728353, "bacc_std": 0.04504928862130183} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 235.12 | 1414 | 0.8745 | 0.097501 | 0.85917 | 0.11205 | 0.85352 | 0.11519 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 235.12 | 1414 | 0.6483 | 0.041877 | 0.61152 | 0.044638 | 0.6116 | 0.043013 | + + +done! total time: 0:05:21 diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/config.yaml b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0b4c5d2d74cdc89e1dc80724d905be37b8b82af --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (ppmi_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic +model: flat_mae +representation: reg +dataset: ppmi_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic +remote_dir: null diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/eval_table.csv b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..feb6b12acf93a57d7a30c7800d2d60458b9db5cb --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,ppmi_dx,,0.046415888336127774,train,0.8718861209964412,0.013041750097370342,0.8599609618339632,0.01472590091357834,0.8494890803446202,0.01517638884294909 +flat_mae,reg,logistic,ppmi_dx,,0.046415888336127774,test,0.65,0.04204190290650507,0.6011396011396011,0.048777506719868295,0.5995280995280995,0.045803139123730206 +flat_mae,reg,logistic,ppmi_dx,1,0.046415888336127774,train,0.8665480427046264,0.014086225180872736,0.8549367951184426,0.01572785690316406,0.8463926354099764,0.01620428609652585 +flat_mae,reg,logistic,ppmi_dx,1,0.046415888336127774,test,0.62,0.04571186279293374,0.5703301673450927,0.05231788984034999,0.5713073005093379,0.048655443192094076 +flat_mae,reg,logistic,ppmi_dx,2,0.005994842503189409,train,0.7473309608540926,0.016160522432073228,0.7089866826144668,0.020020172576186716,0.7008670520231214,0.018482933075693875 +flat_mae,reg,logistic,ppmi_dx,2,0.005994842503189409,test,0.68,0.04210850745395756,0.6259934548854604,0.051952166578008795,0.6247877758913413,0.04639610373425943 +flat_mae,reg,logistic,ppmi_dx,3,0.005994842503189409,train,0.7491103202846975,0.016218600697157966,0.7088435974030269,0.020628132726979654,0.7005726825090987,0.018901951844570677 +flat_mae,reg,logistic,ppmi_dx,3,0.005994842503189409,test,0.64,0.044195311968578746,0.5989304812834224,0.049254482167341865,0.597623089983022,0.046711008197985064 +flat_mae,reg,logistic,ppmi_dx,4,0.005994842503189409,train,0.7508896797153025,0.016796587540997072,0.7113507960965588,0.021022445454399914,0.7028874973239134,0.01924457944951387 +flat_mae,reg,logistic,ppmi_dx,4,0.005994842503189409,test,0.64,0.04661801797588568,0.592944369063772,0.05346581043028313,0.5925297113752122,0.04980534414464328 +flat_mae,reg,logistic,ppmi_dx,5,0.3593813663804626,train,0.9875444839857651,0.004773641656530285,0.9867575528065303,0.005114883628096219,0.9837962962962963,0.0062101541920602295 +flat_mae,reg,logistic,ppmi_dx,5,0.3593813663804626,test,0.58,0.047338001647724844,0.5442708333333334,0.04956300620932027,0.5441426146010186,0.047989714075625134 +flat_mae,reg,logistic,ppmi_dx,6,0.005994842503189409,train,0.7384341637010676,0.016329379943317356,0.6964539632499642,0.0208935098201931,0.6892929779490473,0.019022548801411247 +flat_mae,reg,logistic,ppmi_dx,6,0.005994842503189409,test,0.61,0.045618224428401424,0.5555555555555556,0.052673059223196934,0.5581494057724957,0.04838596606071715 +flat_mae,reg,logistic,ppmi_dx,7,0.046415888336127774,train,0.8558718861209964,0.014127665670526035,0.8422764960033817,0.01600561266767569,0.8325037465210876,0.016375172907907617 +flat_mae,reg,logistic,ppmi_dx,7,0.046415888336127774,test,0.61,0.04778844630242753,0.584,0.05092954074073358,0.583616298811545,0.0506670525307329 +flat_mae,reg,logistic,ppmi_dx,8,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,8,21.54434690031882,test,0.68,0.04721116817025395,0.6637242538881883,0.04903213408634025,0.6655348047538201,0.04951188934582191 +flat_mae,reg,logistic,ppmi_dx,9,0.005994842503189409,train,0.7384341637010676,0.016095907196941307,0.6973790728767926,0.020016432036946794,0.6901627060586598,0.018310040784597786 +flat_mae,reg,logistic,ppmi_dx,9,0.005994842503189409,test,0.71,0.03950815611997097,0.6640018537828757,0.04940689484440615,0.6591680814940577,0.04509909036662303 +flat_mae,reg,logistic,ppmi_dx,10,0.005994842503189409,train,0.7508896797153025,0.01673856534058316,0.7147663171937761,0.02048436407380308,0.7063664097623634,0.01914130607636022 +flat_mae,reg,logistic,ppmi_dx,10,0.005994842503189409,test,0.6,0.04327835024582152,0.5477159656264134,0.04800917683381933,0.5500848896434635,0.044780619706748966 +flat_mae,reg,logistic,ppmi_dx,11,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,11,2.782559402207126,test,0.53,0.051227086585126036,0.5037482842360892,0.052350690291502605,0.5038200339558574,0.05255858112076436 +flat_mae,reg,logistic,ppmi_dx,12,0.005994842503189409,train,0.7615658362989324,0.015663326211140655,0.7269906178854715,0.019514677275118922,0.7176461143224149,0.018370324502241173 +flat_mae,reg,logistic,ppmi_dx,12,0.005994842503189409,test,0.61,0.037629451231714774,0.5215311004784688,0.047685830491403396,0.5377758913412564,0.03986298364161761 +flat_mae,reg,logistic,ppmi_dx,13,0.005994842503189409,train,0.7562277580071174,0.016884720700633005,0.7196497168969723,0.02085432534050235,0.7107016698779705,0.019337393512402736 +flat_mae,reg,logistic,ppmi_dx,13,0.005994842503189409,test,0.6,0.04061869520307121,0.5238095238095238,0.04894970234769562,0.5348047538200339,0.042423345806271676 +flat_mae,reg,logistic,ppmi_dx,14,0.005994842503189409,train,0.7580071174377224,0.0165495732656123,0.7204470973782771,0.020886026064887694,0.7112770284735603,0.019355162760296467 +flat_mae,reg,logistic,ppmi_dx,14,0.005994842503189409,test,0.62,0.04228427130742589,0.5476190476190476,0.05244185327396815,0.5560271646859083,0.04534031701936225 +flat_mae,reg,logistic,ppmi_dx,15,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,15,166.81005372000556,test,0.63,0.04791633959308662,0.6053333333333333,0.051402814146095176,0.6048387096774194,0.050969410652772454 +flat_mae,reg,logistic,ppmi_dx,16,0.005994842503189409,train,0.7526690391459074,0.01608881762459851,0.7147093517980856,0.020443207481453413,0.7060720402483409,0.01891896246773176 +flat_mae,reg,logistic,ppmi_dx,16,0.005994842503189409,test,0.61,0.04480357128622673,0.5623386825272135,0.050557997393108754,0.5632427843803056,0.04730628014090519 +flat_mae,reg,logistic,ppmi_dx,17,0.046415888336127774,train,0.8594306049822064,0.014225530740591333,0.8439519884716096,0.016626055430125485,0.8310452793834298,0.01702794095891203 +flat_mae,reg,logistic,ppmi_dx,17,0.046415888336127774,test,0.62,0.04113653364103495,0.5476190476190476,0.05099806441551977,0.5560271646859083,0.04407461570750624 +flat_mae,reg,logistic,ppmi_dx,18,0.046415888336127774,train,0.8825622775800712,0.012695304454125486,0.8713369963369964,0.014344413784271868,0.8602681438664097,0.014808685956657679 +flat_mae,reg,logistic,ppmi_dx,18,0.046415888336127774,test,0.63,0.04379214084741691,0.5847828526540231,0.050376610672929055,0.5844651952461799,0.047053803881620726 +flat_mae,reg,logistic,ppmi_dx,19,0.005994842503189409,train,0.7508896797153025,0.017428712902398356,0.7163948844384849,0.021175218555771837,0.7081058659815885,0.019963528919578324 +flat_mae,reg,logistic,ppmi_dx,19,0.005994842503189409,test,0.66,0.045580890732849884,0.6263736263736264,0.050686535691664425,0.6239388794567062,0.04899939261217484 +flat_mae,reg,logistic,ppmi_dx,20,0.005994842503189409,train,0.7508896797153025,0.016554307542541363,0.7139325189063409,0.02042117215971942,0.705496681652751,0.018955079889489246 +flat_mae,reg,logistic,ppmi_dx,20,0.005994842503189409,test,0.61,0.04302825118454153,0.5555555555555556,0.04894458069080654,0.5581494057724957,0.04511066337781345 +flat_mae,reg,logistic,ppmi_dx,21,0.046415888336127774,train,0.8683274021352313,0.013754570858473416,0.8560709885515733,0.015538556205976425,0.8460982658959537,0.01601819528819788 +flat_mae,reg,logistic,ppmi_dx,21,0.046415888336127774,test,0.63,0.0424920510213381,0.5713127099988413,0.050998198189696906,0.5742784380305602,0.04571166275906462 +flat_mae,reg,logistic,ppmi_dx,22,0.005994842503189409,train,0.7491103202846975,0.0168037866146027,0.7114643071713365,0.020763370766106688,0.7031818668379362,0.019240178099778436 +flat_mae,reg,logistic,ppmi_dx,22,0.005994842503189409,test,0.61,0.040688332479962855,0.5215311004784688,0.051798379178762136,0.5377758913412564,0.04330185364987699 +flat_mae,reg,logistic,ppmi_dx,23,0.046415888336127774,train,0.8790035587188612,0.013375865587714886,0.8680386740331492,0.015143504211216342,0.8582476985656176,0.01593496896982922 +flat_mae,reg,logistic,ppmi_dx,23,0.046415888336127774,test,0.61,0.04415365896502803,0.5555555555555556,0.052216474748260626,0.5581494057724957,0.04758618467183287 +flat_mae,reg,logistic,ppmi_dx,24,0.005994842503189409,train,0.7455516014234875,0.016208196285473413,0.7047137193520059,0.02051015624217879,0.6968127809890816,0.01876218617889708 +flat_mae,reg,logistic,ppmi_dx,24,0.005994842503189409,test,0.61,0.046850720378666534,0.5623386825272135,0.05309385510104067,0.5632427843803056,0.04977510662415948 +flat_mae,reg,logistic,ppmi_dx,25,0.3593813663804626,train,0.9786476868327402,0.00569172109392499,0.9773606187560425,0.006057212442170915,0.9757011346606722,0.006647452711211105 +flat_mae,reg,logistic,ppmi_dx,25,0.3593813663804626,test,0.55,0.04431945848044625,0.47862356621480706,0.049787439345267866,0.4893887945670628,0.04534000554161029 +flat_mae,reg,logistic,ppmi_dx,26,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,26,2.782559402207126,test,0.63,0.046454278597347735,0.6093337556752191,0.04867740541182819,0.6099320882852293,0.04877760172131098 +flat_mae,reg,logistic,ppmi_dx,27,0.046415888336127774,train,0.8505338078291815,0.015005012185906613,0.8350869838608258,0.017354731906909334,0.8238198458574182,0.017677246179854042 +flat_mae,reg,logistic,ppmi_dx,27,0.046415888336127774,test,0.63,0.042892889853680886,0.5713127099988413,0.05189671084691959,0.5742784380305602,0.04646399654602365 +flat_mae,reg,logistic,ppmi_dx,28,0.046415888336127774,train,0.8772241992882562,0.012812441138991519,0.8656429410399177,0.01457020291357479,0.8550631556411903,0.015228618902475186 +flat_mae,reg,logistic,ppmi_dx,28,0.046415888336127774,test,0.6,0.03970076069800174,0.5238095238095238,0.04809196719501338,0.5348047538200339,0.041642248347882396 +flat_mae,reg,logistic,ppmi_dx,29,0.046415888336127774,train,0.8754448398576512,0.012781626166192097,0.8638509351163531,0.0144328992434532,0.853618068935988,0.014999127929552086 +flat_mae,reg,logistic,ppmi_dx,29,0.046415888336127774,test,0.65,0.0415975960843893,0.5872154735228211,0.05111653031541098,0.5904074702886248,0.045208300175210235 +flat_mae,reg,logistic,ppmi_dx,30,0.046415888336127774,train,0.8736654804270463,0.012593003766915887,0.8607760560775707,0.014462685223989014,0.8486940697923357,0.015100337332675908 +flat_mae,reg,logistic,ppmi_dx,30,0.046415888336127774,test,0.63,0.04237888153314101,0.5713127099988413,0.05178616682731668,0.5742784380305602,0.04621181742001417 +flat_mae,reg,logistic,ppmi_dx,31,0.3593813663804626,train,0.9804270462633452,0.006013472545338957,0.9793024711818215,0.006361906042198198,0.9788856775850996,0.0065385387417896635 +flat_mae,reg,logistic,ppmi_dx,31,0.3593813663804626,test,0.59,0.04877102008365213,0.5626666666666666,0.05153427248160208,0.5623938879456706,0.0512611598185999 +flat_mae,reg,logistic,ppmi_dx,32,0.005994842503189409,train,0.7455516014234875,0.01570458077188478,0.7056136559277643,0.019873675195139683,0.6976825090986941,0.018278252921160244 +flat_mae,reg,logistic,ppmi_dx,32,0.005994842503189409,test,0.66,0.046429624163889155,0.6212121212121212,0.05330301661212197,0.6188455008488964,0.050491742601636073 +flat_mae,reg,logistic,ppmi_dx,33,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,33,166.81005372000556,test,0.61,0.04488211670587741,0.5793334052421529,0.04811050783902624,0.5785229202037352,0.04712415317516103 +flat_mae,reg,logistic,ppmi_dx,34,0.005994842503189409,train,0.7419928825622776,0.01626583649747381,0.6967870365202509,0.020921925431031704,0.6895739670306145,0.018843482307178437 +flat_mae,reg,logistic,ppmi_dx,34,0.005994842503189409,test,0.67,0.043566129045394884,0.6108031607500884,0.05417816495668028,0.6116298811544991,0.04788550891380557 +flat_mae,reg,logistic,ppmi_dx,35,0.046415888336127774,train,0.8683274021352313,0.013647268781266484,0.8557414807414807,0.015465642753366145,0.8452285377863413,0.015900058016322296 +flat_mae,reg,logistic,ppmi_dx,35,0.046415888336127774,test,0.63,0.04688711550095612,0.6053333333333333,0.049813590940168066,0.6048387096774194,0.049350114478584055 +flat_mae,reg,logistic,ppmi_dx,36,0.3593813663804626,train,0.9750889679715302,0.006341017899171948,0.9734905660377358,0.006801374784771119,0.9702017769214302,0.0077128530418281846 +flat_mae,reg,logistic,ppmi_dx,36,0.3593813663804626,test,0.58,0.04845812625349849,0.5543293718166383,0.051041701856394214,0.5543293718166383,0.05092635160088208 +flat_mae,reg,logistic,ppmi_dx,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,37,2.782559402207126,test,0.64,0.044729493625571035,0.6179966044142615,0.04798798216077442,0.6179966044142615,0.04821795369956176 +flat_mae,reg,logistic,ppmi_dx,38,0.005994842503189409,train,0.7597864768683275,0.01565645208333429,0.72291915462404,0.01955440737050009,0.713591843288375,0.018163110947530495 +flat_mae,reg,logistic,ppmi_dx,38,0.005994842503189409,test,0.59,0.04177989947331132,0.539894512400404,0.04704222889493259,0.5420203735144312,0.044034117381761606 +flat_mae,reg,logistic,ppmi_dx,39,0.005994842503189409,train,0.7366548042704626,0.01614972340911461,0.6900119260584376,0.021102765496571103,0.6834992506957824,0.018829030346469405 +flat_mae,reg,logistic,ppmi_dx,39,0.005994842503189409,test,0.71,0.04024112821480034,0.6640018537828757,0.049006799843238656,0.6591680814940577,0.044666235934206204 +flat_mae,reg,logistic,ppmi_dx,40,0.005994842503189409,train,0.7508896797153025,0.01607382310347287,0.7130854617325728,0.019929239295909174,0.7046269535431385,0.018475357168604517 +flat_mae,reg,logistic,ppmi_dx,40,0.005994842503189409,test,0.62,0.043406446525833,0.5703301673450927,0.050075256554158946,0.5713073005093379,0.046469905711091296 +flat_mae,reg,logistic,ppmi_dx,41,0.046415888336127774,train,0.8683274021352313,0.013338888663821697,0.8560709885515733,0.015117309014597952,0.8460982658959537,0.015686182989397834 +flat_mae,reg,logistic,ppmi_dx,41,0.046415888336127774,test,0.65,0.0461851751106348,0.6224786970121885,0.050244573846552974,0.6209677419354839,0.04956440512988713 +flat_mae,reg,logistic,ppmi_dx,42,0.3593813663804626,train,0.9839857651245552,0.0053959132675931885,0.9830051172808187,0.005752280832975625,0.9809061228858916,0.006435500800908677 +flat_mae,reg,logistic,ppmi_dx,42,0.3593813663804626,test,0.61,0.04791969949822306,0.5623386825272135,0.05286192220299092,0.5632427843803056,0.04980044097318068 +flat_mae,reg,logistic,ppmi_dx,43,0.005994842503189409,train,0.7384341637010676,0.01673777277822566,0.6973790728767926,0.020774782191522505,0.6901627060586598,0.019062907678273247 +flat_mae,reg,logistic,ppmi_dx,43,0.005994842503189409,test,0.65,0.0430308308076895,0.5944849959448499,0.05230797591467211,0.5955008488964346,0.04710849701492457 +flat_mae,reg,logistic,ppmi_dx,44,0.005994842503189409,train,0.7419928825622776,0.01674493663247029,0.6987152232184121,0.02142761891991603,0.6913134232498395,0.019439255573409926 +flat_mae,reg,logistic,ppmi_dx,44,0.005994842503189409,test,0.65,0.042871990856502104,0.6011396011396011,0.05078273850157791,0.6005942275042444,0.0469247488015876 +flat_mae,reg,logistic,ppmi_dx,45,0.046415888336127774,train,0.8754448398576512,0.013351552459948856,0.8647552255225522,0.014954815011440189,0.8562272532648255,0.015599352945304443 +flat_mae,reg,logistic,ppmi_dx,45,0.046415888336127774,test,0.67,0.040126175995227845,0.6296711929076422,0.04732509758050134,0.6269100169779287,0.044742133131379584 +flat_mae,reg,logistic,ppmi_dx,46,0.046415888336127774,train,0.8879003558718861,0.012667771538807666,0.8770432880137522,0.014459933266813078,0.8654731320916291,0.015219176845840713 +flat_mae,reg,logistic,ppmi_dx,46,0.046415888336127774,test,0.55,0.04442357932449837,0.5021573182874212,0.04967461444052123,0.5046689303904923,0.04696397063285494 +flat_mae,reg,logistic,ppmi_dx,47,0.3593813663804626,train,0.9804270462633452,0.006053501079391584,0.9792658181147389,0.006429528905555427,0.978015949475487,0.006925813206104861 +flat_mae,reg,logistic,ppmi_dx,47,0.3593813663804626,test,0.63,0.049193084879889375,0.6161427533976553,0.050186173960888446,0.6201188455008488,0.05077576542018513 +flat_mae,reg,logistic,ppmi_dx,48,0.005994842503189409,train,0.7437722419928826,0.015814509380837546,0.7048879034963607,0.019928719285579575,0.6971071505031042,0.0184073889377763 +flat_mae,reg,logistic,ppmi_dx,48,0.005994842503189409,test,0.69,0.041442470968802034,0.6408295678368672,0.051325738156429636,0.6379456706281834,0.046465255420764046 +flat_mae,reg,logistic,ppmi_dx,49,0.046415888336127774,train,0.8790035587188612,0.013452379062848952,0.8664989869349542,0.015492349003211031,0.8538990580175552,0.016083970350523485 +flat_mae,reg,logistic,ppmi_dx,49,0.046415888336127774,test,0.63,0.04117881008479969,0.5713127099988413,0.04958840322334698,0.5742784380305602,0.04422772521770073 +flat_mae,reg,logistic,ppmi_dx,50,0.046415888336127774,train,0.8718861209964412,0.01348303664610227,0.8596403596403597,0.015322337921015592,0.8489884393063584,0.01583631601803845 +flat_mae,reg,logistic,ppmi_dx,50,0.046415888336127774,test,0.56,0.04757348421127046,0.5280995280995281,0.05008016484197532,0.5280135823429541,0.04953309444209088 +flat_mae,reg,logistic,ppmi_dx,51,0.046415888336127774,train,0.8701067615658363,0.013524759220284371,0.8575263496032366,0.015460917558135055,0.8466736244915436,0.016007446341742976 +flat_mae,reg,logistic,ppmi_dx,51,0.046415888336127774,test,0.58,0.04326130372515372,0.5174632352941176,0.04917410137321495,0.5237691001697793,0.04474332239641174 +flat_mae,reg,logistic,ppmi_dx,52,0.046415888336127774,train,0.8701067615658363,0.013413741400252864,0.8584934620571669,0.015023187311378635,0.8492828088203811,0.015470563624524026 +flat_mae,reg,logistic,ppmi_dx,52,0.046415888336127774,test,0.65,0.046047644890917054,0.6266666666666667,0.04840533372767237,0.6260611205432938,0.0481972638479596 +flat_mae,reg,logistic,ppmi_dx,53,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,53,21.54434690031882,test,0.52,0.05098945381154813,0.5,0.05152855531038995,0.5008488964346349,0.05219297210937506 +flat_mae,reg,logistic,ppmi_dx,54,0.005994842503189409,train,0.7580071174377224,0.016410315773883938,0.7204470973782771,0.020399469767446127,0.7112770284735603,0.018806842928095865 +flat_mae,reg,logistic,ppmi_dx,54,0.005994842503189409,test,0.59,0.045373168282587445,0.5464100011063171,0.04964965079898625,0.5471137521222411,0.04752489762788377 +flat_mae,reg,logistic,ppmi_dx,55,0.005994842503189409,train,0.7437722419928826,0.01626863919380074,0.7003199431212228,0.021013526629860477,0.6927585099550417,0.018918974185574873 +flat_mae,reg,logistic,ppmi_dx,55,0.005994842503189409,test,0.7,0.04053333936403463,0.6493688639551192,0.0504711391163477,0.6460101867572157,0.045184925097943036 +flat_mae,reg,logistic,ppmi_dx,56,0.005994842503189409,train,0.7597864768683275,0.015978902898040347,0.7237424217597903,0.019710857638957347,0.7144615713979876,0.01835452842083074 +flat_mae,reg,logistic,ppmi_dx,56,0.005994842503189409,test,0.54,0.046484216676200965,0.4875222816399287,0.05044932006008548,0.4915110356536503,0.04756447760892626 +flat_mae,reg,logistic,ppmi_dx,57,0.005994842503189409,train,0.7526690391459074,0.015454897343611558,0.7120848582685582,0.019590869830594094,0.7034628559195033,0.01782427597595209 +flat_mae,reg,logistic,ppmi_dx,57,0.005994842503189409,test,0.7,0.042484232369197863,0.6553308823529411,0.05022142810085758,0.6511035653650254,0.0465092474597614 +flat_mae,reg,logistic,ppmi_dx,58,0.046415888336127774,train,0.8558718861209964,0.013362264766099174,0.8415430081139503,0.015325415167687598,0.8307642903018626,0.01570468168751532 +flat_mae,reg,logistic,ppmi_dx,58,0.046415888336127774,test,0.61,0.04559756133829966,0.5623386825272135,0.05144264571281226,0.5632427843803056,0.048309025540635 +flat_mae,reg,logistic,ppmi_dx,59,0.046415888336127774,train,0.8647686832740213,0.013794532156343449,0.8521810152691833,0.015520427252548218,0.8423383643759366,0.01588494382449048 +flat_mae,reg,logistic,ppmi_dx,59,0.046415888336127774,test,0.61,0.04521750103665616,0.568536342515765,0.05056103459123672,0.5683361629881154,0.04800614104498229 +flat_mae,reg,logistic,ppmi_dx,60,0.046415888336127774,train,0.8665480427046264,0.014083490398861353,0.8542908996629505,0.015826429217645883,0.8446531791907514,0.01625081625130179 +flat_mae,reg,logistic,ppmi_dx,60,0.046415888336127774,test,0.62,0.04622183034021911,0.5924495924495925,0.049675168115504154,0.5916808149405772,0.04925506667616436 +flat_mae,reg,logistic,ppmi_dx,61,0.046415888336127774,train,0.8718861209964412,0.012736047252137044,0.8608910891089109,0.01419161697149533,0.8524673517448084,0.014709011618593549 +flat_mae,reg,logistic,ppmi_dx,61,0.046415888336127774,test,0.65,0.04580330119107137,0.6266666666666667,0.04896125598001331,0.6260611205432938,0.04857936702341018 +flat_mae,reg,logistic,ppmi_dx,62,0.005994842503189409,train,0.7526690391459074,0.016216166178892894,0.7203709868522317,0.019439143688842066,0.7121601370156283,0.018449883490742253 +flat_mae,reg,logistic,ppmi_dx,62,0.005994842503189409,test,0.65,0.03592478253239677,0.561128526645768,0.050664560234192337,0.5751273344651953,0.040308971798766434 +flat_mae,reg,logistic,ppmi_dx,63,0.046415888336127774,train,0.8647686832740213,0.014055576761730098,0.8525138121546962,0.01563736471273584,0.8432080924855492,0.01585936555232183 +flat_mae,reg,logistic,ppmi_dx,63,0.046415888336127774,test,0.59,0.044339467746016076,0.5523528769516323,0.04823864436309188,0.5522071307300509,0.04652777158969864 +flat_mae,reg,logistic,ppmi_dx,64,0.046415888336127774,train,0.8647686832740213,0.013626112852988457,0.8521810152691833,0.015349041985266466,0.8423383643759366,0.015776235354180556 +flat_mae,reg,logistic,ppmi_dx,64,0.046415888336127774,test,0.55,0.04497766112193918,0.4950061721467849,0.04920324877547284,0.49957555178268254,0.04633296084553126 +flat_mae,reg,logistic,ppmi_dx,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,65,2.782559402207126,test,0.53,0.04938056297775472,0.4986666666666667,0.05060395535274254,0.4987266553480475,0.05011228858351531 +flat_mae,reg,logistic,ppmi_dx,66,0.005994842503189409,train,0.7580071174377224,0.016574709313969677,0.7237131289762868,0.0203248119104985,0.7147559409120103,0.01909583126417066 +flat_mae,reg,logistic,ppmi_dx,66,0.005994842503189409,test,0.61,0.041952144164512026,0.5481404240528328,0.04970145374702517,0.5530560271646858,0.044608945345618364 +flat_mae,reg,logistic,ppmi_dx,67,0.005994842503189409,train,0.7615658362989324,0.01658491434245008,0.7253817990868912,0.02059366302526034,0.7159066581031899,0.019148944050996944 +flat_mae,reg,logistic,ppmi_dx,67,0.005994842503189409,test,0.64,0.042970031417256374,0.5863970588235294,0.05187778748279023,0.5874363327674024,0.04711450442966869 +flat_mae,reg,logistic,ppmi_dx,68,0.046415888336127774,train,0.8558718861209964,0.014677590270001435,0.8422764960033817,0.016768837469080748,0.8325037465210876,0.01740632375963918 +flat_mae,reg,logistic,ppmi_dx,68,0.046415888336127774,test,0.59,0.049398769215436934,0.5670995670995671,0.051106633490824845,0.5674872665534805,0.051184693287689445 +flat_mae,reg,logistic,ppmi_dx,69,0.005994842503189409,train,0.7348754448398577,0.016618985039568244,0.6913715387195336,0.020657220930433037,0.6846633483194177,0.018709012161021152 +flat_mae,reg,logistic,ppmi_dx,69,0.005994842503189409,test,0.7,0.04051434807571263,0.6553308823529411,0.0494273224605534,0.6511035653650254,0.04560115558333581 +flat_mae,reg,logistic,ppmi_dx,70,0.046415888336127774,train,0.8843416370106761,0.012500080925764812,0.8737187797078905,0.01405353381957418,0.8634526867908371,0.014574702779051213 +flat_mae,reg,logistic,ppmi_dx,70,0.046415888336127774,test,0.55,0.04776616375636628,0.52,0.04906446309607456,0.5199490662139219,0.04876989734277054 +flat_mae,reg,logistic,ppmi_dx,71,0.005994842503189409,train,0.7508896797153025,0.016160833153341107,0.71046278850683,0.020673960925599585,0.702017769214301,0.01891706154858337 +flat_mae,reg,logistic,ppmi_dx,71,0.005994842503189409,test,0.67,0.040989808489428195,0.6033177064551027,0.05302157705857945,0.6065365025466893,0.04542963990353352 +flat_mae,reg,logistic,ppmi_dx,72,0.005994842503189409,train,0.7491103202846975,0.016271936414946533,0.7060963804479654,0.020922566154508118,0.6979634981802612,0.01892516729750508 +flat_mae,reg,logistic,ppmi_dx,72,0.005994842503189409,test,0.62,0.04761291841506883,0.5766488413547237,0.05330278657926278,0.5764006791171477,0.050403523087319185 +flat_mae,reg,logistic,ppmi_dx,73,0.3593813663804626,train,0.9804270462633452,0.005728625510427911,0.9792658181147389,0.006094147752892682,0.978015949475487,0.0066743508681687096 +flat_mae,reg,logistic,ppmi_dx,73,0.3593813663804626,test,0.63,0.04358683746270198,0.6093337556752191,0.04632335023113918,0.6099320882852293,0.046898167269677625 +flat_mae,reg,logistic,ppmi_dx,74,0.000774263682681127,train,0.6903914590747331,0.014521262810741419,0.6027268147618506,0.021960904666374077,0.6146167844144723,0.016892220808677486 +flat_mae,reg,logistic,ppmi_dx,74,0.000774263682681127,test,0.64,0.03565368985112201,0.54337899543379,0.050841961188031554,0.5619694397283531,0.03957990269311171 +flat_mae,reg,logistic,ppmi_dx,75,0.005994842503189409,train,0.7455516014234875,0.015856005971499245,0.7028708753119512,0.020422463609873088,0.6950733247698566,0.018515022930456954 +flat_mae,reg,logistic,ppmi_dx,75,0.005994842503189409,test,0.67,0.04224973372697158,0.6176572818908586,0.0526452676787844,0.616723259762309,0.04745713285401925 +flat_mae,reg,logistic,ppmi_dx,76,0.005994842503189409,train,0.7508896797153025,0.015630750078608147,0.7113507960965588,0.020011171945062238,0.7028874973239134,0.018396051151594244 +flat_mae,reg,logistic,ppmi_dx,76,0.005994842503189409,test,0.68,0.044352276153541426,0.64349376114082,0.051167562891031707,0.6400679117147707,0.04859617073738655 +flat_mae,reg,logistic,ppmi_dx,77,0.046415888336127774,train,0.8718861209964412,0.013609171410669113,0.861190773495067,0.015161367237474705,0.8533370798544209,0.01577180710030378 +flat_mae,reg,logistic,ppmi_dx,77,0.046415888336127774,test,0.55,0.04479687489100104,0.508679986898133,0.04904657916205083,0.5097623089983022,0.047280232912353996 +flat_mae,reg,logistic,ppmi_dx,78,0.005994842503189409,train,0.7544483985765125,0.01609430359078452,0.717184240850679,0.020088256312286838,0.7083868550631557,0.018604621972705338 +flat_mae,reg,logistic,ppmi_dx,78,0.005994842503189409,test,0.63,0.038591833332973446,0.5460679671205987,0.05100876444992103,0.5589983022071308,0.042103915821315126 +flat_mae,reg,logistic,ppmi_dx,79,0.005994842503189409,train,0.7455516014234875,0.01499948454681032,0.7009692842923853,0.01940668568881608,0.6933338685506316,0.017442662254376816 +flat_mae,reg,logistic,ppmi_dx,79,0.005994842503189409,test,0.65,0.043106653778738145,0.5944849959448499,0.05088503253114191,0.5955008488964346,0.04617345731964741 +flat_mae,reg,logistic,ppmi_dx,80,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,ppmi_dx,80,21.54434690031882,test,0.58,0.050514013897135515,0.5625,0.052187323206870605,0.564516129032258,0.05332663823033335 +flat_mae,reg,logistic,ppmi_dx,81,0.005994842503189409,train,0.7508896797153025,0.014953541864069114,0.7086443891456333,0.019295527007465965,0.700278312995076,0.017532890168947567 +flat_mae,reg,logistic,ppmi_dx,81,0.005994842503189409,test,0.66,0.04283075530503752,0.6263736263736264,0.04776809425140189,0.6239388794567062,0.04629979603025772 +flat_mae,reg,logistic,ppmi_dx,82,0.005994842503189409,train,0.7473309608540926,0.0160738723468158,0.7044821661334282,0.020307778563806167,0.6965184114750589,0.01838798553971582 +flat_mae,reg,logistic,ppmi_dx,82,0.005994842503189409,test,0.67,0.041194616153084854,0.6176572818908586,0.05082201761729274,0.616723259762309,0.04597639148611591 +flat_mae,reg,logistic,ppmi_dx,83,0.046415888336127774,train,0.8701067615658363,0.014030931839677003,0.857854126027739,0.015906944929197142,0.8475433526011561,0.016403736650072498 +flat_mae,reg,logistic,ppmi_dx,83,0.046415888336127774,test,0.52,0.04858004116918799,0.49066213921901525,0.050153724163580334,0.49066213921901525,0.05008324312421191 +flat_mae,reg,logistic,ppmi_dx,84,0.000774263682681127,train,0.7046263345195729,0.01468762388065834,0.6257541720154044,0.02260785132658797,0.6322655748233783,0.017625065922699527 +flat_mae,reg,logistic,ppmi_dx,84,0.000774263682681127,test,0.62,0.035047002724912166,0.5062370062370062,0.049774486778210655,0.5356536502546689,0.03813136775736135 +flat_mae,reg,logistic,ppmi_dx,85,0.005994842503189409,train,0.7526690391459074,0.015111114396218905,0.7129734754540478,0.019204553028637886,0.7043325840291158,0.017598725416304643 +flat_mae,reg,logistic,ppmi_dx,85,0.005994842503189409,test,0.64,0.03966156829980377,0.5714285714285714,0.04956489499657217,0.5772495755517827,0.04310330810062263 +flat_mae,reg,logistic,ppmi_dx,86,0.000774263682681127,train,0.6957295373665481,0.014230328010299277,0.6104104591061112,0.021947489390513907,0.6206915007493042,0.01680654092323497 +flat_mae,reg,logistic,ppmi_dx,86,0.000774263682681127,test,0.57,0.030209104587855612,0.4188403838356535,0.03812935706238327,0.47495755517826826,0.029785095190924135 +flat_mae,reg,logistic,ppmi_dx,87,0.005994842503189409,train,0.7722419928825622,0.016985275635220114,0.7399652978600347,0.020999699550107876,0.7297955469920787,0.019876455495164706 +flat_mae,reg,logistic,ppmi_dx,87,0.005994842503189409,test,0.61,0.045091710989936945,0.5481404240528328,0.054017228443095866,0.5530560271646858,0.048372968032262896 +flat_mae,reg,logistic,ppmi_dx,88,0.005994842503189409,train,0.7491103202846975,0.016341505263148908,0.7106044503851083,0.0203423450662508,0.7023121387283238,0.018764139193567057 +flat_mae,reg,logistic,ppmi_dx,88,0.005994842503189409,test,0.63,0.040261520090528136,0.5552350042072365,0.051203364308913944,0.5640916808149405,0.04376841586927747 +flat_mae,reg,logistic,ppmi_dx,89,0.005994842503189409,train,0.7419928825622776,0.01638131443662093,0.6987152232184121,0.02089227942626916,0.6913134232498395,0.01891326868676931 +flat_mae,reg,logistic,ppmi_dx,89,0.005994842503189409,test,0.71,0.03786071314700768,0.6514004087029691,0.050817164464169375,0.648981324278438,0.04389853060889528 +flat_mae,reg,logistic,ppmi_dx,90,0.005994842503189409,train,0.7597864768683275,0.015213123134738687,0.72291915462404,0.018879512156461543,0.713591843288375,0.017459713749725596 +flat_mae,reg,logistic,ppmi_dx,90,0.005994842503189409,test,0.62,0.043943322587169036,0.5824175824175825,0.04803208812513042,0.5814940577249575,0.046145471885520595 +flat_mae,reg,logistic,ppmi_dx,91,0.3593813663804626,train,0.9786476868327402,0.00572709547663543,0.9773606187560425,0.006096109470871776,0.9757011346606722,0.006730094047259122 +flat_mae,reg,logistic,ppmi_dx,91,0.3593813663804626,test,0.55,0.04893855739598379,0.529239460194581,0.05058227212132874,0.5301358234295416,0.05132280695900836 +flat_mae,reg,logistic,ppmi_dx,92,0.005994842503189409,train,0.7437722419928826,0.015695741136179546,0.7021902967498823,0.020144032695116422,0.6944979661742667,0.018360058823302047 +flat_mae,reg,logistic,ppmi_dx,92,0.005994842503189409,test,0.69,0.04127897285543815,0.6408295678368672,0.05074286311874772,0.6379456706281834,0.04607875315603386 +flat_mae,reg,logistic,ppmi_dx,93,0.046415888336127774,train,0.8790035587188612,0.01319188761555613,0.8691806331471137,0.014638323431161794,0.8617266110040677,0.015298487056525771 +flat_mae,reg,logistic,ppmi_dx,93,0.046415888336127774,test,0.57,0.04682332324814205,0.5501621508525996,0.04863927648052188,0.551358234295416,0.04929896502114679 +flat_mae,reg,logistic,ppmi_dx,94,0.005994842503189409,train,0.7615658362989324,0.016054703710347985,0.7261925538103549,0.020028669788425213,0.7167763862128024,0.01867922745075365 +flat_mae,reg,logistic,ppmi_dx,94,0.005994842503189409,test,0.67,0.04345540702835493,0.6239316239316239,0.050983038238929586,0.6218166383701189,0.04691919585946412 +flat_mae,reg,logistic,ppmi_dx,95,0.005994842503189409,train,0.7651245551601423,0.016091474858635553,0.7302792321116929,0.01997651383720001,0.7205362877328195,0.018672313973447287 +flat_mae,reg,logistic,ppmi_dx,95,0.005994842503189409,test,0.6,0.03474411604862038,0.49264332825976664,0.046299001572449386,0.5195246179966044,0.03681995214668509 +flat_mae,reg,logistic,ppmi_dx,96,0.005994842503189409,train,0.7597864768683275,0.01589989828159854,0.72291915462404,0.019896673830553147,0.713591843288375,0.0184665213562254 +flat_mae,reg,logistic,ppmi_dx,96,0.005994842503189409,test,0.64,0.03841497103994744,0.5628946090335114,0.04931395248581373,0.5721561969439728,0.0417131268276978 +flat_mae,reg,logistic,ppmi_dx,97,0.005994842503189409,train,0.7455516014234875,0.015995831259967213,0.7028708753119512,0.020376820049494728,0.6950733247698566,0.018518406102744737 +flat_mae,reg,logistic,ppmi_dx,97,0.005994842503189409,test,0.66,0.04102433911716311,0.6155585707824514,0.04767721151604067,0.6137521222410866,0.0444097017419626 +flat_mae,reg,logistic,ppmi_dx,98,0.046415888336127774,train,0.8683274021352313,0.013277968748931158,0.8557414807414807,0.015182713932198694,0.8452285377863413,0.01581164362908229 +flat_mae,reg,logistic,ppmi_dx,98,0.046415888336127774,test,0.64,0.044693820601957944,0.5989304812834224,0.051787343771926925,0.597623089983022,0.048995357777559465 +flat_mae,reg,logistic,ppmi_dx,99,0.005994842503189409,train,0.7473309608540926,0.015600619426899595,0.7054115909929863,0.02005146283326195,0.6973881395846714,0.018267147725048977 +flat_mae,reg,logistic,ppmi_dx,99,0.005994842503189409,test,0.63,0.043833188339430666,0.5783475783475784,0.051714244753356876,0.5793718166383701,0.04745555187402493 +flat_mae,reg,logistic,ppmi_dx,100,0.005994842503189409,train,0.7473309608540926,0.0162319717323818,0.7089866826144668,0.020128290165271515,0.7008670520231214,0.01865310164237348 +flat_mae,reg,logistic,ppmi_dx,100,0.005994842503189409,test,0.68,0.04143765920029749,0.6323529411764706,0.050894282645890666,0.6298811544991512,0.04631517441643852 diff --git a/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/log.txt b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5781d3176f247f363ea271b8ea797c51e596e303 --- /dev/null +++ b/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:25:56 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations attn_reg1_pep4; eval v2 (ppmi_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic +model: flat_mae +representation: reg +dataset: ppmi_dx +distributed: false +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/eval_v2/ppmi_dx__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:17:21 time: 4.4872 data: 3.6895 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:34 time: 0.2417 data: 0.0878 max mem: 3005 +extract (train) [ 40/232] eta: 0:01:01 time: 0.1912 data: 0.0646 max mem: 3005 +extract (train) [ 60/232] eta: 0:00:47 time: 0.1890 data: 0.0649 max mem: 3005 +extract (train) [ 80/232] eta: 0:00:38 time: 0.1735 data: 0.0581 max mem: 3005 +extract (train) [100/232] eta: 0:00:31 time: 0.1774 data: 0.0604 max mem: 3005 +extract (train) [120/232] eta: 0:00:25 time: 0.1583 data: 0.0495 max mem: 3005 +extract (train) [140/232] eta: 0:00:19 time: 0.1765 data: 0.0591 max mem: 3005 +extract (train) [160/232] eta: 0:00:15 time: 0.1596 data: 0.0500 max mem: 3005 +extract (train) [180/232] eta: 0:00:10 time: 0.1954 data: 0.0699 max mem: 3005 +extract (train) [200/232] eta: 0:00:06 time: 0.1791 data: 0.0617 max mem: 3005 +extract (train) [220/232] eta: 0:00:02 time: 0.1678 data: 0.0557 max mem: 3005 +extract (train) [231/232] eta: 0:00:00 time: 0.1667 data: 0.0569 max mem: 3005 +extract (train) Total time: 0:00:46 (0.2018 s / it) +extract (validation) [ 0/50] eta: 0:03:11 time: 3.8220 data: 3.6767 max mem: 3005 +extract (validation) [20/50] eta: 0:00:11 time: 0.2268 data: 0.0887 max mem: 3005 +extract (validation) [40/50] eta: 0:00:02 time: 0.1529 data: 0.0437 max mem: 3005 +extract (validation) [49/50] eta: 0:00:00 time: 0.1571 data: 0.0478 max mem: 3005 +extract (validation) Total time: 0:00:13 (0.2642 s / it) +extract (test) [ 0/50] eta: 0:03:17 time: 3.9502 data: 3.6983 max mem: 3005 +extract (test) [20/50] eta: 0:00:11 time: 0.2219 data: 0.0864 max mem: 3005 +extract (test) [40/50] eta: 0:00:02 time: 0.1575 data: 0.0470 max mem: 3005 +extract (test) [49/50] eta: 0:00:00 time: 0.1623 data: 0.0512 max mem: 3005 +extract (test) Total time: 0:00:13 (0.2678 s / it) +feature extraction time: 0:01:13 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | ppmi_dx | | 0.046416 | train | 0.87189 | 0.013042 | 0.85996 | 0.014726 | 0.84949 | 0.015176 | +| flat_mae | reg | logistic | ppmi_dx | | 0.046416 | test | 0.65 | 0.042042 | 0.60114 | 0.048778 | 0.59953 | 0.045803 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04571186279293374, "f1": 0.5703301673450927, "f1_std": 0.05231788984034999, "bacc": 0.5713073005093379, "bacc_std": 0.048655443192094076} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04210850745395756, "f1": 0.6259934548854604, "f1_std": 0.051952166578008795, "bacc": 0.6247877758913413, "bacc_std": 0.04639610373425943} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.044195311968578746, "f1": 0.5989304812834224, "f1_std": 0.049254482167341865, "bacc": 0.597623089983022, "bacc_std": 0.046711008197985064} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04661801797588568, "f1": 0.592944369063772, "f1_std": 0.05346581043028313, "bacc": 0.5925297113752122, "bacc_std": 0.04980534414464328} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.047338001647724844, "f1": 0.5442708333333334, "f1_std": 0.04956300620932027, "bacc": 0.5441426146010186, "bacc_std": 0.047989714075625134} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.045618224428401424, "f1": 0.5555555555555556, "f1_std": 0.052673059223196934, "bacc": 0.5581494057724957, "bacc_std": 0.04838596606071715} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04778844630242753, "f1": 0.584, "f1_std": 0.05092954074073358, "bacc": 0.583616298811545, "bacc_std": 0.0506670525307329} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 21.54434690031882, "split": "test", "acc": 0.68, "acc_std": 0.04721116817025395, "f1": 0.6637242538881883, "f1_std": 0.04903213408634025, "bacc": 0.6655348047538201, "bacc_std": 0.04951188934582191} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03950815611997097, "f1": 0.6640018537828757, "f1_std": 0.04940689484440615, "bacc": 0.6591680814940577, "bacc_std": 0.04509909036662303} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04327835024582152, "f1": 0.5477159656264134, "f1_std": 0.04800917683381933, "bacc": 0.5500848896434635, "bacc_std": 0.044780619706748966} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.53, "acc_std": 0.051227086585126036, "f1": 0.5037482842360892, "f1_std": 0.052350690291502605, "bacc": 0.5038200339558574, "bacc_std": 0.05255858112076436} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.037629451231714774, "f1": 0.5215311004784688, "f1_std": 0.047685830491403396, "bacc": 0.5377758913412564, "bacc_std": 0.03986298364161761} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04061869520307121, "f1": 0.5238095238095238, "f1_std": 0.04894970234769562, "bacc": 0.5348047538200339, "bacc_std": 0.042423345806271676} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04228427130742589, "f1": 0.5476190476190476, "f1_std": 0.05244185327396815, "bacc": 0.5560271646859083, "bacc_std": 0.04534031701936225} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 166.81005372000556, "split": "test", "acc": 0.63, "acc_std": 0.04791633959308662, "f1": 0.6053333333333333, "f1_std": 0.051402814146095176, "bacc": 0.6048387096774194, "bacc_std": 0.050969410652772454} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04480357128622673, "f1": 0.5623386825272135, "f1_std": 0.050557997393108754, "bacc": 0.5632427843803056, "bacc_std": 0.04730628014090519} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04113653364103495, "f1": 0.5476190476190476, "f1_std": 0.05099806441551977, "bacc": 0.5560271646859083, "bacc_std": 0.04407461570750624} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04379214084741691, "f1": 0.5847828526540231, "f1_std": 0.050376610672929055, "bacc": 0.5844651952461799, "bacc_std": 0.047053803881620726} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.045580890732849884, "f1": 0.6263736263736264, "f1_std": 0.050686535691664425, "bacc": 0.6239388794567062, "bacc_std": 0.04899939261217484} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04302825118454153, "f1": 0.5555555555555556, "f1_std": 0.04894458069080654, "bacc": 0.5581494057724957, "bacc_std": 0.04511066337781345} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.0424920510213381, "f1": 0.5713127099988413, "f1_std": 0.050998198189696906, "bacc": 0.5742784380305602, "bacc_std": 0.04571166275906462} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.040688332479962855, "f1": 0.5215311004784688, "f1_std": 0.051798379178762136, "bacc": 0.5377758913412564, "bacc_std": 0.04330185364987699} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04415365896502803, "f1": 0.5555555555555556, "f1_std": 0.052216474748260626, "bacc": 0.5581494057724957, "bacc_std": 0.04758618467183287} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.046850720378666534, "f1": 0.5623386825272135, "f1_std": 0.05309385510104067, "bacc": 0.5632427843803056, "bacc_std": 0.04977510662415948} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.55, "acc_std": 0.04431945848044625, "f1": 0.47862356621480706, "f1_std": 0.049787439345267866, "bacc": 0.4893887945670628, "bacc_std": 0.04534000554161029} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.63, "acc_std": 0.046454278597347735, "f1": 0.6093337556752191, "f1_std": 0.04867740541182819, "bacc": 0.6099320882852293, "bacc_std": 0.04877760172131098} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.042892889853680886, "f1": 0.5713127099988413, "f1_std": 0.05189671084691959, "bacc": 0.5742784380305602, "bacc_std": 0.04646399654602365} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.03970076069800174, "f1": 0.5238095238095238, "f1_std": 0.04809196719501338, "bacc": 0.5348047538200339, "bacc_std": 0.041642248347882396} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.0415975960843893, "f1": 0.5872154735228211, "f1_std": 0.05111653031541098, "bacc": 0.5904074702886248, "bacc_std": 0.045208300175210235} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04237888153314101, "f1": 0.5713127099988413, "f1_std": 0.05178616682731668, "bacc": 0.5742784380305602, "bacc_std": 0.04621181742001417} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04877102008365213, "f1": 0.5626666666666666, "f1_std": 0.05153427248160208, "bacc": 0.5623938879456706, "bacc_std": 0.0512611598185999} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.046429624163889155, "f1": 0.6212121212121212, "f1_std": 0.05330301661212197, "bacc": 0.6188455008488964, "bacc_std": 0.050491742601636073} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 166.81005372000556, "split": "test", "acc": 0.61, "acc_std": 0.04488211670587741, "f1": 0.5793334052421529, "f1_std": 0.04811050783902624, "bacc": 0.5785229202037352, "bacc_std": 0.04712415317516103} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.043566129045394884, "f1": 0.6108031607500884, "f1_std": 0.05417816495668028, "bacc": 0.6116298811544991, "bacc_std": 0.04788550891380557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04688711550095612, "f1": 0.6053333333333333, "f1_std": 0.049813590940168066, "bacc": 0.6048387096774194, "bacc_std": 0.049350114478584055} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04845812625349849, "f1": 0.5543293718166383, "f1_std": 0.051041701856394214, "bacc": 0.5543293718166383, "bacc_std": 0.05092635160088208} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.64, "acc_std": 0.044729493625571035, "f1": 0.6179966044142615, "f1_std": 0.04798798216077442, "bacc": 0.6179966044142615, "bacc_std": 0.04821795369956176} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04177989947331132, "f1": 0.539894512400404, "f1_std": 0.04704222889493259, "bacc": 0.5420203735144312, "bacc_std": 0.044034117381761606} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.04024112821480034, "f1": 0.6640018537828757, "f1_std": 0.049006799843238656, "bacc": 0.6591680814940577, "bacc_std": 0.044666235934206204} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.043406446525833, "f1": 0.5703301673450927, "f1_std": 0.050075256554158946, "bacc": 0.5713073005093379, "bacc_std": 0.046469905711091296} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.0461851751106348, "f1": 0.6224786970121885, "f1_std": 0.050244573846552974, "bacc": 0.6209677419354839, "bacc_std": 0.04956440512988713} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04791969949822306, "f1": 0.5623386825272135, "f1_std": 0.05286192220299092, "bacc": 0.5632427843803056, "bacc_std": 0.04980044097318068} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.0430308308076895, "f1": 0.5944849959448499, "f1_std": 0.05230797591467211, "bacc": 0.5955008488964346, "bacc_std": 0.04710849701492457} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.042871990856502104, "f1": 0.6011396011396011, "f1_std": 0.05078273850157791, "bacc": 0.6005942275042444, "bacc_std": 0.0469247488015876} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.040126175995227845, "f1": 0.6296711929076422, "f1_std": 0.04732509758050134, "bacc": 0.6269100169779287, "bacc_std": 0.044742133131379584} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.04442357932449837, "f1": 0.5021573182874212, "f1_std": 0.04967461444052123, "bacc": 0.5046689303904923, "bacc_std": 0.04696397063285494} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.049193084879889375, "f1": 0.6161427533976553, "f1_std": 0.050186173960888446, "bacc": 0.6201188455008488, "bacc_std": 0.05077576542018513} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.041442470968802034, "f1": 0.6408295678368672, "f1_std": 0.051325738156429636, "bacc": 0.6379456706281834, "bacc_std": 0.046465255420764046} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04117881008479969, "f1": 0.5713127099988413, "f1_std": 0.04958840322334698, "bacc": 0.5742784380305602, "bacc_std": 0.04422772521770073} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.04757348421127046, "f1": 0.5280995280995281, "f1_std": 0.05008016484197532, "bacc": 0.5280135823429541, "bacc_std": 0.04953309444209088} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04326130372515372, "f1": 0.5174632352941176, "f1_std": 0.04917410137321495, "bacc": 0.5237691001697793, "bacc_std": 0.04474332239641174} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.046047644890917054, "f1": 0.6266666666666667, "f1_std": 0.04840533372767237, "bacc": 0.6260611205432938, "bacc_std": 0.0481972638479596} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 21.54434690031882, "split": "test", "acc": 0.52, "acc_std": 0.05098945381154813, "f1": 0.5, "f1_std": 0.05152855531038995, "bacc": 0.5008488964346349, "bacc_std": 0.05219297210937506} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.045373168282587445, "f1": 0.5464100011063171, "f1_std": 0.04964965079898625, "bacc": 0.5471137521222411, "bacc_std": 0.04752489762788377} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04053333936403463, "f1": 0.6493688639551192, "f1_std": 0.0504711391163477, "bacc": 0.6460101867572157, "bacc_std": 0.045184925097943036} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.54, "acc_std": 0.046484216676200965, "f1": 0.4875222816399287, "f1_std": 0.05044932006008548, "bacc": 0.4915110356536503, "bacc_std": 0.04756447760892626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.042484232369197863, "f1": 0.6553308823529411, "f1_std": 0.05022142810085758, "bacc": 0.6511035653650254, "bacc_std": 0.0465092474597614} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04559756133829966, "f1": 0.5623386825272135, "f1_std": 0.05144264571281226, "bacc": 0.5632427843803056, "bacc_std": 0.048309025540635} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04521750103665616, "f1": 0.568536342515765, "f1_std": 0.05056103459123672, "bacc": 0.5683361629881154, "bacc_std": 0.04800614104498229} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04622183034021911, "f1": 0.5924495924495925, "f1_std": 0.049675168115504154, "bacc": 0.5916808149405772, "bacc_std": 0.04925506667616436} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04580330119107137, "f1": 0.6266666666666667, "f1_std": 0.04896125598001331, "bacc": 0.6260611205432938, "bacc_std": 0.04857936702341018} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.03592478253239677, "f1": 0.561128526645768, "f1_std": 0.050664560234192337, "bacc": 0.5751273344651953, "bacc_std": 0.040308971798766434} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.044339467746016076, "f1": 0.5523528769516323, "f1_std": 0.04823864436309188, "bacc": 0.5522071307300509, "bacc_std": 0.04652777158969864} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.04497766112193918, "f1": 0.4950061721467849, "f1_std": 0.04920324877547284, "bacc": 0.49957555178268254, "bacc_std": 0.04633296084553126} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.53, "acc_std": 0.04938056297775472, "f1": 0.4986666666666667, "f1_std": 0.05060395535274254, "bacc": 0.4987266553480475, "bacc_std": 0.05011228858351531} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.041952144164512026, "f1": 0.5481404240528328, "f1_std": 0.04970145374702517, "bacc": 0.5530560271646858, "bacc_std": 0.044608945345618364} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.042970031417256374, "f1": 0.5863970588235294, "f1_std": 0.05187778748279023, "bacc": 0.5874363327674024, "bacc_std": 0.04711450442966869} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.049398769215436934, "f1": 0.5670995670995671, "f1_std": 0.051106633490824845, "bacc": 0.5674872665534805, "bacc_std": 0.051184693287689445} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04051434807571263, "f1": 0.6553308823529411, "f1_std": 0.0494273224605534, "bacc": 0.6511035653650254, "bacc_std": 0.04560115558333581} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.04776616375636628, "f1": 0.52, "f1_std": 0.04906446309607456, "bacc": 0.5199490662139219, "bacc_std": 0.04876989734277054} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.040989808489428195, "f1": 0.6033177064551027, "f1_std": 0.05302157705857945, "bacc": 0.6065365025466893, "bacc_std": 0.04542963990353352} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04761291841506883, "f1": 0.5766488413547237, "f1_std": 0.05330278657926278, "bacc": 0.5764006791171477, "bacc_std": 0.050403523087319185} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04358683746270198, "f1": 0.6093337556752191, "f1_std": 0.04632335023113918, "bacc": 0.6099320882852293, "bacc_std": 0.046898167269677625} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.000774263682681127, "split": "test", "acc": 0.64, "acc_std": 0.03565368985112201, "f1": 0.54337899543379, "f1_std": 0.050841961188031554, "bacc": 0.5619694397283531, "bacc_std": 0.03957990269311171} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04224973372697158, "f1": 0.6176572818908586, "f1_std": 0.0526452676787844, "bacc": 0.616723259762309, "bacc_std": 0.04745713285401925} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.044352276153541426, "f1": 0.64349376114082, "f1_std": 0.051167562891031707, "bacc": 0.6400679117147707, "bacc_std": 0.04859617073738655} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.04479687489100104, "f1": 0.508679986898133, "f1_std": 0.04904657916205083, "bacc": 0.5097623089983022, "bacc_std": 0.047280232912353996} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.038591833332973446, "f1": 0.5460679671205987, "f1_std": 0.05100876444992103, "bacc": 0.5589983022071308, "bacc_std": 0.042103915821315126} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.043106653778738145, "f1": 0.5944849959448499, "f1_std": 0.05088503253114191, "bacc": 0.5955008488964346, "bacc_std": 0.04617345731964741} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 21.54434690031882, "split": "test", "acc": 0.58, "acc_std": 0.050514013897135515, "f1": 0.5625, "f1_std": 0.052187323206870605, "bacc": 0.564516129032258, "bacc_std": 0.05332663823033335} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04283075530503752, "f1": 0.6263736263736264, "f1_std": 0.04776809425140189, "bacc": 0.6239388794567062, "bacc_std": 0.04629979603025772} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.041194616153084854, "f1": 0.6176572818908586, "f1_std": 0.05082201761729274, "bacc": 0.616723259762309, "bacc_std": 0.04597639148611591} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.52, "acc_std": 0.04858004116918799, "f1": 0.49066213921901525, "f1_std": 0.050153724163580334, "bacc": 0.49066213921901525, "bacc_std": 0.05008324312421191} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.000774263682681127, "split": "test", "acc": 0.62, "acc_std": 0.035047002724912166, "f1": 0.5062370062370062, "f1_std": 0.049774486778210655, "bacc": 0.5356536502546689, "bacc_std": 0.03813136775736135} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03966156829980377, "f1": 0.5714285714285714, "f1_std": 0.04956489499657217, "bacc": 0.5772495755517827, "bacc_std": 0.04310330810062263} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.000774263682681127, "split": "test", "acc": 0.57, "acc_std": 0.030209104587855612, "f1": 0.4188403838356535, "f1_std": 0.03812935706238327, "bacc": 0.47495755517826826, "bacc_std": 0.029785095190924135} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.045091710989936945, "f1": 0.5481404240528328, "f1_std": 0.054017228443095866, "bacc": 0.5530560271646858, "bacc_std": 0.048372968032262896} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.040261520090528136, "f1": 0.5552350042072365, "f1_std": 0.051203364308913944, "bacc": 0.5640916808149405, "bacc_std": 0.04376841586927747} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03786071314700768, "f1": 0.6514004087029691, "f1_std": 0.050817164464169375, "bacc": 0.648981324278438, "bacc_std": 0.04389853060889528} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.043943322587169036, "f1": 0.5824175824175825, "f1_std": 0.04803208812513042, "bacc": 0.5814940577249575, "bacc_std": 0.046145471885520595} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.55, "acc_std": 0.04893855739598379, "f1": 0.529239460194581, "f1_std": 0.05058227212132874, "bacc": 0.5301358234295416, "bacc_std": 0.05132280695900836} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04127897285543815, "f1": 0.6408295678368672, "f1_std": 0.05074286311874772, "bacc": 0.6379456706281834, "bacc_std": 0.04607875315603386} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.04682332324814205, "f1": 0.5501621508525996, "f1_std": 0.04863927648052188, "bacc": 0.551358234295416, "bacc_std": 0.04929896502114679} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04345540702835493, "f1": 0.6239316239316239, "f1_std": 0.050983038238929586, "bacc": 0.6218166383701189, "bacc_std": 0.04691919585946412} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.03474411604862038, "f1": 0.49264332825976664, "f1_std": 0.046299001572449386, "bacc": 0.5195246179966044, "bacc_std": 0.03681995214668509} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03841497103994744, "f1": 0.5628946090335114, "f1_std": 0.04931395248581373, "bacc": 0.5721561969439728, "bacc_std": 0.0417131268276978} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04102433911716311, "f1": 0.6155585707824514, "f1_std": 0.04767721151604067, "bacc": 0.6137521222410866, "bacc_std": 0.0444097017419626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.044693820601957944, "f1": 0.5989304812834224, "f1_std": 0.051787343771926925, "bacc": 0.597623089983022, "bacc_std": 0.048995357777559465} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.043833188339430666, "f1": 0.5783475783475784, "f1_std": 0.051714244753356876, "bacc": 0.5793718166383701, "bacc_std": 0.04745555187402493} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04143765920029749, "f1": 0.6323529411764706, "f1_std": 0.050894282645890666, "bacc": 0.6298811544991512, "bacc_std": 0.04631517441643852} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | ppmi_dx | train | 100 | 4.1395 | 23.648 | 0.82555 | 0.093352 | 0.79965 | 0.11128 | 0.79259 | 0.11234 | +| flat_mae | reg | logistic | ppmi_dx | test | 100 | 4.1395 | 23.648 | 0.6231 | 0.044306 | 0.57389 | 0.04858 | 0.57671 | 0.04405 | + + +done! total time: 0:05:27 diff --git a/decoders/attn_reg1_pep4/pretrain/config.yaml b/decoders/attn_reg1_pep4/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5c99565bb56cf7e6018d326181617d6dcbbc20d --- /dev/null +++ b/decoders/attn_reg1_pep4/pretrain/config.yaml @@ -0,0 +1,100 @@ +name: decoders/attn_reg1_pep4/pretrain +notes: decoder ablations attn_reg1_pep4 (model_kwargs.decoding=attn model_kwargs.reg_tokens=1 + model_kwargs.pred_edge_pad=4) +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 4 + class_token: false + reg_tokens: 1 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.${input_space}.wds/hcpya-all-${input_space}-{00000..01799}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/validation + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +gauss_sigma: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 5 +checkpoint_period: 20 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 7338 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/decoders/attn_reg1_pep4/pretrain/log.json b/decoders/attn_reg1_pep4/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..17cde1eae1fc4e632092c7ea748936bc439c852c --- /dev/null +++ b/decoders/attn_reg1_pep4/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05109799750238657, "train/loss": 0.9934105457782746, "eval/hcp-train-subset/loss": 0.9918035614875055, "eval/hcp-val/loss": 0.9902934912712343} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.08057390013456345, "train/loss": 0.9884730435085297, "eval/hcp-train-subset/loss": 0.9892455041408539, "eval/hcp-val/loss": 0.9875589426486723} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.0886806230631413, "train/loss": 0.985187329864502, "eval/hcp-train-subset/loss": 0.9873298704624176, "eval/hcp-val/loss": 0.9854823937339168} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.18317620996913567, "train/loss": 0.9796646947479248, "eval/hcp-train-subset/loss": 0.9733660961351087, "eval/hcp-val/loss": 0.9716233120810601} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.21921992167310547, "train/loss": 0.9555775726890564, "eval/hcp-train-subset/loss": 0.9405968544944641, "eval/hcp-val/loss": 0.935890709200213} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.16567060801053896, "train/loss": 0.925505720911026, "eval/hcp-train-subset/loss": 0.9214805162722065, "eval/hcp-val/loss": 0.9159717357927754} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.11419273185619301, "train/loss": 0.9113553131103516, "eval/hcp-train-subset/loss": 0.9117312633222149, "eval/hcp-val/loss": 0.9059032442108277} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.09089006631922412, "train/loss": 0.9033922518348694, "eval/hcp-train-subset/loss": 0.9064092040061951, "eval/hcp-val/loss": 0.9013055005381184} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.07729242738603992, "train/loss": 0.898832440366745, "eval/hcp-train-subset/loss": 0.9028759166117637, "eval/hcp-val/loss": 0.8973084620891079} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.07006284935161058, "train/loss": 0.8976163297271729, "eval/hcp-train-subset/loss": 0.9007868113056305, "eval/hcp-val/loss": 0.8951478023682872} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.06797512443695942, "train/loss": 0.8939989508724213, "eval/hcp-train-subset/loss": 0.8986469391853579, "eval/hcp-val/loss": 0.8928347902913247} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.06717725700034585, "train/loss": 0.8896805268573761, "eval/hcp-train-subset/loss": 0.8965294716819641, "eval/hcp-val/loss": 0.8919804903768724} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.06461316388090248, "train/loss": 0.8888568512821198, "eval/hcp-train-subset/loss": 0.8960500945968013, "eval/hcp-val/loss": 0.8898697168596329} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.0649539169692766, "train/loss": 0.8894465819835663, "eval/hcp-train-subset/loss": 0.8948770598057778, "eval/hcp-val/loss": 0.8896013056078265} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.06607527206872474, "train/loss": 0.8867114732170105, "eval/hcp-train-subset/loss": 0.8940951276210046, "eval/hcp-val/loss": 0.8882749292158312} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.06658380174967925, "train/loss": 0.8841128248786926, "eval/hcp-train-subset/loss": 0.8925630065702623, "eval/hcp-val/loss": 0.8877272817396349} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.06729253001567152, "train/loss": 0.8822191092205047, "eval/hcp-train-subset/loss": 0.8906738902291944, "eval/hcp-val/loss": 0.8862310234577425} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.06567143731666938, "train/loss": 0.8825845180130005, "eval/hcp-train-subset/loss": 0.8910742700099945, "eval/hcp-val/loss": 0.886643385694873} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.0661940955440197, "train/loss": 0.8819878600025177, "eval/hcp-train-subset/loss": 0.890208036668839, "eval/hcp-val/loss": 0.884625627148536} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.06851306746781788, "train/loss": 0.8802327109718323, "eval/hcp-train-subset/loss": 0.8902555579139341, "eval/hcp-val/loss": 0.885414534038113} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.06936646679248432, "train/loss": 0.8783858189678192, "eval/hcp-train-subset/loss": 0.8901653376317793, "eval/hcp-val/loss": 0.8857350945472717} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.0697212306771871, "train/loss": 0.8786696637916565, "eval/hcp-train-subset/loss": 0.8879020742831691, "eval/hcp-val/loss": 0.8840822913954335} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.06879894448105585, "train/loss": 0.878272129125595, "eval/hcp-train-subset/loss": 0.8874058444653788, "eval/hcp-val/loss": 0.883584326313388} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.07037042887717108, "train/loss": 0.8782063803386688, "eval/hcp-train-subset/loss": 0.8876514953951682, "eval/hcp-val/loss": 0.8837268304440283} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.07378781966031657, "train/loss": 0.8738483872413635, "eval/hcp-train-subset/loss": 0.8874825642954919, "eval/hcp-val/loss": 0.8846960894523128} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.07305673111907461, "train/loss": 0.8748099811267853, "eval/hcp-train-subset/loss": 0.8871977656118332, "eval/hcp-val/loss": 0.8836951947981312} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.07718141879957938, "train/loss": 0.873583664226532, "eval/hcp-train-subset/loss": 0.886846657722227, "eval/hcp-val/loss": 0.884448685953694} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.07779259664673124, "train/loss": 0.8731949339199067, "eval/hcp-train-subset/loss": 0.8870484309811746, "eval/hcp-val/loss": 0.8841891048416015} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.07965921018042417, "train/loss": 0.8705078758144379, "eval/hcp-train-subset/loss": 0.8865769870819584, "eval/hcp-val/loss": 0.8841315277161137} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.07892127677733887, "train/loss": 0.871897406873703, "eval/hcp-train-subset/loss": 0.8881038571557691, "eval/hcp-val/loss": 0.8844352095357834} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.08083248258867658, "train/loss": 0.8697450028991699, "eval/hcp-train-subset/loss": 0.8879484495809001, "eval/hcp-val/loss": 0.8831758124213065} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.08214160767778199, "train/loss": 0.8680706291294098, "eval/hcp-train-subset/loss": 0.886811327549719, "eval/hcp-val/loss": 0.8829803832115666} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.08308702932370209, "train/loss": 0.8701875491237641, "eval/hcp-train-subset/loss": 0.8861398918013419, "eval/hcp-val/loss": 0.8834478941655928} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.08488458915817505, "train/loss": 0.8664039387321473, "eval/hcp-train-subset/loss": 0.8857135128590369, "eval/hcp-val/loss": 0.8843832967742797} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.0876326218344035, "train/loss": 0.8666460393714904, "eval/hcp-train-subset/loss": 0.8871074443863284, "eval/hcp-val/loss": 0.8824942832992922} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.08706736009181615, "train/loss": 0.8659665279006958, "eval/hcp-train-subset/loss": 0.8867379398115219, "eval/hcp-val/loss": 0.883299361313543} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.0877933274249805, "train/loss": 0.8656994695186615, "eval/hcp-train-subset/loss": 0.8859919280775131, "eval/hcp-val/loss": 0.882713520719159} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.08972895221305115, "train/loss": 0.866185564622879, "eval/hcp-train-subset/loss": 0.8856887086745231, "eval/hcp-val/loss": 0.8817619373721461} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.08801301552171172, "train/loss": 0.8687455080795288, "eval/hcp-train-subset/loss": 0.8830079061369742, "eval/hcp-val/loss": 0.8815582106190343} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.09115709200720892, "train/loss": 0.8640272739982605, "eval/hcp-train-subset/loss": 0.8864937222773029, "eval/hcp-val/loss": 0.8829429716833176} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.08986716044818342, "train/loss": 0.8665840630531311, "eval/hcp-train-subset/loss": 0.8839546470872818, "eval/hcp-val/loss": 0.8815324258419776} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.09718848048338473, "train/loss": 0.8615398256206512, "eval/hcp-train-subset/loss": 0.884782882467393, "eval/hcp-val/loss": 0.8815969296040074} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.093982575960319, "train/loss": 0.8643691049575806, "eval/hcp-train-subset/loss": 0.884660207456158, "eval/hcp-val/loss": 0.8828635513782501} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.09481158659217472, "train/loss": 0.8620548281669617, "eval/hcp-train-subset/loss": 0.8851108877889572, "eval/hcp-val/loss": 0.8819809450257209} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.09874588461406768, "train/loss": 0.8600963641262055, "eval/hcp-train-subset/loss": 0.8830694585077224, "eval/hcp-val/loss": 0.8816026660703844} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.09951853260221397, "train/loss": 0.8623799084663392, "eval/hcp-train-subset/loss": 0.8819999964006485, "eval/hcp-val/loss": 0.8806086970913795} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.09530645176689599, "train/loss": 0.8623296834182739, "eval/hcp-train-subset/loss": 0.8832192469027734, "eval/hcp-val/loss": 0.8808593519272343} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.09969516693544067, "train/loss": 0.8597631815624237, "eval/hcp-train-subset/loss": 0.880244295443258, "eval/hcp-val/loss": 0.8799667377625743} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.1010439094941397, "train/loss": 0.8596788616085053, "eval/hcp-train-subset/loss": 0.8817584610754444, "eval/hcp-val/loss": 0.8803486314512068} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.10014873181133961, "train/loss": 0.8604548396778107, "eval/hcp-train-subset/loss": 0.8821499664937297, "eval/hcp-val/loss": 0.8808918556859416} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.10026031521636332, "train/loss": 0.862445617761612, "eval/hcp-train-subset/loss": 0.8825754821300507, "eval/hcp-val/loss": 0.8800905298802161} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.102848258554778, "train/loss": 0.862302002544403, "eval/hcp-train-subset/loss": 0.8793133958693473, "eval/hcp-val/loss": 0.8804952963705985} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.10335466947008903, "train/loss": 0.859212355260849, "eval/hcp-train-subset/loss": 0.8818975985050201, "eval/hcp-val/loss": 0.8804871949457354} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.10404090107928014, "train/loss": 0.8598042852115632, "eval/hcp-train-subset/loss": 0.880914342980231, "eval/hcp-val/loss": 0.8800214644401304} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.10566503292613647, "train/loss": 0.8605573072624206, "eval/hcp-train-subset/loss": 0.8798415430130497, "eval/hcp-val/loss": 0.878737832269361} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.10798412578048983, "train/loss": 0.8583336571979523, "eval/hcp-train-subset/loss": 0.8799442581592067, "eval/hcp-val/loss": 0.8790431512940314} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.10715982553893344, "train/loss": 0.8602155887317657, "eval/hcp-train-subset/loss": 0.8790119011555949, "eval/hcp-val/loss": 0.879852989027577} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.10968989638668913, "train/loss": 0.8594300882911682, "eval/hcp-train-subset/loss": 0.8773198925679729, "eval/hcp-val/loss": 0.8799148211556096} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.11364429473290255, "train/loss": 0.8556031602287293, "eval/hcp-train-subset/loss": 0.876633019216599, "eval/hcp-val/loss": 0.8784405964036142} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.11393260422305557, "train/loss": 0.8570806022834778, "eval/hcp-train-subset/loss": 0.8756507577434662, "eval/hcp-val/loss": 0.8778372041640743} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.11626497391914226, "train/loss": 0.8564456133842469, "eval/hcp-train-subset/loss": 0.8769741087190567, "eval/hcp-val/loss": 0.877808355516003} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.11754116499252182, "train/loss": 0.8562709504032135, "eval/hcp-train-subset/loss": 0.8755316061358298, "eval/hcp-val/loss": 0.8786171155591165} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.11498269057509497, "train/loss": 0.8581577094078064, "eval/hcp-train-subset/loss": 0.8749712628702964, "eval/hcp-val/loss": 0.8776996847121946} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.11944734436294559, "train/loss": 0.8565247329902649, "eval/hcp-train-subset/loss": 0.8763075768947601, "eval/hcp-val/loss": 0.8779652560910871} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.12265994475090759, "train/loss": 0.8548637902832031, "eval/hcp-train-subset/loss": 0.8738687653695384, "eval/hcp-val/loss": 0.8776392888638281} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.12426604036117753, "train/loss": 0.85528221367836, "eval/hcp-train-subset/loss": 0.8752347125161078, "eval/hcp-val/loss": 0.8773552333154986} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.12525747624591616, "train/loss": 0.8549707554912567, "eval/hcp-train-subset/loss": 0.8746742654231286, "eval/hcp-val/loss": 0.8777826524549915} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.12706129425612903, "train/loss": 0.854354995136261, "eval/hcp-train-subset/loss": 0.8734449686542634, "eval/hcp-val/loss": 0.8773574444555468} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.12798203216949755, "train/loss": 0.8554717939662934, "eval/hcp-train-subset/loss": 0.8761714994907379, "eval/hcp-val/loss": 0.8767416419521454} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.13035050996278624, "train/loss": 0.8551088427734375, "eval/hcp-train-subset/loss": 0.8723429537588551, "eval/hcp-val/loss": 0.8769748057088544} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.13122772406572034, "train/loss": 0.8543202759647369, "eval/hcp-train-subset/loss": 0.8718917446751748, "eval/hcp-val/loss": 0.8766561525483285} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.1359824051302249, "train/loss": 0.8504601029109955, "eval/hcp-train-subset/loss": 0.8720460418731936, "eval/hcp-val/loss": 0.8767627939101188} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.13547589911551008, "train/loss": 0.8539579989337921, "eval/hcp-train-subset/loss": 0.8717525312977452, "eval/hcp-val/loss": 0.8767676007363104} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.13813853802257212, "train/loss": 0.8534620151138306, "eval/hcp-train-subset/loss": 0.8698088074884107, "eval/hcp-val/loss": 0.8762644577410913} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.1403488693782695, "train/loss": 0.8518487320041657, "eval/hcp-train-subset/loss": 0.8697156752309492, "eval/hcp-val/loss": 0.8764403193227707} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.1421419027992987, "train/loss": 0.8528032725524902, "eval/hcp-train-subset/loss": 0.8688998789556565, "eval/hcp-val/loss": 0.8755388500229004} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.1424944888080128, "train/loss": 0.8538179479694367, "eval/hcp-train-subset/loss": 0.8676948681954415, "eval/hcp-val/loss": 0.8771614182379938} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.14659184653063934, "train/loss": 0.8521203755950928, "eval/hcp-train-subset/loss": 0.8671179231136076, "eval/hcp-val/loss": 0.8760389435675836} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.14642424329700338, "train/loss": 0.8540443034648896, "eval/hcp-train-subset/loss": 0.8660197854042053, "eval/hcp-val/loss": 0.8747305466282752} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.14743282225593524, "train/loss": 0.8521954595088959, "eval/hcp-train-subset/loss": 0.8669722657049855, "eval/hcp-val/loss": 0.8756365603016268} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.1497279820283623, "train/loss": 0.8530404705142974, "eval/hcp-train-subset/loss": 0.8649101939893538, "eval/hcp-val/loss": 0.875342619995917} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.15411910802311987, "train/loss": 0.8494300235462189, "eval/hcp-train-subset/loss": 0.8647187563680834, "eval/hcp-val/loss": 0.8747896330971872} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.15443641475573414, "train/loss": 0.8509540951442719, "eval/hcp-train-subset/loss": 0.864066077816871, "eval/hcp-val/loss": 0.8747684465300652} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.15577235329829434, "train/loss": 0.8516101210212708, "eval/hcp-train-subset/loss": 0.8622947469834359, "eval/hcp-val/loss": 0.8749730500482744} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.15483636150754293, "train/loss": 0.8537235752105713, "eval/hcp-train-subset/loss": 0.8617447335873881, "eval/hcp-val/loss": 0.8744749744092265} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.15726549300490827, "train/loss": 0.8531223028850555, "eval/hcp-train-subset/loss": 0.8611829482739971, "eval/hcp-val/loss": 0.8745538832679871} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.1589399654270363, "train/loss": 0.8518211099910736, "eval/hcp-train-subset/loss": 0.8607534698901638, "eval/hcp-val/loss": 0.8742849048106901} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.1597100201035874, "train/loss": 0.853425854101181, "eval/hcp-train-subset/loss": 0.8596996395818649, "eval/hcp-val/loss": 0.8745285599462448} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.1679136949955549, "train/loss": 0.8499118625640869, "eval/hcp-train-subset/loss": 0.8590097110117635, "eval/hcp-val/loss": 0.8740956715999111} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.16399206632799215, "train/loss": 0.851656636800766, "eval/hcp-train-subset/loss": 0.8588101065927937, "eval/hcp-val/loss": 0.8736566814684099} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.162707654400353, "train/loss": 0.8532473838710785, "eval/hcp-train-subset/loss": 0.8582299418987767, "eval/hcp-val/loss": 0.8729709713689743} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.168980682234157, "train/loss": 0.8547069179725647, "eval/hcp-train-subset/loss": 0.8575628784395033, "eval/hcp-val/loss": 0.8731795203301215} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.16863260412570227, "train/loss": 0.8525560132408142, "eval/hcp-train-subset/loss": 0.8574643683048987, "eval/hcp-val/loss": 0.8735443930472097} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.16320975541314717, "train/loss": 0.8556927118396759, "eval/hcp-train-subset/loss": 0.857173788932062, "eval/hcp-val/loss": 0.8725198997605231} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.16689813668829792, "train/loss": 0.8545163739299774, "eval/hcp-train-subset/loss": 0.8560948333432598, "eval/hcp-val/loss": 0.8724849675932238} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.16915528175186625, "train/loss": 0.856800008020401, "eval/hcp-train-subset/loss": 0.8563019835179851, "eval/hcp-val/loss": 0.8723221123218536} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.17029840781421385, "train/loss": 0.8569264556121826, "eval/hcp-train-subset/loss": 0.855609082406567, "eval/hcp-val/loss": 0.8728626793430697} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.17278640413615387, "train/loss": 0.8556145253562927, "eval/hcp-train-subset/loss": 0.8559063653792104, "eval/hcp-val/loss": 0.8725748571657366} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.17126620129566453, "train/loss": 0.8558068560886383, "eval/hcp-train-subset/loss": 0.8556642224711757, "eval/hcp-val/loss": 0.8719721724910121} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.17251811680558854, "train/loss": 0.8553678136062622, "eval/hcp-train-subset/loss": 0.8553241500931401, "eval/hcp-val/loss": 0.8723324739163921} diff --git a/decoders/attn_reg1_pep4/pretrain/log.txt b/decoders/attn_reg1_pep4/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9368a9daa57614461f65e83459dc89d96b63476 --- /dev/null +++ b/decoders/attn_reg1_pep4/pretrain/log.txt @@ -0,0 +1,7775 @@ +pretraining fmri mae +start: 2026-01-16 00:34:41 +cwd: /admin/home/connor/fmri-fm +sha: f9ef1eebbc1a5292e462bf6c7741545659511885, status: has uncommitted changes, branch: dev/clane9 +config: +name: decoders/attn_reg1_pep4/pretrain +notes: decoder ablations attn_reg1_pep4 (model_kwargs.decoding=attn model_kwargs.reg_tokens=1 + model_kwargs.pred_edge_pad=4) +output_dir: experiments/decoders/output/decoders/attn_reg1_pep4/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 4 + class_token: false + reg_tokens: 1 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.${input_space}.wds/hcpya-all-${input_space}-{00000..01799}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/validation + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +gauss_sigma: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 5 +checkpoint_period: 20 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 7338 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Transform( +transform=Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +), +noise_transform=None +) +val transform: +Transform( +transform=Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +), +noise_transform=None +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.${input_space}.wds/hcpya-all-${input_space}-{00000..01799}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/validation +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [899, 472, 767, 116, 1265, 1852, 300, 1335, 361, 1560] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=4, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=False, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 23:16:07 lr: 0.000000 grad: 0.0152 (0.0152) loss: 0.9983 (0.9983) time: 13.4029 data: 10.7894 max mem: 8412 +Train: [0] [ 100/6250] eta: 0:30:04 lr: 0.000000 grad: 0.0140 (0.0159) loss: 0.9958 (0.9963) time: 0.1597 data: 0.0597 max mem: 9303 +Train: [0] [ 200/6250] eta: 0:25:13 lr: 0.000001 grad: 0.0150 (0.0154) loss: 0.9953 (0.9960) time: 0.2086 data: 0.1096 max mem: 9303 +Train: [0] [ 300/6250] eta: 0:22:37 lr: 0.000001 grad: 0.0135 (0.0152) loss: 0.9954 (0.9959) time: 0.2063 data: 0.1169 max mem: 9303 +Train: [0] [ 400/6250] eta: 0:20:57 lr: 0.000002 grad: 0.0139 (0.0150) loss: 0.9952 (0.9958) time: 0.1642 data: 0.0576 max mem: 9303 +Train: [0] [ 500/6250] eta: 0:20:11 lr: 0.000002 grad: 0.0145 (0.0148) loss: 0.9957 (0.9959) time: 0.2069 data: 0.1192 max mem: 9303 +Train: [0] [ 600/6250] eta: 0:19:08 lr: 0.000002 grad: 0.0144 (0.0147) loss: 0.9955 (0.9959) time: 0.1641 data: 0.0782 max mem: 9303 +Train: [0] [ 700/6250] eta: 0:18:52 lr: 0.000003 grad: 0.0142 (0.0147) loss: 0.9963 (0.9959) time: 0.2055 data: 0.1065 max mem: 9303 +Train: [0] [ 800/6250] eta: 0:18:16 lr: 0.000003 grad: 0.0136 (0.0146) loss: 0.9956 (0.9959) time: 0.2458 data: 0.1566 max mem: 9303 +Train: [0] [ 900/6250] eta: 0:17:35 lr: 0.000004 grad: 0.0145 (0.0146) loss: 0.9963 (0.9959) time: 0.1485 data: 0.0549 max mem: 9303 +Train: [0] [1000/6250] eta: 0:17:01 lr: 0.000004 grad: 0.0152 (0.0146) loss: 0.9958 (0.9959) time: 0.1406 data: 0.0547 max mem: 9303 +Train: [0] [1100/6250] eta: 0:16:33 lr: 0.000004 grad: 0.0152 (0.0146) loss: 0.9957 (0.9959) time: 0.1768 data: 0.0865 max mem: 9303 +Train: [0] [1200/6250] eta: 0:16:03 lr: 0.000005 grad: 0.0153 (0.0147) loss: 0.9960 (0.9959) time: 0.1672 data: 0.0730 max mem: 9303 +Train: [0] [1300/6250] eta: 0:15:32 lr: 0.000005 grad: 0.0174 (0.0148) loss: 0.9951 (0.9959) time: 0.1638 data: 0.0760 max mem: 9303 +Train: [0] [1400/6250] eta: 0:15:02 lr: 0.000006 grad: 0.0167 (0.0151) loss: 0.9953 (0.9959) time: 0.1867 data: 0.1067 max mem: 9303 +Train: [0] [1500/6250] eta: 0:14:31 lr: 0.000006 grad: 0.0223 (0.0155) loss: 0.9949 (0.9959) time: 0.1539 data: 0.0594 max mem: 9303 +Train: [0] [1600/6250] eta: 0:14:02 lr: 0.000006 grad: 0.0251 (0.0161) loss: 0.9961 (0.9959) time: 0.1518 data: 0.0646 max mem: 9303 +Train: [0] [1700/6250] eta: 0:13:36 lr: 0.000007 grad: 0.0209 (0.0164) loss: 0.9955 (0.9958) time: 0.1473 data: 0.0624 max mem: 9303 +Train: [0] [1800/6250] eta: 0:13:13 lr: 0.000007 grad: 0.0305 (0.0172) loss: 0.9952 (0.9958) time: 0.1635 data: 0.0754 max mem: 9303 +Train: [0] [1900/6250] eta: 0:12:46 lr: 0.000008 grad: 0.0384 (0.0180) loss: 0.9957 (0.9958) time: 0.1311 data: 0.0484 max mem: 9303 +Train: [0] [2000/6250] eta: 0:12:27 lr: 0.000008 grad: 0.0396 (0.0192) loss: 0.9946 (0.9957) time: 0.1742 data: 0.0820 max mem: 9303 +Train: [0] [2100/6250] eta: 0:12:09 lr: 0.000008 grad: 0.0406 (0.0203) loss: 0.9953 (0.9957) time: 0.1821 data: 0.0963 max mem: 9303 +Train: [0] [2200/6250] eta: 0:11:47 lr: 0.000009 grad: 0.0378 (0.0213) loss: 0.9951 (0.9957) time: 0.1562 data: 0.0696 max mem: 9303 +Train: [0] [2300/6250] eta: 0:11:27 lr: 0.000009 grad: 0.0412 (0.0222) loss: 0.9947 (0.9956) time: 0.1529 data: 0.0596 max mem: 9303 +Train: [0] [2400/6250] eta: 0:11:08 lr: 0.000010 grad: 0.0564 (0.0233) loss: 0.9930 (0.9956) time: 0.1674 data: 0.0760 max mem: 9303 +Train: [0] [2500/6250] eta: 0:10:48 lr: 0.000010 grad: 0.0422 (0.0242) loss: 0.9942 (0.9955) time: 0.1273 data: 0.0359 max mem: 9303 +Train: [0] [2600/6250] eta: 0:10:28 lr: 0.000010 grad: 0.0395 (0.0252) loss: 0.9942 (0.9955) time: 0.1583 data: 0.0767 max mem: 9303 +Train: [0] [2700/6250] eta: 0:10:08 lr: 0.000011 grad: 0.0406 (0.0260) loss: 0.9943 (0.9954) time: 0.1546 data: 0.0666 max mem: 9303 +Train: [0] [2800/6250] eta: 0:09:49 lr: 0.000011 grad: 0.0516 (0.0270) loss: 0.9934 (0.9954) time: 0.1112 data: 0.0079 max mem: 9303 +Train: [0] [2900/6250] eta: 0:09:30 lr: 0.000012 grad: 0.0695 (0.0282) loss: 0.9931 (0.9953) time: 0.1445 data: 0.0610 max mem: 9303 +Train: [0] [3000/6250] eta: 0:09:12 lr: 0.000012 grad: 0.0500 (0.0291) loss: 0.9941 (0.9953) time: 0.1469 data: 0.0523 max mem: 9303 +Train: [0] [3100/6250] eta: 0:08:53 lr: 0.000012 grad: 0.0545 (0.0299) loss: 0.9933 (0.9952) time: 0.1587 data: 0.0714 max mem: 9303 +Train: [0] [3200/6250] eta: 0:08:36 lr: 0.000013 grad: 0.0503 (0.0306) loss: 0.9929 (0.9952) time: 0.1548 data: 0.0697 max mem: 9303 +Train: [0] [3300/6250] eta: 0:08:17 lr: 0.000013 grad: 0.0565 (0.0314) loss: 0.9921 (0.9951) time: 0.1651 data: 0.0799 max mem: 9303 +Train: [0] [3400/6250] eta: 0:08:00 lr: 0.000014 grad: 0.0662 (0.0325) loss: 0.9928 (0.9950) time: 0.1671 data: 0.0773 max mem: 9303 +Train: [0] [3500/6250] eta: 0:07:42 lr: 0.000014 grad: 0.0554 (0.0335) loss: 0.9922 (0.9950) time: 0.1627 data: 0.0776 max mem: 9303 +Train: [0] [3600/6250] eta: 0:07:25 lr: 0.000014 grad: 0.0594 (0.0343) loss: 0.9925 (0.9949) time: 0.1353 data: 0.0439 max mem: 9303 +Train: [0] [3700/6250] eta: 0:07:08 lr: 0.000015 grad: 0.0696 (0.0354) loss: 0.9917 (0.9948) time: 0.1734 data: 0.0885 max mem: 9303 +Train: [0] [3800/6250] eta: 0:06:50 lr: 0.000015 grad: 0.0767 (0.0365) loss: 0.9924 (0.9948) time: 0.1632 data: 0.0770 max mem: 9303 +Train: [0] [3900/6250] eta: 0:06:32 lr: 0.000016 grad: 0.0630 (0.0373) loss: 0.9935 (0.9947) time: 0.1548 data: 0.0597 max mem: 9303 +Train: [0] [4000/6250] eta: 0:06:15 lr: 0.000016 grad: 0.0759 (0.0381) loss: 0.9926 (0.9947) time: 0.1456 data: 0.0629 max mem: 9303 +Train: [0] [4100/6250] eta: 0:05:59 lr: 0.000016 grad: 0.0685 (0.0389) loss: 0.9922 (0.9946) time: 0.1685 data: 0.0752 max mem: 9303 +Train: [0] [4200/6250] eta: 0:05:41 lr: 0.000017 grad: 0.0676 (0.0397) loss: 0.9923 (0.9946) time: 0.1320 data: 0.0489 max mem: 9303 +Train: [0] [4300/6250] eta: 0:05:24 lr: 0.000017 grad: 0.0645 (0.0404) loss: 0.9916 (0.9945) time: 0.1447 data: 0.0544 max mem: 9303 +Train: [0] [4400/6250] eta: 0:05:07 lr: 0.000018 grad: 0.0772 (0.0412) loss: 0.9923 (0.9944) time: 0.1476 data: 0.0613 max mem: 9303 +Train: [0] [4500/6250] eta: 0:04:49 lr: 0.000018 grad: 0.0566 (0.0420) loss: 0.9918 (0.9944) time: 0.1165 data: 0.0284 max mem: 9303 +Train: [0] [4600/6250] eta: 0:04:33 lr: 0.000018 grad: 0.0720 (0.0427) loss: 0.9919 (0.9943) time: 0.1596 data: 0.0662 max mem: 9303 +Train: [0] [4700/6250] eta: 0:04:16 lr: 0.000019 grad: 0.0682 (0.0434) loss: 0.9904 (0.9943) time: 0.1910 data: 0.0904 max mem: 9303 +Train: [0] [4800/6250] eta: 0:03:59 lr: 0.000019 grad: 0.0639 (0.0440) loss: 0.9917 (0.9942) time: 0.1714 data: 0.0836 max mem: 9303 +Train: [0] [4900/6250] eta: 0:03:43 lr: 0.000020 grad: 0.0656 (0.0445) loss: 0.9915 (0.9941) time: 0.1499 data: 0.0641 max mem: 9303 +Train: [0] [5000/6250] eta: 0:03:26 lr: 0.000020 grad: 0.0545 (0.0451) loss: 0.9927 (0.9941) time: 0.1594 data: 0.0718 max mem: 9303 +Train: [0] [5100/6250] eta: 0:03:09 lr: 0.000020 grad: 0.0691 (0.0455) loss: 0.9918 (0.9940) time: 0.1471 data: 0.0521 max mem: 9303 +Train: [0] [5200/6250] eta: 0:02:52 lr: 0.000021 grad: 0.0625 (0.0460) loss: 0.9906 (0.9940) time: 0.1355 data: 0.0431 max mem: 9303 +Train: [0] [5300/6250] eta: 0:02:36 lr: 0.000021 grad: 0.0800 (0.0465) loss: 0.9914 (0.9940) time: 0.1746 data: 0.0875 max mem: 9303 +Train: [0] [5400/6250] eta: 0:02:19 lr: 0.000022 grad: 0.0657 (0.0469) loss: 0.9908 (0.9939) time: 0.1263 data: 0.0396 max mem: 9303 +Train: [0] [5500/6250] eta: 0:02:03 lr: 0.000022 grad: 0.0713 (0.0474) loss: 0.9913 (0.9938) time: 0.1678 data: 0.0780 max mem: 9303 +Train: [0] [5600/6250] eta: 0:01:46 lr: 0.000022 grad: 0.0624 (0.0478) loss: 0.9907 (0.9938) time: 0.1735 data: 0.0901 max mem: 9303 +Train: [0] [5700/6250] eta: 0:01:29 lr: 0.000023 grad: 0.0771 (0.0482) loss: 0.9898 (0.9938) time: 0.1397 data: 0.0381 max mem: 9303 +Train: [0] [5800/6250] eta: 0:01:13 lr: 0.000023 grad: 0.0747 (0.0488) loss: 0.9891 (0.9937) time: 0.2108 data: 0.1230 max mem: 9303 +Train: [0] [5900/6250] eta: 0:00:57 lr: 0.000024 grad: 0.0713 (0.0493) loss: 0.9906 (0.9936) time: 0.1589 data: 0.0655 max mem: 9303 +Train: [0] [6000/6250] eta: 0:00:40 lr: 0.000024 grad: 0.0791 (0.0498) loss: 0.9896 (0.9936) time: 0.1558 data: 0.0611 max mem: 9303 +Train: [0] [6100/6250] eta: 0:00:24 lr: 0.000024 grad: 0.0796 (0.0503) loss: 0.9897 (0.9935) time: 0.1517 data: 0.0596 max mem: 9303 +Train: [0] [6200/6250] eta: 0:00:08 lr: 0.000025 grad: 0.0771 (0.0509) loss: 0.9910 (0.9934) time: 0.1571 data: 0.0667 max mem: 9303 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0695 (0.0511) loss: 0.9907 (0.9934) time: 0.0985 data: 0.0002 max mem: 9303 +Train: [0] Total time: 0:17:09 (0.1647 s / it) +Averaged stats: lr: 0.000025 grad: 0.0695 (0.0511) loss: 0.9907 (0.9934) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:04:01 loss: 0.9947 (0.9947) time: 3.8964 data: 3.7112 max mem: 9303 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9921 (0.9918) time: 0.1285 data: 0.0998 max mem: 9303 +Eval (hcp-train-subset): [0] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-train-subset): loss: 0.9921 (0.9918) +Eval (hcp-val): [0] [ 0/62] eta: 0:03:14 loss: 0.9860 (0.9860) time: 3.1292 data: 3.0540 max mem: 9303 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9901 (0.9903) time: 0.1484 data: 0.1155 max mem: 9303 +Eval (hcp-val): [0] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (hcp-val): loss: 0.9901 (0.9903) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [1] [ 0/6250] eta: 10:43:02 lr: 0.000025 grad: 0.0584 (0.0584) loss: 0.9939 (0.9939) time: 6.1732 data: 6.0836 max mem: 9303 +Train: [1] [ 100/6250] eta: 0:21:41 lr: 0.000025 grad: 0.0688 (0.0716) loss: 0.9911 (0.9911) time: 0.1790 data: 0.0724 max mem: 9303 +Train: [1] [ 200/6250] eta: 0:18:22 lr: 0.000026 grad: 0.0703 (0.0721) loss: 0.9902 (0.9909) time: 0.1953 data: 0.1090 max mem: 9303 +Train: [1] [ 300/6250] eta: 0:17:42 lr: 0.000026 grad: 0.0802 (0.0754) loss: 0.9904 (0.9903) time: 0.1290 data: 0.0458 max mem: 9303 +Train: [1] [ 400/6250] eta: 0:17:03 lr: 0.000027 grad: 0.0805 (0.0789) loss: 0.9884 (0.9899) time: 0.1490 data: 0.0524 max mem: 9303 +Train: [1] [ 500/6250] eta: 0:17:21 lr: 0.000027 grad: 0.0927 (0.0813) loss: 0.9885 (0.9895) time: 0.1959 data: 0.0825 max mem: 9303 +Train: [1] [ 600/6250] eta: 0:16:53 lr: 0.000027 grad: 0.0955 (0.0822) loss: 0.9889 (0.9895) time: 0.1562 data: 0.0626 max mem: 9303 +Train: [1] [ 700/6250] eta: 0:16:28 lr: 0.000028 grad: 0.0799 (0.0822) loss: 0.9897 (0.9895) time: 0.1736 data: 0.0857 max mem: 9303 +Train: [1] [ 800/6250] eta: 0:16:08 lr: 0.000028 grad: 0.0705 (0.0815) loss: 0.9912 (0.9895) time: 0.1202 data: 0.0316 max mem: 9303 +Train: [1] [ 900/6250] eta: 0:15:51 lr: 0.000029 grad: 0.0753 (0.0811) loss: 0.9904 (0.9895) time: 0.1757 data: 0.0840 max mem: 9303 +Train: [1] [1000/6250] eta: 0:15:29 lr: 0.000029 grad: 0.0725 (0.0804) loss: 0.9895 (0.9896) time: 0.1798 data: 0.0925 max mem: 9303 +Train: [1] [1100/6250] eta: 0:15:10 lr: 0.000029 grad: 0.0962 (0.0803) loss: 0.9887 (0.9895) time: 0.1996 data: 0.1108 max mem: 9303 +Train: [1] [1200/6250] eta: 0:14:47 lr: 0.000030 grad: 0.0693 (0.0799) loss: 0.9904 (0.9896) time: 0.1791 data: 0.0812 max mem: 9303 +Train: [1] [1300/6250] eta: 0:14:31 lr: 0.000030 grad: 0.0738 (0.0799) loss: 0.9899 (0.9896) time: 0.1473 data: 0.0485 max mem: 9303 +Train: [1] [1400/6250] eta: 0:14:10 lr: 0.000031 grad: 0.0735 (0.0801) loss: 0.9889 (0.9895) time: 0.1090 data: 0.0149 max mem: 9303 +Train: [1] [1500/6250] eta: 0:13:49 lr: 0.000031 grad: 0.0702 (0.0797) loss: 0.9901 (0.9896) time: 0.1738 data: 0.0732 max mem: 9303 +Train: [1] [1600/6250] eta: 0:13:26 lr: 0.000031 grad: 0.0704 (0.0796) loss: 0.9900 (0.9896) time: 0.1864 data: 0.0965 max mem: 9303 +Train: [1] [1700/6250] eta: 0:13:04 lr: 0.000032 grad: 0.0629 (0.0794) loss: 0.9906 (0.9896) time: 0.1562 data: 0.0630 max mem: 9303 +Train: [1] [1800/6250] eta: 0:12:46 lr: 0.000032 grad: 0.0725 (0.0794) loss: 0.9886 (0.9896) time: 0.1973 data: 0.1090 max mem: 9303 +Train: [1] [1900/6250] eta: 0:12:27 lr: 0.000033 grad: 0.0670 (0.0792) loss: 0.9896 (0.9896) time: 0.1540 data: 0.0657 max mem: 9303 +Train: [1] [2000/6250] eta: 0:12:08 lr: 0.000033 grad: 0.0754 (0.0791) loss: 0.9883 (0.9895) time: 0.1794 data: 0.0826 max mem: 9303 +Train: [1] [2100/6250] eta: 0:11:47 lr: 0.000033 grad: 0.0861 (0.0793) loss: 0.9891 (0.9895) time: 0.1566 data: 0.0710 max mem: 9303 +Train: [1] [2200/6250] eta: 0:11:28 lr: 0.000034 grad: 0.0697 (0.0792) loss: 0.9880 (0.9895) time: 0.1640 data: 0.0761 max mem: 9303 +Train: [1] [2300/6250] eta: 0:11:14 lr: 0.000034 grad: 0.0776 (0.0792) loss: 0.9887 (0.9895) time: 0.2297 data: 0.1375 max mem: 9303 +Train: [1] [2400/6250] eta: 0:10:57 lr: 0.000035 grad: 0.0770 (0.0791) loss: 0.9872 (0.9894) time: 0.1921 data: 0.1009 max mem: 9303 +Train: [1] [2500/6250] eta: 0:10:39 lr: 0.000035 grad: 0.0762 (0.0791) loss: 0.9895 (0.9894) time: 0.1597 data: 0.0647 max mem: 9303 +Train: [1] [2600/6250] eta: 0:10:21 lr: 0.000035 grad: 0.0655 (0.0792) loss: 0.9891 (0.9894) time: 0.1776 data: 0.0782 max mem: 9303 +Train: [1] [2700/6250] eta: 0:10:04 lr: 0.000036 grad: 0.0673 (0.0792) loss: 0.9914 (0.9894) time: 0.1682 data: 0.0864 max mem: 9303 +Train: [1] [2800/6250] eta: 0:09:48 lr: 0.000036 grad: 0.0696 (0.0793) loss: 0.9891 (0.9893) time: 0.2012 data: 0.1108 max mem: 9303 +Train: [1] [2900/6250] eta: 0:09:28 lr: 0.000037 grad: 0.0862 (0.0794) loss: 0.9884 (0.9893) time: 0.1473 data: 0.0526 max mem: 9303 +Train: [1] [3000/6250] eta: 0:09:12 lr: 0.000037 grad: 0.0771 (0.0795) loss: 0.9879 (0.9892) time: 0.1934 data: 0.1035 max mem: 9303 +Train: [1] [3100/6250] eta: 0:08:55 lr: 0.000037 grad: 0.0764 (0.0796) loss: 0.9880 (0.9892) time: 0.1705 data: 0.0678 max mem: 9303 +Train: [1] [3200/6250] eta: 0:08:37 lr: 0.000038 grad: 0.0817 (0.0797) loss: 0.9889 (0.9892) time: 0.1713 data: 0.0766 max mem: 9303 +Train: [1] [3300/6250] eta: 0:08:18 lr: 0.000038 grad: 0.0762 (0.0797) loss: 0.9882 (0.9891) time: 0.1370 data: 0.0461 max mem: 9303 +Train: [1] [3400/6250] eta: 0:08:00 lr: 0.000039 grad: 0.0701 (0.0798) loss: 0.9888 (0.9891) time: 0.1265 data: 0.0370 max mem: 9303 +Train: [1] [3500/6250] eta: 0:07:42 lr: 0.000039 grad: 0.0783 (0.0800) loss: 0.9882 (0.9891) time: 0.1547 data: 0.0701 max mem: 9303 +Train: [1] [3600/6250] eta: 0:07:24 lr: 0.000039 grad: 0.0769 (0.0801) loss: 0.9868 (0.9890) time: 0.1539 data: 0.0655 max mem: 9303 +Train: [1] [3700/6250] eta: 0:07:07 lr: 0.000040 grad: 0.0756 (0.0801) loss: 0.9874 (0.9890) time: 0.2221 data: 0.1426 max mem: 9303 +Train: [1] [3800/6250] eta: 0:06:52 lr: 0.000040 grad: 0.0762 (0.0802) loss: 0.9873 (0.9890) time: 0.3023 data: 0.2098 max mem: 9303 +Train: [1] [3900/6250] eta: 0:06:36 lr: 0.000041 grad: 0.0726 (0.0803) loss: 0.9895 (0.9890) time: 0.1893 data: 0.1001 max mem: 9303 +Train: [1] [4000/6250] eta: 0:06:20 lr: 0.000041 grad: 0.0766 (0.0802) loss: 0.9881 (0.9889) time: 0.2530 data: 0.1547 max mem: 9303 +Train: [1] [4100/6250] eta: 0:06:03 lr: 0.000041 grad: 0.0732 (0.0802) loss: 0.9891 (0.9889) time: 0.1869 data: 0.1002 max mem: 9303 +Train: [1] [4200/6250] eta: 0:05:46 lr: 0.000042 grad: 0.0728 (0.0803) loss: 0.9881 (0.9889) time: 0.1517 data: 0.0675 max mem: 9303 +Train: [1] [4300/6250] eta: 0:05:29 lr: 0.000042 grad: 0.0677 (0.0803) loss: 0.9882 (0.9889) time: 0.2186 data: 0.1243 max mem: 9303 +Train: [1] [4400/6250] eta: 0:05:11 lr: 0.000043 grad: 0.0687 (0.0803) loss: 0.9903 (0.9889) time: 0.1578 data: 0.0676 max mem: 9303 +Train: [1] [4500/6250] eta: 0:04:55 lr: 0.000043 grad: 0.0782 (0.0803) loss: 0.9875 (0.9889) time: 0.1893 data: 0.1004 max mem: 9303 +Train: [1] [4600/6250] eta: 0:04:38 lr: 0.000043 grad: 0.0823 (0.0803) loss: 0.9895 (0.9888) time: 0.1007 data: 0.0051 max mem: 9303 +Train: [1] [4700/6250] eta: 0:04:21 lr: 0.000044 grad: 0.0653 (0.0802) loss: 0.9884 (0.9888) time: 0.1620 data: 0.0775 max mem: 9303 +Train: [1] [4800/6250] eta: 0:04:04 lr: 0.000044 grad: 0.0781 (0.0802) loss: 0.9885 (0.9888) time: 0.1700 data: 0.0749 max mem: 9303 +Train: [1] [4900/6250] eta: 0:03:47 lr: 0.000045 grad: 0.0687 (0.0801) loss: 0.9895 (0.9888) time: 0.1430 data: 0.0461 max mem: 9303 +Train: [1] [5000/6250] eta: 0:03:31 lr: 0.000045 grad: 0.0907 (0.0802) loss: 0.9877 (0.9888) time: 0.1566 data: 0.0660 max mem: 9303 +Train: [1] [5100/6250] eta: 0:03:14 lr: 0.000045 grad: 0.0700 (0.0801) loss: 0.9880 (0.9888) time: 0.1344 data: 0.0495 max mem: 9305 +Train: [1] [5200/6250] eta: 0:02:57 lr: 0.000046 grad: 0.0711 (0.0802) loss: 0.9886 (0.9888) time: 0.1524 data: 0.0631 max mem: 9305 +Train: [1] [5300/6250] eta: 0:02:40 lr: 0.000046 grad: 0.0735 (0.0801) loss: 0.9882 (0.9887) time: 0.1744 data: 0.0926 max mem: 9305 +Train: [1] [5400/6250] eta: 0:02:24 lr: 0.000047 grad: 0.0759 (0.0800) loss: 0.9857 (0.9887) time: 0.1549 data: 0.0614 max mem: 9305 +Train: [1] [5500/6250] eta: 0:02:07 lr: 0.000047 grad: 0.0799 (0.0800) loss: 0.9866 (0.9887) time: 0.1710 data: 0.0826 max mem: 9305 +Train: [1] [5600/6250] eta: 0:01:50 lr: 0.000047 grad: 0.0811 (0.0801) loss: 0.9880 (0.9887) time: 0.1617 data: 0.0641 max mem: 9305 +Train: [1] [5700/6250] eta: 0:01:33 lr: 0.000048 grad: 0.0775 (0.0802) loss: 0.9879 (0.9886) time: 0.1417 data: 0.0439 max mem: 9305 +Train: [1] [5800/6250] eta: 0:01:16 lr: 0.000048 grad: 0.0687 (0.0803) loss: 0.9859 (0.9886) time: 0.1739 data: 0.0821 max mem: 9305 +Train: [1] [5900/6250] eta: 0:00:59 lr: 0.000049 grad: 0.0841 (0.0805) loss: 0.9849 (0.9886) time: 0.1591 data: 0.0744 max mem: 9305 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0750 (0.0805) loss: 0.9867 (0.9885) time: 0.1551 data: 0.0658 max mem: 9305 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.0809 (0.0804) loss: 0.9870 (0.9885) time: 0.1592 data: 0.0699 max mem: 9305 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.0855 (0.0805) loss: 0.9855 (0.9885) time: 0.2116 data: 0.1202 max mem: 9305 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0850 (0.0806) loss: 0.9857 (0.9885) time: 0.1775 data: 0.0919 max mem: 9305 +Train: [1] Total time: 0:17:47 (0.1708 s / it) +Averaged stats: lr: 0.000050 grad: 0.0850 (0.0806) loss: 0.9857 (0.9885) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:05:42 loss: 0.9878 (0.9878) time: 5.5208 data: 5.4864 max mem: 9305 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9899 (0.9892) time: 0.1632 data: 0.1328 max mem: 9305 +Eval (hcp-train-subset): [1] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.9899 (0.9892) +Eval (hcp-val): [1] [ 0/62] eta: 0:05:37 loss: 0.9882 (0.9882) time: 5.4378 data: 5.4043 max mem: 9305 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9879 (0.9876) time: 0.1642 data: 0.1335 max mem: 9305 +Eval (hcp-val): [1] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (hcp-val): loss: 0.9879 (0.9876) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [2] [ 0/6250] eta: 8:56:18 lr: 0.000050 grad: 0.0881 (0.0881) loss: 0.9855 (0.9855) time: 5.1485 data: 4.8878 max mem: 9305 +Train: [2] [ 100/6250] eta: 0:22:55 lr: 0.000050 grad: 0.0893 (0.1123) loss: 0.9851 (0.9836) time: 0.1851 data: 0.0996 max mem: 9305 +Train: [2] [ 200/6250] eta: 0:19:44 lr: 0.000051 grad: 0.0845 (0.1050) loss: 0.9889 (0.9842) time: 0.1591 data: 0.0548 max mem: 9305 +Train: [2] [ 300/6250] eta: 0:19:11 lr: 0.000051 grad: 0.0931 (0.1000) loss: 0.9867 (0.9848) time: 0.2286 data: 0.1008 max mem: 9305 +Train: [2] [ 400/6250] eta: 0:19:00 lr: 0.000052 grad: 0.0811 (0.0961) loss: 0.9860 (0.9852) time: 0.1880 data: 0.0754 max mem: 9305 +Train: [2] [ 500/6250] eta: 0:18:22 lr: 0.000052 grad: 0.0791 (0.0935) loss: 0.9856 (0.9856) time: 0.1811 data: 0.0893 max mem: 9305 +Train: [2] [ 600/6250] eta: 0:17:50 lr: 0.000052 grad: 0.0845 (0.0916) loss: 0.9858 (0.9857) time: 0.1649 data: 0.0679 max mem: 9305 +Train: [2] [ 700/6250] eta: 0:17:38 lr: 0.000053 grad: 0.0724 (0.0904) loss: 0.9869 (0.9859) time: 0.2093 data: 0.1134 max mem: 9305 +Train: [2] [ 800/6250] eta: 0:17:15 lr: 0.000053 grad: 0.0887 (0.0902) loss: 0.9863 (0.9859) time: 0.1335 data: 0.0004 max mem: 9305 +Train: [2] [ 900/6250] eta: 0:16:50 lr: 0.000054 grad: 0.0712 (0.0896) loss: 0.9862 (0.9858) time: 0.1834 data: 0.0871 max mem: 9305 +Train: [2] [1000/6250] eta: 0:16:21 lr: 0.000054 grad: 0.0875 (0.0890) loss: 0.9866 (0.9858) time: 0.1236 data: 0.0288 max mem: 9305 +Train: [2] [1100/6250] eta: 0:15:56 lr: 0.000054 grad: 0.0757 (0.0883) loss: 0.9865 (0.9859) time: 0.1924 data: 0.1008 max mem: 9305 +Train: [2] [1200/6250] eta: 0:15:29 lr: 0.000055 grad: 0.0788 (0.0879) loss: 0.9865 (0.9859) time: 0.1792 data: 0.0879 max mem: 9305 +Train: [2] [1300/6250] eta: 0:15:09 lr: 0.000055 grad: 0.0815 (0.0876) loss: 0.9850 (0.9859) time: 0.1437 data: 0.0485 max mem: 9305 +Train: [2] [1400/6250] eta: 0:14:44 lr: 0.000056 grad: 0.0763 (0.0871) loss: 0.9859 (0.9859) time: 0.1706 data: 0.0828 max mem: 9305 +Train: [2] [1500/6250] eta: 0:14:22 lr: 0.000056 grad: 0.0767 (0.0870) loss: 0.9842 (0.9859) time: 0.1771 data: 0.0728 max mem: 9305 +Train: [2] [1600/6250] eta: 0:14:01 lr: 0.000056 grad: 0.0792 (0.0868) loss: 0.9850 (0.9859) time: 0.1603 data: 0.0640 max mem: 9305 +Train: [2] [1700/6250] eta: 0:13:43 lr: 0.000057 grad: 0.0673 (0.0868) loss: 0.9860 (0.9858) time: 0.1843 data: 0.0816 max mem: 9305 +Train: [2] [1800/6250] eta: 0:13:24 lr: 0.000057 grad: 0.0733 (0.0866) loss: 0.9867 (0.9858) time: 0.1914 data: 0.0984 max mem: 9305 +Train: [2] [1900/6250] eta: 0:13:05 lr: 0.000058 grad: 0.0772 (0.0864) loss: 0.9869 (0.9858) time: 0.1805 data: 0.0869 max mem: 9305 +Train: [2] [2000/6250] eta: 0:12:51 lr: 0.000058 grad: 0.0763 (0.0862) loss: 0.9845 (0.9858) time: 0.2080 data: 0.1087 max mem: 9305 +Train: [2] [2100/6250] eta: 0:12:30 lr: 0.000058 grad: 0.0762 (0.0861) loss: 0.9830 (0.9857) time: 0.1748 data: 0.0821 max mem: 9305 +Train: [2] [2200/6250] eta: 0:12:13 lr: 0.000059 grad: 0.0820 (0.0863) loss: 0.9853 (0.9856) time: 0.1191 data: 0.0213 max mem: 9305 +Train: [2] [2300/6250] eta: 0:11:54 lr: 0.000059 grad: 0.0738 (0.0861) loss: 0.9849 (0.9856) time: 0.1970 data: 0.1070 max mem: 9305 +Train: [2] [2400/6250] eta: 0:11:36 lr: 0.000060 grad: 0.0927 (0.0864) loss: 0.9849 (0.9856) time: 0.1505 data: 0.0558 max mem: 9305 +Train: [2] [2500/6250] eta: 0:11:17 lr: 0.000060 grad: 0.0795 (0.0864) loss: 0.9854 (0.9855) time: 0.1995 data: 0.0992 max mem: 9305 +Train: [2] [2600/6250] eta: 0:10:58 lr: 0.000060 grad: 0.0814 (0.0864) loss: 0.9866 (0.9854) time: 0.1987 data: 0.1077 max mem: 9305 +Train: [2] [2700/6250] eta: 0:10:39 lr: 0.000061 grad: 0.0925 (0.0866) loss: 0.9834 (0.9854) time: 0.1313 data: 0.0419 max mem: 9305 +Train: [2] [2800/6250] eta: 0:10:19 lr: 0.000061 grad: 0.0745 (0.0865) loss: 0.9856 (0.9854) time: 0.1635 data: 0.0763 max mem: 9305 +Train: [2] [2900/6250] eta: 0:10:02 lr: 0.000062 grad: 0.0833 (0.0864) loss: 0.9830 (0.9854) time: 0.1421 data: 0.0483 max mem: 9305 +Train: [2] [3000/6250] eta: 0:09:44 lr: 0.000062 grad: 0.0865 (0.0865) loss: 0.9838 (0.9853) time: 0.1768 data: 0.0867 max mem: 9305 +Train: [2] [3100/6250] eta: 0:09:26 lr: 0.000062 grad: 0.0886 (0.0865) loss: 0.9853 (0.9853) time: 0.1717 data: 0.0772 max mem: 9305 +Train: [2] [3200/6250] eta: 0:09:07 lr: 0.000063 grad: 0.0730 (0.0864) loss: 0.9845 (0.9853) time: 0.1780 data: 0.0857 max mem: 9305 +Train: [2] [3300/6250] eta: 0:08:50 lr: 0.000063 grad: 0.0768 (0.0863) loss: 0.9843 (0.9853) time: 0.1710 data: 0.0773 max mem: 9305 +Train: [2] [3400/6250] eta: 0:08:31 lr: 0.000064 grad: 0.0775 (0.0863) loss: 0.9845 (0.9853) time: 0.1881 data: 0.1001 max mem: 9305 +Train: [2] [3500/6250] eta: 0:08:14 lr: 0.000064 grad: 0.0716 (0.0862) loss: 0.9884 (0.9853) time: 0.1927 data: 0.1051 max mem: 9305 +Train: [2] [3600/6250] eta: 0:07:56 lr: 0.000064 grad: 0.0809 (0.0861) loss: 0.9866 (0.9853) time: 0.1951 data: 0.0968 max mem: 9305 +Train: [2] [3700/6250] eta: 0:07:39 lr: 0.000065 grad: 0.0864 (0.0861) loss: 0.9841 (0.9853) time: 0.1749 data: 0.0865 max mem: 9305 +Train: [2] [3800/6250] eta: 0:07:20 lr: 0.000065 grad: 0.0742 (0.0861) loss: 0.9863 (0.9853) time: 0.1707 data: 0.0850 max mem: 9305 +Train: [2] [3900/6250] eta: 0:07:01 lr: 0.000066 grad: 0.0843 (0.0861) loss: 0.9822 (0.9853) time: 0.1499 data: 0.0531 max mem: 9305 +Train: [2] [4000/6250] eta: 0:06:43 lr: 0.000066 grad: 0.0870 (0.0860) loss: 0.9859 (0.9853) time: 0.1468 data: 0.0623 max mem: 9305 +Train: [2] [4100/6250] eta: 0:06:25 lr: 0.000066 grad: 0.0753 (0.0858) loss: 0.9874 (0.9853) time: 0.1942 data: 0.1075 max mem: 9305 +Train: [2] [4200/6250] eta: 0:06:07 lr: 0.000067 grad: 0.0679 (0.0856) loss: 0.9858 (0.9853) time: 0.1681 data: 0.0717 max mem: 9305 +Train: [2] [4300/6250] eta: 0:05:49 lr: 0.000067 grad: 0.0747 (0.0855) loss: 0.9847 (0.9853) time: 0.1523 data: 0.0557 max mem: 9305 +Train: [2] [4400/6250] eta: 0:05:31 lr: 0.000068 grad: 0.0732 (0.0854) loss: 0.9868 (0.9853) time: 0.1586 data: 0.0746 max mem: 9305 +Train: [2] [4500/6250] eta: 0:05:13 lr: 0.000068 grad: 0.0662 (0.0853) loss: 0.9875 (0.9854) time: 0.1586 data: 0.0624 max mem: 9305 +Train: [2] [4600/6250] eta: 0:04:55 lr: 0.000068 grad: 0.0848 (0.0851) loss: 0.9849 (0.9854) time: 0.1370 data: 0.0495 max mem: 9305 +Train: [2] [4700/6250] eta: 0:04:37 lr: 0.000069 grad: 0.0782 (0.0852) loss: 0.9847 (0.9854) time: 0.1869 data: 0.0878 max mem: 9305 +Train: [2] [4800/6250] eta: 0:04:19 lr: 0.000069 grad: 0.0843 (0.0852) loss: 0.9861 (0.9854) time: 0.1695 data: 0.0697 max mem: 9305 +Train: [2] [4900/6250] eta: 0:04:01 lr: 0.000070 grad: 0.0864 (0.0852) loss: 0.9843 (0.9854) time: 0.1614 data: 0.0649 max mem: 9305 +Train: [2] [5000/6250] eta: 0:03:44 lr: 0.000070 grad: 0.0861 (0.0852) loss: 0.9853 (0.9854) time: 0.1930 data: 0.1039 max mem: 9305 +Train: [2] [5100/6250] eta: 0:03:26 lr: 0.000070 grad: 0.0770 (0.0852) loss: 0.9858 (0.9854) time: 0.2245 data: 0.1365 max mem: 9305 +Train: [2] [5200/6250] eta: 0:03:08 lr: 0.000071 grad: 0.0742 (0.0852) loss: 0.9873 (0.9854) time: 0.1526 data: 0.0508 max mem: 9305 +Train: [2] [5300/6250] eta: 0:02:50 lr: 0.000071 grad: 0.0783 (0.0853) loss: 0.9847 (0.9853) time: 0.1557 data: 0.0730 max mem: 9305 +Train: [2] [5400/6250] eta: 0:02:32 lr: 0.000072 grad: 0.0958 (0.0854) loss: 0.9831 (0.9853) time: 0.1952 data: 0.1079 max mem: 9305 +Train: [2] [5500/6250] eta: 0:02:14 lr: 0.000072 grad: 0.0962 (0.0855) loss: 0.9850 (0.9853) time: 0.1070 data: 0.0019 max mem: 9305 +Train: [2] [5600/6250] eta: 0:01:56 lr: 0.000072 grad: 0.0935 (0.0858) loss: 0.9833 (0.9853) time: 0.1698 data: 0.0944 max mem: 9305 +Train: [2] [5700/6250] eta: 0:01:38 lr: 0.000073 grad: 0.0875 (0.0860) loss: 0.9854 (0.9853) time: 0.1738 data: 0.0786 max mem: 9305 +Train: [2] [5800/6250] eta: 0:01:20 lr: 0.000073 grad: 0.0982 (0.0863) loss: 0.9855 (0.9853) time: 0.1480 data: 0.0565 max mem: 9305 +Train: [2] [5900/6250] eta: 0:01:02 lr: 0.000074 grad: 0.0939 (0.0866) loss: 0.9837 (0.9853) time: 0.2102 data: 0.1133 max mem: 9305 +Train: [2] [6000/6250] eta: 0:00:44 lr: 0.000074 grad: 0.1237 (0.0872) loss: 0.9847 (0.9852) time: 0.1550 data: 0.0707 max mem: 9305 +Train: [2] [6100/6250] eta: 0:00:26 lr: 0.000074 grad: 0.1015 (0.0876) loss: 0.9858 (0.9852) time: 0.1782 data: 0.0852 max mem: 9305 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.0992 (0.0883) loss: 0.9835 (0.9852) time: 0.1500 data: 0.0547 max mem: 9305 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1051 (0.0887) loss: 0.9855 (0.9852) time: 0.1502 data: 0.0503 max mem: 9305 +Train: [2] Total time: 0:18:40 (0.1793 s / it) +Averaged stats: lr: 0.000075 grad: 0.1051 (0.0887) loss: 0.9855 (0.9852) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:05:17 loss: 0.9847 (0.9847) time: 5.1257 data: 5.0909 max mem: 9305 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9866 (0.9873) time: 0.1314 data: 0.1010 max mem: 9305 +Eval (hcp-train-subset): [2] Total time: 0:00:14 (0.2281 s / it) +Averaged stats (hcp-train-subset): loss: 0.9866 (0.9873) +Eval (hcp-val): [2] [ 0/62] eta: 0:06:24 loss: 0.9801 (0.9801) time: 6.1970 data: 6.1616 max mem: 9305 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9857 (0.9855) time: 0.1415 data: 0.1112 max mem: 9305 +Eval (hcp-val): [2] Total time: 0:00:17 (0.2767 s / it) +Averaged stats (hcp-val): loss: 0.9857 (0.9855) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [3] [ 0/6250] eta: 11:48:39 lr: 0.000075 grad: 0.0482 (0.0482) loss: 0.9946 (0.9946) time: 6.8031 data: 6.5520 max mem: 9305 +Train: [3] [ 100/6250] eta: 0:27:36 lr: 0.000075 grad: 0.1001 (0.1247) loss: 0.9848 (0.9851) time: 0.2025 data: 0.0701 max mem: 9305 +Train: [3] [ 200/6250] eta: 0:23:13 lr: 0.000076 grad: 0.0922 (0.1240) loss: 0.9827 (0.9843) time: 0.2110 data: 0.1270 max mem: 9305 +Train: [3] [ 300/6250] eta: 0:21:14 lr: 0.000076 grad: 0.1417 (0.1364) loss: 0.9856 (0.9840) time: 0.1567 data: 0.0550 max mem: 9305 +Train: [3] [ 400/6250] eta: 0:20:13 lr: 0.000077 grad: 0.1262 (0.1404) loss: 0.9853 (0.9839) time: 0.1931 data: 0.0967 max mem: 9305 +Train: [3] [ 500/6250] eta: 0:19:24 lr: 0.000077 grad: 0.1378 (0.1428) loss: 0.9831 (0.9839) time: 0.1980 data: 0.0749 max mem: 9305 +Train: [3] [ 600/6250] eta: 0:19:22 lr: 0.000077 grad: 0.2607 (0.1504) loss: 0.9840 (0.9840) time: 0.2025 data: 0.1176 max mem: 9305 +Train: [3] [ 700/6250] eta: 0:18:46 lr: 0.000078 grad: 0.0976 (0.1504) loss: 0.9859 (0.9841) time: 0.1762 data: 0.0737 max mem: 9305 +Train: [3] [ 800/6250] eta: 0:18:41 lr: 0.000078 grad: 0.1405 (0.1526) loss: 0.9841 (0.9841) time: 0.1522 data: 0.0003 max mem: 9305 +Train: [3] [ 900/6250] eta: 0:18:03 lr: 0.000079 grad: 0.1659 (0.1565) loss: 0.9830 (0.9841) time: 0.1587 data: 0.0632 max mem: 9305 +Train: [3] [1000/6250] eta: 0:17:33 lr: 0.000079 grad: 0.0975 (0.1583) loss: 0.9794 (0.9839) time: 0.1673 data: 0.0688 max mem: 9305 +Train: [3] [1100/6250] eta: 0:17:07 lr: 0.000079 grad: 0.1409 (0.1601) loss: 0.9848 (0.9839) time: 0.1950 data: 0.1008 max mem: 9305 +Train: [3] [1200/6250] eta: 0:16:45 lr: 0.000080 grad: 0.1633 (0.1618) loss: 0.9815 (0.9838) time: 0.2782 data: 0.1723 max mem: 9305 +Train: [3] [1300/6250] eta: 0:16:12 lr: 0.000080 grad: 0.1222 (0.1627) loss: 0.9831 (0.9837) time: 0.1731 data: 0.0796 max mem: 9305 +Train: [3] [1400/6250] eta: 0:15:47 lr: 0.000081 grad: 0.1973 (0.1657) loss: 0.9821 (0.9837) time: 0.1333 data: 0.0375 max mem: 9305 +Train: [3] [1500/6250] eta: 0:15:25 lr: 0.000081 grad: 0.1383 (0.1667) loss: 0.9831 (0.9836) time: 0.1554 data: 0.0575 max mem: 9305 +Train: [3] [1600/6250] eta: 0:15:01 lr: 0.000081 grad: 0.1097 (0.1688) loss: 0.9809 (0.9835) time: 0.1827 data: 0.0850 max mem: 9305 +Train: [3] [1700/6250] eta: 0:14:38 lr: 0.000082 grad: 0.1690 (0.1714) loss: 0.9833 (0.9834) time: 0.1405 data: 0.0360 max mem: 9305 +Train: [3] [1800/6250] eta: 0:14:16 lr: 0.000082 grad: 0.1716 (0.1729) loss: 0.9818 (0.9832) time: 0.1410 data: 0.0449 max mem: 9305 +Train: [3] [1900/6250] eta: 0:13:55 lr: 0.000083 grad: 0.2519 (0.1761) loss: 0.9835 (0.9832) time: 0.1128 data: 0.0147 max mem: 9305 +Train: [3] [2000/6250] eta: 0:13:40 lr: 0.000083 grad: 0.1559 (0.1763) loss: 0.9810 (0.9832) time: 0.1513 data: 0.0462 max mem: 9305 +Train: [3] [2100/6250] eta: 0:13:14 lr: 0.000083 grad: 0.1552 (0.1770) loss: 0.9802 (0.9831) time: 0.1480 data: 0.0582 max mem: 9305 +Train: [3] [2200/6250] eta: 0:12:52 lr: 0.000084 grad: 0.1574 (0.1774) loss: 0.9811 (0.9830) time: 0.1903 data: 0.0944 max mem: 9305 +Train: [3] [2300/6250] eta: 0:12:31 lr: 0.000084 grad: 0.1744 (0.1785) loss: 0.9797 (0.9830) time: 0.1072 data: 0.0003 max mem: 9305 +Train: [3] [2400/6250] eta: 0:12:11 lr: 0.000085 grad: 0.1705 (0.1795) loss: 0.9813 (0.9829) time: 0.1809 data: 0.0949 max mem: 9305 +Train: [3] [2500/6250] eta: 0:11:50 lr: 0.000085 grad: 0.1507 (0.1800) loss: 0.9808 (0.9828) time: 0.1941 data: 0.0973 max mem: 9305 +Train: [3] [2600/6250] eta: 0:11:30 lr: 0.000085 grad: 0.1601 (0.1806) loss: 0.9815 (0.9828) time: 0.1501 data: 0.0537 max mem: 9305 +Train: [3] [2700/6250] eta: 0:11:09 lr: 0.000086 grad: 0.1696 (0.1811) loss: 0.9811 (0.9827) time: 0.1953 data: 0.1032 max mem: 9305 +Train: [3] [2800/6250] eta: 0:10:51 lr: 0.000086 grad: 0.1705 (0.1815) loss: 0.9789 (0.9826) time: 0.1141 data: 0.0010 max mem: 9305 +Train: [3] [2900/6250] eta: 0:10:30 lr: 0.000087 grad: 0.1307 (0.1808) loss: 0.9806 (0.9825) time: 0.1602 data: 0.0734 max mem: 9305 +Train: [3] [3000/6250] eta: 0:10:12 lr: 0.000087 grad: 0.2250 (0.1821) loss: 0.9830 (0.9825) time: 0.3034 data: 0.2013 max mem: 9305 +Train: [3] [3100/6250] eta: 0:09:51 lr: 0.000087 grad: 0.1311 (0.1827) loss: 0.9805 (0.9825) time: 0.2228 data: 0.1335 max mem: 9305 +Train: [3] [3200/6250] eta: 0:09:29 lr: 0.000088 grad: 0.1416 (0.1826) loss: 0.9781 (0.9824) time: 0.1697 data: 0.0829 max mem: 9305 +Train: [3] [3300/6250] eta: 0:09:10 lr: 0.000088 grad: 0.1535 (0.1830) loss: 0.9798 (0.9823) time: 0.1777 data: 0.0803 max mem: 9305 +Train: [3] [3400/6250] eta: 0:08:51 lr: 0.000089 grad: 0.1958 (0.1837) loss: 0.9798 (0.9823) time: 0.1951 data: 0.0993 max mem: 9305 +Train: [3] [3500/6250] eta: 0:08:31 lr: 0.000089 grad: 0.1722 (0.1842) loss: 0.9806 (0.9822) time: 0.1913 data: 0.0753 max mem: 9305 +Train: [3] [3600/6250] eta: 0:08:13 lr: 0.000089 grad: 0.1845 (0.1839) loss: 0.9800 (0.9821) time: 0.1821 data: 0.0622 max mem: 9305 +Train: [3] [3700/6250] eta: 0:07:54 lr: 0.000090 grad: 0.1140 (0.1837) loss: 0.9779 (0.9821) time: 0.1321 data: 0.0385 max mem: 9305 +Train: [3] [3800/6250] eta: 0:07:35 lr: 0.000090 grad: 0.1599 (0.1838) loss: 0.9800 (0.9820) time: 0.1482 data: 0.0454 max mem: 9305 +Train: [3] [3900/6250] eta: 0:07:15 lr: 0.000091 grad: 0.1786 (0.1836) loss: 0.9815 (0.9819) time: 0.1416 data: 0.0480 max mem: 9305 +Train: [3] [4000/6250] eta: 0:06:57 lr: 0.000091 grad: 0.1744 (0.1836) loss: 0.9778 (0.9819) time: 0.2013 data: 0.1226 max mem: 9305 +Train: [3] [4100/6250] eta: 0:06:37 lr: 0.000091 grad: 0.1711 (0.1834) loss: 0.9795 (0.9818) time: 0.1653 data: 0.0761 max mem: 9305 +Train: [3] [4200/6250] eta: 0:06:20 lr: 0.000092 grad: 0.1481 (0.1832) loss: 0.9790 (0.9818) time: 0.1136 data: 0.0003 max mem: 9305 +Train: [3] [4300/6250] eta: 0:06:00 lr: 0.000092 grad: 0.2127 (0.1832) loss: 0.9769 (0.9817) time: 0.1487 data: 0.0630 max mem: 9305 +Train: [3] [4400/6250] eta: 0:05:41 lr: 0.000093 grad: 0.1559 (0.1830) loss: 0.9798 (0.9816) time: 0.1572 data: 0.0641 max mem: 9305 +Train: [3] [4500/6250] eta: 0:05:22 lr: 0.000093 grad: 0.1597 (0.1830) loss: 0.9781 (0.9815) time: 0.2042 data: 0.1225 max mem: 9305 +Train: [3] [4600/6250] eta: 0:05:03 lr: 0.000093 grad: 0.1474 (0.1834) loss: 0.9763 (0.9814) time: 0.1403 data: 0.0469 max mem: 9305 +Train: [3] [4700/6250] eta: 0:04:44 lr: 0.000094 grad: 0.1448 (0.1831) loss: 0.9783 (0.9813) time: 0.1536 data: 0.0606 max mem: 9305 +Train: [3] [4800/6250] eta: 0:04:26 lr: 0.000094 grad: 0.1711 (0.1836) loss: 0.9775 (0.9812) time: 0.1537 data: 0.0652 max mem: 9305 +Train: [3] [4900/6250] eta: 0:04:07 lr: 0.000095 grad: 0.1310 (0.1837) loss: 0.9777 (0.9812) time: 0.1883 data: 0.1001 max mem: 9305 +Train: [3] [5000/6250] eta: 0:03:49 lr: 0.000095 grad: 0.1771 (0.1835) loss: 0.9786 (0.9811) time: 0.2083 data: 0.1164 max mem: 9305 +Train: [3] [5100/6250] eta: 0:03:30 lr: 0.000095 grad: 0.1227 (0.1833) loss: 0.9799 (0.9810) time: 0.1634 data: 0.0710 max mem: 9305 +Train: [3] [5200/6250] eta: 0:03:11 lr: 0.000096 grad: 0.2264 (0.1834) loss: 0.9771 (0.9809) time: 0.1391 data: 0.0520 max mem: 9305 +Train: [3] [5300/6250] eta: 0:02:53 lr: 0.000096 grad: 0.1273 (0.1836) loss: 0.9750 (0.9808) time: 0.1715 data: 0.0798 max mem: 9305 +Train: [3] [5400/6250] eta: 0:02:35 lr: 0.000097 grad: 0.1114 (0.1831) loss: 0.9746 (0.9807) time: 0.3649 data: 0.2820 max mem: 9305 +Train: [3] [5500/6250] eta: 0:02:16 lr: 0.000097 grad: 0.1316 (0.1832) loss: 0.9754 (0.9806) time: 0.1397 data: 0.0510 max mem: 9305 +Train: [3] [5600/6250] eta: 0:01:58 lr: 0.000097 grad: 0.1226 (0.1832) loss: 0.9716 (0.9805) time: 0.2190 data: 0.1424 max mem: 9305 +Train: [3] [5700/6250] eta: 0:01:40 lr: 0.000098 grad: 0.1223 (0.1827) loss: 0.9756 (0.9804) time: 0.1997 data: 0.1042 max mem: 9305 +Train: [3] [5800/6250] eta: 0:01:21 lr: 0.000098 grad: 0.1535 (0.1827) loss: 0.9736 (0.9803) time: 0.1789 data: 0.0852 max mem: 9305 +Train: [3] [5900/6250] eta: 0:01:03 lr: 0.000099 grad: 0.1875 (0.1832) loss: 0.9745 (0.9802) time: 0.2280 data: 0.1247 max mem: 9305 +Train: [3] [6000/6250] eta: 0:00:45 lr: 0.000099 grad: 0.1569 (0.1831) loss: 0.9717 (0.9800) time: 0.2442 data: 0.1167 max mem: 9305 +Train: [3] [6100/6250] eta: 0:00:27 lr: 0.000099 grad: 0.1642 (0.1834) loss: 0.9726 (0.9799) time: 0.2468 data: 0.1254 max mem: 9305 +Train: [3] [6200/6250] eta: 0:00:09 lr: 0.000100 grad: 0.1672 (0.1833) loss: 0.9706 (0.9797) time: 0.1683 data: 0.0798 max mem: 9305 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1492 (0.1832) loss: 0.9718 (0.9797) time: 0.1796 data: 0.0813 max mem: 9305 +Train: [3] Total time: 0:19:19 (0.1855 s / it) +Averaged stats: lr: 0.000100 grad: 0.1492 (0.1832) loss: 0.9718 (0.9797) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:04:21 loss: 0.9748 (0.9748) time: 4.2234 data: 4.1279 max mem: 9305 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9744 (0.9734) time: 0.1126 data: 0.0826 max mem: 9305 +Eval (hcp-train-subset): [3] Total time: 0:00:16 (0.2738 s / it) +Averaged stats (hcp-train-subset): loss: 0.9744 (0.9734) +Eval (hcp-val): [3] [ 0/62] eta: 0:03:52 loss: 0.9635 (0.9635) time: 3.7573 data: 3.6931 max mem: 9305 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9714 (0.9716) time: 0.1540 data: 0.1235 max mem: 9305 +Eval (hcp-val): [3] Total time: 0:00:15 (0.2462 s / it) +Averaged stats (hcp-val): loss: 0.9714 (0.9716) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [4] [ 0/6250] eta: 7:59:19 lr: 0.000100 grad: 0.1872 (0.1872) loss: 0.9756 (0.9756) time: 4.6015 data: 4.2157 max mem: 9305 +Train: [4] [ 100/6250] eta: 0:30:09 lr: 0.000100 grad: 0.1796 (0.1958) loss: 0.9724 (0.9738) time: 0.3902 data: 0.2780 max mem: 9305 +Train: [4] [ 200/6250] eta: 0:24:15 lr: 0.000101 grad: 0.1442 (0.1955) loss: 0.9660 (0.9709) time: 0.1626 data: 0.0420 max mem: 9305 +Train: [4] [ 300/6250] eta: 0:23:06 lr: 0.000101 grad: 0.1265 (0.1930) loss: 0.9714 (0.9698) time: 0.2966 data: 0.1721 max mem: 9305 +Train: [4] [ 400/6250] eta: 0:21:16 lr: 0.000102 grad: 0.1198 (0.1864) loss: 0.9731 (0.9698) time: 0.1571 data: 0.0709 max mem: 9305 +Train: [4] [ 500/6250] eta: 0:21:03 lr: 0.000102 grad: 0.1745 (0.1914) loss: 0.9688 (0.9698) time: 0.2024 data: 0.0875 max mem: 9305 +Train: [4] [ 600/6250] eta: 0:19:57 lr: 0.000102 grad: 0.1873 (0.1896) loss: 0.9693 (0.9698) time: 0.1854 data: 0.0872 max mem: 9305 +Train: [4] [ 700/6250] eta: 0:19:13 lr: 0.000103 grad: 0.2013 (0.1904) loss: 0.9680 (0.9696) time: 0.2103 data: 0.0872 max mem: 9305 +Train: [4] [ 800/6250] eta: 0:18:55 lr: 0.000103 grad: 0.1711 (0.1914) loss: 0.9674 (0.9692) time: 0.2314 data: 0.1315 max mem: 9305 +Train: [4] [ 900/6250] eta: 0:18:47 lr: 0.000104 grad: 0.2081 (0.1960) loss: 0.9658 (0.9688) time: 0.4333 data: 0.2779 max mem: 9305 +Train: [4] [1000/6250] eta: 0:17:57 lr: 0.000104 grad: 0.1683 (0.1964) loss: 0.9644 (0.9684) time: 0.1701 data: 0.0751 max mem: 9305 +Train: [4] [1100/6250] eta: 0:17:27 lr: 0.000104 grad: 0.1609 (0.2005) loss: 0.9652 (0.9681) time: 0.2075 data: 0.1030 max mem: 9305 +Train: [4] [1200/6250] eta: 0:16:52 lr: 0.000105 grad: 0.1710 (0.2017) loss: 0.9648 (0.9678) time: 0.1531 data: 0.0602 max mem: 9305 +Train: [4] [1300/6250] eta: 0:16:21 lr: 0.000105 grad: 0.1181 (0.2023) loss: 0.9636 (0.9675) time: 0.1970 data: 0.1047 max mem: 9305 +Train: [4] [1400/6250] eta: 0:16:00 lr: 0.000106 grad: 0.2121 (0.2034) loss: 0.9638 (0.9672) time: 0.1387 data: 0.0403 max mem: 9305 +Train: [4] [1500/6250] eta: 0:15:34 lr: 0.000106 grad: 0.1636 (0.2043) loss: 0.9629 (0.9670) time: 0.1636 data: 0.0588 max mem: 9305 +Train: [4] [1600/6250] eta: 0:15:10 lr: 0.000106 grad: 0.1656 (0.2045) loss: 0.9660 (0.9669) time: 0.2196 data: 0.1207 max mem: 9305 +Train: [4] [1700/6250] eta: 0:14:43 lr: 0.000107 grad: 0.1185 (0.2037) loss: 0.9616 (0.9666) time: 0.1705 data: 0.0783 max mem: 9305 +Train: [4] [1800/6250] eta: 0:14:22 lr: 0.000107 grad: 0.2291 (0.2041) loss: 0.9615 (0.9664) time: 0.1471 data: 0.0237 max mem: 9305 +Train: [4] [1900/6250] eta: 0:13:57 lr: 0.000108 grad: 0.1829 (0.2037) loss: 0.9638 (0.9663) time: 0.1653 data: 0.0676 max mem: 9305 +Train: [4] [2000/6250] eta: 0:13:34 lr: 0.000108 grad: 0.1736 (0.2029) loss: 0.9619 (0.9660) time: 0.1323 data: 0.0355 max mem: 9305 +Train: [4] [2100/6250] eta: 0:13:12 lr: 0.000108 grad: 0.1837 (0.2030) loss: 0.9638 (0.9658) time: 0.2074 data: 0.1111 max mem: 9305 +Train: [4] [2200/6250] eta: 0:12:48 lr: 0.000109 grad: 0.1809 (0.2025) loss: 0.9595 (0.9657) time: 0.1506 data: 0.0601 max mem: 9305 +Train: [4] [2300/6250] eta: 0:12:31 lr: 0.000109 grad: 0.2348 (0.2017) loss: 0.9638 (0.9655) time: 0.1455 data: 0.0535 max mem: 9305 +Train: [4] [2400/6250] eta: 0:12:09 lr: 0.000110 grad: 0.1426 (0.2015) loss: 0.9611 (0.9653) time: 0.1936 data: 0.0968 max mem: 9305 +Train: [4] [2500/6250] eta: 0:11:48 lr: 0.000110 grad: 0.1753 (0.2018) loss: 0.9604 (0.9652) time: 0.1887 data: 0.0985 max mem: 9305 +Train: [4] [2600/6250] eta: 0:11:29 lr: 0.000110 grad: 0.1658 (0.2011) loss: 0.9575 (0.9650) time: 0.2277 data: 0.1316 max mem: 9305 +Train: [4] [2700/6250] eta: 0:11:10 lr: 0.000111 grad: 0.1791 (0.2006) loss: 0.9586 (0.9648) time: 0.1735 data: 0.0837 max mem: 9305 +Train: [4] [2800/6250] eta: 0:10:49 lr: 0.000111 grad: 0.1575 (0.2009) loss: 0.9611 (0.9647) time: 0.1904 data: 0.1054 max mem: 9305 +Train: [4] [2900/6250] eta: 0:10:33 lr: 0.000112 grad: 0.1627 (0.2009) loss: 0.9590 (0.9645) time: 0.2282 data: 0.1106 max mem: 9305 +Train: [4] [3000/6250] eta: 0:10:14 lr: 0.000112 grad: 0.1960 (0.2022) loss: 0.9582 (0.9643) time: 0.1152 data: 0.0211 max mem: 9305 +Train: [4] [3100/6250] eta: 0:09:54 lr: 0.000112 grad: 0.2111 (0.2023) loss: 0.9589 (0.9641) time: 0.2685 data: 0.1747 max mem: 9305 +Train: [4] [3200/6250] eta: 0:09:35 lr: 0.000113 grad: 0.1804 (0.2024) loss: 0.9545 (0.9639) time: 0.1476 data: 0.0360 max mem: 9305 +Train: [4] [3300/6250] eta: 0:09:16 lr: 0.000113 grad: 0.2372 (0.2035) loss: 0.9562 (0.9637) time: 0.1995 data: 0.1026 max mem: 9305 +Train: [4] [3400/6250] eta: 0:08:57 lr: 0.000114 grad: 0.1647 (0.2047) loss: 0.9575 (0.9635) time: 0.2166 data: 0.1161 max mem: 9305 +Train: [4] [3500/6250] eta: 0:08:37 lr: 0.000114 grad: 0.2462 (0.2059) loss: 0.9547 (0.9633) time: 0.1579 data: 0.0592 max mem: 9305 +Train: [4] [3600/6250] eta: 0:08:19 lr: 0.000114 grad: 0.1897 (0.2068) loss: 0.9569 (0.9630) time: 0.2392 data: 0.1444 max mem: 9305 +Train: [4] [3700/6250] eta: 0:07:59 lr: 0.000115 grad: 0.2467 (0.2083) loss: 0.9560 (0.9628) time: 0.1697 data: 0.0714 max mem: 9305 +Train: [4] [3800/6250] eta: 0:07:40 lr: 0.000115 grad: 0.2071 (0.2092) loss: 0.9524 (0.9626) time: 0.2231 data: 0.1152 max mem: 9305 +Train: [4] [3900/6250] eta: 0:07:21 lr: 0.000116 grad: 0.2606 (0.2100) loss: 0.9512 (0.9623) time: 0.1922 data: 0.1000 max mem: 9305 +Train: [4] [4000/6250] eta: 0:07:01 lr: 0.000116 grad: 0.1660 (0.2105) loss: 0.9546 (0.9621) time: 0.1829 data: 0.0845 max mem: 9305 +Train: [4] [4100/6250] eta: 0:06:42 lr: 0.000116 grad: 0.2676 (0.2116) loss: 0.9511 (0.9618) time: 0.1927 data: 0.0875 max mem: 9305 +Train: [4] [4200/6250] eta: 0:06:22 lr: 0.000117 grad: 0.1695 (0.2123) loss: 0.9522 (0.9615) time: 0.1611 data: 0.0700 max mem: 9305 +Train: [4] [4300/6250] eta: 0:06:03 lr: 0.000117 grad: 0.2130 (0.2125) loss: 0.9485 (0.9613) time: 0.1446 data: 0.0528 max mem: 9305 +Train: [4] [4400/6250] eta: 0:05:44 lr: 0.000118 grad: 0.2325 (0.2135) loss: 0.9500 (0.9610) time: 0.1592 data: 0.0614 max mem: 9305 +Train: [4] [4500/6250] eta: 0:05:26 lr: 0.000118 grad: 0.2090 (0.2142) loss: 0.9476 (0.9608) time: 0.2079 data: 0.1183 max mem: 9305 +Train: [4] [4600/6250] eta: 0:05:06 lr: 0.000118 grad: 0.2380 (0.2146) loss: 0.9483 (0.9605) time: 0.1478 data: 0.0533 max mem: 9305 +Train: [4] [4700/6250] eta: 0:04:47 lr: 0.000119 grad: 0.2120 (0.2153) loss: 0.9462 (0.9602) time: 0.1418 data: 0.0530 max mem: 9305 +Train: [4] [4800/6250] eta: 0:04:28 lr: 0.000119 grad: 0.2085 (0.2162) loss: 0.9463 (0.9599) time: 0.2086 data: 0.1136 max mem: 9305 +Train: [4] [4900/6250] eta: 0:04:09 lr: 0.000120 grad: 0.1798 (0.2164) loss: 0.9444 (0.9596) time: 0.1707 data: 0.0767 max mem: 9305 +Train: [4] [5000/6250] eta: 0:03:50 lr: 0.000120 grad: 0.1939 (0.2168) loss: 0.9454 (0.9593) time: 0.1867 data: 0.0783 max mem: 9305 +Train: [4] [5100/6250] eta: 0:03:32 lr: 0.000120 grad: 0.2654 (0.2171) loss: 0.9424 (0.9590) time: 0.1670 data: 0.0781 max mem: 9305 +Train: [4] [5200/6250] eta: 0:03:13 lr: 0.000121 grad: 0.1726 (0.2175) loss: 0.9440 (0.9587) time: 0.1399 data: 0.0342 max mem: 9305 +Train: [4] [5300/6250] eta: 0:02:54 lr: 0.000121 grad: 0.2514 (0.2181) loss: 0.9411 (0.9584) time: 0.1419 data: 0.0504 max mem: 9305 +Train: [4] [5400/6250] eta: 0:02:35 lr: 0.000122 grad: 0.1748 (0.2183) loss: 0.9426 (0.9581) time: 0.1584 data: 0.0636 max mem: 9305 +Train: [4] [5500/6250] eta: 0:02:17 lr: 0.000122 grad: 0.2067 (0.2187) loss: 0.9453 (0.9578) time: 0.1865 data: 0.0975 max mem: 9305 +Train: [4] [5600/6250] eta: 0:01:59 lr: 0.000122 grad: 0.1989 (0.2189) loss: 0.9415 (0.9576) time: 0.2093 data: 0.1169 max mem: 9305 +Train: [4] [5700/6250] eta: 0:01:41 lr: 0.000123 grad: 0.2140 (0.2190) loss: 0.9387 (0.9573) time: 0.1915 data: 0.1083 max mem: 9305 +Train: [4] [5800/6250] eta: 0:01:22 lr: 0.000123 grad: 0.2113 (0.2192) loss: 0.9379 (0.9569) time: 0.1926 data: 0.0932 max mem: 9305 +Train: [4] [5900/6250] eta: 0:01:04 lr: 0.000124 grad: 0.2021 (0.2193) loss: 0.9395 (0.9566) time: 0.1200 data: 0.0169 max mem: 9305 +Train: [4] [6000/6250] eta: 0:00:46 lr: 0.000124 grad: 0.1716 (0.2193) loss: 0.9368 (0.9563) time: 0.1977 data: 0.0985 max mem: 9305 +Train: [4] [6100/6250] eta: 0:00:27 lr: 0.000124 grad: 0.1960 (0.2195) loss: 0.9374 (0.9560) time: 0.2758 data: 0.1786 max mem: 9305 +Train: [4] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.1945 (0.2193) loss: 0.9388 (0.9557) time: 0.1397 data: 0.0441 max mem: 9305 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1895 (0.2192) loss: 0.9389 (0.9556) time: 0.1706 data: 0.0717 max mem: 9305 +Train: [4] Total time: 0:19:24 (0.1863 s / it) +Averaged stats: lr: 0.000125 grad: 0.1895 (0.2192) loss: 0.9389 (0.9556) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:05:14 loss: 0.9446 (0.9446) time: 5.0678 data: 5.0308 max mem: 9305 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9413 (0.9406) time: 0.1479 data: 0.1189 max mem: 9305 +Eval (hcp-train-subset): [4] Total time: 0:00:15 (0.2516 s / it) +Averaged stats (hcp-train-subset): loss: 0.9413 (0.9406) +Making plots (hcp-train-subset): example=39 +Eval (hcp-val): [4] [ 0/62] eta: 0:04:13 loss: 0.9328 (0.9328) time: 4.0838 data: 3.9800 max mem: 9305 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9360 (0.9359) time: 0.1654 data: 0.1367 max mem: 9305 +Eval (hcp-val): [4] Total time: 0:00:15 (0.2500 s / it) +Averaged stats (hcp-val): loss: 0.9360 (0.9359) +Making plots (hcp-val): example=10 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [5] [ 0/6250] eta: 9:28:44 lr: 0.000125 grad: 0.2499 (0.2499) loss: 0.9283 (0.9283) time: 5.4599 data: 5.3392 max mem: 9305 +Train: [5] [ 100/6250] eta: 0:23:40 lr: 0.000125 grad: 0.1672 (0.2406) loss: 0.9359 (0.9367) time: 0.2056 data: 0.1035 max mem: 9305 +Train: [5] [ 200/6250] eta: 0:21:32 lr: 0.000125 grad: 0.2148 (0.2170) loss: 0.9326 (0.9354) time: 0.2223 data: 0.1287 max mem: 9305 +Train: [5] [ 300/6250] eta: 0:20:06 lr: 0.000125 grad: 0.1956 (0.2108) loss: 0.9337 (0.9349) time: 0.1564 data: 0.0643 max mem: 9305 +Train: [5] [ 400/6250] eta: 0:19:22 lr: 0.000125 grad: 0.2095 (0.2078) loss: 0.9353 (0.9348) time: 0.2126 data: 0.1205 max mem: 9305 +Train: [5] [ 500/6250] eta: 0:19:05 lr: 0.000125 grad: 0.1645 (0.2054) loss: 0.9273 (0.9343) time: 0.2018 data: 0.0731 max mem: 9305 +Train: [5] [ 600/6250] eta: 0:18:29 lr: 0.000125 grad: 0.1508 (0.2033) loss: 0.9346 (0.9340) time: 0.1979 data: 0.1044 max mem: 9305 +Train: [5] [ 700/6250] eta: 0:17:58 lr: 0.000125 grad: 0.1459 (0.2035) loss: 0.9321 (0.9339) time: 0.1410 data: 0.0351 max mem: 9305 +Train: [5] [ 800/6250] eta: 0:17:32 lr: 0.000125 grad: 0.1612 (0.2017) loss: 0.9297 (0.9335) time: 0.1776 data: 0.0890 max mem: 9305 +Train: [5] [ 900/6250] eta: 0:17:00 lr: 0.000125 grad: 0.1894 (0.2013) loss: 0.9307 (0.9333) time: 0.1523 data: 0.0544 max mem: 9305 +Train: [5] [1000/6250] eta: 0:16:27 lr: 0.000125 grad: 0.2173 (0.2003) loss: 0.9319 (0.9331) time: 0.1634 data: 0.0780 max mem: 9305 +Train: [5] [1100/6250] eta: 0:15:53 lr: 0.000125 grad: 0.1706 (0.1994) loss: 0.9256 (0.9327) time: 0.1748 data: 0.0730 max mem: 9305 +Train: [5] [1200/6250] eta: 0:15:42 lr: 0.000125 grad: 0.1646 (0.1975) loss: 0.9280 (0.9326) time: 0.1834 data: 0.0907 max mem: 9305 +Train: [5] [1300/6250] eta: 0:15:17 lr: 0.000125 grad: 0.1791 (0.1972) loss: 0.9321 (0.9325) time: 0.1394 data: 0.0500 max mem: 9305 +Train: [5] [1400/6250] eta: 0:14:53 lr: 0.000125 grad: 0.1676 (0.1964) loss: 0.9313 (0.9324) time: 0.1488 data: 0.0556 max mem: 9305 +Train: [5] [1500/6250] eta: 0:14:33 lr: 0.000125 grad: 0.1507 (0.1947) loss: 0.9343 (0.9324) time: 0.1084 data: 0.0196 max mem: 9305 +Train: [5] [1600/6250] eta: 0:14:10 lr: 0.000125 grad: 0.1493 (0.1936) loss: 0.9285 (0.9323) time: 0.1939 data: 0.0982 max mem: 9305 +Train: [5] [1700/6250] eta: 0:13:49 lr: 0.000125 grad: 0.1387 (0.1925) loss: 0.9325 (0.9322) time: 0.2055 data: 0.1148 max mem: 9305 +Train: [5] [1800/6250] eta: 0:13:26 lr: 0.000125 grad: 0.1684 (0.1918) loss: 0.9289 (0.9320) time: 0.1920 data: 0.0908 max mem: 9305 +Train: [5] [1900/6250] eta: 0:13:06 lr: 0.000125 grad: 0.1475 (0.1911) loss: 0.9256 (0.9319) time: 0.1842 data: 0.1007 max mem: 9305 +Train: [5] [2000/6250] eta: 0:12:47 lr: 0.000125 grad: 0.1742 (0.1911) loss: 0.9295 (0.9317) time: 0.1779 data: 0.0812 max mem: 9305 +Train: [5] [2100/6250] eta: 0:12:29 lr: 0.000125 grad: 0.1910 (0.1906) loss: 0.9286 (0.9315) time: 0.1658 data: 0.0742 max mem: 9305 +Train: [5] [2200/6250] eta: 0:12:17 lr: 0.000125 grad: 0.1530 (0.1896) loss: 0.9260 (0.9314) time: 0.2970 data: 0.1971 max mem: 9305 +Train: [5] [2300/6250] eta: 0:11:54 lr: 0.000125 grad: 0.1550 (0.1885) loss: 0.9282 (0.9313) time: 0.1859 data: 0.0929 max mem: 9305 +Train: [5] [2400/6250] eta: 0:11:36 lr: 0.000125 grad: 0.1366 (0.1878) loss: 0.9223 (0.9310) time: 0.1946 data: 0.0914 max mem: 9305 +Train: [5] [2500/6250] eta: 0:11:16 lr: 0.000125 grad: 0.1799 (0.1872) loss: 0.9241 (0.9308) time: 0.1460 data: 0.0522 max mem: 9305 +Train: [5] [2600/6250] eta: 0:11:01 lr: 0.000125 grad: 0.1407 (0.1863) loss: 0.9263 (0.9305) time: 0.1322 data: 0.0109 max mem: 9305 +Train: [5] [2700/6250] eta: 0:10:41 lr: 0.000125 grad: 0.1565 (0.1858) loss: 0.9266 (0.9303) time: 0.1843 data: 0.0883 max mem: 9305 +Train: [5] [2800/6250] eta: 0:10:22 lr: 0.000125 grad: 0.1579 (0.1860) loss: 0.9243 (0.9300) time: 0.1252 data: 0.0413 max mem: 9305 +Train: [5] [2900/6250] eta: 0:10:04 lr: 0.000125 grad: 0.1419 (0.1853) loss: 0.9209 (0.9298) time: 0.1746 data: 0.0875 max mem: 9305 +Train: [5] [3000/6250] eta: 0:09:46 lr: 0.000125 grad: 0.1841 (0.1848) loss: 0.9254 (0.9296) time: 0.1622 data: 0.0670 max mem: 9305 +Train: [5] [3100/6250] eta: 0:09:27 lr: 0.000125 grad: 0.1509 (0.1842) loss: 0.9267 (0.9294) time: 0.1988 data: 0.1063 max mem: 9305 +Train: [5] [3200/6250] eta: 0:09:10 lr: 0.000125 grad: 0.1444 (0.1840) loss: 0.9233 (0.9292) time: 0.2099 data: 0.1121 max mem: 9305 +Train: [5] [3300/6250] eta: 0:08:51 lr: 0.000125 grad: 0.1544 (0.1830) loss: 0.9240 (0.9290) time: 0.1391 data: 0.0473 max mem: 9305 +Train: [5] [3400/6250] eta: 0:08:33 lr: 0.000125 grad: 0.1446 (0.1823) loss: 0.9249 (0.9289) time: 0.1897 data: 0.1044 max mem: 9305 +Train: [5] [3500/6250] eta: 0:08:14 lr: 0.000125 grad: 0.1517 (0.1817) loss: 0.9243 (0.9287) time: 0.1866 data: 0.0793 max mem: 9305 +Train: [5] [3600/6250] eta: 0:07:56 lr: 0.000125 grad: 0.1656 (0.1811) loss: 0.9242 (0.9285) time: 0.1843 data: 0.0949 max mem: 9305 +Train: [5] [3700/6250] eta: 0:07:36 lr: 0.000125 grad: 0.1279 (0.1801) loss: 0.9216 (0.9284) time: 0.1359 data: 0.0526 max mem: 9305 +Train: [5] [3800/6250] eta: 0:07:20 lr: 0.000125 grad: 0.1437 (0.1791) loss: 0.9214 (0.9283) time: 0.1939 data: 0.0915 max mem: 9305 +Train: [5] [3900/6250] eta: 0:07:01 lr: 0.000125 grad: 0.1411 (0.1787) loss: 0.9255 (0.9282) time: 0.1815 data: 0.0939 max mem: 9305 +Train: [5] [4000/6250] eta: 0:06:41 lr: 0.000125 grad: 0.1378 (0.1778) loss: 0.9232 (0.9281) time: 0.1311 data: 0.0440 max mem: 9305 +Train: [5] [4100/6250] eta: 0:06:22 lr: 0.000125 grad: 0.1773 (0.1776) loss: 0.9241 (0.9280) time: 0.1490 data: 0.0613 max mem: 9305 +Train: [5] [4200/6250] eta: 0:06:03 lr: 0.000125 grad: 0.1283 (0.1771) loss: 0.9213 (0.9279) time: 0.1659 data: 0.0768 max mem: 9305 +Train: [5] [4300/6250] eta: 0:05:44 lr: 0.000125 grad: 0.1380 (0.1764) loss: 0.9245 (0.9278) time: 0.1481 data: 0.0639 max mem: 9305 +Train: [5] [4400/6250] eta: 0:05:26 lr: 0.000125 grad: 0.1639 (0.1760) loss: 0.9241 (0.9277) time: 0.1394 data: 0.0564 max mem: 9305 +Train: [5] [4500/6250] eta: 0:05:07 lr: 0.000125 grad: 0.1374 (0.1753) loss: 0.9244 (0.9275) time: 0.1672 data: 0.0762 max mem: 9305 +Train: [5] [4600/6250] eta: 0:04:50 lr: 0.000125 grad: 0.1374 (0.1746) loss: 0.9220 (0.9274) time: 0.2193 data: 0.1128 max mem: 9305 +Train: [5] [4700/6250] eta: 0:04:35 lr: 0.000125 grad: 0.1406 (0.1742) loss: 0.9227 (0.9273) time: 0.1802 data: 0.0806 max mem: 9305 +Train: [5] [4800/6250] eta: 0:04:18 lr: 0.000125 grad: 0.1230 (0.1736) loss: 0.9226 (0.9272) time: 0.2134 data: 0.1054 max mem: 9305 +Train: [5] [4900/6250] eta: 0:04:00 lr: 0.000125 grad: 0.1287 (0.1729) loss: 0.9198 (0.9271) time: 0.1676 data: 0.0745 max mem: 9305 +Train: [5] [5000/6250] eta: 0:03:43 lr: 0.000125 grad: 0.1274 (0.1724) loss: 0.9220 (0.9270) time: 0.1264 data: 0.0003 max mem: 9305 +Train: [5] [5100/6250] eta: 0:03:26 lr: 0.000125 grad: 0.1404 (0.1718) loss: 0.9205 (0.9269) time: 0.2406 data: 0.1496 max mem: 9305 +Train: [5] [5200/6250] eta: 0:03:08 lr: 0.000125 grad: 0.1356 (0.1713) loss: 0.9197 (0.9267) time: 0.1805 data: 0.0862 max mem: 9305 +Train: [5] [5300/6250] eta: 0:02:50 lr: 0.000125 grad: 0.1158 (0.1706) loss: 0.9211 (0.9266) time: 0.2655 data: 0.1752 max mem: 9305 +Train: [5] [5400/6250] eta: 0:02:32 lr: 0.000125 grad: 0.1366 (0.1702) loss: 0.9189 (0.9265) time: 0.1928 data: 0.1049 max mem: 9305 +Train: [5] [5500/6250] eta: 0:02:15 lr: 0.000125 grad: 0.1192 (0.1699) loss: 0.9197 (0.9263) time: 0.1313 data: 0.0475 max mem: 9305 +Train: [5] [5600/6250] eta: 0:01:57 lr: 0.000125 grad: 0.1123 (0.1691) loss: 0.9230 (0.9262) time: 0.2219 data: 0.1205 max mem: 9305 +Train: [5] [5700/6250] eta: 0:01:39 lr: 0.000125 grad: 0.1320 (0.1686) loss: 0.9188 (0.9261) time: 0.1871 data: 0.0922 max mem: 9305 +Train: [5] [5800/6250] eta: 0:01:21 lr: 0.000125 grad: 0.1344 (0.1680) loss: 0.9182 (0.9260) time: 0.1891 data: 0.1068 max mem: 9305 +Train: [5] [5900/6250] eta: 0:01:03 lr: 0.000125 grad: 0.1056 (0.1674) loss: 0.9220 (0.9259) time: 0.1677 data: 0.0832 max mem: 9305 +Train: [5] [6000/6250] eta: 0:00:45 lr: 0.000125 grad: 0.1229 (0.1669) loss: 0.9177 (0.9258) time: 0.1762 data: 0.0954 max mem: 9305 +Train: [5] [6100/6250] eta: 0:00:27 lr: 0.000125 grad: 0.1251 (0.1664) loss: 0.9172 (0.9257) time: 0.1779 data: 0.0906 max mem: 9305 +Train: [5] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.1286 (0.1659) loss: 0.9171 (0.9256) time: 0.2138 data: 0.1166 max mem: 9305 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1553 (0.1657) loss: 0.9168 (0.9255) time: 0.1268 data: 0.0224 max mem: 9305 +Train: [5] Total time: 0:19:03 (0.1830 s / it) +Averaged stats: lr: 0.000125 grad: 0.1553 (0.1657) loss: 0.9168 (0.9255) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:06:07 loss: 0.9289 (0.9289) time: 5.9323 data: 5.8957 max mem: 9305 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.9239 (0.9215) time: 0.1516 data: 0.1224 max mem: 9305 +Eval (hcp-train-subset): [5] Total time: 0:00:15 (0.2528 s / it) +Averaged stats (hcp-train-subset): loss: 0.9239 (0.9215) +Eval (hcp-val): [5] [ 0/62] eta: 0:04:57 loss: 0.9177 (0.9177) time: 4.7912 data: 4.7559 max mem: 9305 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.9155 (0.9160) time: 0.1452 data: 0.1157 max mem: 9305 +Eval (hcp-val): [5] Total time: 0:00:15 (0.2448 s / it) +Averaged stats (hcp-val): loss: 0.9155 (0.9160) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [6] [ 0/6250] eta: 7:56:31 lr: 0.000125 grad: 0.1183 (0.1183) loss: 0.9390 (0.9390) time: 4.5746 data: 4.3705 max mem: 9305 +Train: [6] [ 100/6250] eta: 0:26:00 lr: 0.000125 grad: 0.1174 (0.1466) loss: 0.9102 (0.9200) time: 0.1942 data: 0.1012 max mem: 9305 +Train: [6] [ 200/6250] eta: 0:21:55 lr: 0.000125 grad: 0.1085 (0.1418) loss: 0.9177 (0.9175) time: 0.1773 data: 0.0833 max mem: 9305 +Train: [6] [ 300/6250] eta: 0:20:54 lr: 0.000125 grad: 0.1182 (0.1364) loss: 0.9156 (0.9172) time: 0.1862 data: 0.0658 max mem: 9305 +Train: [6] [ 400/6250] eta: 0:19:47 lr: 0.000125 grad: 0.1242 (0.1361) loss: 0.9122 (0.9165) time: 0.1946 data: 0.0922 max mem: 9305 +Train: [6] [ 500/6250] eta: 0:20:07 lr: 0.000125 grad: 0.1166 (0.1348) loss: 0.9149 (0.9161) time: 0.1306 data: 0.0189 max mem: 9305 +Train: [6] [ 600/6250] eta: 0:19:24 lr: 0.000125 grad: 0.1460 (0.1345) loss: 0.9177 (0.9162) time: 0.2781 data: 0.1435 max mem: 9305 +Train: [6] [ 700/6250] eta: 0:18:43 lr: 0.000125 grad: 0.1190 (0.1353) loss: 0.9167 (0.9165) time: 0.1536 data: 0.0656 max mem: 9305 +Train: [6] [ 800/6250] eta: 0:18:08 lr: 0.000125 grad: 0.1054 (0.1338) loss: 0.9195 (0.9166) time: 0.1735 data: 0.0877 max mem: 9305 +Train: [6] [ 900/6250] eta: 0:17:27 lr: 0.000125 grad: 0.1318 (0.1323) loss: 0.9149 (0.9165) time: 0.1591 data: 0.0641 max mem: 9305 +Train: [6] [1000/6250] eta: 0:16:46 lr: 0.000125 grad: 0.1248 (0.1320) loss: 0.9136 (0.9164) time: 0.1452 data: 0.0616 max mem: 9305 +Train: [6] [1100/6250] eta: 0:16:19 lr: 0.000125 grad: 0.1260 (0.1316) loss: 0.9100 (0.9164) time: 0.1852 data: 0.0826 max mem: 9305 +Train: [6] [1200/6250] eta: 0:16:02 lr: 0.000125 grad: 0.1134 (0.1306) loss: 0.9164 (0.9162) time: 0.1142 data: 0.0005 max mem: 9305 +Train: [6] [1300/6250] eta: 0:15:42 lr: 0.000125 grad: 0.1136 (0.1300) loss: 0.9145 (0.9162) time: 0.1679 data: 0.0750 max mem: 9305 +Train: [6] [1400/6250] eta: 0:15:24 lr: 0.000125 grad: 0.1086 (0.1302) loss: 0.9109 (0.9160) time: 0.2131 data: 0.1183 max mem: 9305 +Train: [6] [1500/6250] eta: 0:15:00 lr: 0.000125 grad: 0.1166 (0.1296) loss: 0.9139 (0.9159) time: 0.1803 data: 0.0892 max mem: 9305 +Train: [6] [1600/6250] eta: 0:14:44 lr: 0.000125 grad: 0.1153 (0.1289) loss: 0.9147 (0.9157) time: 0.2096 data: 0.1082 max mem: 9305 +Train: [6] [1700/6250] eta: 0:14:17 lr: 0.000125 grad: 0.1331 (0.1285) loss: 0.9106 (0.9155) time: 0.1692 data: 0.0785 max mem: 9305 +Train: [6] [1800/6250] eta: 0:14:01 lr: 0.000125 grad: 0.1109 (0.1284) loss: 0.9153 (0.9154) time: 0.3171 data: 0.2155 max mem: 9305 +Train: [6] [1900/6250] eta: 0:13:31 lr: 0.000125 grad: 0.1158 (0.1275) loss: 0.9132 (0.9153) time: 0.1525 data: 0.0663 max mem: 9305 +Train: [6] [2000/6250] eta: 0:13:08 lr: 0.000125 grad: 0.1021 (0.1271) loss: 0.9124 (0.9152) time: 0.1601 data: 0.0574 max mem: 9305 +Train: [6] [2100/6250] eta: 0:12:54 lr: 0.000125 grad: 0.1090 (0.1265) loss: 0.9115 (0.9150) time: 0.1372 data: 0.0400 max mem: 9305 +Train: [6] [2200/6250] eta: 0:12:30 lr: 0.000125 grad: 0.1066 (0.1261) loss: 0.9121 (0.9149) time: 0.1449 data: 0.0252 max mem: 9305 +Train: [6] [2300/6250] eta: 0:12:13 lr: 0.000125 grad: 0.1056 (0.1254) loss: 0.9097 (0.9148) time: 0.1539 data: 0.0570 max mem: 9305 +Train: [6] [2400/6250] eta: 0:11:53 lr: 0.000125 grad: 0.1129 (0.1251) loss: 0.9125 (0.9147) time: 0.1689 data: 0.0717 max mem: 9305 +Train: [6] [2500/6250] eta: 0:11:36 lr: 0.000125 grad: 0.0928 (0.1248) loss: 0.9109 (0.9145) time: 0.1152 data: 0.0002 max mem: 9305 +Train: [6] [2600/6250] eta: 0:11:14 lr: 0.000125 grad: 0.1037 (0.1243) loss: 0.9142 (0.9145) time: 0.1908 data: 0.0960 max mem: 9305 +Train: [6] [2700/6250] eta: 0:10:57 lr: 0.000125 grad: 0.1065 (0.1241) loss: 0.9109 (0.9143) time: 0.1459 data: 0.0293 max mem: 9305 +Train: [6] [2800/6250] eta: 0:10:36 lr: 0.000125 grad: 0.1067 (0.1238) loss: 0.9148 (0.9142) time: 0.1840 data: 0.0900 max mem: 9305 +Train: [6] [2900/6250] eta: 0:10:18 lr: 0.000125 grad: 0.0966 (0.1233) loss: 0.9108 (0.9141) time: 0.2433 data: 0.0958 max mem: 9305 +Train: [6] [3000/6250] eta: 0:10:01 lr: 0.000125 grad: 0.1285 (0.1233) loss: 0.9137 (0.9140) time: 0.1399 data: 0.0460 max mem: 9305 +Train: [6] [3100/6250] eta: 0:09:40 lr: 0.000125 grad: 0.1054 (0.1230) loss: 0.9084 (0.9138) time: 0.1478 data: 0.0568 max mem: 9305 +Train: [6] [3200/6250] eta: 0:09:19 lr: 0.000125 grad: 0.1060 (0.1226) loss: 0.9069 (0.9137) time: 0.1487 data: 0.0515 max mem: 9305 +Train: [6] [3300/6250] eta: 0:09:00 lr: 0.000125 grad: 0.1083 (0.1224) loss: 0.9098 (0.9135) time: 0.1691 data: 0.0873 max mem: 9305 +Train: [6] [3400/6250] eta: 0:08:40 lr: 0.000125 grad: 0.1262 (0.1223) loss: 0.9061 (0.9134) time: 0.1582 data: 0.0710 max mem: 9305 +Train: [6] [3500/6250] eta: 0:08:20 lr: 0.000125 grad: 0.1048 (0.1221) loss: 0.9087 (0.9132) time: 0.0956 data: 0.0107 max mem: 9305 +Train: [6] [3600/6250] eta: 0:07:59 lr: 0.000125 grad: 0.0943 (0.1217) loss: 0.9104 (0.9131) time: 0.1422 data: 0.0568 max mem: 9305 +Train: [6] [3700/6250] eta: 0:07:40 lr: 0.000125 grad: 0.1169 (0.1214) loss: 0.9026 (0.9130) time: 0.1697 data: 0.0753 max mem: 9305 +Train: [6] [3800/6250] eta: 0:07:21 lr: 0.000125 grad: 0.0971 (0.1212) loss: 0.9051 (0.9128) time: 0.1771 data: 0.0807 max mem: 9305 +Train: [6] [3900/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0956 (0.1208) loss: 0.9065 (0.9127) time: 0.1697 data: 0.0806 max mem: 9305 +Train: [6] [4000/6250] eta: 0:06:43 lr: 0.000125 grad: 0.1080 (0.1204) loss: 0.9111 (0.9126) time: 0.1925 data: 0.1014 max mem: 9305 +Train: [6] [4100/6250] eta: 0:06:25 lr: 0.000125 grad: 0.1013 (0.1201) loss: 0.9123 (0.9126) time: 0.1678 data: 0.0793 max mem: 9305 +Train: [6] [4200/6250] eta: 0:06:06 lr: 0.000125 grad: 0.0989 (0.1199) loss: 0.9085 (0.9125) time: 0.1766 data: 0.0949 max mem: 9305 +Train: [6] [4300/6250] eta: 0:05:48 lr: 0.000125 grad: 0.0951 (0.1195) loss: 0.9074 (0.9124) time: 0.1716 data: 0.0879 max mem: 9305 +Train: [6] [4400/6250] eta: 0:05:31 lr: 0.000125 grad: 0.0958 (0.1191) loss: 0.9084 (0.9124) time: 0.2177 data: 0.1179 max mem: 9305 +Train: [6] [4500/6250] eta: 0:05:14 lr: 0.000125 grad: 0.0981 (0.1187) loss: 0.9100 (0.9123) time: 0.1643 data: 0.0772 max mem: 9305 +Train: [6] [4600/6250] eta: 0:04:56 lr: 0.000125 grad: 0.1013 (0.1185) loss: 0.9079 (0.9123) time: 0.1837 data: 0.0858 max mem: 9305 +Train: [6] [4700/6250] eta: 0:04:39 lr: 0.000125 grad: 0.0923 (0.1182) loss: 0.9053 (0.9123) time: 0.1536 data: 0.0522 max mem: 9305 +Train: [6] [4800/6250] eta: 0:04:21 lr: 0.000125 grad: 0.0944 (0.1180) loss: 0.9069 (0.9122) time: 0.1486 data: 0.0541 max mem: 9305 +Train: [6] [4900/6250] eta: 0:04:04 lr: 0.000125 grad: 0.0944 (0.1177) loss: 0.9129 (0.9122) time: 0.1916 data: 0.1033 max mem: 9305 +Train: [6] [5000/6250] eta: 0:03:45 lr: 0.000125 grad: 0.0989 (0.1173) loss: 0.9118 (0.9121) time: 0.1927 data: 0.1024 max mem: 9305 +Train: [6] [5100/6250] eta: 0:03:28 lr: 0.000125 grad: 0.1019 (0.1170) loss: 0.9082 (0.9121) time: 0.1635 data: 0.0799 max mem: 9305 +Train: [6] [5200/6250] eta: 0:03:10 lr: 0.000125 grad: 0.0976 (0.1167) loss: 0.9129 (0.9121) time: 0.1891 data: 0.1049 max mem: 9305 +Train: [6] [5300/6250] eta: 0:02:52 lr: 0.000125 grad: 0.0810 (0.1165) loss: 0.9061 (0.9120) time: 0.1552 data: 0.0694 max mem: 9305 +Train: [6] [5400/6250] eta: 0:02:33 lr: 0.000125 grad: 0.0952 (0.1161) loss: 0.9033 (0.9119) time: 0.1876 data: 0.1006 max mem: 9305 +Train: [6] [5500/6250] eta: 0:02:15 lr: 0.000125 grad: 0.1006 (0.1159) loss: 0.9049 (0.9118) time: 0.2015 data: 0.1110 max mem: 9305 +Train: [6] [5600/6250] eta: 0:01:57 lr: 0.000125 grad: 0.1006 (0.1157) loss: 0.9096 (0.9118) time: 0.1704 data: 0.0790 max mem: 9305 +Train: [6] [5700/6250] eta: 0:01:39 lr: 0.000125 grad: 0.0872 (0.1155) loss: 0.9098 (0.9117) time: 0.1925 data: 0.0950 max mem: 9305 +Train: [6] [5800/6250] eta: 0:01:21 lr: 0.000125 grad: 0.0879 (0.1152) loss: 0.9074 (0.9117) time: 0.2016 data: 0.0954 max mem: 9305 +Train: [6] [5900/6250] eta: 0:01:03 lr: 0.000125 grad: 0.0947 (0.1151) loss: 0.9028 (0.9116) time: 0.1442 data: 0.0597 max mem: 9305 +Train: [6] [6000/6250] eta: 0:00:45 lr: 0.000125 grad: 0.0909 (0.1148) loss: 0.9107 (0.9115) time: 0.1528 data: 0.0605 max mem: 9305 +Train: [6] [6100/6250] eta: 0:00:27 lr: 0.000125 grad: 0.0915 (0.1146) loss: 0.9063 (0.9115) time: 0.1337 data: 0.0242 max mem: 9305 +Train: [6] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.0905 (0.1143) loss: 0.9109 (0.9114) time: 0.1989 data: 0.1094 max mem: 9305 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0869 (0.1142) loss: 0.9090 (0.9114) time: 0.1365 data: 0.0379 max mem: 9305 +Train: [6] Total time: 0:18:52 (0.1812 s / it) +Averaged stats: lr: 0.000125 grad: 0.0869 (0.1142) loss: 0.9090 (0.9114) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:06:34 loss: 0.9179 (0.9179) time: 6.3629 data: 6.3268 max mem: 9305 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.9114 (0.9117) time: 0.1327 data: 0.1036 max mem: 9305 +Eval (hcp-train-subset): [6] Total time: 0:00:15 (0.2430 s / it) +Averaged stats (hcp-train-subset): loss: 0.9114 (0.9117) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:45 loss: 0.8994 (0.8994) time: 3.6323 data: 3.5564 max mem: 9305 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.9052 (0.9059) time: 0.1553 data: 0.1249 max mem: 9305 +Eval (hcp-val): [6] Total time: 0:00:15 (0.2434 s / it) +Averaged stats (hcp-val): loss: 0.9052 (0.9059) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [7] [ 0/6250] eta: 10:41:02 lr: 0.000125 grad: 0.0964 (0.0964) loss: 0.9192 (0.9192) time: 6.1539 data: 6.0500 max mem: 9305 +Train: [7] [ 100/6250] eta: 0:23:57 lr: 0.000125 grad: 0.0774 (0.1069) loss: 0.9100 (0.9091) time: 0.2084 data: 0.1120 max mem: 9305 +Train: [7] [ 200/6250] eta: 0:21:00 lr: 0.000125 grad: 0.0782 (0.1026) loss: 0.9064 (0.9071) time: 0.1522 data: 0.0518 max mem: 9305 +Train: [7] [ 300/6250] eta: 0:20:16 lr: 0.000125 grad: 0.0875 (0.1020) loss: 0.9055 (0.9061) time: 0.1053 data: 0.0002 max mem: 9305 +Train: [7] [ 400/6250] eta: 0:19:44 lr: 0.000125 grad: 0.0889 (0.1012) loss: 0.9076 (0.9056) time: 0.2096 data: 0.1102 max mem: 9305 +Train: [7] [ 500/6250] eta: 0:19:05 lr: 0.000125 grad: 0.0981 (0.1010) loss: 0.9014 (0.9050) time: 0.1715 data: 0.0795 max mem: 9305 +Train: [7] [ 600/6250] eta: 0:18:41 lr: 0.000125 grad: 0.0940 (0.1003) loss: 0.9009 (0.9045) time: 0.2160 data: 0.1076 max mem: 9305 +Train: [7] [ 700/6250] eta: 0:17:59 lr: 0.000125 grad: 0.0934 (0.1017) loss: 0.9012 (0.9040) time: 0.1660 data: 0.0754 max mem: 9305 +Train: [7] [ 800/6250] eta: 0:17:27 lr: 0.000125 grad: 0.0899 (0.1013) loss: 0.8994 (0.9037) time: 0.1674 data: 0.0736 max mem: 9305 +Train: [7] [ 900/6250] eta: 0:17:18 lr: 0.000125 grad: 0.1018 (0.1010) loss: 0.9036 (0.9036) time: 0.2389 data: 0.1324 max mem: 9305 +Train: [7] [1000/6250] eta: 0:16:48 lr: 0.000125 grad: 0.1066 (0.1013) loss: 0.9024 (0.9037) time: 0.1730 data: 0.0790 max mem: 9305 +Train: [7] [1100/6250] eta: 0:16:26 lr: 0.000125 grad: 0.0903 (0.1012) loss: 0.9039 (0.9038) time: 0.2493 data: 0.1352 max mem: 9305 +Train: [7] [1200/6250] eta: 0:16:01 lr: 0.000125 grad: 0.0883 (0.1004) loss: 0.9024 (0.9037) time: 0.1413 data: 0.0495 max mem: 9305 +Train: [7] [1300/6250] eta: 0:15:32 lr: 0.000125 grad: 0.0914 (0.0998) loss: 0.9046 (0.9038) time: 0.1783 data: 0.0891 max mem: 9305 +Train: [7] [1400/6250] eta: 0:15:28 lr: 0.000125 grad: 0.0974 (0.0994) loss: 0.9045 (0.9039) time: 0.0974 data: 0.0002 max mem: 9305 +Train: [7] [1500/6250] eta: 0:14:57 lr: 0.000125 grad: 0.0814 (0.0986) loss: 0.9065 (0.9040) time: 0.1973 data: 0.0980 max mem: 9305 +Train: [7] [1600/6250] eta: 0:14:33 lr: 0.000125 grad: 0.0862 (0.0982) loss: 0.9071 (0.9041) time: 0.2146 data: 0.1105 max mem: 9305 +Train: [7] [1700/6250] eta: 0:14:25 lr: 0.000125 grad: 0.0845 (0.0976) loss: 0.9089 (0.9043) time: 0.2597 data: 0.1458 max mem: 9305 +Train: [7] [1800/6250] eta: 0:13:56 lr: 0.000125 grad: 0.0876 (0.0975) loss: 0.9042 (0.9044) time: 0.1433 data: 0.0506 max mem: 9305 +Train: [7] [1900/6250] eta: 0:13:37 lr: 0.000125 grad: 0.0855 (0.0970) loss: 0.9073 (0.9045) time: 0.1903 data: 0.0901 max mem: 9305 +Train: [7] [2000/6250] eta: 0:13:15 lr: 0.000125 grad: 0.0810 (0.0966) loss: 0.9114 (0.9046) time: 0.1641 data: 0.0672 max mem: 9305 +Train: [7] [2100/6250] eta: 0:12:56 lr: 0.000125 grad: 0.0870 (0.0965) loss: 0.9025 (0.9046) time: 0.1869 data: 0.1015 max mem: 9305 +Train: [7] [2200/6250] eta: 0:12:35 lr: 0.000125 grad: 0.0790 (0.0963) loss: 0.9067 (0.9047) time: 0.1608 data: 0.0775 max mem: 9305 +Train: [7] [2300/6250] eta: 0:12:15 lr: 0.000125 grad: 0.0818 (0.0961) loss: 0.9024 (0.9047) time: 0.1662 data: 0.0835 max mem: 9305 +Train: [7] [2400/6250] eta: 0:11:56 lr: 0.000125 grad: 0.0890 (0.0960) loss: 0.9027 (0.9047) time: 0.1620 data: 0.0664 max mem: 9305 +Train: [7] [2500/6250] eta: 0:11:42 lr: 0.000125 grad: 0.0871 (0.0959) loss: 0.9000 (0.9046) time: 0.3009 data: 0.2053 max mem: 9305 +Train: [7] [2600/6250] eta: 0:11:18 lr: 0.000125 grad: 0.0770 (0.0956) loss: 0.9047 (0.9046) time: 0.1639 data: 0.0698 max mem: 9305 +Train: [7] [2700/6250] eta: 0:10:56 lr: 0.000125 grad: 0.0903 (0.0955) loss: 0.9105 (0.9046) time: 0.1290 data: 0.0294 max mem: 9305 +Train: [7] [2800/6250] eta: 0:10:35 lr: 0.000125 grad: 0.0833 (0.0954) loss: 0.9050 (0.9046) time: 0.1506 data: 0.0553 max mem: 9305 +Train: [7] [2900/6250] eta: 0:10:14 lr: 0.000125 grad: 0.0875 (0.0952) loss: 0.9058 (0.9046) time: 0.1410 data: 0.0451 max mem: 9305 +Train: [7] [3000/6250] eta: 0:09:56 lr: 0.000125 grad: 0.0905 (0.0952) loss: 0.9067 (0.9046) time: 0.1509 data: 0.0588 max mem: 9305 +Train: [7] [3100/6250] eta: 0:09:35 lr: 0.000125 grad: 0.0813 (0.0952) loss: 0.9045 (0.9046) time: 0.1929 data: 0.1083 max mem: 9305 +Train: [7] [3200/6250] eta: 0:09:16 lr: 0.000125 grad: 0.0916 (0.0949) loss: 0.9050 (0.9046) time: 0.1665 data: 0.0777 max mem: 9305 +Train: [7] [3300/6250] eta: 0:08:56 lr: 0.000125 grad: 0.0808 (0.0947) loss: 0.9022 (0.9045) time: 0.1508 data: 0.0698 max mem: 9305 +Train: [7] [3400/6250] eta: 0:08:37 lr: 0.000125 grad: 0.0884 (0.0947) loss: 0.9036 (0.9045) time: 0.2085 data: 0.1199 max mem: 9305 +Train: [7] [3500/6250] eta: 0:08:17 lr: 0.000125 grad: 0.0840 (0.0945) loss: 0.9015 (0.9045) time: 0.1609 data: 0.0650 max mem: 9305 +Train: [7] [3600/6250] eta: 0:07:58 lr: 0.000125 grad: 0.0777 (0.0942) loss: 0.9026 (0.9045) time: 0.1577 data: 0.0705 max mem: 9305 +Train: [7] [3700/6250] eta: 0:07:39 lr: 0.000125 grad: 0.0798 (0.0942) loss: 0.9030 (0.9045) time: 0.1575 data: 0.0682 max mem: 9305 +Train: [7] [3800/6250] eta: 0:07:20 lr: 0.000125 grad: 0.0773 (0.0939) loss: 0.9048 (0.9044) time: 0.1875 data: 0.0943 max mem: 9305 +Train: [7] [3900/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0885 (0.0938) loss: 0.9014 (0.9044) time: 0.1725 data: 0.0912 max mem: 9305 +Train: [7] [4000/6250] eta: 0:06:45 lr: 0.000125 grad: 0.0771 (0.0936) loss: 0.9052 (0.9043) time: 0.1826 data: 0.0926 max mem: 9305 +Train: [7] [4100/6250] eta: 0:06:29 lr: 0.000125 grad: 0.0742 (0.0934) loss: 0.9025 (0.9042) time: 0.2340 data: 0.1377 max mem: 9305 +Train: [7] [4200/6250] eta: 0:06:12 lr: 0.000125 grad: 0.0842 (0.0934) loss: 0.9027 (0.9042) time: 0.2379 data: 0.1428 max mem: 9305 +Train: [7] [4300/6250] eta: 0:05:55 lr: 0.000125 grad: 0.0805 (0.0932) loss: 0.9045 (0.9042) time: 0.2231 data: 0.1131 max mem: 9305 +Train: [7] [4400/6250] eta: 0:05:37 lr: 0.000125 grad: 0.0826 (0.0931) loss: 0.9041 (0.9041) time: 0.2794 data: 0.1724 max mem: 9305 +Train: [7] [4500/6250] eta: 0:05:20 lr: 0.000125 grad: 0.0817 (0.0929) loss: 0.9006 (0.9041) time: 0.1431 data: 0.0498 max mem: 9305 +Train: [7] [4600/6250] eta: 0:05:02 lr: 0.000125 grad: 0.0862 (0.0930) loss: 0.9052 (0.9040) time: 0.1328 data: 0.0424 max mem: 9305 +Train: [7] [4700/6250] eta: 0:04:44 lr: 0.000125 grad: 0.0871 (0.0928) loss: 0.9008 (0.9040) time: 0.2435 data: 0.1229 max mem: 9305 +Train: [7] [4800/6250] eta: 0:04:26 lr: 0.000125 grad: 0.0825 (0.0927) loss: 0.9046 (0.9040) time: 0.2028 data: 0.1034 max mem: 9305 +Train: [7] [4900/6250] eta: 0:04:07 lr: 0.000125 grad: 0.0873 (0.0925) loss: 0.8963 (0.9039) time: 0.2151 data: 0.1243 max mem: 9305 +Train: [7] [5000/6250] eta: 0:03:49 lr: 0.000125 grad: 0.0864 (0.0924) loss: 0.8981 (0.9038) time: 0.1528 data: 0.0576 max mem: 9305 +Train: [7] [5100/6250] eta: 0:03:32 lr: 0.000125 grad: 0.0811 (0.0922) loss: 0.9059 (0.9038) time: 0.4730 data: 0.3828 max mem: 9305 +Train: [7] [5200/6250] eta: 0:03:13 lr: 0.000125 grad: 0.0789 (0.0921) loss: 0.8990 (0.9037) time: 0.1773 data: 0.0937 max mem: 9305 +Train: [7] [5300/6250] eta: 0:02:55 lr: 0.000125 grad: 0.0771 (0.0920) loss: 0.9017 (0.9037) time: 0.1648 data: 0.0734 max mem: 9305 +Train: [7] [5400/6250] eta: 0:02:36 lr: 0.000125 grad: 0.0798 (0.0919) loss: 0.9004 (0.9036) time: 0.1590 data: 0.0701 max mem: 9305 +Train: [7] [5500/6250] eta: 0:02:18 lr: 0.000125 grad: 0.0795 (0.0918) loss: 0.8987 (0.9036) time: 0.1980 data: 0.1088 max mem: 9305 +Train: [7] [5600/6250] eta: 0:02:00 lr: 0.000125 grad: 0.0761 (0.0916) loss: 0.8991 (0.9035) time: 0.2114 data: 0.1152 max mem: 9305 +Train: [7] [5700/6250] eta: 0:01:41 lr: 0.000125 grad: 0.0754 (0.0914) loss: 0.9035 (0.9035) time: 0.3072 data: 0.2069 max mem: 9305 +Train: [7] [5800/6250] eta: 0:01:23 lr: 0.000125 grad: 0.0791 (0.0913) loss: 0.9026 (0.9035) time: 0.2087 data: 0.1140 max mem: 9305 +Train: [7] [5900/6250] eta: 0:01:04 lr: 0.000125 grad: 0.0737 (0.0911) loss: 0.9029 (0.9035) time: 0.1936 data: 0.1026 max mem: 9305 +Train: [7] [6000/6250] eta: 0:00:46 lr: 0.000125 grad: 0.1003 (0.0912) loss: 0.9041 (0.9034) time: 0.1738 data: 0.0792 max mem: 9305 +Train: [7] [6100/6250] eta: 0:00:27 lr: 0.000125 grad: 0.0826 (0.0910) loss: 0.9039 (0.9034) time: 0.1763 data: 0.0862 max mem: 9305 +Train: [7] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.0857 (0.0910) loss: 0.9019 (0.9034) time: 0.3410 data: 0.2519 max mem: 9305 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0759 (0.0909) loss: 0.9029 (0.9034) time: 0.1703 data: 0.0675 max mem: 9305 +Train: [7] Total time: 0:19:33 (0.1878 s / it) +Averaged stats: lr: 0.000125 grad: 0.0759 (0.0909) loss: 0.9029 (0.9034) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:05:43 loss: 0.9135 (0.9135) time: 5.5457 data: 5.5111 max mem: 9305 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.9073 (0.9064) time: 0.1614 data: 0.1304 max mem: 9305 +Eval (hcp-train-subset): [7] Total time: 0:00:17 (0.2853 s / it) +Averaged stats (hcp-train-subset): loss: 0.9073 (0.9064) +Eval (hcp-val): [7] [ 0/62] eta: 0:05:17 loss: 0.9008 (0.9008) time: 5.1287 data: 5.0938 max mem: 9305 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.9011 (0.9013) time: 0.1636 data: 0.1341 max mem: 9305 +Eval (hcp-val): [7] Total time: 0:00:16 (0.2581 s / it) +Averaged stats (hcp-val): loss: 0.9011 (0.9013) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [8] [ 0/6250] eta: 8:30:39 lr: 0.000125 grad: 0.0557 (0.0557) loss: 0.9083 (0.9083) time: 4.9024 data: 4.6350 max mem: 9305 +Train: [8] [ 100/6250] eta: 0:25:30 lr: 0.000125 grad: 0.0760 (0.0837) loss: 0.8986 (0.9035) time: 0.2388 data: 0.1334 max mem: 9305 +Train: [8] [ 200/6250] eta: 0:22:06 lr: 0.000125 grad: 0.0817 (0.0845) loss: 0.9006 (0.9030) time: 0.1973 data: 0.0772 max mem: 9305 +Train: [8] [ 300/6250] eta: 0:22:00 lr: 0.000125 grad: 0.0859 (0.0860) loss: 0.8976 (0.9015) time: 0.3030 data: 0.2017 max mem: 9305 +Train: [8] [ 400/6250] eta: 0:20:40 lr: 0.000125 grad: 0.0872 (0.0855) loss: 0.8981 (0.9011) time: 0.1662 data: 0.0519 max mem: 9305 +Train: [8] [ 500/6250] eta: 0:19:55 lr: 0.000125 grad: 0.0799 (0.0847) loss: 0.8968 (0.9008) time: 0.1438 data: 0.0543 max mem: 9305 +Train: [8] [ 600/6250] eta: 0:20:10 lr: 0.000125 grad: 0.0820 (0.0844) loss: 0.9040 (0.9008) time: 0.1181 data: 0.0003 max mem: 9305 +Train: [8] [ 700/6250] eta: 0:19:02 lr: 0.000125 grad: 0.0697 (0.0835) loss: 0.9020 (0.9008) time: 0.1450 data: 0.0558 max mem: 9305 +Train: [8] [ 800/6250] eta: 0:18:39 lr: 0.000125 grad: 0.0720 (0.0826) loss: 0.9046 (0.9008) time: 0.1789 data: 0.0802 max mem: 9305 +Train: [8] [ 900/6250] eta: 0:17:57 lr: 0.000125 grad: 0.0732 (0.0821) loss: 0.8990 (0.9007) time: 0.1810 data: 0.0906 max mem: 9305 +Train: [8] [1000/6250] eta: 0:17:35 lr: 0.000125 grad: 0.0795 (0.0819) loss: 0.8988 (0.9006) time: 0.1881 data: 0.0985 max mem: 9305 +Train: [8] [1100/6250] eta: 0:16:56 lr: 0.000125 grad: 0.0785 (0.0821) loss: 0.9035 (0.9007) time: 0.1539 data: 0.0561 max mem: 9305 +Train: [8] [1200/6250] eta: 0:16:28 lr: 0.000125 grad: 0.0718 (0.0818) loss: 0.9015 (0.9006) time: 0.1371 data: 0.0409 max mem: 9305 +Train: [8] [1300/6250] eta: 0:16:18 lr: 0.000125 grad: 0.0754 (0.0814) loss: 0.8958 (0.9005) time: 0.1586 data: 0.0547 max mem: 9305 +Train: [8] [1400/6250] eta: 0:15:42 lr: 0.000125 grad: 0.0841 (0.0814) loss: 0.9003 (0.9003) time: 0.1935 data: 0.1095 max mem: 9305 +Train: [8] [1500/6250] eta: 0:15:10 lr: 0.000125 grad: 0.0830 (0.0814) loss: 0.8935 (0.9002) time: 0.1617 data: 0.0690 max mem: 9305 +Train: [8] [1600/6250] eta: 0:14:48 lr: 0.000125 grad: 0.0719 (0.0812) loss: 0.8970 (0.9000) time: 0.2500 data: 0.1620 max mem: 9305 +Train: [8] [1700/6250] eta: 0:14:32 lr: 0.000125 grad: 0.0804 (0.0809) loss: 0.9007 (0.8999) time: 0.1254 data: 0.0003 max mem: 9305 +Train: [8] [1800/6250] eta: 0:14:03 lr: 0.000125 grad: 0.0789 (0.0806) loss: 0.8997 (0.8998) time: 0.1449 data: 0.0576 max mem: 9305 +Train: [8] [1900/6250] eta: 0:13:43 lr: 0.000125 grad: 0.0768 (0.0804) loss: 0.9002 (0.8998) time: 0.1501 data: 0.0642 max mem: 9305 +Train: [8] [2000/6250] eta: 0:13:25 lr: 0.000125 grad: 0.0762 (0.0804) loss: 0.8990 (0.8998) time: 0.1158 data: 0.0003 max mem: 9305 +Train: [8] [2100/6250] eta: 0:13:01 lr: 0.000125 grad: 0.0722 (0.0802) loss: 0.8982 (0.8998) time: 0.1473 data: 0.0582 max mem: 9305 +Train: [8] [2200/6250] eta: 0:12:37 lr: 0.000125 grad: 0.0782 (0.0800) loss: 0.8998 (0.8998) time: 0.1654 data: 0.0802 max mem: 9305 +Train: [8] [2300/6250] eta: 0:12:20 lr: 0.000125 grad: 0.0699 (0.0798) loss: 0.9018 (0.8998) time: 0.2536 data: 0.1675 max mem: 9305 +Train: [8] [2400/6250] eta: 0:11:57 lr: 0.000125 grad: 0.0677 (0.0798) loss: 0.9006 (0.8998) time: 0.1743 data: 0.0676 max mem: 9305 +Train: [8] [2500/6250] eta: 0:11:37 lr: 0.000125 grad: 0.0771 (0.0796) loss: 0.8993 (0.8999) time: 0.1815 data: 0.0912 max mem: 9305 +Train: [8] [2600/6250] eta: 0:11:16 lr: 0.000125 grad: 0.0750 (0.0796) loss: 0.9002 (0.8999) time: 0.1534 data: 0.0629 max mem: 9305 +Train: [8] [2700/6250] eta: 0:10:58 lr: 0.000125 grad: 0.0780 (0.0796) loss: 0.9000 (0.8999) time: 0.2027 data: 0.1045 max mem: 9305 +Train: [8] [2800/6250] eta: 0:10:37 lr: 0.000125 grad: 0.0763 (0.0794) loss: 0.9001 (0.8998) time: 0.1697 data: 0.0799 max mem: 9305 +Train: [8] [2900/6250] eta: 0:10:16 lr: 0.000125 grad: 0.0789 (0.0794) loss: 0.9030 (0.8999) time: 0.1634 data: 0.0602 max mem: 9305 +Train: [8] [3000/6250] eta: 0:09:56 lr: 0.000125 grad: 0.0713 (0.0792) loss: 0.9006 (0.8999) time: 0.1809 data: 0.0836 max mem: 9305 +Train: [8] [3100/6250] eta: 0:09:37 lr: 0.000125 grad: 0.0720 (0.0791) loss: 0.9028 (0.9000) time: 0.1991 data: 0.1063 max mem: 9305 +Train: [8] [3200/6250] eta: 0:09:17 lr: 0.000125 grad: 0.0734 (0.0790) loss: 0.9007 (0.9000) time: 0.1753 data: 0.0939 max mem: 9305 +Train: [8] [3300/6250] eta: 0:08:57 lr: 0.000125 grad: 0.0775 (0.0790) loss: 0.8993 (0.9000) time: 0.1427 data: 0.0486 max mem: 9305 +Train: [8] [3400/6250] eta: 0:08:37 lr: 0.000125 grad: 0.0697 (0.0789) loss: 0.9049 (0.9000) time: 0.1585 data: 0.0741 max mem: 9305 +Train: [8] [3500/6250] eta: 0:08:20 lr: 0.000125 grad: 0.0757 (0.0789) loss: 0.8996 (0.9000) time: 0.2346 data: 0.1391 max mem: 9305 +Train: [8] [3600/6250] eta: 0:08:04 lr: 0.000125 grad: 0.0719 (0.0789) loss: 0.8982 (0.9000) time: 0.2275 data: 0.1228 max mem: 9305 +Train: [8] [3700/6250] eta: 0:07:46 lr: 0.000125 grad: 0.0718 (0.0788) loss: 0.8984 (0.9000) time: 0.1581 data: 0.0567 max mem: 9305 +Train: [8] [3800/6250] eta: 0:07:29 lr: 0.000125 grad: 0.0764 (0.0788) loss: 0.9018 (0.9000) time: 0.1561 data: 0.0588 max mem: 9305 +Train: [8] [3900/6250] eta: 0:07:11 lr: 0.000125 grad: 0.0714 (0.0787) loss: 0.9049 (0.9000) time: 0.2464 data: 0.1240 max mem: 9305 +Train: [8] [4000/6250] eta: 0:06:54 lr: 0.000125 grad: 0.0727 (0.0786) loss: 0.8991 (0.9000) time: 0.2610 data: 0.1594 max mem: 9305 +Train: [8] [4100/6250] eta: 0:06:35 lr: 0.000125 grad: 0.0774 (0.0786) loss: 0.8970 (0.9000) time: 0.2254 data: 0.1196 max mem: 9305 +Train: [8] [4200/6250] eta: 0:06:18 lr: 0.000125 grad: 0.0742 (0.0785) loss: 0.9009 (0.8999) time: 0.2389 data: 0.1312 max mem: 9305 +Train: [8] [4300/6250] eta: 0:05:59 lr: 0.000125 grad: 0.0808 (0.0785) loss: 0.9003 (0.8998) time: 0.1662 data: 0.0676 max mem: 9305 +Train: [8] [4400/6250] eta: 0:05:40 lr: 0.000125 grad: 0.0769 (0.0786) loss: 0.8950 (0.8998) time: 0.1792 data: 0.0651 max mem: 9305 +Train: [8] [4500/6250] eta: 0:05:22 lr: 0.000125 grad: 0.0712 (0.0785) loss: 0.8963 (0.8997) time: 0.2117 data: 0.1181 max mem: 9305 +Train: [8] [4600/6250] eta: 0:05:04 lr: 0.000125 grad: 0.0707 (0.0785) loss: 0.8938 (0.8996) time: 0.1093 data: 0.0118 max mem: 9305 +Train: [8] [4700/6250] eta: 0:04:45 lr: 0.000125 grad: 0.0757 (0.0784) loss: 0.8920 (0.8996) time: 0.1725 data: 0.0612 max mem: 9305 +Train: [8] [4800/6250] eta: 0:04:27 lr: 0.000125 grad: 0.0687 (0.0784) loss: 0.8988 (0.8995) time: 0.1776 data: 0.0920 max mem: 9305 +Train: [8] [4900/6250] eta: 0:04:08 lr: 0.000125 grad: 0.0740 (0.0783) loss: 0.8987 (0.8994) time: 0.1548 data: 0.0629 max mem: 9305 +Train: [8] [5000/6250] eta: 0:03:50 lr: 0.000125 grad: 0.0688 (0.0783) loss: 0.8975 (0.8994) time: 0.1310 data: 0.0470 max mem: 9305 +Train: [8] [5100/6250] eta: 0:03:31 lr: 0.000125 grad: 0.0705 (0.0782) loss: 0.8978 (0.8993) time: 0.1749 data: 0.0838 max mem: 9305 +Train: [8] [5200/6250] eta: 0:03:13 lr: 0.000124 grad: 0.0710 (0.0781) loss: 0.8942 (0.8993) time: 0.2224 data: 0.1320 max mem: 9305 +Train: [8] [5300/6250] eta: 0:02:55 lr: 0.000124 grad: 0.0717 (0.0781) loss: 0.8983 (0.8992) time: 0.1981 data: 0.1091 max mem: 9305 +Train: [8] [5400/6250] eta: 0:02:36 lr: 0.000124 grad: 0.0704 (0.0780) loss: 0.8998 (0.8991) time: 0.1875 data: 0.0990 max mem: 9305 +Train: [8] [5500/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0701 (0.0780) loss: 0.8950 (0.8991) time: 0.1246 data: 0.0146 max mem: 9305 +Train: [8] [5600/6250] eta: 0:01:59 lr: 0.000124 grad: 0.0685 (0.0779) loss: 0.8968 (0.8990) time: 0.1846 data: 0.0893 max mem: 9305 +Train: [8] [5700/6250] eta: 0:01:41 lr: 0.000124 grad: 0.0623 (0.0778) loss: 0.8992 (0.8990) time: 0.1447 data: 0.0500 max mem: 9305 +Train: [8] [5800/6250] eta: 0:01:22 lr: 0.000124 grad: 0.0714 (0.0777) loss: 0.8954 (0.8990) time: 0.1830 data: 0.0921 max mem: 9305 +Train: [8] [5900/6250] eta: 0:01:04 lr: 0.000124 grad: 0.0681 (0.0776) loss: 0.8949 (0.8989) time: 0.1248 data: 0.0375 max mem: 9305 +Train: [8] [6000/6250] eta: 0:00:46 lr: 0.000124 grad: 0.0702 (0.0775) loss: 0.8962 (0.8989) time: 0.1474 data: 0.0395 max mem: 9305 +Train: [8] [6100/6250] eta: 0:00:27 lr: 0.000124 grad: 0.0702 (0.0774) loss: 0.8959 (0.8989) time: 0.1044 data: 0.0004 max mem: 9305 +Train: [8] [6200/6250] eta: 0:00:09 lr: 0.000124 grad: 0.0716 (0.0774) loss: 0.9013 (0.8989) time: 0.1429 data: 0.0535 max mem: 9305 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0664 (0.0773) loss: 0.8953 (0.8988) time: 0.1342 data: 0.0418 max mem: 9305 +Train: [8] Total time: 0:19:19 (0.1856 s / it) +Averaged stats: lr: 0.000124 grad: 0.0664 (0.0773) loss: 0.8953 (0.8988) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:05:14 loss: 0.9114 (0.9114) time: 5.0761 data: 5.0385 max mem: 9305 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.9031 (0.9029) time: 0.1193 data: 0.0903 max mem: 9305 +Eval (hcp-train-subset): [8] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (hcp-train-subset): loss: 0.9031 (0.9029) +Eval (hcp-val): [8] [ 0/62] eta: 0:05:25 loss: 0.8961 (0.8961) time: 5.2505 data: 5.2139 max mem: 9305 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8967 (0.8973) time: 0.1533 data: 0.1240 max mem: 9305 +Eval (hcp-val): [8] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-val): loss: 0.8967 (0.8973) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [9] [ 0/6250] eta: 8:44:24 lr: 0.000124 grad: 0.0879 (0.0879) loss: 0.8907 (0.8907) time: 5.0342 data: 4.7828 max mem: 9305 +Train: [9] [ 100/6250] eta: 0:26:18 lr: 0.000124 grad: 0.0674 (0.0838) loss: 0.8941 (0.8970) time: 0.1611 data: 0.0652 max mem: 9305 +Train: [9] [ 200/6250] eta: 0:21:56 lr: 0.000124 grad: 0.0645 (0.0801) loss: 0.8974 (0.8954) time: 0.1527 data: 0.0665 max mem: 9305 +Train: [9] [ 300/6250] eta: 0:20:14 lr: 0.000124 grad: 0.0676 (0.0782) loss: 0.8982 (0.8955) time: 0.2001 data: 0.1160 max mem: 9305 +Train: [9] [ 400/6250] eta: 0:18:53 lr: 0.000124 grad: 0.0761 (0.0763) loss: 0.8926 (0.8956) time: 0.1773 data: 0.0766 max mem: 9305 +Train: [9] [ 500/6250] eta: 0:18:33 lr: 0.000124 grad: 0.0676 (0.0763) loss: 0.8958 (0.8950) time: 0.2014 data: 0.1148 max mem: 9305 +Train: [9] [ 600/6250] eta: 0:18:03 lr: 0.000124 grad: 0.0672 (0.0753) loss: 0.8911 (0.8949) time: 0.2480 data: 0.1442 max mem: 9305 +Train: [9] [ 700/6250] eta: 0:17:33 lr: 0.000124 grad: 0.0772 (0.0749) loss: 0.8985 (0.8950) time: 0.1645 data: 0.0587 max mem: 9305 +Train: [9] [ 800/6250] eta: 0:17:34 lr: 0.000124 grad: 0.0774 (0.0749) loss: 0.8926 (0.8950) time: 0.1230 data: 0.0005 max mem: 9305 +Train: [9] [ 900/6250] eta: 0:17:19 lr: 0.000124 grad: 0.0711 (0.0745) loss: 0.8950 (0.8951) time: 0.1678 data: 0.0809 max mem: 9305 +Train: [9] [1000/6250] eta: 0:16:55 lr: 0.000124 grad: 0.0724 (0.0744) loss: 0.8949 (0.8952) time: 0.1517 data: 0.0589 max mem: 9305 +Train: [9] [1100/6250] eta: 0:16:21 lr: 0.000124 grad: 0.0710 (0.0741) loss: 0.8955 (0.8955) time: 0.1484 data: 0.0663 max mem: 9305 +Train: [9] [1200/6250] eta: 0:15:56 lr: 0.000124 grad: 0.0748 (0.0738) loss: 0.8957 (0.8956) time: 0.1911 data: 0.1052 max mem: 9305 +Train: [9] [1300/6250] eta: 0:15:54 lr: 0.000124 grad: 0.0652 (0.0736) loss: 0.8985 (0.8957) time: 0.3292 data: 0.2293 max mem: 9305 +Train: [9] [1400/6250] eta: 0:15:17 lr: 0.000124 grad: 0.0652 (0.0733) loss: 0.8995 (0.8958) time: 0.1546 data: 0.0577 max mem: 9305 +Train: [9] [1500/6250] eta: 0:14:58 lr: 0.000124 grad: 0.0655 (0.0732) loss: 0.8999 (0.8960) time: 0.2271 data: 0.1297 max mem: 9305 +Train: [9] [1600/6250] eta: 0:14:35 lr: 0.000124 grad: 0.0677 (0.0733) loss: 0.8934 (0.8960) time: 0.0999 data: 0.0003 max mem: 9305 +Train: [9] [1700/6250] eta: 0:14:14 lr: 0.000124 grad: 0.0697 (0.0732) loss: 0.8946 (0.8960) time: 0.1423 data: 0.0498 max mem: 9305 +Train: [9] [1800/6250] eta: 0:13:47 lr: 0.000124 grad: 0.0629 (0.0732) loss: 0.8937 (0.8960) time: 0.1884 data: 0.0995 max mem: 9305 +Train: [9] [1900/6250] eta: 0:13:23 lr: 0.000124 grad: 0.0643 (0.0729) loss: 0.8971 (0.8961) time: 0.1846 data: 0.0838 max mem: 9305 +Train: [9] [2000/6250] eta: 0:13:02 lr: 0.000124 grad: 0.0641 (0.0728) loss: 0.9004 (0.8962) time: 0.1993 data: 0.1082 max mem: 9305 +Train: [9] [2100/6250] eta: 0:12:41 lr: 0.000124 grad: 0.0645 (0.0727) loss: 0.8978 (0.8963) time: 0.1665 data: 0.0805 max mem: 9305 +Train: [9] [2200/6250] eta: 0:12:23 lr: 0.000124 grad: 0.0712 (0.0726) loss: 0.8970 (0.8963) time: 0.2108 data: 0.1235 max mem: 9305 +Train: [9] [2300/6250] eta: 0:12:06 lr: 0.000124 grad: 0.0655 (0.0724) loss: 0.9016 (0.8965) time: 0.0965 data: 0.0002 max mem: 9305 +Train: [9] [2400/6250] eta: 0:11:42 lr: 0.000124 grad: 0.0708 (0.0724) loss: 0.8996 (0.8966) time: 0.1573 data: 0.0657 max mem: 9305 +Train: [9] [2500/6250] eta: 0:11:21 lr: 0.000124 grad: 0.0677 (0.0722) loss: 0.8993 (0.8968) time: 0.1101 data: 0.0086 max mem: 9305 +Train: [9] [2600/6250] eta: 0:11:01 lr: 0.000124 grad: 0.0748 (0.0721) loss: 0.9025 (0.8969) time: 0.1429 data: 0.0559 max mem: 9305 +Train: [9] [2700/6250] eta: 0:10:41 lr: 0.000124 grad: 0.0692 (0.0721) loss: 0.9015 (0.8970) time: 0.1887 data: 0.0988 max mem: 9305 +Train: [9] [2800/6250] eta: 0:10:21 lr: 0.000124 grad: 0.0714 (0.0720) loss: 0.8969 (0.8971) time: 0.1400 data: 0.0388 max mem: 9305 +Train: [9] [2900/6250] eta: 0:10:01 lr: 0.000124 grad: 0.0701 (0.0719) loss: 0.8972 (0.8971) time: 0.1685 data: 0.0773 max mem: 9305 +Train: [9] [3000/6250] eta: 0:09:40 lr: 0.000124 grad: 0.0701 (0.0718) loss: 0.8922 (0.8971) time: 0.1712 data: 0.0796 max mem: 9305 +Train: [9] [3100/6250] eta: 0:09:21 lr: 0.000124 grad: 0.0704 (0.0717) loss: 0.8963 (0.8971) time: 0.1747 data: 0.0859 max mem: 9305 +Train: [9] [3200/6250] eta: 0:09:02 lr: 0.000124 grad: 0.0672 (0.0718) loss: 0.9008 (0.8971) time: 0.1856 data: 0.0970 max mem: 9305 +Train: [9] [3300/6250] eta: 0:08:42 lr: 0.000124 grad: 0.0661 (0.0717) loss: 0.9008 (0.8971) time: 0.1527 data: 0.0714 max mem: 9305 +Train: [9] [3400/6250] eta: 0:08:25 lr: 0.000124 grad: 0.0693 (0.0716) loss: 0.8962 (0.8972) time: 0.1367 data: 0.0430 max mem: 9305 +Train: [9] [3500/6250] eta: 0:08:07 lr: 0.000124 grad: 0.0693 (0.0715) loss: 0.8986 (0.8973) time: 0.1891 data: 0.0978 max mem: 9305 +Train: [9] [3600/6250] eta: 0:07:50 lr: 0.000124 grad: 0.0621 (0.0715) loss: 0.8971 (0.8973) time: 0.1616 data: 0.0779 max mem: 9305 +Train: [9] [3700/6250] eta: 0:07:33 lr: 0.000124 grad: 0.0659 (0.0714) loss: 0.9004 (0.8973) time: 0.1676 data: 0.0723 max mem: 9305 +Train: [9] [3800/6250] eta: 0:07:15 lr: 0.000124 grad: 0.0634 (0.0714) loss: 0.8975 (0.8973) time: 0.1974 data: 0.0987 max mem: 9305 +Train: [9] [3900/6250] eta: 0:06:57 lr: 0.000124 grad: 0.0662 (0.0714) loss: 0.8984 (0.8973) time: 0.2029 data: 0.1199 max mem: 9305 +Train: [9] [4000/6250] eta: 0:06:39 lr: 0.000124 grad: 0.0678 (0.0713) loss: 0.8969 (0.8974) time: 0.1770 data: 0.0843 max mem: 9305 +Train: [9] [4100/6250] eta: 0:06:22 lr: 0.000124 grad: 0.0691 (0.0713) loss: 0.8977 (0.8974) time: 0.2291 data: 0.1184 max mem: 9305 +Train: [9] [4200/6250] eta: 0:06:06 lr: 0.000124 grad: 0.0676 (0.0712) loss: 0.8977 (0.8974) time: 0.1210 data: 0.0003 max mem: 9305 +Train: [9] [4300/6250] eta: 0:05:48 lr: 0.000124 grad: 0.0651 (0.0712) loss: 0.8955 (0.8974) time: 0.1851 data: 0.0983 max mem: 9305 +Train: [9] [4400/6250] eta: 0:05:30 lr: 0.000124 grad: 0.0646 (0.0711) loss: 0.9007 (0.8974) time: 0.1675 data: 0.0832 max mem: 9305 +Train: [9] [4500/6250] eta: 0:05:11 lr: 0.000124 grad: 0.0706 (0.0711) loss: 0.8997 (0.8974) time: 0.1657 data: 0.0768 max mem: 9305 +Train: [9] [4600/6250] eta: 0:04:54 lr: 0.000124 grad: 0.0646 (0.0710) loss: 0.8975 (0.8974) time: 0.1949 data: 0.1093 max mem: 9305 +Train: [9] [4700/6250] eta: 0:04:36 lr: 0.000124 grad: 0.0631 (0.0709) loss: 0.9009 (0.8974) time: 0.1987 data: 0.1114 max mem: 9305 +Train: [9] [4800/6250] eta: 0:04:19 lr: 0.000124 grad: 0.0686 (0.0709) loss: 0.9009 (0.8975) time: 0.2228 data: 0.1459 max mem: 9305 +Train: [9] [4900/6250] eta: 0:04:01 lr: 0.000124 grad: 0.0668 (0.0709) loss: 0.8963 (0.8975) time: 0.2036 data: 0.1214 max mem: 9305 +Train: [9] [5000/6250] eta: 0:03:43 lr: 0.000124 grad: 0.0604 (0.0708) loss: 0.8970 (0.8975) time: 0.1724 data: 0.0814 max mem: 9305 +Train: [9] [5100/6250] eta: 0:03:26 lr: 0.000124 grad: 0.0689 (0.0708) loss: 0.8960 (0.8975) time: 0.1656 data: 0.0621 max mem: 9305 +Train: [9] [5200/6250] eta: 0:03:08 lr: 0.000124 grad: 0.0636 (0.0707) loss: 0.8989 (0.8974) time: 0.1791 data: 0.0943 max mem: 9305 +Train: [9] [5300/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0594 (0.0707) loss: 0.9021 (0.8975) time: 0.1909 data: 0.0929 max mem: 9305 +Train: [9] [5400/6250] eta: 0:02:33 lr: 0.000124 grad: 0.0702 (0.0706) loss: 0.9007 (0.8975) time: 0.1235 data: 0.0003 max mem: 9305 +Train: [9] [5500/6250] eta: 0:02:15 lr: 0.000124 grad: 0.0680 (0.0706) loss: 0.8965 (0.8975) time: 0.1906 data: 0.0727 max mem: 9305 +Train: [9] [5600/6250] eta: 0:01:56 lr: 0.000124 grad: 0.0663 (0.0705) loss: 0.8960 (0.8975) time: 0.1909 data: 0.0870 max mem: 9305 +Train: [9] [5700/6250] eta: 0:01:39 lr: 0.000124 grad: 0.0636 (0.0704) loss: 0.8971 (0.8976) time: 0.1534 data: 0.0333 max mem: 9305 +Train: [9] [5800/6250] eta: 0:01:21 lr: 0.000124 grad: 0.0613 (0.0703) loss: 0.8960 (0.8976) time: 0.1925 data: 0.1025 max mem: 9305 +Train: [9] [5900/6250] eta: 0:01:03 lr: 0.000124 grad: 0.0610 (0.0702) loss: 0.8992 (0.8976) time: 0.2132 data: 0.1203 max mem: 9305 +Train: [9] [6000/6250] eta: 0:00:45 lr: 0.000124 grad: 0.0668 (0.0702) loss: 0.8965 (0.8976) time: 0.1938 data: 0.0961 max mem: 9305 +Train: [9] [6100/6250] eta: 0:00:27 lr: 0.000124 grad: 0.0651 (0.0701) loss: 0.8983 (0.8976) time: 0.1484 data: 0.0501 max mem: 9305 +Train: [9] [6200/6250] eta: 0:00:09 lr: 0.000124 grad: 0.0611 (0.0701) loss: 0.9020 (0.8976) time: 0.1775 data: 0.0920 max mem: 9305 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0633 (0.0701) loss: 0.9019 (0.8976) time: 0.1775 data: 0.0722 max mem: 9305 +Train: [9] Total time: 0:18:52 (0.1813 s / it) +Averaged stats: lr: 0.000124 grad: 0.0633 (0.0701) loss: 0.9019 (0.8976) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:37 loss: 0.9099 (0.9099) time: 5.4426 data: 5.4081 max mem: 9305 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.9018 (0.9008) time: 0.1635 data: 0.1326 max mem: 9305 +Eval (hcp-train-subset): [9] Total time: 0:00:14 (0.2347 s / it) +Averaged stats (hcp-train-subset): loss: 0.9018 (0.9008) +Making plots (hcp-train-subset): example=54 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:16 loss: 0.8908 (0.8908) time: 5.1046 data: 5.0681 max mem: 9305 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8935 (0.8951) time: 0.1402 data: 0.1070 max mem: 9305 +Eval (hcp-val): [9] Total time: 0:00:14 (0.2402 s / it) +Averaged stats (hcp-val): loss: 0.8935 (0.8951) +Making plots (hcp-val): example=9 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [10] [ 0/6250] eta: 11:36:57 lr: 0.000124 grad: 0.1142 (0.1142) loss: 0.9009 (0.9009) time: 6.6908 data: 6.5877 max mem: 9305 +Train: [10] [ 100/6250] eta: 0:24:11 lr: 0.000124 grad: 0.0713 (0.0870) loss: 0.9040 (0.9010) time: 0.1482 data: 0.0506 max mem: 9305 +Train: [10] [ 200/6250] eta: 0:21:36 lr: 0.000124 grad: 0.0714 (0.0799) loss: 0.8995 (0.9004) time: 0.1759 data: 0.0807 max mem: 9305 +Train: [10] [ 300/6250] eta: 0:20:38 lr: 0.000124 grad: 0.0679 (0.0776) loss: 0.8939 (0.8993) time: 0.2499 data: 0.1647 max mem: 9305 +Train: [10] [ 400/6250] eta: 0:19:39 lr: 0.000124 grad: 0.0735 (0.0757) loss: 0.8934 (0.8987) time: 0.2030 data: 0.1109 max mem: 9305 +Train: [10] [ 500/6250] eta: 0:19:12 lr: 0.000124 grad: 0.0736 (0.0755) loss: 0.8956 (0.8976) time: 0.1727 data: 0.0783 max mem: 9305 +Train: [10] [ 600/6250] eta: 0:18:40 lr: 0.000124 grad: 0.0678 (0.0747) loss: 0.8901 (0.8966) time: 0.1810 data: 0.0525 max mem: 9305 +Train: [10] [ 700/6250] eta: 0:18:08 lr: 0.000124 grad: 0.0688 (0.0744) loss: 0.8924 (0.8960) time: 0.2016 data: 0.1121 max mem: 9305 +Train: [10] [ 800/6250] eta: 0:17:47 lr: 0.000124 grad: 0.0718 (0.0741) loss: 0.8960 (0.8956) time: 0.2088 data: 0.1162 max mem: 9305 +Train: [10] [ 900/6250] eta: 0:17:29 lr: 0.000124 grad: 0.0665 (0.0737) loss: 0.8919 (0.8952) time: 0.1631 data: 0.0552 max mem: 9305 +Train: [10] [1000/6250] eta: 0:16:58 lr: 0.000124 grad: 0.0645 (0.0731) loss: 0.8957 (0.8951) time: 0.1868 data: 0.0943 max mem: 9305 +Train: [10] [1100/6250] eta: 0:16:18 lr: 0.000124 grad: 0.0673 (0.0726) loss: 0.8939 (0.8950) time: 0.1460 data: 0.0613 max mem: 9305 +Train: [10] [1200/6250] eta: 0:15:46 lr: 0.000124 grad: 0.0647 (0.0723) loss: 0.8927 (0.8949) time: 0.1656 data: 0.0787 max mem: 9305 +Train: [10] [1300/6250] eta: 0:15:18 lr: 0.000124 grad: 0.0658 (0.0721) loss: 0.8934 (0.8947) time: 0.1742 data: 0.0776 max mem: 9305 +Train: [10] [1400/6250] eta: 0:14:53 lr: 0.000124 grad: 0.0633 (0.0717) loss: 0.8915 (0.8947) time: 0.1618 data: 0.0664 max mem: 9305 +Train: [10] [1500/6250] eta: 0:14:28 lr: 0.000124 grad: 0.0665 (0.0714) loss: 0.8920 (0.8948) time: 0.1891 data: 0.0979 max mem: 9305 +Train: [10] [1600/6250] eta: 0:14:05 lr: 0.000124 grad: 0.0640 (0.0710) loss: 0.8919 (0.8948) time: 0.1861 data: 0.0890 max mem: 9305 +Train: [10] [1700/6250] eta: 0:13:38 lr: 0.000124 grad: 0.0695 (0.0711) loss: 0.8899 (0.8947) time: 0.1672 data: 0.0870 max mem: 9305 +Train: [10] [1800/6250] eta: 0:13:16 lr: 0.000124 grad: 0.0660 (0.0709) loss: 0.8934 (0.8948) time: 0.1660 data: 0.0801 max mem: 9305 +Train: [10] [1900/6250] eta: 0:12:55 lr: 0.000124 grad: 0.0609 (0.0707) loss: 0.8959 (0.8947) time: 0.1638 data: 0.0818 max mem: 9305 +Train: [10] [2000/6250] eta: 0:12:35 lr: 0.000124 grad: 0.0612 (0.0705) loss: 0.8951 (0.8946) time: 0.1693 data: 0.0677 max mem: 9305 +Train: [10] [2100/6250] eta: 0:12:16 lr: 0.000124 grad: 0.0627 (0.0703) loss: 0.8918 (0.8947) time: 0.1384 data: 0.0480 max mem: 9305 +Train: [10] [2200/6250] eta: 0:11:57 lr: 0.000124 grad: 0.0644 (0.0700) loss: 0.8931 (0.8947) time: 0.1582 data: 0.0689 max mem: 9305 +Train: [10] [2300/6250] eta: 0:11:36 lr: 0.000124 grad: 0.0648 (0.0701) loss: 0.8976 (0.8947) time: 0.1292 data: 0.0402 max mem: 9305 +Train: [10] [2400/6250] eta: 0:11:16 lr: 0.000124 grad: 0.0686 (0.0700) loss: 0.8939 (0.8947) time: 0.1561 data: 0.0643 max mem: 9305 +Train: [10] [2500/6250] eta: 0:10:58 lr: 0.000124 grad: 0.0616 (0.0698) loss: 0.8946 (0.8947) time: 0.1901 data: 0.1077 max mem: 9305 +Train: [10] [2600/6250] eta: 0:10:41 lr: 0.000124 grad: 0.0618 (0.0697) loss: 0.8934 (0.8948) time: 0.1628 data: 0.0619 max mem: 9305 +Train: [10] [2700/6250] eta: 0:10:31 lr: 0.000124 grad: 0.0612 (0.0695) loss: 0.8923 (0.8948) time: 0.2005 data: 0.0958 max mem: 9305 +Train: [10] [2800/6250] eta: 0:10:15 lr: 0.000124 grad: 0.0647 (0.0693) loss: 0.8961 (0.8948) time: 0.2189 data: 0.1295 max mem: 9305 +Train: [10] [2900/6250] eta: 0:10:04 lr: 0.000124 grad: 0.0621 (0.0692) loss: 0.8954 (0.8948) time: 0.1661 data: 0.0661 max mem: 9305 +Train: [10] [3000/6250] eta: 0:09:50 lr: 0.000124 grad: 0.0640 (0.0691) loss: 0.8952 (0.8948) time: 0.2026 data: 0.1097 max mem: 9305 +Train: [10] [3100/6250] eta: 0:09:33 lr: 0.000124 grad: 0.0657 (0.0690) loss: 0.8989 (0.8948) time: 0.1242 data: 0.0089 max mem: 9305 +Train: [10] [3200/6250] eta: 0:09:16 lr: 0.000124 grad: 0.0618 (0.0691) loss: 0.8943 (0.8947) time: 0.2109 data: 0.1124 max mem: 9305 +Train: [10] [3300/6250] eta: 0:08:59 lr: 0.000124 grad: 0.0618 (0.0690) loss: 0.8954 (0.8947) time: 0.1804 data: 0.0903 max mem: 9305 +Train: [10] [3400/6250] eta: 0:08:40 lr: 0.000124 grad: 0.0706 (0.0689) loss: 0.8955 (0.8947) time: 0.1565 data: 0.0644 max mem: 9305 +Train: [10] [3500/6250] eta: 0:08:24 lr: 0.000124 grad: 0.0635 (0.0688) loss: 0.8967 (0.8947) time: 0.1158 data: 0.0004 max mem: 9305 +Train: [10] [3600/6250] eta: 0:08:04 lr: 0.000124 grad: 0.0623 (0.0687) loss: 0.8952 (0.8947) time: 0.1766 data: 0.0737 max mem: 9305 +Train: [10] [3700/6250] eta: 0:07:47 lr: 0.000124 grad: 0.0615 (0.0686) loss: 0.8949 (0.8947) time: 0.1912 data: 0.1022 max mem: 9305 +Train: [10] [3800/6250] eta: 0:07:30 lr: 0.000124 grad: 0.0655 (0.0686) loss: 0.8949 (0.8946) time: 0.2301 data: 0.1391 max mem: 9305 +Train: [10] [3900/6250] eta: 0:07:12 lr: 0.000124 grad: 0.0612 (0.0686) loss: 0.8912 (0.8946) time: 0.2218 data: 0.1124 max mem: 9305 +Train: [10] [4000/6250] eta: 0:06:55 lr: 0.000124 grad: 0.0656 (0.0685) loss: 0.8948 (0.8946) time: 0.2010 data: 0.1109 max mem: 9305 +Train: [10] [4100/6250] eta: 0:06:37 lr: 0.000124 grad: 0.0643 (0.0685) loss: 0.8962 (0.8946) time: 0.1605 data: 0.0653 max mem: 9305 +Train: [10] [4200/6250] eta: 0:06:18 lr: 0.000124 grad: 0.0622 (0.0684) loss: 0.8939 (0.8946) time: 0.2092 data: 0.1251 max mem: 9305 +Train: [10] [4300/6250] eta: 0:06:01 lr: 0.000124 grad: 0.0642 (0.0684) loss: 0.8892 (0.8946) time: 0.2220 data: 0.1079 max mem: 9305 +Train: [10] [4400/6250] eta: 0:05:41 lr: 0.000124 grad: 0.0629 (0.0683) loss: 0.8940 (0.8946) time: 0.1820 data: 0.0964 max mem: 9305 +Train: [10] [4500/6250] eta: 0:05:22 lr: 0.000124 grad: 0.0596 (0.0684) loss: 0.8925 (0.8946) time: 0.1801 data: 0.0838 max mem: 9305 +Train: [10] [4600/6250] eta: 0:05:05 lr: 0.000124 grad: 0.0678 (0.0684) loss: 0.8940 (0.8946) time: 0.2672 data: 0.1900 max mem: 9305 +Train: [10] [4700/6250] eta: 0:04:46 lr: 0.000124 grad: 0.0719 (0.0684) loss: 0.8936 (0.8946) time: 0.1738 data: 0.0831 max mem: 9305 +Train: [10] [4800/6250] eta: 0:04:27 lr: 0.000124 grad: 0.0616 (0.0684) loss: 0.8873 (0.8945) time: 0.1839 data: 0.1035 max mem: 9305 +Train: [10] [4900/6250] eta: 0:04:09 lr: 0.000124 grad: 0.0602 (0.0684) loss: 0.8895 (0.8945) time: 0.2305 data: 0.1439 max mem: 9305 +Train: [10] [5000/6250] eta: 0:03:52 lr: 0.000124 grad: 0.0652 (0.0683) loss: 0.8920 (0.8944) time: 0.2350 data: 0.1355 max mem: 9305 +Train: [10] [5100/6250] eta: 0:03:33 lr: 0.000124 grad: 0.0637 (0.0683) loss: 0.8914 (0.8944) time: 0.2755 data: 0.1885 max mem: 9305 +Train: [10] [5200/6250] eta: 0:03:15 lr: 0.000124 grad: 0.0629 (0.0683) loss: 0.8936 (0.8944) time: 0.1992 data: 0.1115 max mem: 9305 +Train: [10] [5300/6250] eta: 0:02:56 lr: 0.000124 grad: 0.0615 (0.0683) loss: 0.8946 (0.8944) time: 0.2089 data: 0.1203 max mem: 9305 +Train: [10] [5400/6250] eta: 0:02:38 lr: 0.000124 grad: 0.0625 (0.0682) loss: 0.8963 (0.8944) time: 0.1544 data: 0.0718 max mem: 9305 +Train: [10] [5500/6250] eta: 0:02:19 lr: 0.000124 grad: 0.0606 (0.0682) loss: 0.8893 (0.8943) time: 0.1849 data: 0.0978 max mem: 9305 +Train: [10] [5600/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0654 (0.0682) loss: 0.8888 (0.8943) time: 0.4882 data: 0.3602 max mem: 9305 +Train: [10] [5700/6250] eta: 0:01:42 lr: 0.000124 grad: 0.0603 (0.0682) loss: 0.8906 (0.8942) time: 0.1730 data: 0.0774 max mem: 9305 +Train: [10] [5800/6250] eta: 0:01:23 lr: 0.000124 grad: 0.0619 (0.0681) loss: 0.8940 (0.8942) time: 0.1683 data: 0.0683 max mem: 9305 +Train: [10] [5900/6250] eta: 0:01:05 lr: 0.000124 grad: 0.0620 (0.0681) loss: 0.8940 (0.8942) time: 0.1550 data: 0.0662 max mem: 9305 +Train: [10] [6000/6250] eta: 0:00:46 lr: 0.000124 grad: 0.0617 (0.0680) loss: 0.8964 (0.8941) time: 0.1771 data: 0.0895 max mem: 9305 +Train: [10] [6100/6250] eta: 0:00:28 lr: 0.000124 grad: 0.0653 (0.0680) loss: 0.8915 (0.8941) time: 0.1130 data: 0.0003 max mem: 9305 +Train: [10] [6200/6250] eta: 0:00:09 lr: 0.000124 grad: 0.0615 (0.0680) loss: 0.8923 (0.8940) time: 0.1366 data: 0.0499 max mem: 9305 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0645 (0.0680) loss: 0.8877 (0.8940) time: 0.1773 data: 0.0818 max mem: 9305 +Train: [10] Total time: 0:19:29 (0.1871 s / it) +Averaged stats: lr: 0.000124 grad: 0.0645 (0.0680) loss: 0.8877 (0.8940) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:05:42 loss: 0.9084 (0.9084) time: 5.5213 data: 5.4849 max mem: 9305 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.9001 (0.8986) time: 0.1565 data: 0.1260 max mem: 9305 +Eval (hcp-train-subset): [10] Total time: 0:00:15 (0.2450 s / it) +Averaged stats (hcp-train-subset): loss: 0.9001 (0.8986) +Eval (hcp-val): [10] [ 0/62] eta: 0:06:16 loss: 0.8881 (0.8881) time: 6.0673 data: 6.0322 max mem: 9305 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8914 (0.8928) time: 0.1672 data: 0.1383 max mem: 9305 +Eval (hcp-val): [10] Total time: 0:00:15 (0.2444 s / it) +Averaged stats (hcp-val): loss: 0.8914 (0.8928) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [11] [ 0/6250] eta: 12:24:31 lr: 0.000124 grad: 0.0750 (0.0750) loss: 0.9032 (0.9032) time: 7.1474 data: 7.0300 max mem: 9305 +Train: [11] [ 100/6250] eta: 0:28:09 lr: 0.000124 grad: 0.0754 (0.0853) loss: 0.8931 (0.8953) time: 0.3156 data: 0.2150 max mem: 9305 +Train: [11] [ 200/6250] eta: 0:22:24 lr: 0.000124 grad: 0.0700 (0.0783) loss: 0.8899 (0.8937) time: 0.1856 data: 0.0954 max mem: 9305 +Train: [11] [ 300/6250] eta: 0:20:42 lr: 0.000124 grad: 0.0656 (0.0757) loss: 0.8926 (0.8924) time: 0.1995 data: 0.1021 max mem: 9305 +Train: [11] [ 400/6250] eta: 0:21:02 lr: 0.000124 grad: 0.0709 (0.0743) loss: 0.8866 (0.8918) time: 0.2733 data: 0.1512 max mem: 9305 +Train: [11] [ 500/6250] eta: 0:21:12 lr: 0.000124 grad: 0.0626 (0.0725) loss: 0.8910 (0.8919) time: 0.4972 data: 0.3878 max mem: 9305 +Train: [11] [ 600/6250] eta: 0:19:50 lr: 0.000124 grad: 0.0658 (0.0715) loss: 0.8898 (0.8922) time: 0.1898 data: 0.0902 max mem: 9305 +Train: [11] [ 700/6250] eta: 0:19:08 lr: 0.000124 grad: 0.0609 (0.0707) loss: 0.8900 (0.8921) time: 0.2562 data: 0.1715 max mem: 9305 +Train: [11] [ 800/6250] eta: 0:18:23 lr: 0.000124 grad: 0.0661 (0.0698) loss: 0.8949 (0.8924) time: 0.1471 data: 0.0522 max mem: 9305 +Train: [11] [ 900/6250] eta: 0:17:54 lr: 0.000124 grad: 0.0663 (0.0693) loss: 0.8858 (0.8922) time: 0.1533 data: 0.0615 max mem: 9305 +Train: [11] [1000/6250] eta: 0:17:25 lr: 0.000124 grad: 0.0706 (0.0692) loss: 0.8931 (0.8922) time: 0.1836 data: 0.0901 max mem: 9305 +Train: [11] [1100/6250] eta: 0:16:49 lr: 0.000124 grad: 0.0616 (0.0691) loss: 0.8907 (0.8919) time: 0.1595 data: 0.0724 max mem: 9305 +Train: [11] [1200/6250] eta: 0:16:23 lr: 0.000124 grad: 0.0663 (0.0689) loss: 0.8854 (0.8919) time: 0.0995 data: 0.0002 max mem: 9305 +Train: [11] [1300/6250] eta: 0:15:56 lr: 0.000124 grad: 0.0629 (0.0687) loss: 0.8924 (0.8918) time: 0.1876 data: 0.0974 max mem: 9305 +Train: [11] [1400/6250] eta: 0:15:27 lr: 0.000124 grad: 0.0629 (0.0685) loss: 0.8929 (0.8916) time: 0.1562 data: 0.0603 max mem: 9305 +Train: [11] [1500/6250] eta: 0:14:59 lr: 0.000124 grad: 0.0652 (0.0684) loss: 0.8868 (0.8913) time: 0.1370 data: 0.0501 max mem: 9305 +Train: [11] [1600/6250] eta: 0:14:36 lr: 0.000124 grad: 0.0670 (0.0684) loss: 0.8864 (0.8910) time: 0.1699 data: 0.0820 max mem: 9305 +Train: [11] [1700/6250] eta: 0:14:12 lr: 0.000124 grad: 0.0672 (0.0686) loss: 0.8813 (0.8907) time: 0.1705 data: 0.0888 max mem: 9305 +Train: [11] [1800/6250] eta: 0:13:46 lr: 0.000124 grad: 0.0648 (0.0686) loss: 0.8859 (0.8904) time: 0.1437 data: 0.0559 max mem: 9305 +Train: [11] [1900/6250] eta: 0:13:21 lr: 0.000124 grad: 0.0689 (0.0686) loss: 0.8903 (0.8902) time: 0.1422 data: 0.0487 max mem: 9305 +Train: [11] [2000/6250] eta: 0:12:59 lr: 0.000124 grad: 0.0636 (0.0686) loss: 0.8897 (0.8900) time: 0.1677 data: 0.0702 max mem: 9305 +Train: [11] [2100/6250] eta: 0:12:37 lr: 0.000124 grad: 0.0662 (0.0685) loss: 0.8910 (0.8898) time: 0.1646 data: 0.0754 max mem: 9305 +Train: [11] [2200/6250] eta: 0:12:15 lr: 0.000124 grad: 0.0716 (0.0684) loss: 0.8882 (0.8896) time: 0.1646 data: 0.0672 max mem: 9305 +Train: [11] [2300/6250] eta: 0:11:56 lr: 0.000124 grad: 0.0657 (0.0683) loss: 0.8892 (0.8896) time: 0.2216 data: 0.1354 max mem: 9305 +Train: [11] [2400/6250] eta: 0:11:44 lr: 0.000124 grad: 0.0635 (0.0682) loss: 0.8870 (0.8895) time: 0.1196 data: 0.0003 max mem: 9305 +Train: [11] [2500/6250] eta: 0:11:28 lr: 0.000124 grad: 0.0639 (0.0681) loss: 0.8901 (0.8895) time: 0.2835 data: 0.1793 max mem: 9305 +Train: [11] [2600/6250] eta: 0:11:20 lr: 0.000124 grad: 0.0630 (0.0680) loss: 0.8847 (0.8895) time: 0.3598 data: 0.2413 max mem: 9305 +Train: [11] [2700/6250] eta: 0:10:59 lr: 0.000124 grad: 0.0627 (0.0679) loss: 0.8885 (0.8895) time: 0.1210 data: 0.0003 max mem: 9305 +Train: [11] [2800/6250] eta: 0:10:43 lr: 0.000124 grad: 0.0671 (0.0680) loss: 0.8912 (0.8895) time: 0.1547 data: 0.0345 max mem: 9305 +Train: [11] [2900/6250] eta: 0:10:23 lr: 0.000124 grad: 0.0663 (0.0679) loss: 0.8880 (0.8894) time: 0.1490 data: 0.0591 max mem: 9305 +Train: [11] [3000/6250] eta: 0:10:04 lr: 0.000124 grad: 0.0667 (0.0679) loss: 0.8894 (0.8894) time: 0.1411 data: 0.0486 max mem: 9305 +Train: [11] [3100/6250] eta: 0:09:47 lr: 0.000124 grad: 0.0658 (0.0679) loss: 0.8913 (0.8893) time: 0.2873 data: 0.2030 max mem: 9305 +Train: [11] [3200/6250] eta: 0:09:28 lr: 0.000124 grad: 0.0661 (0.0680) loss: 0.8835 (0.8893) time: 0.1464 data: 0.0355 max mem: 9305 +Train: [11] [3300/6250] eta: 0:09:10 lr: 0.000124 grad: 0.0658 (0.0680) loss: 0.8891 (0.8893) time: 0.1668 data: 0.0663 max mem: 9305 +Train: [11] [3400/6250] eta: 0:08:52 lr: 0.000124 grad: 0.0652 (0.0680) loss: 0.8849 (0.8892) time: 0.1379 data: 0.0383 max mem: 9305 +Train: [11] [3500/6250] eta: 0:08:35 lr: 0.000124 grad: 0.0673 (0.0680) loss: 0.8891 (0.8891) time: 0.1210 data: 0.0006 max mem: 9305 +Train: [11] [3600/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0671 (0.0681) loss: 0.8893 (0.8891) time: 0.1570 data: 0.0699 max mem: 9305 +Train: [11] [3700/6250] eta: 0:07:54 lr: 0.000124 grad: 0.0660 (0.0680) loss: 0.8899 (0.8891) time: 0.1869 data: 0.0993 max mem: 9305 +Train: [11] [3800/6250] eta: 0:07:35 lr: 0.000124 grad: 0.0671 (0.0680) loss: 0.8871 (0.8891) time: 0.1218 data: 0.0193 max mem: 9305 +Train: [11] [3900/6250] eta: 0:07:17 lr: 0.000124 grad: 0.0656 (0.0679) loss: 0.8893 (0.8890) time: 0.1991 data: 0.1114 max mem: 9305 +Train: [11] [4000/6250] eta: 0:06:58 lr: 0.000123 grad: 0.0691 (0.0681) loss: 0.8915 (0.8890) time: 0.1365 data: 0.0343 max mem: 9305 +Train: [11] [4100/6250] eta: 0:06:40 lr: 0.000123 grad: 0.0691 (0.0681) loss: 0.8918 (0.8890) time: 0.2369 data: 0.1377 max mem: 9305 +Train: [11] [4200/6250] eta: 0:06:20 lr: 0.000123 grad: 0.0609 (0.0680) loss: 0.8899 (0.8890) time: 0.1804 data: 0.0882 max mem: 9305 +Train: [11] [4300/6250] eta: 0:06:05 lr: 0.000123 grad: 0.0681 (0.0680) loss: 0.8889 (0.8890) time: 0.1515 data: 0.0559 max mem: 9305 +Train: [11] [4400/6250] eta: 0:05:45 lr: 0.000123 grad: 0.0610 (0.0679) loss: 0.8929 (0.8891) time: 0.1688 data: 0.0771 max mem: 9305 +Train: [11] [4500/6250] eta: 0:05:27 lr: 0.000123 grad: 0.0590 (0.0679) loss: 0.8881 (0.8890) time: 0.3561 data: 0.2260 max mem: 9305 +Train: [11] [4600/6250] eta: 0:05:08 lr: 0.000123 grad: 0.0703 (0.0679) loss: 0.8873 (0.8890) time: 0.1860 data: 0.0892 max mem: 9305 +Train: [11] [4700/6250] eta: 0:04:49 lr: 0.000123 grad: 0.0648 (0.0679) loss: 0.8891 (0.8890) time: 0.1708 data: 0.0833 max mem: 9305 +Train: [11] [4800/6250] eta: 0:04:31 lr: 0.000123 grad: 0.0631 (0.0679) loss: 0.8912 (0.8890) time: 0.2405 data: 0.1497 max mem: 9305 +Train: [11] [4900/6250] eta: 0:04:13 lr: 0.000123 grad: 0.0630 (0.0678) loss: 0.8904 (0.8891) time: 0.2361 data: 0.1469 max mem: 9305 +Train: [11] [5000/6250] eta: 0:03:54 lr: 0.000123 grad: 0.0607 (0.0678) loss: 0.8936 (0.8891) time: 0.1875 data: 0.0946 max mem: 9305 +Train: [11] [5100/6250] eta: 0:03:35 lr: 0.000123 grad: 0.0652 (0.0678) loss: 0.8892 (0.8892) time: 0.2282 data: 0.1307 max mem: 9305 +Train: [11] [5200/6250] eta: 0:03:16 lr: 0.000123 grad: 0.0638 (0.0678) loss: 0.8888 (0.8892) time: 0.1878 data: 0.0773 max mem: 9305 +Train: [11] [5300/6250] eta: 0:02:58 lr: 0.000123 grad: 0.0596 (0.0677) loss: 0.8965 (0.8892) time: 0.2554 data: 0.1626 max mem: 9305 +Train: [11] [5400/6250] eta: 0:02:39 lr: 0.000123 grad: 0.0633 (0.0677) loss: 0.8902 (0.8893) time: 0.2038 data: 0.1089 max mem: 9305 +Train: [11] [5500/6250] eta: 0:02:21 lr: 0.000123 grad: 0.0646 (0.0676) loss: 0.8940 (0.8894) time: 0.3447 data: 0.2396 max mem: 9305 +Train: [11] [5600/6250] eta: 0:02:02 lr: 0.000123 grad: 0.0647 (0.0675) loss: 0.8919 (0.8894) time: 0.1608 data: 0.0648 max mem: 9305 +Train: [11] [5700/6250] eta: 0:01:43 lr: 0.000123 grad: 0.0651 (0.0675) loss: 0.8919 (0.8895) time: 0.1549 data: 0.0599 max mem: 9305 +Train: [11] [5800/6250] eta: 0:01:24 lr: 0.000123 grad: 0.0591 (0.0674) loss: 0.8923 (0.8895) time: 0.2737 data: 0.1668 max mem: 9305 +Train: [11] [5900/6250] eta: 0:01:06 lr: 0.000123 grad: 0.0622 (0.0673) loss: 0.8908 (0.8895) time: 0.1081 data: 0.0069 max mem: 9305 +Train: [11] [6000/6250] eta: 0:00:47 lr: 0.000123 grad: 0.0645 (0.0673) loss: 0.8900 (0.8896) time: 0.1123 data: 0.0002 max mem: 9305 +Train: [11] [6100/6250] eta: 0:00:28 lr: 0.000123 grad: 0.0623 (0.0672) loss: 0.8903 (0.8896) time: 0.1378 data: 0.0496 max mem: 9305 +Train: [11] [6200/6250] eta: 0:00:09 lr: 0.000123 grad: 0.0591 (0.0672) loss: 0.8926 (0.8897) time: 0.1472 data: 0.0585 max mem: 9305 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0663 (0.0672) loss: 0.8931 (0.8897) time: 0.1974 data: 0.1073 max mem: 9305 +Train: [11] Total time: 0:19:39 (0.1887 s / it) +Averaged stats: lr: 0.000123 grad: 0.0663 (0.0672) loss: 0.8931 (0.8897) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:39 loss: 0.9070 (0.9070) time: 5.4767 data: 5.4429 max mem: 9305 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8969 (0.8965) time: 0.1538 data: 0.1240 max mem: 9305 +Eval (hcp-train-subset): [11] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-train-subset): loss: 0.8969 (0.8965) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:33 loss: 0.8893 (0.8893) time: 5.3847 data: 5.3515 max mem: 9305 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8900 (0.8920) time: 0.1603 data: 0.1313 max mem: 9305 +Eval (hcp-val): [11] Total time: 0:00:15 (0.2472 s / it) +Averaged stats (hcp-val): loss: 0.8900 (0.8920) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [12] [ 0/6250] eta: 10:21:15 lr: 0.000123 grad: 0.1131 (0.1131) loss: 0.8691 (0.8691) time: 5.9640 data: 5.7369 max mem: 9305 +Train: [12] [ 100/6250] eta: 0:25:07 lr: 0.000123 grad: 0.0611 (0.0693) loss: 0.8917 (0.8916) time: 0.1855 data: 0.0737 max mem: 9305 +Train: [12] [ 200/6250] eta: 0:21:01 lr: 0.000123 grad: 0.0582 (0.0654) loss: 0.8932 (0.8934) time: 0.1577 data: 0.0559 max mem: 9305 +Train: [12] [ 300/6250] eta: 0:19:31 lr: 0.000123 grad: 0.0608 (0.0641) loss: 0.8930 (0.8930) time: 0.1729 data: 0.0762 max mem: 9305 +Train: [12] [ 400/6250] eta: 0:18:33 lr: 0.000123 grad: 0.0643 (0.0643) loss: 0.8809 (0.8916) time: 0.2036 data: 0.1088 max mem: 9305 +Train: [12] [ 500/6250] eta: 0:17:24 lr: 0.000123 grad: 0.0590 (0.0642) loss: 0.8862 (0.8907) time: 0.1578 data: 0.0635 max mem: 9305 +Train: [12] [ 600/6250] eta: 0:16:39 lr: 0.000123 grad: 0.0612 (0.0640) loss: 0.8954 (0.8904) time: 0.1664 data: 0.0752 max mem: 9305 +Train: [12] [ 700/6250] eta: 0:16:20 lr: 0.000123 grad: 0.0590 (0.0639) loss: 0.8900 (0.8901) time: 0.1208 data: 0.0029 max mem: 9305 +Train: [12] [ 800/6250] eta: 0:16:06 lr: 0.000123 grad: 0.0670 (0.0641) loss: 0.8840 (0.8896) time: 0.1943 data: 0.1045 max mem: 9305 +Train: [12] [ 900/6250] eta: 0:15:47 lr: 0.000123 grad: 0.0618 (0.0642) loss: 0.8894 (0.8894) time: 0.2004 data: 0.1136 max mem: 9305 +Train: [12] [1000/6250] eta: 0:15:23 lr: 0.000123 grad: 0.0662 (0.0643) loss: 0.8859 (0.8892) time: 0.1412 data: 0.0451 max mem: 9305 +Train: [12] [1100/6250] eta: 0:15:19 lr: 0.000123 grad: 0.0616 (0.0642) loss: 0.8906 (0.8891) time: 0.1887 data: 0.0847 max mem: 9305 +Train: [12] [1200/6250] eta: 0:14:55 lr: 0.000123 grad: 0.0631 (0.0643) loss: 0.8887 (0.8888) time: 0.1626 data: 0.0774 max mem: 9305 +Train: [12] [1300/6250] eta: 0:14:36 lr: 0.000123 grad: 0.0629 (0.0646) loss: 0.8865 (0.8886) time: 0.1923 data: 0.0964 max mem: 9305 +Train: [12] [1400/6250] eta: 0:14:18 lr: 0.000123 grad: 0.0666 (0.0646) loss: 0.8856 (0.8884) time: 0.2128 data: 0.1218 max mem: 9305 +Train: [12] [1500/6250] eta: 0:13:54 lr: 0.000123 grad: 0.0600 (0.0648) loss: 0.8918 (0.8883) time: 0.1396 data: 0.0458 max mem: 9305 +Train: [12] [1600/6250] eta: 0:13:33 lr: 0.000123 grad: 0.0635 (0.0650) loss: 0.8885 (0.8882) time: 0.1862 data: 0.0921 max mem: 9305 +Train: [12] [1700/6250] eta: 0:13:16 lr: 0.000123 grad: 0.0614 (0.0651) loss: 0.8885 (0.8881) time: 0.1791 data: 0.0940 max mem: 9305 +Train: [12] [1800/6250] eta: 0:12:56 lr: 0.000123 grad: 0.0599 (0.0650) loss: 0.8890 (0.8880) time: 0.1620 data: 0.0677 max mem: 9305 +Train: [12] [1900/6250] eta: 0:12:35 lr: 0.000123 grad: 0.0607 (0.0650) loss: 0.8928 (0.8881) time: 0.1275 data: 0.0352 max mem: 9305 +Train: [12] [2000/6250] eta: 0:12:19 lr: 0.000123 grad: 0.0617 (0.0650) loss: 0.8933 (0.8881) time: 0.2042 data: 0.1053 max mem: 9305 +Train: [12] [2100/6250] eta: 0:12:08 lr: 0.000123 grad: 0.0616 (0.0650) loss: 0.8927 (0.8882) time: 0.1804 data: 0.0858 max mem: 9305 +Train: [12] [2200/6250] eta: 0:12:01 lr: 0.000123 grad: 0.0628 (0.0651) loss: 0.8898 (0.8882) time: 0.1056 data: 0.0002 max mem: 9305 +Train: [12] [2300/6250] eta: 0:11:51 lr: 0.000123 grad: 0.0571 (0.0652) loss: 0.8927 (0.8882) time: 0.2176 data: 0.1021 max mem: 9305 +Train: [12] [2400/6250] eta: 0:11:41 lr: 0.000123 grad: 0.0586 (0.0651) loss: 0.8905 (0.8882) time: 0.1946 data: 0.0802 max mem: 9305 +Train: [12] [2500/6250] eta: 0:11:25 lr: 0.000123 grad: 0.0580 (0.0650) loss: 0.8926 (0.8883) time: 0.1242 data: 0.0276 max mem: 9305 +Train: [12] [2600/6250] eta: 0:11:07 lr: 0.000123 grad: 0.0571 (0.0650) loss: 0.8913 (0.8884) time: 0.1991 data: 0.1051 max mem: 9305 +Train: [12] [2700/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0607 (0.0649) loss: 0.8901 (0.8885) time: 0.2082 data: 0.1192 max mem: 9305 +Train: [12] [2800/6250] eta: 0:10:28 lr: 0.000123 grad: 0.0636 (0.0649) loss: 0.8861 (0.8884) time: 0.1751 data: 0.0887 max mem: 9305 +Train: [12] [2900/6250] eta: 0:10:10 lr: 0.000123 grad: 0.0693 (0.0649) loss: 0.8831 (0.8883) time: 0.1979 data: 0.1065 max mem: 9305 +Train: [12] [3000/6250] eta: 0:09:55 lr: 0.000123 grad: 0.0659 (0.0650) loss: 0.8872 (0.8883) time: 0.2346 data: 0.1473 max mem: 9305 +Train: [12] [3100/6250] eta: 0:09:37 lr: 0.000123 grad: 0.0588 (0.0650) loss: 0.8955 (0.8883) time: 0.2262 data: 0.1312 max mem: 9305 +Train: [12] [3200/6250] eta: 0:09:18 lr: 0.000123 grad: 0.0617 (0.0651) loss: 0.8882 (0.8883) time: 0.1847 data: 0.0697 max mem: 9305 +Train: [12] [3300/6250] eta: 0:09:03 lr: 0.000123 grad: 0.0651 (0.0652) loss: 0.8824 (0.8882) time: 0.0963 data: 0.0002 max mem: 9305 +Train: [12] [3400/6250] eta: 0:08:44 lr: 0.000123 grad: 0.0648 (0.0652) loss: 0.8820 (0.8882) time: 0.1435 data: 0.0455 max mem: 9305 +Train: [12] [3500/6250] eta: 0:08:29 lr: 0.000123 grad: 0.0614 (0.0652) loss: 0.8875 (0.8882) time: 0.2600 data: 0.1351 max mem: 9305 +Train: [12] [3600/6250] eta: 0:08:09 lr: 0.000123 grad: 0.0610 (0.0653) loss: 0.8920 (0.8882) time: 0.1883 data: 0.0796 max mem: 9305 +Train: [12] [3700/6250] eta: 0:07:51 lr: 0.000123 grad: 0.0619 (0.0652) loss: 0.8865 (0.8882) time: 0.1679 data: 0.0792 max mem: 9305 +Train: [12] [3800/6250] eta: 0:07:32 lr: 0.000123 grad: 0.0613 (0.0652) loss: 0.8873 (0.8883) time: 0.2117 data: 0.1166 max mem: 9305 +Train: [12] [3900/6250] eta: 0:07:12 lr: 0.000123 grad: 0.0597 (0.0651) loss: 0.8931 (0.8883) time: 0.1504 data: 0.0644 max mem: 9305 +Train: [12] [4000/6250] eta: 0:06:56 lr: 0.000123 grad: 0.0609 (0.0650) loss: 0.8906 (0.8884) time: 0.2240 data: 0.0920 max mem: 9305 +Train: [12] [4100/6250] eta: 0:06:38 lr: 0.000123 grad: 0.0607 (0.0650) loss: 0.8889 (0.8884) time: 0.1478 data: 0.0517 max mem: 9305 +Train: [12] [4200/6250] eta: 0:06:19 lr: 0.000123 grad: 0.0609 (0.0650) loss: 0.8873 (0.8884) time: 0.2125 data: 0.1177 max mem: 9305 +Train: [12] [4300/6250] eta: 0:06:00 lr: 0.000123 grad: 0.0604 (0.0649) loss: 0.8904 (0.8884) time: 0.1314 data: 0.0382 max mem: 9305 +Train: [12] [4400/6250] eta: 0:05:43 lr: 0.000123 grad: 0.0653 (0.0649) loss: 0.8857 (0.8884) time: 0.1809 data: 0.0809 max mem: 9305 +Train: [12] [4500/6250] eta: 0:05:25 lr: 0.000123 grad: 0.0615 (0.0649) loss: 0.8854 (0.8884) time: 0.1834 data: 0.0896 max mem: 9305 +Train: [12] [4600/6250] eta: 0:05:05 lr: 0.000123 grad: 0.0606 (0.0648) loss: 0.8907 (0.8884) time: 0.1694 data: 0.0761 max mem: 9305 +Train: [12] [4700/6250] eta: 0:04:46 lr: 0.000123 grad: 0.0616 (0.0648) loss: 0.8863 (0.8884) time: 0.1933 data: 0.0970 max mem: 9305 +Train: [12] [4800/6250] eta: 0:04:28 lr: 0.000123 grad: 0.0623 (0.0647) loss: 0.8898 (0.8885) time: 0.1641 data: 0.0843 max mem: 9305 +Train: [12] [4900/6250] eta: 0:04:10 lr: 0.000123 grad: 0.0639 (0.0647) loss: 0.8923 (0.8885) time: 0.1601 data: 0.0802 max mem: 9305 +Train: [12] [5000/6250] eta: 0:03:51 lr: 0.000123 grad: 0.0586 (0.0647) loss: 0.8968 (0.8886) time: 0.1888 data: 0.0990 max mem: 9305 +Train: [12] [5100/6250] eta: 0:03:32 lr: 0.000123 grad: 0.0605 (0.0647) loss: 0.8937 (0.8887) time: 0.1981 data: 0.1021 max mem: 9305 +Train: [12] [5200/6250] eta: 0:03:14 lr: 0.000123 grad: 0.0581 (0.0646) loss: 0.8939 (0.8888) time: 0.1150 data: 0.0002 max mem: 9305 +Train: [12] [5300/6250] eta: 0:02:55 lr: 0.000123 grad: 0.0652 (0.0646) loss: 0.8900 (0.8888) time: 0.1475 data: 0.0456 max mem: 9305 +Train: [12] [5400/6250] eta: 0:02:37 lr: 0.000123 grad: 0.0638 (0.0646) loss: 0.8910 (0.8888) time: 0.1069 data: 0.0002 max mem: 9305 +Train: [12] [5500/6250] eta: 0:02:18 lr: 0.000123 grad: 0.0608 (0.0646) loss: 0.8919 (0.8889) time: 0.1465 data: 0.0502 max mem: 9305 +Train: [12] [5600/6250] eta: 0:02:00 lr: 0.000123 grad: 0.0599 (0.0646) loss: 0.8869 (0.8889) time: 0.3223 data: 0.2147 max mem: 9305 +Train: [12] [5700/6250] eta: 0:01:42 lr: 0.000123 grad: 0.0585 (0.0646) loss: 0.8897 (0.8889) time: 0.2146 data: 0.1166 max mem: 9305 +Train: [12] [5800/6250] eta: 0:01:23 lr: 0.000123 grad: 0.0601 (0.0645) loss: 0.8911 (0.8889) time: 0.1741 data: 0.0837 max mem: 9305 +Train: [12] [5900/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0630 (0.0645) loss: 0.8881 (0.8889) time: 0.3799 data: 0.2688 max mem: 9305 +Train: [12] [6000/6250] eta: 0:00:46 lr: 0.000123 grad: 0.0606 (0.0645) loss: 0.8837 (0.8889) time: 0.1549 data: 0.0633 max mem: 9305 +Train: [12] [6100/6250] eta: 0:00:27 lr: 0.000123 grad: 0.0641 (0.0646) loss: 0.8898 (0.8889) time: 0.2312 data: 0.1231 max mem: 9305 +Train: [12] [6200/6250] eta: 0:00:09 lr: 0.000123 grad: 0.0642 (0.0646) loss: 0.8885 (0.8889) time: 0.1660 data: 0.0808 max mem: 9305 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0621 (0.0646) loss: 0.8870 (0.8889) time: 0.1322 data: 0.0417 max mem: 9305 +Train: [12] Total time: 0:19:26 (0.1866 s / it) +Averaged stats: lr: 0.000123 grad: 0.0621 (0.0646) loss: 0.8870 (0.8889) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:04:53 loss: 0.9050 (0.9050) time: 4.7285 data: 4.6406 max mem: 9305 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8977 (0.8961) time: 0.1512 data: 0.1226 max mem: 9305 +Eval (hcp-train-subset): [12] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-train-subset): loss: 0.8977 (0.8961) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:07 loss: 0.8871 (0.8871) time: 4.9532 data: 4.9199 max mem: 9305 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8883 (0.8899) time: 0.1376 data: 0.1085 max mem: 9305 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (hcp-val): loss: 0.8883 (0.8899) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [13] [ 0/6250] eta: 6:58:27 lr: 0.000123 grad: 0.0900 (0.0900) loss: 0.8945 (0.8945) time: 4.0172 data: 3.8472 max mem: 9305 +Train: [13] [ 100/6250] eta: 0:23:19 lr: 0.000123 grad: 0.0617 (0.0702) loss: 0.8972 (0.9015) time: 0.2065 data: 0.0985 max mem: 9305 +Train: [13] [ 200/6250] eta: 0:19:47 lr: 0.000123 grad: 0.0607 (0.0692) loss: 0.8925 (0.8991) time: 0.1846 data: 0.0940 max mem: 9305 +Train: [13] [ 300/6250] eta: 0:18:41 lr: 0.000123 grad: 0.0602 (0.0670) loss: 0.8928 (0.8973) time: 0.1247 data: 0.0100 max mem: 9305 +Train: [13] [ 400/6250] eta: 0:18:10 lr: 0.000123 grad: 0.0607 (0.0657) loss: 0.8909 (0.8967) time: 0.1632 data: 0.0540 max mem: 9305 +Train: [13] [ 500/6250] eta: 0:17:21 lr: 0.000123 grad: 0.0658 (0.0651) loss: 0.8917 (0.8954) time: 0.1974 data: 0.1085 max mem: 9305 +Train: [13] [ 600/6250] eta: 0:16:34 lr: 0.000123 grad: 0.0609 (0.0647) loss: 0.8876 (0.8948) time: 0.1624 data: 0.0805 max mem: 9305 +Train: [13] [ 700/6250] eta: 0:16:22 lr: 0.000123 grad: 0.0578 (0.0644) loss: 0.8917 (0.8944) time: 0.2027 data: 0.1029 max mem: 9305 +Train: [13] [ 800/6250] eta: 0:15:55 lr: 0.000123 grad: 0.0597 (0.0646) loss: 0.8965 (0.8942) time: 0.1427 data: 0.0493 max mem: 9305 +Train: [13] [ 900/6250] eta: 0:15:32 lr: 0.000123 grad: 0.0590 (0.0647) loss: 0.8923 (0.8940) time: 0.1843 data: 0.0771 max mem: 9305 +Train: [13] [1000/6250] eta: 0:15:01 lr: 0.000123 grad: 0.0605 (0.0645) loss: 0.8901 (0.8938) time: 0.1406 data: 0.0552 max mem: 9305 +Train: [13] [1100/6250] eta: 0:14:32 lr: 0.000123 grad: 0.0609 (0.0642) loss: 0.8867 (0.8934) time: 0.1418 data: 0.0459 max mem: 9305 +Train: [13] [1200/6250] eta: 0:14:08 lr: 0.000123 grad: 0.0599 (0.0641) loss: 0.8916 (0.8932) time: 0.1509 data: 0.0626 max mem: 9305 +Train: [13] [1300/6250] eta: 0:13:52 lr: 0.000123 grad: 0.0632 (0.0641) loss: 0.8868 (0.8930) time: 0.1153 data: 0.0220 max mem: 9305 +Train: [13] [1400/6250] eta: 0:13:37 lr: 0.000123 grad: 0.0630 (0.0641) loss: 0.8880 (0.8928) time: 0.1888 data: 0.0999 max mem: 9305 +Train: [13] [1500/6250] eta: 0:13:23 lr: 0.000123 grad: 0.0590 (0.0640) loss: 0.8902 (0.8926) time: 0.2111 data: 0.1174 max mem: 9305 +Train: [13] [1600/6250] eta: 0:13:09 lr: 0.000123 grad: 0.0633 (0.0640) loss: 0.8902 (0.8925) time: 0.2020 data: 0.1129 max mem: 9305 +Train: [13] [1700/6250] eta: 0:12:56 lr: 0.000123 grad: 0.0597 (0.0639) loss: 0.8960 (0.8924) time: 0.1564 data: 0.0504 max mem: 9305 +Train: [13] [1800/6250] eta: 0:12:52 lr: 0.000123 grad: 0.0636 (0.0639) loss: 0.8895 (0.8922) time: 0.1753 data: 0.0460 max mem: 9305 +Train: [13] [1900/6250] eta: 0:12:42 lr: 0.000123 grad: 0.0616 (0.0639) loss: 0.8879 (0.8920) time: 0.1197 data: 0.0005 max mem: 9305 +Train: [13] [2000/6250] eta: 0:12:29 lr: 0.000123 grad: 0.0652 (0.0639) loss: 0.8939 (0.8919) time: 0.1431 data: 0.0406 max mem: 9305 +Train: [13] [2100/6250] eta: 0:12:15 lr: 0.000123 grad: 0.0598 (0.0639) loss: 0.8906 (0.8918) time: 0.1718 data: 0.0817 max mem: 9305 +Train: [13] [2200/6250] eta: 0:12:07 lr: 0.000123 grad: 0.0594 (0.0639) loss: 0.8877 (0.8916) time: 0.2619 data: 0.1425 max mem: 9305 +Train: [13] [2300/6250] eta: 0:11:48 lr: 0.000123 grad: 0.0626 (0.0639) loss: 0.8889 (0.8915) time: 0.2815 data: 0.1887 max mem: 9305 +Train: [13] [2400/6250] eta: 0:11:29 lr: 0.000123 grad: 0.0634 (0.0640) loss: 0.8886 (0.8913) time: 0.1910 data: 0.0514 max mem: 9305 +Train: [13] [2500/6250] eta: 0:11:13 lr: 0.000123 grad: 0.0593 (0.0640) loss: 0.8890 (0.8912) time: 0.1414 data: 0.0501 max mem: 9305 +Train: [13] [2600/6250] eta: 0:10:57 lr: 0.000123 grad: 0.0620 (0.0639) loss: 0.8880 (0.8912) time: 0.2362 data: 0.1466 max mem: 9305 +Train: [13] [2700/6250] eta: 0:10:36 lr: 0.000123 grad: 0.0600 (0.0638) loss: 0.8938 (0.8912) time: 0.1942 data: 0.1060 max mem: 9305 +Train: [13] [2800/6250] eta: 0:10:19 lr: 0.000123 grad: 0.0619 (0.0638) loss: 0.8897 (0.8911) time: 0.1467 data: 0.0494 max mem: 9305 +Train: [13] [2900/6250] eta: 0:10:01 lr: 0.000123 grad: 0.0631 (0.0638) loss: 0.8878 (0.8911) time: 0.1923 data: 0.0913 max mem: 9305 +Train: [13] [3000/6250] eta: 0:09:44 lr: 0.000123 grad: 0.0654 (0.0637) loss: 0.8903 (0.8911) time: 0.1571 data: 0.0619 max mem: 9305 +Train: [13] [3100/6250] eta: 0:09:28 lr: 0.000123 grad: 0.0607 (0.0637) loss: 0.8913 (0.8911) time: 0.2085 data: 0.1030 max mem: 9305 +Train: [13] [3200/6250] eta: 0:09:10 lr: 0.000123 grad: 0.0645 (0.0637) loss: 0.8914 (0.8911) time: 0.1414 data: 0.0395 max mem: 9305 +Train: [13] [3300/6250] eta: 0:08:52 lr: 0.000123 grad: 0.0633 (0.0637) loss: 0.8876 (0.8911) time: 0.1365 data: 0.0377 max mem: 9305 +Train: [13] [3400/6250] eta: 0:08:34 lr: 0.000123 grad: 0.0671 (0.0638) loss: 0.8878 (0.8909) time: 0.1619 data: 0.0569 max mem: 9305 +Train: [13] [3500/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0619 (0.0638) loss: 0.8857 (0.8909) time: 0.1713 data: 0.0829 max mem: 9305 +Train: [13] [3600/6250] eta: 0:08:00 lr: 0.000123 grad: 0.0631 (0.0638) loss: 0.8881 (0.8908) time: 0.1751 data: 0.0878 max mem: 9305 +Train: [13] [3700/6250] eta: 0:07:42 lr: 0.000122 grad: 0.0604 (0.0638) loss: 0.8937 (0.8908) time: 0.2079 data: 0.1235 max mem: 9305 +Train: [13] [3800/6250] eta: 0:07:23 lr: 0.000122 grad: 0.0610 (0.0638) loss: 0.8883 (0.8907) time: 0.1690 data: 0.0856 max mem: 9305 +Train: [13] [3900/6250] eta: 0:07:07 lr: 0.000122 grad: 0.0631 (0.0639) loss: 0.8879 (0.8907) time: 0.2343 data: 0.1348 max mem: 9305 +Train: [13] [4000/6250] eta: 0:06:49 lr: 0.000122 grad: 0.0631 (0.0639) loss: 0.8914 (0.8907) time: 0.1998 data: 0.0949 max mem: 9305 +Train: [13] [4100/6250] eta: 0:06:31 lr: 0.000122 grad: 0.0619 (0.0640) loss: 0.8915 (0.8907) time: 0.1335 data: 0.0254 max mem: 9305 +Train: [13] [4200/6250] eta: 0:06:15 lr: 0.000122 grad: 0.0623 (0.0640) loss: 0.8833 (0.8906) time: 0.1257 data: 0.0003 max mem: 9305 +Train: [13] [4300/6250] eta: 0:05:57 lr: 0.000122 grad: 0.0676 (0.0640) loss: 0.8874 (0.8906) time: 0.2147 data: 0.1277 max mem: 9305 +Train: [13] [4400/6250] eta: 0:05:40 lr: 0.000122 grad: 0.0632 (0.0640) loss: 0.8870 (0.8905) time: 0.1431 data: 0.0631 max mem: 9305 +Train: [13] [4500/6250] eta: 0:05:20 lr: 0.000122 grad: 0.0609 (0.0640) loss: 0.8971 (0.8905) time: 0.1602 data: 0.0715 max mem: 9305 +Train: [13] [4600/6250] eta: 0:05:02 lr: 0.000122 grad: 0.0663 (0.0640) loss: 0.8876 (0.8905) time: 0.1888 data: 0.1024 max mem: 9305 +Train: [13] [4700/6250] eta: 0:04:43 lr: 0.000122 grad: 0.0670 (0.0641) loss: 0.8844 (0.8905) time: 0.2170 data: 0.1210 max mem: 9305 +Train: [13] [4800/6250] eta: 0:04:25 lr: 0.000122 grad: 0.0667 (0.0642) loss: 0.8908 (0.8904) time: 0.1535 data: 0.0662 max mem: 9305 +Train: [13] [4900/6250] eta: 0:04:08 lr: 0.000122 grad: 0.0624 (0.0642) loss: 0.8881 (0.8904) time: 0.1437 data: 0.0450 max mem: 9305 +Train: [13] [5000/6250] eta: 0:03:49 lr: 0.000122 grad: 0.0665 (0.0642) loss: 0.8900 (0.8903) time: 0.1565 data: 0.0358 max mem: 9305 +Train: [13] [5100/6250] eta: 0:03:30 lr: 0.000122 grad: 0.0711 (0.0643) loss: 0.8824 (0.8903) time: 0.1543 data: 0.0569 max mem: 9305 +Train: [13] [5200/6250] eta: 0:03:12 lr: 0.000122 grad: 0.0708 (0.0644) loss: 0.8859 (0.8902) time: 0.1840 data: 0.0861 max mem: 9305 +Train: [13] [5300/6250] eta: 0:02:53 lr: 0.000122 grad: 0.0648 (0.0645) loss: 0.8851 (0.8901) time: 0.2291 data: 0.1072 max mem: 9305 +Train: [13] [5400/6250] eta: 0:02:35 lr: 0.000122 grad: 0.0669 (0.0645) loss: 0.8859 (0.8900) time: 0.1163 data: 0.0082 max mem: 9305 +Train: [13] [5500/6250] eta: 0:02:17 lr: 0.000122 grad: 0.0615 (0.0646) loss: 0.8846 (0.8900) time: 0.1282 data: 0.0003 max mem: 9305 +Train: [13] [5600/6250] eta: 0:01:58 lr: 0.000122 grad: 0.0659 (0.0646) loss: 0.8856 (0.8899) time: 0.1531 data: 0.0688 max mem: 9305 +Train: [13] [5700/6250] eta: 0:01:40 lr: 0.000122 grad: 0.0648 (0.0647) loss: 0.8905 (0.8899) time: 0.3062 data: 0.1664 max mem: 9305 +Train: [13] [5800/6250] eta: 0:01:22 lr: 0.000122 grad: 0.0636 (0.0647) loss: 0.8843 (0.8898) time: 0.1323 data: 0.0435 max mem: 9305 +Train: [13] [5900/6250] eta: 0:01:04 lr: 0.000122 grad: 0.0708 (0.0648) loss: 0.8822 (0.8897) time: 0.1388 data: 0.0489 max mem: 9305 +Train: [13] [6000/6250] eta: 0:00:45 lr: 0.000122 grad: 0.0660 (0.0649) loss: 0.8828 (0.8896) time: 0.1219 data: 0.0321 max mem: 9305 +Train: [13] [6100/6250] eta: 0:00:27 lr: 0.000122 grad: 0.0634 (0.0649) loss: 0.8889 (0.8895) time: 0.1930 data: 0.1076 max mem: 9305 +Train: [13] [6200/6250] eta: 0:00:09 lr: 0.000122 grad: 0.0629 (0.0649) loss: 0.8863 (0.8895) time: 0.1607 data: 0.0740 max mem: 9305 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0682 (0.0650) loss: 0.8872 (0.8894) time: 0.2148 data: 0.1360 max mem: 9305 +Train: [13] Total time: 0:19:06 (0.1834 s / it) +Averaged stats: lr: 0.000122 grad: 0.0682 (0.0650) loss: 0.8872 (0.8894) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:04:28 loss: 0.9047 (0.9047) time: 4.3253 data: 4.2732 max mem: 9305 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8916 (0.8949) time: 0.1198 data: 0.0899 max mem: 9305 +Eval (hcp-train-subset): [13] Total time: 0:00:14 (0.2283 s / it) +Averaged stats (hcp-train-subset): loss: 0.8916 (0.8949) +Eval (hcp-val): [13] [ 0/62] eta: 0:06:10 loss: 0.8821 (0.8821) time: 5.9738 data: 5.9383 max mem: 9305 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8889 (0.8896) time: 0.1546 data: 0.1256 max mem: 9305 +Eval (hcp-val): [13] Total time: 0:00:15 (0.2456 s / it) +Averaged stats (hcp-val): loss: 0.8889 (0.8896) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [14] [ 0/6250] eta: 10:09:35 lr: 0.000122 grad: 0.1325 (0.1325) loss: 0.8920 (0.8920) time: 5.8520 data: 5.6968 max mem: 9305 +Train: [14] [ 100/6250] eta: 0:23:09 lr: 0.000122 grad: 0.0651 (0.0710) loss: 0.8909 (0.8896) time: 0.1363 data: 0.0299 max mem: 9305 +Train: [14] [ 200/6250] eta: 0:20:23 lr: 0.000122 grad: 0.0667 (0.0708) loss: 0.8879 (0.8874) time: 0.1901 data: 0.0960 max mem: 9305 +Train: [14] [ 300/6250] eta: 0:19:02 lr: 0.000122 grad: 0.0727 (0.0715) loss: 0.8853 (0.8863) time: 0.1958 data: 0.1049 max mem: 9305 +Train: [14] [ 400/6250] eta: 0:18:11 lr: 0.000122 grad: 0.0703 (0.0708) loss: 0.8838 (0.8863) time: 0.1571 data: 0.0636 max mem: 9305 +Train: [14] [ 500/6250] eta: 0:17:14 lr: 0.000122 grad: 0.0666 (0.0700) loss: 0.8851 (0.8864) time: 0.1549 data: 0.0719 max mem: 9305 +Train: [14] [ 600/6250] eta: 0:16:33 lr: 0.000122 grad: 0.0619 (0.0693) loss: 0.8865 (0.8869) time: 0.1960 data: 0.1069 max mem: 9305 +Train: [14] [ 700/6250] eta: 0:16:36 lr: 0.000122 grad: 0.0616 (0.0688) loss: 0.8908 (0.8874) time: 0.2333 data: 0.1326 max mem: 9305 +Train: [14] [ 800/6250] eta: 0:16:17 lr: 0.000122 grad: 0.0613 (0.0683) loss: 0.8864 (0.8874) time: 0.2029 data: 0.1082 max mem: 9305 +Train: [14] [ 900/6250] eta: 0:15:51 lr: 0.000122 grad: 0.0594 (0.0679) loss: 0.8915 (0.8875) time: 0.1716 data: 0.0786 max mem: 9305 +Train: [14] [1000/6250] eta: 0:15:40 lr: 0.000122 grad: 0.0614 (0.0675) loss: 0.8915 (0.8875) time: 0.1271 data: 0.0332 max mem: 9305 +Train: [14] [1100/6250] eta: 0:15:18 lr: 0.000122 grad: 0.0626 (0.0672) loss: 0.8918 (0.8875) time: 0.1563 data: 0.0575 max mem: 9305 +Train: [14] [1200/6250] eta: 0:14:53 lr: 0.000122 grad: 0.0622 (0.0671) loss: 0.8900 (0.8877) time: 0.1776 data: 0.0888 max mem: 9305 +Train: [14] [1300/6250] eta: 0:14:40 lr: 0.000122 grad: 0.0653 (0.0671) loss: 0.8888 (0.8876) time: 0.2091 data: 0.1309 max mem: 9305 +Train: [14] [1400/6250] eta: 0:14:25 lr: 0.000122 grad: 0.0622 (0.0671) loss: 0.8942 (0.8876) time: 0.1944 data: 0.0920 max mem: 9305 +Train: [14] [1500/6250] eta: 0:14:20 lr: 0.000122 grad: 0.0654 (0.0670) loss: 0.8846 (0.8876) time: 0.1838 data: 0.0679 max mem: 9305 +Train: [14] [1600/6250] eta: 0:14:04 lr: 0.000122 grad: 0.0649 (0.0670) loss: 0.8893 (0.8877) time: 0.2039 data: 0.1115 max mem: 9305 +Train: [14] [1700/6250] eta: 0:13:50 lr: 0.000122 grad: 0.0598 (0.0669) loss: 0.8815 (0.8877) time: 0.1988 data: 0.1156 max mem: 9305 +Train: [14] [1800/6250] eta: 0:13:30 lr: 0.000122 grad: 0.0622 (0.0668) loss: 0.8904 (0.8877) time: 0.1748 data: 0.0842 max mem: 9305 +Train: [14] [1900/6250] eta: 0:13:15 lr: 0.000122 grad: 0.0671 (0.0668) loss: 0.8835 (0.8876) time: 0.2207 data: 0.1163 max mem: 9305 +Train: [14] [2000/6250] eta: 0:13:07 lr: 0.000122 grad: 0.0631 (0.0667) loss: 0.8854 (0.8876) time: 0.1067 data: 0.0002 max mem: 9305 +Train: [14] [2100/6250] eta: 0:12:47 lr: 0.000122 grad: 0.0619 (0.0666) loss: 0.8860 (0.8876) time: 0.1931 data: 0.1042 max mem: 9305 +Train: [14] [2200/6250] eta: 0:12:27 lr: 0.000122 grad: 0.0626 (0.0667) loss: 0.8934 (0.8877) time: 0.1802 data: 0.0885 max mem: 9305 +Train: [14] [2300/6250] eta: 0:12:15 lr: 0.000122 grad: 0.0663 (0.0666) loss: 0.8890 (0.8877) time: 0.1147 data: 0.0002 max mem: 9305 +Train: [14] [2400/6250] eta: 0:11:55 lr: 0.000122 grad: 0.0630 (0.0667) loss: 0.8833 (0.8876) time: 0.2908 data: 0.1837 max mem: 9305 +Train: [14] [2500/6250] eta: 0:11:37 lr: 0.000122 grad: 0.0624 (0.0668) loss: 0.8844 (0.8875) time: 0.3127 data: 0.2058 max mem: 9305 +Train: [14] [2600/6250] eta: 0:11:14 lr: 0.000122 grad: 0.0665 (0.0668) loss: 0.8835 (0.8875) time: 0.2100 data: 0.1184 max mem: 9305 +Train: [14] [2700/6250] eta: 0:10:53 lr: 0.000122 grad: 0.0622 (0.0669) loss: 0.8883 (0.8874) time: 0.1597 data: 0.0691 max mem: 9305 +Train: [14] [2800/6250] eta: 0:10:33 lr: 0.000122 grad: 0.0653 (0.0670) loss: 0.8851 (0.8874) time: 0.2131 data: 0.1136 max mem: 9305 +Train: [14] [2900/6250] eta: 0:10:12 lr: 0.000122 grad: 0.0658 (0.0669) loss: 0.8853 (0.8873) time: 0.1606 data: 0.0693 max mem: 9305 +Train: [14] [3000/6250] eta: 0:09:53 lr: 0.000122 grad: 0.0671 (0.0669) loss: 0.8842 (0.8873) time: 0.1403 data: 0.0475 max mem: 9305 +Train: [14] [3100/6250] eta: 0:09:34 lr: 0.000122 grad: 0.0635 (0.0669) loss: 0.8865 (0.8872) time: 0.1626 data: 0.0650 max mem: 9305 +Train: [14] [3200/6250] eta: 0:09:19 lr: 0.000122 grad: 0.0655 (0.0669) loss: 0.8861 (0.8872) time: 0.3461 data: 0.2577 max mem: 9305 +Train: [14] [3300/6250] eta: 0:09:00 lr: 0.000122 grad: 0.0634 (0.0669) loss: 0.8921 (0.8872) time: 0.1466 data: 0.0470 max mem: 9305 +Train: [14] [3400/6250] eta: 0:08:44 lr: 0.000122 grad: 0.0674 (0.0668) loss: 0.8873 (0.8872) time: 0.3879 data: 0.3042 max mem: 9305 +Train: [14] [3500/6250] eta: 0:08:25 lr: 0.000122 grad: 0.0600 (0.0667) loss: 0.8865 (0.8872) time: 0.1637 data: 0.0679 max mem: 9305 +Train: [14] [3600/6250] eta: 0:08:06 lr: 0.000122 grad: 0.0665 (0.0666) loss: 0.8870 (0.8872) time: 0.1916 data: 0.0924 max mem: 9305 +Train: [14] [3700/6250] eta: 0:07:48 lr: 0.000122 grad: 0.0643 (0.0666) loss: 0.8888 (0.8873) time: 0.1905 data: 0.0967 max mem: 9305 +Train: [14] [3800/6250] eta: 0:07:30 lr: 0.000122 grad: 0.0746 (0.0668) loss: 0.8903 (0.8872) time: 0.2227 data: 0.1334 max mem: 9305 +Train: [14] [3900/6250] eta: 0:07:11 lr: 0.000122 grad: 0.0629 (0.0667) loss: 0.8845 (0.8873) time: 0.2007 data: 0.1126 max mem: 9305 +Train: [14] [4000/6250] eta: 0:06:53 lr: 0.000122 grad: 0.0637 (0.0667) loss: 0.8885 (0.8873) time: 0.1673 data: 0.0660 max mem: 9305 +Train: [14] [4100/6250] eta: 0:06:34 lr: 0.000122 grad: 0.0643 (0.0667) loss: 0.8860 (0.8873) time: 0.1698 data: 0.0896 max mem: 9305 +Train: [14] [4200/6250] eta: 0:06:16 lr: 0.000122 grad: 0.0629 (0.0667) loss: 0.8952 (0.8873) time: 0.1682 data: 0.0890 max mem: 9305 +Train: [14] [4300/6250] eta: 0:05:57 lr: 0.000122 grad: 0.0647 (0.0667) loss: 0.8882 (0.8873) time: 0.1800 data: 0.0926 max mem: 9305 +Train: [14] [4400/6250] eta: 0:05:39 lr: 0.000122 grad: 0.0652 (0.0667) loss: 0.8833 (0.8872) time: 0.1671 data: 0.0944 max mem: 9305 +Train: [14] [4500/6250] eta: 0:05:21 lr: 0.000122 grad: 0.0571 (0.0667) loss: 0.8881 (0.8872) time: 0.2103 data: 0.1306 max mem: 9305 +Train: [14] [4600/6250] eta: 0:05:03 lr: 0.000122 grad: 0.0637 (0.0666) loss: 0.8827 (0.8871) time: 0.1661 data: 0.0864 max mem: 9305 +Train: [14] [4700/6250] eta: 0:04:44 lr: 0.000122 grad: 0.0636 (0.0666) loss: 0.8804 (0.8871) time: 0.1979 data: 0.1073 max mem: 9305 +Train: [14] [4800/6250] eta: 0:04:27 lr: 0.000122 grad: 0.0614 (0.0665) loss: 0.8890 (0.8871) time: 0.1508 data: 0.0560 max mem: 9305 +Train: [14] [4900/6250] eta: 0:04:08 lr: 0.000122 grad: 0.0682 (0.0665) loss: 0.8884 (0.8871) time: 0.2029 data: 0.1124 max mem: 9305 +Train: [14] [5000/6250] eta: 0:03:49 lr: 0.000122 grad: 0.0612 (0.0664) loss: 0.8864 (0.8870) time: 0.1854 data: 0.0979 max mem: 9305 +Train: [14] [5100/6250] eta: 0:03:31 lr: 0.000122 grad: 0.0601 (0.0663) loss: 0.8904 (0.8870) time: 0.1680 data: 0.0636 max mem: 9305 +Train: [14] [5200/6250] eta: 0:03:13 lr: 0.000122 grad: 0.0636 (0.0663) loss: 0.8866 (0.8870) time: 0.2228 data: 0.1311 max mem: 9305 +Train: [14] [5300/6250] eta: 0:02:55 lr: 0.000122 grad: 0.0668 (0.0662) loss: 0.8831 (0.8870) time: 0.1335 data: 0.0003 max mem: 9305 +Train: [14] [5400/6250] eta: 0:02:36 lr: 0.000122 grad: 0.0612 (0.0662) loss: 0.8898 (0.8870) time: 0.1888 data: 0.0747 max mem: 9305 +Train: [14] [5500/6250] eta: 0:02:18 lr: 0.000122 grad: 0.0665 (0.0662) loss: 0.8852 (0.8870) time: 0.1458 data: 0.0406 max mem: 9305 +Train: [14] [5600/6250] eta: 0:01:59 lr: 0.000122 grad: 0.0603 (0.0661) loss: 0.8869 (0.8870) time: 0.1923 data: 0.1020 max mem: 9305 +Train: [14] [5700/6250] eta: 0:01:40 lr: 0.000122 grad: 0.0631 (0.0661) loss: 0.8831 (0.8869) time: 0.1646 data: 0.0739 max mem: 9305 +Train: [14] [5800/6250] eta: 0:01:22 lr: 0.000122 grad: 0.0629 (0.0661) loss: 0.8851 (0.8869) time: 0.1527 data: 0.0514 max mem: 9305 +Train: [14] [5900/6250] eta: 0:01:04 lr: 0.000122 grad: 0.0629 (0.0661) loss: 0.8817 (0.8869) time: 0.1521 data: 0.0597 max mem: 9305 +Train: [14] [6000/6250] eta: 0:00:45 lr: 0.000122 grad: 0.0621 (0.0661) loss: 0.8831 (0.8868) time: 0.1509 data: 0.0564 max mem: 9305 +Train: [14] [6100/6250] eta: 0:00:27 lr: 0.000122 grad: 0.0635 (0.0660) loss: 0.8793 (0.8868) time: 0.1510 data: 0.0582 max mem: 9305 +Train: [14] [6200/6250] eta: 0:00:09 lr: 0.000122 grad: 0.0647 (0.0661) loss: 0.8838 (0.8867) time: 0.1447 data: 0.0611 max mem: 9305 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0657 (0.0661) loss: 0.8886 (0.8867) time: 0.1763 data: 0.0942 max mem: 9305 +Train: [14] Total time: 0:19:03 (0.1830 s / it) +Averaged stats: lr: 0.000122 grad: 0.0657 (0.0661) loss: 0.8886 (0.8867) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:05:33 loss: 0.9037 (0.9037) time: 5.3793 data: 5.3422 max mem: 9305 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8954 (0.8941) time: 0.1322 data: 0.1013 max mem: 9305 +Eval (hcp-train-subset): [14] Total time: 0:00:14 (0.2405 s / it) +Averaged stats (hcp-train-subset): loss: 0.8954 (0.8941) +Making plots (hcp-train-subset): example=46 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:10 loss: 0.8834 (0.8834) time: 5.0023 data: 4.9656 max mem: 9305 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8878 (0.8883) time: 0.1370 data: 0.1067 max mem: 9305 +Eval (hcp-val): [14] Total time: 0:00:15 (0.2460 s / it) +Averaged stats (hcp-val): loss: 0.8878 (0.8883) +Making plots (hcp-val): example=55 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [15] [ 0/6250] eta: 17:33:42 lr: 0.000122 grad: 0.1180 (0.1180) loss: 0.8837 (0.8837) time: 10.1156 data: 9.9830 max mem: 9305 +Train: [15] [ 100/6250] eta: 0:28:27 lr: 0.000122 grad: 0.0600 (0.0736) loss: 0.8823 (0.8900) time: 0.2389 data: 0.1364 max mem: 9305 +Train: [15] [ 200/6250] eta: 0:22:42 lr: 0.000122 grad: 0.0657 (0.0711) loss: 0.8798 (0.8864) time: 0.1739 data: 0.0865 max mem: 9305 +Train: [15] [ 300/6250] eta: 0:20:35 lr: 0.000122 grad: 0.0675 (0.0700) loss: 0.8824 (0.8853) time: 0.2067 data: 0.1123 max mem: 9305 +Train: [15] [ 400/6250] eta: 0:19:17 lr: 0.000122 grad: 0.0648 (0.0700) loss: 0.8784 (0.8842) time: 0.1661 data: 0.0736 max mem: 9305 +Train: [15] [ 500/6250] eta: 0:18:26 lr: 0.000122 grad: 0.0657 (0.0701) loss: 0.8885 (0.8838) time: 0.1883 data: 0.0969 max mem: 9305 +Train: [15] [ 600/6250] eta: 0:17:36 lr: 0.000122 grad: 0.0678 (0.0698) loss: 0.8828 (0.8837) time: 0.1460 data: 0.0452 max mem: 9305 +Train: [15] [ 700/6250] eta: 0:17:24 lr: 0.000122 grad: 0.0680 (0.0692) loss: 0.8785 (0.8833) time: 0.2428 data: 0.1550 max mem: 9305 +Train: [15] [ 800/6250] eta: 0:16:55 lr: 0.000122 grad: 0.0590 (0.0689) loss: 0.8839 (0.8834) time: 0.2413 data: 0.1531 max mem: 9305 +Train: [15] [ 900/6250] eta: 0:16:36 lr: 0.000122 grad: 0.0654 (0.0685) loss: 0.8829 (0.8833) time: 0.1985 data: 0.1087 max mem: 9305 +Train: [15] [1000/6250] eta: 0:16:32 lr: 0.000122 grad: 0.0632 (0.0683) loss: 0.8835 (0.8833) time: 0.2397 data: 0.1372 max mem: 9305 +Train: [15] [1100/6250] eta: 0:16:21 lr: 0.000121 grad: 0.0606 (0.0679) loss: 0.8849 (0.8833) time: 0.1238 data: 0.0003 max mem: 9305 +Train: [15] [1200/6250] eta: 0:16:11 lr: 0.000121 grad: 0.0656 (0.0676) loss: 0.8803 (0.8833) time: 0.1273 data: 0.0021 max mem: 9305 +Train: [15] [1300/6250] eta: 0:16:00 lr: 0.000121 grad: 0.0623 (0.0675) loss: 0.8815 (0.8833) time: 0.2467 data: 0.1560 max mem: 9305 +Train: [15] [1400/6250] eta: 0:15:43 lr: 0.000121 grad: 0.0627 (0.0674) loss: 0.8835 (0.8833) time: 0.2133 data: 0.1261 max mem: 9305 +Train: [15] [1500/6250] eta: 0:15:24 lr: 0.000121 grad: 0.0621 (0.0672) loss: 0.8810 (0.8833) time: 0.1559 data: 0.0611 max mem: 9305 +Train: [15] [1600/6250] eta: 0:15:06 lr: 0.000121 grad: 0.0661 (0.0671) loss: 0.8849 (0.8834) time: 0.1345 data: 0.0038 max mem: 9305 +Train: [15] [1700/6250] eta: 0:14:41 lr: 0.000121 grad: 0.0644 (0.0671) loss: 0.8781 (0.8834) time: 0.1815 data: 0.0827 max mem: 9305 +Train: [15] [1800/6250] eta: 0:14:20 lr: 0.000121 grad: 0.0650 (0.0671) loss: 0.8857 (0.8835) time: 0.1603 data: 0.0685 max mem: 9305 +Train: [15] [1900/6250] eta: 0:14:00 lr: 0.000121 grad: 0.0623 (0.0672) loss: 0.8835 (0.8834) time: 0.1720 data: 0.0756 max mem: 9305 +Train: [15] [2000/6250] eta: 0:13:37 lr: 0.000121 grad: 0.0663 (0.0676) loss: 0.8843 (0.8834) time: 0.1776 data: 0.0814 max mem: 9305 +Train: [15] [2100/6250] eta: 0:13:19 lr: 0.000121 grad: 0.0613 (0.0674) loss: 0.8879 (0.8834) time: 0.1139 data: 0.0003 max mem: 9305 +Train: [15] [2200/6250] eta: 0:12:57 lr: 0.000121 grad: 0.0617 (0.0674) loss: 0.8867 (0.8833) time: 0.1653 data: 0.0744 max mem: 9305 +Train: [15] [2300/6250] eta: 0:12:36 lr: 0.000121 grad: 0.0606 (0.0673) loss: 0.8843 (0.8834) time: 0.2014 data: 0.1015 max mem: 9305 +Train: [15] [2400/6250] eta: 0:12:16 lr: 0.000121 grad: 0.0669 (0.0672) loss: 0.8842 (0.8834) time: 0.1832 data: 0.0867 max mem: 9305 +Train: [15] [2500/6250] eta: 0:11:54 lr: 0.000121 grad: 0.0617 (0.0671) loss: 0.8800 (0.8834) time: 0.1593 data: 0.0715 max mem: 9305 +Train: [15] [2600/6250] eta: 0:11:33 lr: 0.000121 grad: 0.0592 (0.0670) loss: 0.8865 (0.8835) time: 0.1461 data: 0.0597 max mem: 9305 +Train: [15] [2700/6250] eta: 0:11:13 lr: 0.000121 grad: 0.0651 (0.0669) loss: 0.8833 (0.8835) time: 0.2089 data: 0.1210 max mem: 9305 +Train: [15] [2800/6250] eta: 0:10:51 lr: 0.000121 grad: 0.0633 (0.0669) loss: 0.8840 (0.8836) time: 0.1593 data: 0.0716 max mem: 9305 +Train: [15] [2900/6250] eta: 0:10:31 lr: 0.000121 grad: 0.0611 (0.0667) loss: 0.8856 (0.8836) time: 0.2020 data: 0.1109 max mem: 9305 +Train: [15] [3000/6250] eta: 0:10:16 lr: 0.000121 grad: 0.0612 (0.0667) loss: 0.8852 (0.8837) time: 0.2770 data: 0.1817 max mem: 9305 +Train: [15] [3100/6250] eta: 0:09:55 lr: 0.000121 grad: 0.0589 (0.0665) loss: 0.8879 (0.8839) time: 0.1438 data: 0.0478 max mem: 9305 +Train: [15] [3200/6250] eta: 0:09:38 lr: 0.000121 grad: 0.0634 (0.0665) loss: 0.8828 (0.8839) time: 0.1388 data: 0.0204 max mem: 9305 +Train: [15] [3300/6250] eta: 0:09:17 lr: 0.000121 grad: 0.0628 (0.0665) loss: 0.8871 (0.8840) time: 0.1419 data: 0.0607 max mem: 9305 +Train: [15] [3400/6250] eta: 0:09:00 lr: 0.000121 grad: 0.0651 (0.0665) loss: 0.8829 (0.8841) time: 0.1114 data: 0.0003 max mem: 9305 +Train: [15] [3500/6250] eta: 0:08:43 lr: 0.000121 grad: 0.0625 (0.0664) loss: 0.8846 (0.8841) time: 0.3663 data: 0.2574 max mem: 9305 +Train: [15] [3600/6250] eta: 0:08:22 lr: 0.000121 grad: 0.0667 (0.0664) loss: 0.8850 (0.8841) time: 0.1703 data: 0.0700 max mem: 9305 +Train: [15] [3700/6250] eta: 0:08:03 lr: 0.000121 grad: 0.0661 (0.0664) loss: 0.8844 (0.8841) time: 0.2120 data: 0.1085 max mem: 9305 +Train: [15] [3800/6250] eta: 0:07:45 lr: 0.000121 grad: 0.0626 (0.0664) loss: 0.8846 (0.8841) time: 0.2613 data: 0.1594 max mem: 9305 +Train: [15] [3900/6250] eta: 0:07:24 lr: 0.000121 grad: 0.0641 (0.0663) loss: 0.8829 (0.8841) time: 0.1403 data: 0.0509 max mem: 9305 +Train: [15] [4000/6250] eta: 0:07:07 lr: 0.000121 grad: 0.0665 (0.0663) loss: 0.8856 (0.8841) time: 0.2769 data: 0.1712 max mem: 9305 +Train: [15] [4100/6250] eta: 0:06:46 lr: 0.000121 grad: 0.0623 (0.0663) loss: 0.8822 (0.8841) time: 0.1910 data: 0.1106 max mem: 9305 +Train: [15] [4200/6250] eta: 0:06:27 lr: 0.000121 grad: 0.0652 (0.0664) loss: 0.8858 (0.8841) time: 0.1874 data: 0.1011 max mem: 9305 +Train: [15] [4300/6250] eta: 0:06:07 lr: 0.000121 grad: 0.0671 (0.0663) loss: 0.8826 (0.8841) time: 0.1615 data: 0.0800 max mem: 9305 +Train: [15] [4400/6250] eta: 0:05:48 lr: 0.000121 grad: 0.0659 (0.0663) loss: 0.8832 (0.8841) time: 0.2049 data: 0.1131 max mem: 9305 +Train: [15] [4500/6250] eta: 0:05:29 lr: 0.000121 grad: 0.0669 (0.0663) loss: 0.8883 (0.8842) time: 0.1678 data: 0.0837 max mem: 9305 +Train: [15] [4600/6250] eta: 0:05:10 lr: 0.000121 grad: 0.0598 (0.0664) loss: 0.8854 (0.8842) time: 0.2055 data: 0.1091 max mem: 9305 +Train: [15] [4700/6250] eta: 0:04:50 lr: 0.000121 grad: 0.0672 (0.0663) loss: 0.8858 (0.8842) time: 0.1763 data: 0.0816 max mem: 9305 +Train: [15] [4800/6250] eta: 0:04:31 lr: 0.000121 grad: 0.0694 (0.0664) loss: 0.8801 (0.8842) time: 0.1468 data: 0.0613 max mem: 9305 +Train: [15] [4900/6250] eta: 0:04:12 lr: 0.000121 grad: 0.0626 (0.0665) loss: 0.8791 (0.8842) time: 0.1396 data: 0.0427 max mem: 9305 +Train: [15] [5000/6250] eta: 0:03:53 lr: 0.000121 grad: 0.0653 (0.0664) loss: 0.8854 (0.8842) time: 0.1361 data: 0.0458 max mem: 9305 +Train: [15] [5100/6250] eta: 0:03:34 lr: 0.000121 grad: 0.0663 (0.0665) loss: 0.8839 (0.8842) time: 0.0978 data: 0.0003 max mem: 9305 +Train: [15] [5200/6250] eta: 0:03:15 lr: 0.000121 grad: 0.0676 (0.0665) loss: 0.8826 (0.8842) time: 0.1676 data: 0.0795 max mem: 9305 +Train: [15] [5300/6250] eta: 0:02:56 lr: 0.000121 grad: 0.0653 (0.0665) loss: 0.8824 (0.8842) time: 0.1053 data: 0.0002 max mem: 9305 +Train: [15] [5400/6250] eta: 0:02:37 lr: 0.000121 grad: 0.0667 (0.0665) loss: 0.8844 (0.8842) time: 0.1652 data: 0.0724 max mem: 9305 +Train: [15] [5500/6250] eta: 0:02:18 lr: 0.000121 grad: 0.0634 (0.0665) loss: 0.8858 (0.8842) time: 0.1357 data: 0.0511 max mem: 9305 +Train: [15] [5600/6250] eta: 0:02:00 lr: 0.000121 grad: 0.0656 (0.0666) loss: 0.8898 (0.8842) time: 0.1803 data: 0.0938 max mem: 9305 +Train: [15] [5700/6250] eta: 0:01:41 lr: 0.000121 grad: 0.0617 (0.0666) loss: 0.8825 (0.8841) time: 0.1593 data: 0.0676 max mem: 9305 +Train: [15] [5800/6250] eta: 0:01:22 lr: 0.000121 grad: 0.0637 (0.0666) loss: 0.8867 (0.8841) time: 0.1738 data: 0.0731 max mem: 9305 +Train: [15] [5900/6250] eta: 0:01:04 lr: 0.000121 grad: 0.0628 (0.0666) loss: 0.8807 (0.8841) time: 0.1516 data: 0.0619 max mem: 9305 +Train: [15] [6000/6250] eta: 0:00:45 lr: 0.000121 grad: 0.0686 (0.0666) loss: 0.8800 (0.8841) time: 0.1774 data: 0.0865 max mem: 9305 +Train: [15] [6100/6250] eta: 0:00:27 lr: 0.000121 grad: 0.0670 (0.0666) loss: 0.8855 (0.8841) time: 0.1860 data: 0.0968 max mem: 9305 +Train: [15] [6200/6250] eta: 0:00:09 lr: 0.000121 grad: 0.0648 (0.0666) loss: 0.8852 (0.8841) time: 0.0994 data: 0.0002 max mem: 9305 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0624 (0.0666) loss: 0.8917 (0.8841) time: 0.1122 data: 0.0002 max mem: 9305 +Train: [15] Total time: 0:19:18 (0.1853 s / it) +Averaged stats: lr: 0.000121 grad: 0.0624 (0.0666) loss: 0.8917 (0.8841) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:05:50 loss: 0.9027 (0.9027) time: 5.6540 data: 5.6180 max mem: 9305 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8923 (0.8926) time: 0.1363 data: 0.1070 max mem: 9305 +Eval (hcp-train-subset): [15] Total time: 0:00:14 (0.2372 s / it) +Averaged stats (hcp-train-subset): loss: 0.8923 (0.8926) +Eval (hcp-val): [15] [ 0/62] eta: 0:05:12 loss: 0.8847 (0.8847) time: 5.0428 data: 5.0085 max mem: 9305 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8855 (0.8877) time: 0.1428 data: 0.1129 max mem: 9305 +Eval (hcp-val): [15] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-val): loss: 0.8855 (0.8877) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 10:42:32 lr: 0.000121 grad: 0.0957 (0.0957) loss: 0.8871 (0.8871) time: 6.1684 data: 6.0333 max mem: 9305 +Train: [16] [ 100/6250] eta: 0:22:28 lr: 0.000121 grad: 0.0690 (0.0821) loss: 0.8805 (0.8749) time: 0.1712 data: 0.0784 max mem: 9305 +Train: [16] [ 200/6250] eta: 0:20:11 lr: 0.000121 grad: 0.0635 (0.0779) loss: 0.8819 (0.8753) time: 0.1738 data: 0.0832 max mem: 9305 +Train: [16] [ 300/6250] eta: 0:18:59 lr: 0.000121 grad: 0.0700 (0.0773) loss: 0.8753 (0.8757) time: 0.1193 data: 0.0138 max mem: 9305 +Train: [16] [ 400/6250] eta: 0:18:29 lr: 0.000121 grad: 0.0642 (0.0761) loss: 0.8836 (0.8765) time: 0.1923 data: 0.0996 max mem: 9305 +Train: [16] [ 500/6250] eta: 0:17:42 lr: 0.000121 grad: 0.0647 (0.0747) loss: 0.8859 (0.8774) time: 0.2071 data: 0.1117 max mem: 9305 +Train: [16] [ 600/6250] eta: 0:17:07 lr: 0.000121 grad: 0.0646 (0.0737) loss: 0.8894 (0.8782) time: 0.1563 data: 0.0607 max mem: 9305 +Train: [16] [ 700/6250] eta: 0:16:32 lr: 0.000121 grad: 0.0639 (0.0727) loss: 0.8821 (0.8791) time: 0.1986 data: 0.1130 max mem: 9305 +Train: [16] [ 800/6250] eta: 0:16:52 lr: 0.000121 grad: 0.0643 (0.0718) loss: 0.8843 (0.8799) time: 0.2657 data: 0.1684 max mem: 9305 +Train: [16] [ 900/6250] eta: 0:16:28 lr: 0.000121 grad: 0.0656 (0.0712) loss: 0.8877 (0.8803) time: 0.1651 data: 0.0748 max mem: 9305 +Train: [16] [1000/6250] eta: 0:16:21 lr: 0.000121 grad: 0.0638 (0.0706) loss: 0.8828 (0.8807) time: 0.1307 data: 0.0148 max mem: 9305 +Train: [16] [1100/6250] eta: 0:16:19 lr: 0.000121 grad: 0.0643 (0.0702) loss: 0.8871 (0.8809) time: 0.3471 data: 0.2223 max mem: 9305 +Train: [16] [1200/6250] eta: 0:15:51 lr: 0.000121 grad: 0.0629 (0.0699) loss: 0.8836 (0.8810) time: 0.1696 data: 0.0858 max mem: 9305 +Train: [16] [1300/6250] eta: 0:15:38 lr: 0.000121 grad: 0.0629 (0.0698) loss: 0.8824 (0.8811) time: 0.1640 data: 0.0777 max mem: 9305 +Train: [16] [1400/6250] eta: 0:15:10 lr: 0.000121 grad: 0.0639 (0.0696) loss: 0.8845 (0.8812) time: 0.1420 data: 0.0495 max mem: 9305 +Train: [16] [1500/6250] eta: 0:14:48 lr: 0.000121 grad: 0.0644 (0.0695) loss: 0.8819 (0.8814) time: 0.1078 data: 0.0045 max mem: 9305 +Train: [16] [1600/6250] eta: 0:14:26 lr: 0.000121 grad: 0.0665 (0.0692) loss: 0.8809 (0.8814) time: 0.1824 data: 0.0873 max mem: 9305 +Train: [16] [1700/6250] eta: 0:14:08 lr: 0.000121 grad: 0.0601 (0.0690) loss: 0.8824 (0.8815) time: 0.1797 data: 0.0778 max mem: 9305 +Train: [16] [1800/6250] eta: 0:13:50 lr: 0.000121 grad: 0.0636 (0.0689) loss: 0.8854 (0.8814) time: 0.1725 data: 0.0685 max mem: 9305 +Train: [16] [1900/6250] eta: 0:13:31 lr: 0.000121 grad: 0.0639 (0.0688) loss: 0.8873 (0.8816) time: 0.1079 data: 0.0103 max mem: 9305 +Train: [16] [2000/6250] eta: 0:13:14 lr: 0.000121 grad: 0.0662 (0.0687) loss: 0.8834 (0.8817) time: 0.1749 data: 0.0867 max mem: 9305 +Train: [16] [2100/6250] eta: 0:12:51 lr: 0.000121 grad: 0.0641 (0.0686) loss: 0.8806 (0.8818) time: 0.2009 data: 0.1066 max mem: 9305 +Train: [16] [2200/6250] eta: 0:12:31 lr: 0.000121 grad: 0.0657 (0.0685) loss: 0.8843 (0.8819) time: 0.1800 data: 0.0915 max mem: 9305 +Train: [16] [2300/6250] eta: 0:12:15 lr: 0.000121 grad: 0.0637 (0.0684) loss: 0.8804 (0.8819) time: 0.1092 data: 0.0003 max mem: 9305 +Train: [16] [2400/6250] eta: 0:11:56 lr: 0.000121 grad: 0.0703 (0.0683) loss: 0.8792 (0.8819) time: 0.1134 data: 0.0142 max mem: 9305 +Train: [16] [2500/6250] eta: 0:11:38 lr: 0.000121 grad: 0.0637 (0.0683) loss: 0.8797 (0.8818) time: 0.2294 data: 0.0873 max mem: 9305 +Train: [16] [2600/6250] eta: 0:11:19 lr: 0.000121 grad: 0.0667 (0.0682) loss: 0.8817 (0.8818) time: 0.2356 data: 0.1383 max mem: 9305 +Train: [16] [2700/6250] eta: 0:11:00 lr: 0.000121 grad: 0.0670 (0.0680) loss: 0.8756 (0.8818) time: 0.2003 data: 0.1094 max mem: 9305 +Train: [16] [2800/6250] eta: 0:10:41 lr: 0.000121 grad: 0.0611 (0.0680) loss: 0.8898 (0.8818) time: 0.2082 data: 0.0892 max mem: 9305 +Train: [16] [2900/6250] eta: 0:10:23 lr: 0.000121 grad: 0.0631 (0.0681) loss: 0.8875 (0.8819) time: 0.1536 data: 0.0629 max mem: 9305 +Train: [16] [3000/6250] eta: 0:10:04 lr: 0.000121 grad: 0.0632 (0.0680) loss: 0.8869 (0.8819) time: 0.1891 data: 0.0846 max mem: 9305 +Train: [16] [3100/6250] eta: 0:09:49 lr: 0.000121 grad: 0.0623 (0.0679) loss: 0.8813 (0.8819) time: 0.2991 data: 0.1987 max mem: 9305 +Train: [16] [3200/6250] eta: 0:09:30 lr: 0.000121 grad: 0.0680 (0.0678) loss: 0.8838 (0.8820) time: 0.1986 data: 0.1094 max mem: 9305 +Train: [16] [3300/6250] eta: 0:09:14 lr: 0.000121 grad: 0.0620 (0.0678) loss: 0.8872 (0.8821) time: 0.1125 data: 0.0002 max mem: 9305 +Train: [16] [3400/6250] eta: 0:08:55 lr: 0.000121 grad: 0.0599 (0.0677) loss: 0.8880 (0.8822) time: 0.1550 data: 0.0549 max mem: 9305 +Train: [16] [3500/6250] eta: 0:08:35 lr: 0.000120 grad: 0.0665 (0.0677) loss: 0.8867 (0.8822) time: 0.2060 data: 0.0443 max mem: 9305 +Train: [16] [3600/6250] eta: 0:08:18 lr: 0.000120 grad: 0.0645 (0.0678) loss: 0.8846 (0.8822) time: 0.1330 data: 0.0345 max mem: 9305 +Train: [16] [3700/6250] eta: 0:07:58 lr: 0.000120 grad: 0.0664 (0.0678) loss: 0.8778 (0.8821) time: 0.1726 data: 0.0842 max mem: 9305 +Train: [16] [3800/6250] eta: 0:07:38 lr: 0.000120 grad: 0.0642 (0.0678) loss: 0.8787 (0.8821) time: 0.1689 data: 0.0772 max mem: 9305 +Train: [16] [3900/6250] eta: 0:07:19 lr: 0.000120 grad: 0.0636 (0.0678) loss: 0.8823 (0.8820) time: 0.1909 data: 0.1070 max mem: 9305 +Train: [16] [4000/6250] eta: 0:07:01 lr: 0.000120 grad: 0.0674 (0.0678) loss: 0.8816 (0.8820) time: 0.2355 data: 0.1563 max mem: 9305 +Train: [16] [4100/6250] eta: 0:06:42 lr: 0.000120 grad: 0.0683 (0.0678) loss: 0.8813 (0.8820) time: 0.1760 data: 0.0821 max mem: 9305 +Train: [16] [4200/6250] eta: 0:06:23 lr: 0.000120 grad: 0.0664 (0.0677) loss: 0.8828 (0.8820) time: 0.1532 data: 0.0747 max mem: 9305 +Train: [16] [4300/6250] eta: 0:06:04 lr: 0.000120 grad: 0.0638 (0.0677) loss: 0.8812 (0.8820) time: 0.1917 data: 0.0958 max mem: 9305 +Train: [16] [4400/6250] eta: 0:05:45 lr: 0.000120 grad: 0.0685 (0.0677) loss: 0.8798 (0.8819) time: 0.1700 data: 0.0890 max mem: 9305 +Train: [16] [4500/6250] eta: 0:05:26 lr: 0.000120 grad: 0.0678 (0.0677) loss: 0.8821 (0.8819) time: 0.1646 data: 0.0683 max mem: 9305 +Train: [16] [4600/6250] eta: 0:05:06 lr: 0.000120 grad: 0.0650 (0.0677) loss: 0.8807 (0.8819) time: 0.1555 data: 0.0631 max mem: 9305 +Train: [16] [4700/6250] eta: 0:04:47 lr: 0.000120 grad: 0.0644 (0.0677) loss: 0.8771 (0.8819) time: 0.1312 data: 0.0317 max mem: 9305 +Train: [16] [4800/6250] eta: 0:04:27 lr: 0.000120 grad: 0.0632 (0.0677) loss: 0.8809 (0.8819) time: 0.1238 data: 0.0211 max mem: 9305 +Train: [16] [4900/6250] eta: 0:04:08 lr: 0.000120 grad: 0.0637 (0.0676) loss: 0.8857 (0.8819) time: 0.1682 data: 0.0868 max mem: 9305 +Train: [16] [5000/6250] eta: 0:03:50 lr: 0.000120 grad: 0.0632 (0.0675) loss: 0.8844 (0.8819) time: 0.1935 data: 0.1088 max mem: 9305 +Train: [16] [5100/6250] eta: 0:03:31 lr: 0.000120 grad: 0.0661 (0.0675) loss: 0.8807 (0.8819) time: 0.1329 data: 0.0470 max mem: 9305 +Train: [16] [5200/6250] eta: 0:03:13 lr: 0.000120 grad: 0.0596 (0.0674) loss: 0.8831 (0.8819) time: 0.1407 data: 0.0404 max mem: 9305 +Train: [16] [5300/6250] eta: 0:02:54 lr: 0.000120 grad: 0.0638 (0.0674) loss: 0.8839 (0.8819) time: 0.1730 data: 0.0885 max mem: 9305 +Train: [16] [5400/6250] eta: 0:02:36 lr: 0.000120 grad: 0.0620 (0.0673) loss: 0.8811 (0.8819) time: 0.1968 data: 0.1071 max mem: 9305 +Train: [16] [5500/6250] eta: 0:02:18 lr: 0.000120 grad: 0.0651 (0.0673) loss: 0.8827 (0.8819) time: 0.1556 data: 0.0579 max mem: 9305 +Train: [16] [5600/6250] eta: 0:02:00 lr: 0.000120 grad: 0.0652 (0.0673) loss: 0.8854 (0.8820) time: 0.4291 data: 0.2929 max mem: 9305 +Train: [16] [5700/6250] eta: 0:01:41 lr: 0.000120 grad: 0.0635 (0.0672) loss: 0.8799 (0.8820) time: 0.2626 data: 0.1611 max mem: 9305 +Train: [16] [5800/6250] eta: 0:01:23 lr: 0.000120 grad: 0.0664 (0.0672) loss: 0.8806 (0.8821) time: 0.1372 data: 0.0475 max mem: 9305 +Train: [16] [5900/6250] eta: 0:01:04 lr: 0.000120 grad: 0.0717 (0.0672) loss: 0.8859 (0.8821) time: 0.2746 data: 0.1781 max mem: 9305 +Train: [16] [6000/6250] eta: 0:00:46 lr: 0.000120 grad: 0.0643 (0.0672) loss: 0.8816 (0.8822) time: 0.1375 data: 0.0385 max mem: 9305 +Train: [16] [6100/6250] eta: 0:00:27 lr: 0.000120 grad: 0.0661 (0.0672) loss: 0.8803 (0.8822) time: 0.1561 data: 0.0688 max mem: 9305 +Train: [16] [6200/6250] eta: 0:00:09 lr: 0.000120 grad: 0.0678 (0.0673) loss: 0.8783 (0.8822) time: 0.1517 data: 0.0667 max mem: 9305 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0675 (0.0673) loss: 0.8782 (0.8822) time: 0.1352 data: 0.0505 max mem: 9305 +Train: [16] Total time: 0:19:17 (0.1853 s / it) +Averaged stats: lr: 0.000120 grad: 0.0675 (0.0673) loss: 0.8782 (0.8822) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:11 loss: 0.9029 (0.9029) time: 4.0568 data: 3.9908 max mem: 9305 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8899 (0.8907) time: 0.1377 data: 0.1088 max mem: 9305 +Eval (hcp-train-subset): [16] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8899 (0.8907) +Eval (hcp-val): [16] [ 0/62] eta: 0:05:23 loss: 0.8849 (0.8849) time: 5.2222 data: 5.1871 max mem: 9305 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8845 (0.8862) time: 0.1327 data: 0.1040 max mem: 9305 +Eval (hcp-val): [16] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-val): loss: 0.8845 (0.8862) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 10:04:27 lr: 0.000120 grad: 0.0910 (0.0910) loss: 0.9171 (0.9171) time: 5.8027 data: 5.6144 max mem: 9305 +Train: [17] [ 100/6250] eta: 0:24:36 lr: 0.000120 grad: 0.0682 (0.0666) loss: 0.8842 (0.8909) time: 0.2160 data: 0.1255 max mem: 9305 +Train: [17] [ 200/6250] eta: 0:20:04 lr: 0.000120 grad: 0.0651 (0.0671) loss: 0.8775 (0.8866) time: 0.1541 data: 0.0595 max mem: 9305 +Train: [17] [ 300/6250] eta: 0:18:41 lr: 0.000120 grad: 0.0610 (0.0674) loss: 0.8776 (0.8830) time: 0.1649 data: 0.0760 max mem: 9305 +Train: [17] [ 400/6250] eta: 0:17:43 lr: 0.000120 grad: 0.0666 (0.0677) loss: 0.8755 (0.8808) time: 0.1489 data: 0.0510 max mem: 9305 +Train: [17] [ 500/6250] eta: 0:17:02 lr: 0.000120 grad: 0.0649 (0.0682) loss: 0.8847 (0.8806) time: 0.1839 data: 0.0705 max mem: 9305 +Train: [17] [ 600/6250] eta: 0:17:21 lr: 0.000120 grad: 0.0631 (0.0679) loss: 0.8835 (0.8809) time: 0.3815 data: 0.2207 max mem: 9305 +Train: [17] [ 700/6250] eta: 0:16:47 lr: 0.000120 grad: 0.0646 (0.0675) loss: 0.8846 (0.8815) time: 0.1962 data: 0.1038 max mem: 9305 +Train: [17] [ 800/6250] eta: 0:16:52 lr: 0.000120 grad: 0.0662 (0.0671) loss: 0.8900 (0.8820) time: 0.3087 data: 0.2139 max mem: 9305 +Train: [17] [ 900/6250] eta: 0:16:28 lr: 0.000120 grad: 0.0577 (0.0666) loss: 0.8865 (0.8825) time: 0.1844 data: 0.0895 max mem: 9305 +Train: [17] [1000/6250] eta: 0:16:23 lr: 0.000120 grad: 0.0631 (0.0663) loss: 0.8888 (0.8829) time: 0.1280 data: 0.0418 max mem: 9305 +Train: [17] [1100/6250] eta: 0:16:06 lr: 0.000120 grad: 0.0634 (0.0660) loss: 0.8840 (0.8831) time: 0.2195 data: 0.1242 max mem: 9305 +Train: [17] [1200/6250] eta: 0:15:38 lr: 0.000120 grad: 0.0661 (0.0657) loss: 0.8786 (0.8833) time: 0.1928 data: 0.1064 max mem: 9305 +Train: [17] [1300/6250] eta: 0:15:11 lr: 0.000120 grad: 0.0597 (0.0654) loss: 0.8892 (0.8835) time: 0.1613 data: 0.0636 max mem: 9305 +Train: [17] [1400/6250] eta: 0:14:55 lr: 0.000120 grad: 0.0636 (0.0654) loss: 0.8828 (0.8835) time: 0.2216 data: 0.1216 max mem: 9305 +Train: [17] [1500/6250] eta: 0:14:33 lr: 0.000120 grad: 0.0625 (0.0653) loss: 0.8802 (0.8836) time: 0.1358 data: 0.0428 max mem: 9305 +Train: [17] [1600/6250] eta: 0:14:23 lr: 0.000120 grad: 0.0628 (0.0653) loss: 0.8801 (0.8836) time: 0.3492 data: 0.2096 max mem: 9305 +Train: [17] [1700/6250] eta: 0:13:55 lr: 0.000120 grad: 0.0655 (0.0654) loss: 0.8887 (0.8837) time: 0.1634 data: 0.0445 max mem: 9305 +Train: [17] [1800/6250] eta: 0:13:37 lr: 0.000120 grad: 0.0653 (0.0655) loss: 0.8824 (0.8836) time: 0.1264 data: 0.0385 max mem: 9305 +Train: [17] [1900/6250] eta: 0:13:13 lr: 0.000120 grad: 0.0595 (0.0654) loss: 0.8807 (0.8835) time: 0.1428 data: 0.0517 max mem: 9305 +Train: [17] [2000/6250] eta: 0:12:52 lr: 0.000120 grad: 0.0640 (0.0655) loss: 0.8816 (0.8834) time: 0.1774 data: 0.0631 max mem: 9305 +Train: [17] [2100/6250] eta: 0:12:34 lr: 0.000120 grad: 0.0631 (0.0655) loss: 0.8841 (0.8834) time: 0.2234 data: 0.1356 max mem: 9305 +Train: [17] [2200/6250] eta: 0:12:14 lr: 0.000120 grad: 0.0611 (0.0656) loss: 0.8832 (0.8834) time: 0.1852 data: 0.0908 max mem: 9305 +Train: [17] [2300/6250] eta: 0:12:01 lr: 0.000120 grad: 0.0640 (0.0656) loss: 0.8808 (0.8833) time: 0.1304 data: 0.0083 max mem: 9305 +Train: [17] [2400/6250] eta: 0:11:38 lr: 0.000120 grad: 0.0635 (0.0656) loss: 0.8818 (0.8833) time: 0.1705 data: 0.0789 max mem: 9305 +Train: [17] [2500/6250] eta: 0:11:19 lr: 0.000120 grad: 0.0633 (0.0656) loss: 0.8814 (0.8832) time: 0.1787 data: 0.0906 max mem: 9305 +Train: [17] [2600/6250] eta: 0:11:00 lr: 0.000120 grad: 0.0617 (0.0656) loss: 0.8834 (0.8832) time: 0.1647 data: 0.0705 max mem: 9305 +Train: [17] [2700/6250] eta: 0:10:40 lr: 0.000120 grad: 0.0596 (0.0656) loss: 0.8823 (0.8831) time: 0.1576 data: 0.0695 max mem: 9305 +Train: [17] [2800/6250] eta: 0:10:21 lr: 0.000120 grad: 0.0640 (0.0655) loss: 0.8762 (0.8830) time: 0.1622 data: 0.0646 max mem: 9305 +Train: [17] [2900/6250] eta: 0:10:02 lr: 0.000120 grad: 0.0649 (0.0656) loss: 0.8783 (0.8829) time: 0.1716 data: 0.0744 max mem: 9305 +Train: [17] [3000/6250] eta: 0:09:43 lr: 0.000120 grad: 0.0635 (0.0655) loss: 0.8811 (0.8828) time: 0.2329 data: 0.1422 max mem: 9305 +Train: [17] [3100/6250] eta: 0:09:23 lr: 0.000120 grad: 0.0627 (0.0655) loss: 0.8867 (0.8828) time: 0.1382 data: 0.0409 max mem: 9305 +Train: [17] [3200/6250] eta: 0:09:07 lr: 0.000120 grad: 0.0626 (0.0654) loss: 0.8815 (0.8828) time: 0.2389 data: 0.0923 max mem: 9305 +Train: [17] [3300/6250] eta: 0:08:51 lr: 0.000120 grad: 0.0632 (0.0654) loss: 0.8827 (0.8828) time: 0.2027 data: 0.0468 max mem: 9305 +Train: [17] [3400/6250] eta: 0:08:33 lr: 0.000120 grad: 0.0614 (0.0653) loss: 0.8812 (0.8828) time: 0.1855 data: 0.0916 max mem: 9305 +Train: [17] [3500/6250] eta: 0:08:14 lr: 0.000120 grad: 0.0617 (0.0653) loss: 0.8853 (0.8829) time: 0.1683 data: 0.0723 max mem: 9305 +Train: [17] [3600/6250] eta: 0:07:56 lr: 0.000120 grad: 0.0629 (0.0653) loss: 0.8801 (0.8828) time: 0.1587 data: 0.0687 max mem: 9305 +Train: [17] [3700/6250] eta: 0:07:39 lr: 0.000120 grad: 0.0619 (0.0652) loss: 0.8775 (0.8828) time: 0.2457 data: 0.1459 max mem: 9305 +Train: [17] [3800/6250] eta: 0:07:21 lr: 0.000120 grad: 0.0601 (0.0652) loss: 0.8806 (0.8829) time: 0.2373 data: 0.1313 max mem: 9305 +Train: [17] [3900/6250] eta: 0:07:05 lr: 0.000120 grad: 0.0650 (0.0652) loss: 0.8825 (0.8829) time: 0.0992 data: 0.0002 max mem: 9305 +Train: [17] [4000/6250] eta: 0:06:46 lr: 0.000120 grad: 0.0628 (0.0653) loss: 0.8812 (0.8828) time: 0.1791 data: 0.0919 max mem: 9305 +Train: [17] [4100/6250] eta: 0:06:26 lr: 0.000120 grad: 0.0636 (0.0653) loss: 0.8824 (0.8827) time: 0.1461 data: 0.0624 max mem: 9305 +Train: [17] [4200/6250] eta: 0:06:08 lr: 0.000120 grad: 0.0643 (0.0654) loss: 0.8806 (0.8827) time: 0.1760 data: 0.0823 max mem: 9305 +Train: [17] [4300/6250] eta: 0:05:50 lr: 0.000120 grad: 0.0669 (0.0654) loss: 0.8802 (0.8827) time: 0.1975 data: 0.1101 max mem: 9305 +Train: [17] [4400/6250] eta: 0:05:31 lr: 0.000120 grad: 0.0637 (0.0655) loss: 0.8759 (0.8826) time: 0.1754 data: 0.0788 max mem: 9305 +Train: [17] [4500/6250] eta: 0:05:12 lr: 0.000120 grad: 0.0714 (0.0656) loss: 0.8777 (0.8825) time: 0.1460 data: 0.0646 max mem: 9305 +Train: [17] [4600/6250] eta: 0:04:54 lr: 0.000120 grad: 0.0678 (0.0656) loss: 0.8819 (0.8825) time: 0.2002 data: 0.1041 max mem: 9305 +Train: [17] [4700/6250] eta: 0:04:36 lr: 0.000120 grad: 0.0633 (0.0656) loss: 0.8844 (0.8824) time: 0.1489 data: 0.0618 max mem: 9305 +Train: [17] [4800/6250] eta: 0:04:18 lr: 0.000120 grad: 0.0654 (0.0656) loss: 0.8774 (0.8824) time: 0.1492 data: 0.0591 max mem: 9305 +Train: [17] [4900/6250] eta: 0:04:00 lr: 0.000119 grad: 0.0604 (0.0656) loss: 0.8798 (0.8824) time: 0.1336 data: 0.0441 max mem: 9305 +Train: [17] [5000/6250] eta: 0:03:41 lr: 0.000119 grad: 0.0611 (0.0657) loss: 0.8888 (0.8824) time: 0.1682 data: 0.0729 max mem: 9305 +Train: [17] [5100/6250] eta: 0:03:23 lr: 0.000119 grad: 0.0666 (0.0658) loss: 0.8796 (0.8824) time: 0.1724 data: 0.0802 max mem: 9305 +Train: [17] [5200/6250] eta: 0:03:05 lr: 0.000119 grad: 0.0678 (0.0658) loss: 0.8825 (0.8824) time: 0.1362 data: 0.0506 max mem: 9305 +Train: [17] [5300/6250] eta: 0:02:47 lr: 0.000119 grad: 0.0641 (0.0658) loss: 0.8786 (0.8824) time: 0.1584 data: 0.0696 max mem: 9305 +Train: [17] [5400/6250] eta: 0:02:30 lr: 0.000119 grad: 0.0639 (0.0658) loss: 0.8779 (0.8824) time: 0.2226 data: 0.1229 max mem: 9305 +Train: [17] [5500/6250] eta: 0:02:12 lr: 0.000119 grad: 0.0606 (0.0658) loss: 0.8804 (0.8824) time: 0.2829 data: 0.1907 max mem: 9305 +Train: [17] [5600/6250] eta: 0:01:55 lr: 0.000119 grad: 0.0604 (0.0658) loss: 0.8841 (0.8824) time: 0.2184 data: 0.1163 max mem: 9305 +Train: [17] [5700/6250] eta: 0:01:37 lr: 0.000119 grad: 0.0645 (0.0657) loss: 0.8872 (0.8824) time: 0.1496 data: 0.0476 max mem: 9305 +Train: [17] [5800/6250] eta: 0:01:19 lr: 0.000119 grad: 0.0689 (0.0658) loss: 0.8851 (0.8824) time: 0.2436 data: 0.1519 max mem: 9305 +Train: [17] [5900/6250] eta: 0:01:01 lr: 0.000119 grad: 0.0616 (0.0658) loss: 0.8837 (0.8825) time: 0.2525 data: 0.1509 max mem: 9305 +Train: [17] [6000/6250] eta: 0:00:44 lr: 0.000119 grad: 0.0617 (0.0658) loss: 0.8875 (0.8825) time: 0.2137 data: 0.1205 max mem: 9305 +Train: [17] [6100/6250] eta: 0:00:26 lr: 0.000119 grad: 0.0612 (0.0657) loss: 0.8841 (0.8825) time: 0.1569 data: 0.0619 max mem: 9305 +Train: [17] [6200/6250] eta: 0:00:08 lr: 0.000119 grad: 0.0624 (0.0657) loss: 0.8846 (0.8826) time: 0.1277 data: 0.0413 max mem: 9305 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0623 (0.0657) loss: 0.8832 (0.8826) time: 0.1685 data: 0.0769 max mem: 9305 +Train: [17] Total time: 0:18:27 (0.1773 s / it) +Averaged stats: lr: 0.000119 grad: 0.0623 (0.0657) loss: 0.8832 (0.8826) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:04:12 loss: 0.8990 (0.8990) time: 4.0737 data: 3.9768 max mem: 9305 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8913 (0.8911) time: 0.1221 data: 0.0932 max mem: 9305 +Eval (hcp-train-subset): [17] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-train-subset): loss: 0.8913 (0.8911) +Eval (hcp-val): [17] [ 0/62] eta: 0:03:40 loss: 0.8837 (0.8837) time: 3.5579 data: 3.4699 max mem: 9305 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8851 (0.8866) time: 0.1546 data: 0.1260 max mem: 9305 +Eval (hcp-val): [17] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8851 (0.8866) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [18] [ 0/6250] eta: 9:25:33 lr: 0.000119 grad: 0.0683 (0.0683) loss: 0.9106 (0.9106) time: 5.4294 data: 5.2251 max mem: 9305 +Train: [18] [ 100/6250] eta: 0:22:38 lr: 0.000119 grad: 0.0674 (0.0679) loss: 0.8850 (0.8903) time: 0.1771 data: 0.0758 max mem: 9305 +Train: [18] [ 200/6250] eta: 0:19:55 lr: 0.000119 grad: 0.0633 (0.0669) loss: 0.8870 (0.8875) time: 0.1396 data: 0.0317 max mem: 9305 +Train: [18] [ 300/6250] eta: 0:18:20 lr: 0.000119 grad: 0.0620 (0.0662) loss: 0.8811 (0.8870) time: 0.1691 data: 0.0585 max mem: 9305 +Train: [18] [ 400/6250] eta: 0:17:53 lr: 0.000119 grad: 0.0617 (0.0661) loss: 0.8839 (0.8869) time: 0.1776 data: 0.0873 max mem: 9305 +Train: [18] [ 500/6250] eta: 0:17:43 lr: 0.000119 grad: 0.0610 (0.0657) loss: 0.8844 (0.8869) time: 0.1927 data: 0.1040 max mem: 9305 +Train: [18] [ 600/6250] eta: 0:17:32 lr: 0.000119 grad: 0.0628 (0.0656) loss: 0.8853 (0.8868) time: 0.1967 data: 0.0964 max mem: 9305 +Train: [18] [ 700/6250] eta: 0:17:23 lr: 0.000119 grad: 0.0686 (0.0658) loss: 0.8865 (0.8865) time: 0.1861 data: 0.0777 max mem: 9305 +Train: [18] [ 800/6250] eta: 0:17:36 lr: 0.000119 grad: 0.0636 (0.0657) loss: 0.8815 (0.8860) time: 0.1328 data: 0.0004 max mem: 9305 +Train: [18] [ 900/6250] eta: 0:17:06 lr: 0.000119 grad: 0.0617 (0.0656) loss: 0.8849 (0.8858) time: 0.1809 data: 0.0891 max mem: 9305 +Train: [18] [1000/6250] eta: 0:16:48 lr: 0.000119 grad: 0.0628 (0.0653) loss: 0.8852 (0.8857) time: 0.1284 data: 0.0168 max mem: 9305 +Train: [18] [1100/6250] eta: 0:16:44 lr: 0.000119 grad: 0.0622 (0.0652) loss: 0.8841 (0.8856) time: 0.1504 data: 0.0496 max mem: 9305 +Train: [18] [1200/6250] eta: 0:16:36 lr: 0.000119 grad: 0.0634 (0.0653) loss: 0.8807 (0.8855) time: 0.1568 data: 0.0295 max mem: 9305 +Train: [18] [1300/6250] eta: 0:16:06 lr: 0.000119 grad: 0.0614 (0.0653) loss: 0.8857 (0.8853) time: 0.1229 data: 0.0230 max mem: 9305 +Train: [18] [1400/6250] eta: 0:15:39 lr: 0.000119 grad: 0.0672 (0.0653) loss: 0.8821 (0.8851) time: 0.1580 data: 0.0780 max mem: 9305 +Train: [18] [1500/6250] eta: 0:15:31 lr: 0.000119 grad: 0.0651 (0.0654) loss: 0.8810 (0.8848) time: 0.2174 data: 0.1260 max mem: 9305 +Train: [18] [1600/6250] eta: 0:14:58 lr: 0.000119 grad: 0.0644 (0.0655) loss: 0.8821 (0.8846) time: 0.1686 data: 0.0712 max mem: 9305 +Train: [18] [1700/6250] eta: 0:14:35 lr: 0.000119 grad: 0.0657 (0.0655) loss: 0.8806 (0.8843) time: 0.1544 data: 0.0628 max mem: 9305 +Train: [18] [1800/6250] eta: 0:14:12 lr: 0.000119 grad: 0.0659 (0.0654) loss: 0.8779 (0.8841) time: 0.1760 data: 0.0877 max mem: 9305 +Train: [18] [1900/6250] eta: 0:13:55 lr: 0.000119 grad: 0.0650 (0.0655) loss: 0.8777 (0.8838) time: 0.1026 data: 0.0065 max mem: 9305 +Train: [18] [2000/6250] eta: 0:13:31 lr: 0.000119 grad: 0.0637 (0.0655) loss: 0.8797 (0.8837) time: 0.1900 data: 0.0969 max mem: 9305 +Train: [18] [2100/6250] eta: 0:13:15 lr: 0.000119 grad: 0.0609 (0.0654) loss: 0.8875 (0.8836) time: 0.1170 data: 0.0002 max mem: 9305 +Train: [18] [2200/6250] eta: 0:12:56 lr: 0.000119 grad: 0.0599 (0.0654) loss: 0.8816 (0.8836) time: 0.2344 data: 0.1056 max mem: 9305 +Train: [18] [2300/6250] eta: 0:12:40 lr: 0.000119 grad: 0.0608 (0.0653) loss: 0.8826 (0.8835) time: 0.1225 data: 0.0003 max mem: 9305 +Train: [18] [2400/6250] eta: 0:12:17 lr: 0.000119 grad: 0.0636 (0.0653) loss: 0.8767 (0.8834) time: 0.1724 data: 0.0792 max mem: 9305 +Train: [18] [2500/6250] eta: 0:11:55 lr: 0.000119 grad: 0.0620 (0.0654) loss: 0.8805 (0.8833) time: 0.1722 data: 0.0624 max mem: 9305 +Train: [18] [2600/6250] eta: 0:11:34 lr: 0.000119 grad: 0.0634 (0.0653) loss: 0.8765 (0.8833) time: 0.1506 data: 0.0513 max mem: 9305 +Train: [18] [2700/6250] eta: 0:11:16 lr: 0.000119 grad: 0.0641 (0.0654) loss: 0.8777 (0.8832) time: 0.1511 data: 0.0605 max mem: 9305 +Train: [18] [2800/6250] eta: 0:10:56 lr: 0.000119 grad: 0.0616 (0.0654) loss: 0.8830 (0.8832) time: 0.1450 data: 0.0452 max mem: 9305 +Train: [18] [2900/6250] eta: 0:10:35 lr: 0.000119 grad: 0.0644 (0.0654) loss: 0.8803 (0.8831) time: 0.2040 data: 0.1108 max mem: 9305 +Train: [18] [3000/6250] eta: 0:10:15 lr: 0.000119 grad: 0.0640 (0.0655) loss: 0.8799 (0.8831) time: 0.1887 data: 0.0958 max mem: 9305 +Train: [18] [3100/6250] eta: 0:09:55 lr: 0.000119 grad: 0.0647 (0.0656) loss: 0.8783 (0.8830) time: 0.1808 data: 0.0811 max mem: 9305 +Train: [18] [3200/6250] eta: 0:09:40 lr: 0.000119 grad: 0.0753 (0.0659) loss: 0.8806 (0.8829) time: 0.1691 data: 0.0726 max mem: 9305 +Train: [18] [3300/6250] eta: 0:09:18 lr: 0.000119 grad: 0.0631 (0.0660) loss: 0.8837 (0.8829) time: 0.1654 data: 0.0776 max mem: 9305 +Train: [18] [3400/6250] eta: 0:08:59 lr: 0.000119 grad: 0.0686 (0.0661) loss: 0.8784 (0.8827) time: 0.1759 data: 0.0749 max mem: 9305 +Train: [18] [3500/6250] eta: 0:08:39 lr: 0.000119 grad: 0.0634 (0.0661) loss: 0.8817 (0.8826) time: 0.1880 data: 0.1023 max mem: 9305 +Train: [18] [3600/6250] eta: 0:08:20 lr: 0.000119 grad: 0.0638 (0.0661) loss: 0.8810 (0.8826) time: 0.2083 data: 0.1200 max mem: 9305 +Train: [18] [3700/6250] eta: 0:08:01 lr: 0.000119 grad: 0.0625 (0.0662) loss: 0.8844 (0.8825) time: 0.1849 data: 0.1004 max mem: 9305 +Train: [18] [3800/6250] eta: 0:07:42 lr: 0.000119 grad: 0.0674 (0.0662) loss: 0.8834 (0.8825) time: 0.2271 data: 0.1453 max mem: 9305 +Train: [18] [3900/6250] eta: 0:07:22 lr: 0.000119 grad: 0.0670 (0.0662) loss: 0.8771 (0.8825) time: 0.1680 data: 0.0791 max mem: 9305 +Train: [18] [4000/6250] eta: 0:07:02 lr: 0.000119 grad: 0.0669 (0.0662) loss: 0.8827 (0.8824) time: 0.1636 data: 0.0799 max mem: 9305 +Train: [18] [4100/6250] eta: 0:06:42 lr: 0.000119 grad: 0.0617 (0.0663) loss: 0.8781 (0.8824) time: 0.1448 data: 0.0516 max mem: 9305 +Train: [18] [4200/6250] eta: 0:06:23 lr: 0.000119 grad: 0.0673 (0.0663) loss: 0.8787 (0.8823) time: 0.1677 data: 0.0821 max mem: 9305 +Train: [18] [4300/6250] eta: 0:06:03 lr: 0.000119 grad: 0.0646 (0.0663) loss: 0.8812 (0.8822) time: 0.1712 data: 0.0907 max mem: 9305 +Train: [18] [4400/6250] eta: 0:05:44 lr: 0.000119 grad: 0.0658 (0.0664) loss: 0.8869 (0.8823) time: 0.1935 data: 0.1105 max mem: 9305 +Train: [18] [4500/6250] eta: 0:05:25 lr: 0.000119 grad: 0.0597 (0.0664) loss: 0.8839 (0.8823) time: 0.1652 data: 0.0686 max mem: 9305 +Train: [18] [4600/6250] eta: 0:05:05 lr: 0.000119 grad: 0.0628 (0.0664) loss: 0.8837 (0.8823) time: 0.1574 data: 0.0649 max mem: 9305 +Train: [18] [4700/6250] eta: 0:04:46 lr: 0.000119 grad: 0.0634 (0.0664) loss: 0.8818 (0.8823) time: 0.1162 data: 0.0164 max mem: 9305 +Train: [18] [4800/6250] eta: 0:04:26 lr: 0.000119 grad: 0.0638 (0.0664) loss: 0.8818 (0.8823) time: 0.1544 data: 0.0531 max mem: 9305 +Train: [18] [4900/6250] eta: 0:04:07 lr: 0.000119 grad: 0.0622 (0.0664) loss: 0.8837 (0.8823) time: 0.1609 data: 0.0667 max mem: 9305 +Train: [18] [5000/6250] eta: 0:03:48 lr: 0.000119 grad: 0.0677 (0.0664) loss: 0.8788 (0.8822) time: 0.2042 data: 0.1112 max mem: 9305 +Train: [18] [5100/6250] eta: 0:03:30 lr: 0.000119 grad: 0.0632 (0.0664) loss: 0.8797 (0.8822) time: 0.1624 data: 0.0702 max mem: 9305 +Train: [18] [5200/6250] eta: 0:03:12 lr: 0.000119 grad: 0.0645 (0.0663) loss: 0.8792 (0.8822) time: 0.1203 data: 0.0003 max mem: 9305 +Train: [18] [5300/6250] eta: 0:02:54 lr: 0.000119 grad: 0.0617 (0.0663) loss: 0.8819 (0.8822) time: 0.1307 data: 0.0363 max mem: 9305 +Train: [18] [5400/6250] eta: 0:02:35 lr: 0.000119 grad: 0.0627 (0.0663) loss: 0.8808 (0.8821) time: 0.1661 data: 0.0723 max mem: 9305 +Train: [18] [5500/6250] eta: 0:02:16 lr: 0.000119 grad: 0.0628 (0.0663) loss: 0.8768 (0.8821) time: 0.1690 data: 0.0773 max mem: 9305 +Train: [18] [5600/6250] eta: 0:01:58 lr: 0.000119 grad: 0.0610 (0.0662) loss: 0.8811 (0.8820) time: 0.0984 data: 0.0002 max mem: 9305 +Train: [18] [5700/6250] eta: 0:01:40 lr: 0.000119 grad: 0.0590 (0.0662) loss: 0.8848 (0.8820) time: 0.1905 data: 0.0965 max mem: 9305 +Train: [18] [5800/6250] eta: 0:01:21 lr: 0.000118 grad: 0.0623 (0.0662) loss: 0.8788 (0.8820) time: 0.1549 data: 0.0647 max mem: 9305 +Train: [18] [5900/6250] eta: 0:01:03 lr: 0.000118 grad: 0.0658 (0.0662) loss: 0.8792 (0.8820) time: 0.1487 data: 0.0502 max mem: 9305 +Train: [18] [6000/6250] eta: 0:00:45 lr: 0.000118 grad: 0.0596 (0.0662) loss: 0.8849 (0.8820) time: 0.1437 data: 0.0594 max mem: 9305 +Train: [18] [6100/6250] eta: 0:00:27 lr: 0.000118 grad: 0.0600 (0.0662) loss: 0.8834 (0.8820) time: 0.1578 data: 0.0636 max mem: 9305 +Train: [18] [6200/6250] eta: 0:00:09 lr: 0.000118 grad: 0.0655 (0.0662) loss: 0.8838 (0.8820) time: 0.1733 data: 0.0849 max mem: 9305 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0647 (0.0662) loss: 0.8833 (0.8820) time: 0.1637 data: 0.0792 max mem: 9305 +Train: [18] Total time: 0:18:54 (0.1816 s / it) +Averaged stats: lr: 0.000118 grad: 0.0647 (0.0662) loss: 0.8833 (0.8820) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:05:46 loss: 0.9014 (0.9014) time: 5.5926 data: 5.5249 max mem: 9305 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8895 (0.8902) time: 0.1581 data: 0.1287 max mem: 9305 +Eval (hcp-train-subset): [18] Total time: 0:00:15 (0.2455 s / it) +Averaged stats (hcp-train-subset): loss: 0.8895 (0.8902) +Eval (hcp-val): [18] [ 0/62] eta: 0:03:59 loss: 0.8814 (0.8814) time: 3.8599 data: 3.7610 max mem: 9305 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8847 (0.8846) time: 0.1413 data: 0.1125 max mem: 9305 +Eval (hcp-val): [18] Total time: 0:00:14 (0.2289 s / it) +Averaged stats (hcp-val): loss: 0.8847 (0.8846) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [19] [ 0/6250] eta: 10:08:09 lr: 0.000118 grad: 0.2321 (0.2321) loss: 0.8819 (0.8819) time: 5.8383 data: 5.7042 max mem: 9305 +Train: [19] [ 100/6250] eta: 0:22:10 lr: 0.000118 grad: 0.0676 (0.0730) loss: 0.8798 (0.8920) time: 0.1508 data: 0.0470 max mem: 9305 +Train: [19] [ 200/6250] eta: 0:19:41 lr: 0.000118 grad: 0.0628 (0.0705) loss: 0.8793 (0.8863) time: 0.1521 data: 0.0548 max mem: 9305 +Train: [19] [ 300/6250] eta: 0:18:18 lr: 0.000118 grad: 0.0690 (0.0690) loss: 0.8758 (0.8846) time: 0.1870 data: 0.0947 max mem: 9305 +Train: [19] [ 400/6250] eta: 0:17:31 lr: 0.000118 grad: 0.0647 (0.0682) loss: 0.8836 (0.8841) time: 0.1502 data: 0.0438 max mem: 9305 +Train: [19] [ 500/6250] eta: 0:17:37 lr: 0.000118 grad: 0.0636 (0.0670) loss: 0.8862 (0.8841) time: 0.2519 data: 0.1398 max mem: 9305 +Train: [19] [ 600/6250] eta: 0:17:07 lr: 0.000118 grad: 0.0631 (0.0668) loss: 0.8833 (0.8837) time: 0.1561 data: 0.0562 max mem: 9305 +Train: [19] [ 700/6250] eta: 0:17:18 lr: 0.000118 grad: 0.0613 (0.0664) loss: 0.8865 (0.8836) time: 0.1197 data: 0.0003 max mem: 9305 +Train: [19] [ 800/6250] eta: 0:17:15 lr: 0.000118 grad: 0.0654 (0.0663) loss: 0.8800 (0.8834) time: 0.2403 data: 0.1258 max mem: 9305 +Train: [19] [ 900/6250] eta: 0:16:58 lr: 0.000118 grad: 0.0721 (0.0666) loss: 0.8790 (0.8830) time: 0.1790 data: 0.0620 max mem: 9305 +Train: [19] [1000/6250] eta: 0:16:31 lr: 0.000118 grad: 0.0609 (0.0667) loss: 0.8793 (0.8826) time: 0.1452 data: 0.0475 max mem: 9305 +Train: [19] [1100/6250] eta: 0:16:00 lr: 0.000118 grad: 0.0677 (0.0667) loss: 0.8749 (0.8823) time: 0.2018 data: 0.1124 max mem: 9305 +Train: [19] [1200/6250] eta: 0:15:30 lr: 0.000118 grad: 0.0692 (0.0667) loss: 0.8771 (0.8820) time: 0.1389 data: 0.0536 max mem: 9305 +Train: [19] [1300/6250] eta: 0:15:06 lr: 0.000118 grad: 0.0658 (0.0669) loss: 0.8768 (0.8817) time: 0.1526 data: 0.0661 max mem: 9305 +Train: [19] [1400/6250] eta: 0:14:47 lr: 0.000118 grad: 0.0620 (0.0672) loss: 0.8771 (0.8813) time: 0.1953 data: 0.1087 max mem: 9305 +Train: [19] [1500/6250] eta: 0:14:33 lr: 0.000118 grad: 0.0667 (0.0674) loss: 0.8789 (0.8811) time: 0.2160 data: 0.1203 max mem: 9305 +Train: [19] [1600/6250] eta: 0:14:14 lr: 0.000118 grad: 0.0633 (0.0674) loss: 0.8792 (0.8809) time: 0.2005 data: 0.1072 max mem: 9305 +Train: [19] [1700/6250] eta: 0:14:09 lr: 0.000118 grad: 0.0619 (0.0674) loss: 0.8812 (0.8808) time: 0.2877 data: 0.1835 max mem: 9305 +Train: [19] [1800/6250] eta: 0:13:54 lr: 0.000118 grad: 0.0645 (0.0674) loss: 0.8816 (0.8807) time: 0.2944 data: 0.2051 max mem: 9305 +Train: [19] [1900/6250] eta: 0:13:33 lr: 0.000118 grad: 0.0653 (0.0674) loss: 0.8855 (0.8807) time: 0.1490 data: 0.0037 max mem: 9305 +Train: [19] [2000/6250] eta: 0:13:20 lr: 0.000118 grad: 0.0692 (0.0675) loss: 0.8825 (0.8807) time: 0.1555 data: 0.0583 max mem: 9305 +Train: [19] [2100/6250] eta: 0:13:11 lr: 0.000118 grad: 0.0647 (0.0675) loss: 0.8850 (0.8807) time: 0.4408 data: 0.2757 max mem: 9305 +Train: [19] [2200/6250] eta: 0:12:44 lr: 0.000118 grad: 0.0680 (0.0674) loss: 0.8817 (0.8808) time: 0.1208 data: 0.0086 max mem: 9305 +Train: [19] [2300/6250] eta: 0:12:25 lr: 0.000118 grad: 0.0703 (0.0675) loss: 0.8802 (0.8808) time: 0.2069 data: 0.1169 max mem: 9305 +Train: [19] [2400/6250] eta: 0:12:03 lr: 0.000118 grad: 0.0646 (0.0675) loss: 0.8849 (0.8808) time: 0.1586 data: 0.0691 max mem: 9305 +Train: [19] [2500/6250] eta: 0:11:41 lr: 0.000118 grad: 0.0658 (0.0676) loss: 0.8798 (0.8807) time: 0.1987 data: 0.0858 max mem: 9305 +Train: [19] [2600/6250] eta: 0:11:21 lr: 0.000118 grad: 0.0671 (0.0676) loss: 0.8853 (0.8808) time: 0.1636 data: 0.0771 max mem: 9305 +Train: [19] [2700/6250] eta: 0:11:02 lr: 0.000118 grad: 0.0654 (0.0675) loss: 0.8820 (0.8808) time: 0.1649 data: 0.0806 max mem: 9305 +Train: [19] [2800/6250] eta: 0:10:43 lr: 0.000118 grad: 0.0665 (0.0675) loss: 0.8825 (0.8809) time: 0.1476 data: 0.0600 max mem: 9305 +Train: [19] [2900/6250] eta: 0:10:24 lr: 0.000118 grad: 0.0636 (0.0675) loss: 0.8845 (0.8810) time: 0.1450 data: 0.0612 max mem: 9305 +Train: [19] [3000/6250] eta: 0:10:07 lr: 0.000118 grad: 0.0631 (0.0674) loss: 0.8844 (0.8811) time: 0.2046 data: 0.1027 max mem: 9305 +Train: [19] [3100/6250] eta: 0:09:50 lr: 0.000118 grad: 0.0621 (0.0674) loss: 0.8829 (0.8811) time: 0.2780 data: 0.1892 max mem: 9305 +Train: [19] [3200/6250] eta: 0:09:29 lr: 0.000118 grad: 0.0668 (0.0674) loss: 0.8789 (0.8812) time: 0.1760 data: 0.0885 max mem: 9305 +Train: [19] [3300/6250] eta: 0:09:14 lr: 0.000118 grad: 0.0667 (0.0674) loss: 0.8864 (0.8812) time: 0.1264 data: 0.0220 max mem: 9305 +Train: [19] [3400/6250] eta: 0:08:53 lr: 0.000118 grad: 0.0618 (0.0673) loss: 0.8849 (0.8813) time: 0.1380 data: 0.0466 max mem: 9305 +Train: [19] [3500/6250] eta: 0:08:32 lr: 0.000118 grad: 0.0663 (0.0674) loss: 0.8893 (0.8813) time: 0.1270 data: 0.0365 max mem: 9305 +Train: [19] [3600/6250] eta: 0:08:12 lr: 0.000118 grad: 0.0635 (0.0674) loss: 0.8818 (0.8814) time: 0.1546 data: 0.0648 max mem: 9305 +Train: [19] [3700/6250] eta: 0:07:54 lr: 0.000118 grad: 0.0672 (0.0674) loss: 0.8826 (0.8815) time: 0.1926 data: 0.1036 max mem: 9305 +Train: [19] [3800/6250] eta: 0:07:33 lr: 0.000118 grad: 0.0597 (0.0674) loss: 0.8877 (0.8815) time: 0.1329 data: 0.0434 max mem: 9305 +Train: [19] [3900/6250] eta: 0:07:13 lr: 0.000118 grad: 0.0694 (0.0674) loss: 0.8868 (0.8817) time: 0.1502 data: 0.0676 max mem: 9305 +Train: [19] [4000/6250] eta: 0:06:53 lr: 0.000118 grad: 0.0594 (0.0674) loss: 0.8881 (0.8818) time: 0.1838 data: 0.0924 max mem: 9305 +Train: [19] [4100/6250] eta: 0:06:34 lr: 0.000118 grad: 0.0633 (0.0674) loss: 0.8816 (0.8818) time: 0.1527 data: 0.0558 max mem: 9305 +Train: [19] [4200/6250] eta: 0:06:15 lr: 0.000118 grad: 0.0695 (0.0675) loss: 0.8788 (0.8818) time: 0.1529 data: 0.0641 max mem: 9305 +Train: [19] [4300/6250] eta: 0:05:57 lr: 0.000118 grad: 0.0658 (0.0675) loss: 0.8802 (0.8818) time: 0.2234 data: 0.1359 max mem: 9305 +Train: [19] [4400/6250] eta: 0:05:37 lr: 0.000118 grad: 0.0645 (0.0675) loss: 0.8779 (0.8818) time: 0.1955 data: 0.0951 max mem: 9305 +Train: [19] [4500/6250] eta: 0:05:19 lr: 0.000118 grad: 0.0672 (0.0676) loss: 0.8806 (0.8818) time: 0.2005 data: 0.1088 max mem: 9305 +Train: [19] [4600/6250] eta: 0:05:01 lr: 0.000118 grad: 0.0649 (0.0676) loss: 0.8884 (0.8818) time: 0.1808 data: 0.0845 max mem: 9305 +Train: [19] [4700/6250] eta: 0:04:43 lr: 0.000118 grad: 0.0651 (0.0677) loss: 0.8731 (0.8817) time: 0.2600 data: 0.1673 max mem: 9305 +Train: [19] [4800/6250] eta: 0:04:26 lr: 0.000118 grad: 0.0637 (0.0677) loss: 0.8806 (0.8817) time: 0.4128 data: 0.2991 max mem: 9305 +Train: [19] [4900/6250] eta: 0:04:07 lr: 0.000118 grad: 0.0682 (0.0678) loss: 0.8789 (0.8816) time: 0.1924 data: 0.0996 max mem: 9305 +Train: [19] [5000/6250] eta: 0:03:48 lr: 0.000118 grad: 0.0712 (0.0679) loss: 0.8771 (0.8815) time: 0.1695 data: 0.0638 max mem: 9305 +Train: [19] [5100/6250] eta: 0:03:29 lr: 0.000118 grad: 0.0718 (0.0680) loss: 0.8770 (0.8814) time: 0.1510 data: 0.0554 max mem: 9305 +Train: [19] [5200/6250] eta: 0:03:11 lr: 0.000118 grad: 0.0680 (0.0680) loss: 0.8764 (0.8813) time: 0.2150 data: 0.1156 max mem: 9305 +Train: [19] [5300/6250] eta: 0:02:52 lr: 0.000118 grad: 0.0652 (0.0681) loss: 0.8728 (0.8812) time: 0.1274 data: 0.0268 max mem: 9305 +Train: [19] [5400/6250] eta: 0:02:34 lr: 0.000118 grad: 0.0684 (0.0681) loss: 0.8731 (0.8811) time: 0.1677 data: 0.0697 max mem: 9305 +Train: [19] [5500/6250] eta: 0:02:16 lr: 0.000118 grad: 0.0653 (0.0682) loss: 0.8702 (0.8810) time: 0.1595 data: 0.0711 max mem: 9305 +Train: [19] [5600/6250] eta: 0:01:57 lr: 0.000118 grad: 0.0681 (0.0682) loss: 0.8727 (0.8809) time: 0.1116 data: 0.0210 max mem: 9305 +Train: [19] [5700/6250] eta: 0:01:39 lr: 0.000118 grad: 0.0646 (0.0682) loss: 0.8787 (0.8808) time: 0.1802 data: 0.1036 max mem: 9305 +Train: [19] [5800/6250] eta: 0:01:21 lr: 0.000118 grad: 0.0649 (0.0683) loss: 0.8796 (0.8807) time: 0.1868 data: 0.0853 max mem: 9305 +Train: [19] [5900/6250] eta: 0:01:03 lr: 0.000118 grad: 0.0657 (0.0683) loss: 0.8791 (0.8806) time: 0.2363 data: 0.1371 max mem: 9305 +Train: [19] [6000/6250] eta: 0:00:45 lr: 0.000118 grad: 0.0680 (0.0684) loss: 0.8735 (0.8805) time: 0.1649 data: 0.0770 max mem: 9305 +Train: [19] [6100/6250] eta: 0:00:27 lr: 0.000117 grad: 0.0642 (0.0684) loss: 0.8709 (0.8804) time: 0.1514 data: 0.0639 max mem: 9305 +Train: [19] [6200/6250] eta: 0:00:09 lr: 0.000117 grad: 0.0686 (0.0685) loss: 0.8765 (0.8803) time: 0.1735 data: 0.0885 max mem: 9305 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0736 (0.0685) loss: 0.8734 (0.8802) time: 0.1498 data: 0.0687 max mem: 9305 +Train: [19] Total time: 0:18:52 (0.1811 s / it) +Averaged stats: lr: 0.000117 grad: 0.0736 (0.0685) loss: 0.8734 (0.8802) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:06:15 loss: 0.9032 (0.9032) time: 6.0508 data: 6.0144 max mem: 9305 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8880 (0.8903) time: 0.2048 data: 0.1751 max mem: 9305 +Eval (hcp-train-subset): [19] Total time: 0:00:17 (0.2747 s / it) +Averaged stats (hcp-train-subset): loss: 0.8880 (0.8903) +Making plots (hcp-train-subset): example=6 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:23 loss: 0.8802 (0.8802) time: 4.2561 data: 4.1653 max mem: 9305 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8835 (0.8854) time: 0.1811 data: 0.1497 max mem: 9305 +Eval (hcp-val): [19] Total time: 0:00:17 (0.2825 s / it) +Averaged stats (hcp-val): loss: 0.8835 (0.8854) +Making plots (hcp-val): example=51 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 16:35:56 lr: 0.000117 grad: 0.0724 (0.0724) loss: 0.9181 (0.9181) time: 9.5610 data: 9.4535 max mem: 9305 +Train: [20] [ 100/6250] eta: 0:29:12 lr: 0.000117 grad: 0.0641 (0.0721) loss: 0.8815 (0.8839) time: 0.1151 data: 0.0092 max mem: 9305 +Train: [20] [ 200/6250] eta: 0:25:09 lr: 0.000117 grad: 0.0703 (0.0733) loss: 0.8670 (0.8788) time: 0.2068 data: 0.1080 max mem: 9305 +Train: [20] [ 300/6250] eta: 0:23:15 lr: 0.000117 grad: 0.0705 (0.0739) loss: 0.8745 (0.8759) time: 0.2422 data: 0.1473 max mem: 9305 +Train: [20] [ 400/6250] eta: 0:22:17 lr: 0.000117 grad: 0.0681 (0.0736) loss: 0.8768 (0.8753) time: 0.2628 data: 0.1797 max mem: 9305 +Train: [20] [ 500/6250] eta: 0:22:22 lr: 0.000117 grad: 0.0692 (0.0731) loss: 0.8731 (0.8753) time: 0.5769 data: 0.4167 max mem: 9305 +Train: [20] [ 600/6250] eta: 0:21:37 lr: 0.000117 grad: 0.0678 (0.0728) loss: 0.8802 (0.8755) time: 0.2157 data: 0.1216 max mem: 9305 +Train: [20] [ 700/6250] eta: 0:21:21 lr: 0.000117 grad: 0.0645 (0.0721) loss: 0.8768 (0.8758) time: 0.3488 data: 0.2323 max mem: 9305 +Train: [20] [ 800/6250] eta: 0:20:20 lr: 0.000117 grad: 0.0679 (0.0713) loss: 0.8768 (0.8761) time: 0.1391 data: 0.0320 max mem: 9305 +Train: [20] [ 900/6250] eta: 0:19:46 lr: 0.000117 grad: 0.0661 (0.0708) loss: 0.8741 (0.8762) time: 0.2183 data: 0.1157 max mem: 9305 +Train: [20] [1000/6250] eta: 0:19:27 lr: 0.000117 grad: 0.0600 (0.0703) loss: 0.8757 (0.8763) time: 0.3580 data: 0.2502 max mem: 9305 +Train: [20] [1100/6250] eta: 0:18:50 lr: 0.000117 grad: 0.0651 (0.0701) loss: 0.8751 (0.8765) time: 0.1360 data: 0.0184 max mem: 9305 +Train: [20] [1200/6250] eta: 0:18:11 lr: 0.000117 grad: 0.0667 (0.0700) loss: 0.8769 (0.8765) time: 0.1682 data: 0.0667 max mem: 9305 +Train: [20] [1300/6250] eta: 0:17:39 lr: 0.000117 grad: 0.0650 (0.0698) loss: 0.8784 (0.8767) time: 0.1705 data: 0.0781 max mem: 9305 +Train: [20] [1400/6250] eta: 0:17:19 lr: 0.000117 grad: 0.0612 (0.0699) loss: 0.8797 (0.8768) time: 0.1200 data: 0.0006 max mem: 9305 +Train: [20] [1500/6250] eta: 0:16:47 lr: 0.000117 grad: 0.0665 (0.0699) loss: 0.8743 (0.8769) time: 0.1620 data: 0.0729 max mem: 9305 +Train: [20] [1600/6250] eta: 0:16:21 lr: 0.000117 grad: 0.0703 (0.0699) loss: 0.8791 (0.8770) time: 0.1601 data: 0.0654 max mem: 9305 +Train: [20] [1700/6250] eta: 0:16:00 lr: 0.000117 grad: 0.0691 (0.0700) loss: 0.8776 (0.8770) time: 0.3203 data: 0.2141 max mem: 9305 +Train: [20] [1800/6250] eta: 0:15:28 lr: 0.000117 grad: 0.0648 (0.0699) loss: 0.8814 (0.8770) time: 0.1860 data: 0.0931 max mem: 9305 +Train: [20] [1900/6250] eta: 0:15:10 lr: 0.000117 grad: 0.0645 (0.0699) loss: 0.8789 (0.8771) time: 0.2512 data: 0.1370 max mem: 9305 +Train: [20] [2000/6250] eta: 0:14:40 lr: 0.000117 grad: 0.0670 (0.0699) loss: 0.8767 (0.8771) time: 0.2144 data: 0.1353 max mem: 9305 +Train: [20] [2100/6250] eta: 0:14:18 lr: 0.000117 grad: 0.0684 (0.0699) loss: 0.8797 (0.8772) time: 0.1862 data: 0.0952 max mem: 9305 +Train: [20] [2200/6250] eta: 0:13:52 lr: 0.000117 grad: 0.0666 (0.0700) loss: 0.8754 (0.8773) time: 0.1653 data: 0.0765 max mem: 9305 +Train: [20] [2300/6250] eta: 0:13:27 lr: 0.000117 grad: 0.0662 (0.0701) loss: 0.8804 (0.8772) time: 0.1805 data: 0.0928 max mem: 9305 +Train: [20] [2400/6250] eta: 0:13:04 lr: 0.000117 grad: 0.0690 (0.0701) loss: 0.8731 (0.8771) time: 0.1950 data: 0.1076 max mem: 9305 +Train: [20] [2500/6250] eta: 0:12:45 lr: 0.000117 grad: 0.0678 (0.0702) loss: 0.8777 (0.8772) time: 0.1867 data: 0.0936 max mem: 9305 +Train: [20] [2600/6250] eta: 0:12:23 lr: 0.000117 grad: 0.0681 (0.0703) loss: 0.8758 (0.8772) time: 0.1426 data: 0.0302 max mem: 9305 +Train: [20] [2700/6250] eta: 0:12:07 lr: 0.000117 grad: 0.0710 (0.0703) loss: 0.8746 (0.8772) time: 0.2894 data: 0.1833 max mem: 9305 +Train: [20] [2800/6250] eta: 0:11:43 lr: 0.000117 grad: 0.0670 (0.0703) loss: 0.8792 (0.8773) time: 0.1905 data: 0.0957 max mem: 9305 +Train: [20] [2900/6250] eta: 0:11:19 lr: 0.000117 grad: 0.0694 (0.0703) loss: 0.8777 (0.8773) time: 0.1735 data: 0.0815 max mem: 9305 +Train: [20] [3000/6250] eta: 0:10:57 lr: 0.000117 grad: 0.0702 (0.0703) loss: 0.8721 (0.8773) time: 0.2455 data: 0.1675 max mem: 9305 +Train: [20] [3100/6250] eta: 0:10:33 lr: 0.000117 grad: 0.0661 (0.0702) loss: 0.8789 (0.8775) time: 0.1646 data: 0.0659 max mem: 9305 +Train: [20] [3200/6250] eta: 0:10:11 lr: 0.000117 grad: 0.0703 (0.0702) loss: 0.8770 (0.8775) time: 0.1075 data: 0.0002 max mem: 9305 +Train: [20] [3300/6250] eta: 0:09:48 lr: 0.000117 grad: 0.0643 (0.0702) loss: 0.8829 (0.8776) time: 0.1188 data: 0.0318 max mem: 9305 +Train: [20] [3400/6250] eta: 0:09:26 lr: 0.000117 grad: 0.0732 (0.0702) loss: 0.8809 (0.8777) time: 0.1789 data: 0.0879 max mem: 9305 +Train: [20] [3500/6250] eta: 0:09:04 lr: 0.000117 grad: 0.0635 (0.0702) loss: 0.8835 (0.8778) time: 0.1625 data: 0.0770 max mem: 9305 +Train: [20] [3600/6250] eta: 0:08:42 lr: 0.000117 grad: 0.0692 (0.0702) loss: 0.8787 (0.8778) time: 0.1582 data: 0.0736 max mem: 9305 +Train: [20] [3700/6250] eta: 0:08:20 lr: 0.000117 grad: 0.0683 (0.0701) loss: 0.8785 (0.8779) time: 0.1633 data: 0.0742 max mem: 9305 +Train: [20] [3800/6250] eta: 0:07:59 lr: 0.000117 grad: 0.0668 (0.0701) loss: 0.8762 (0.8779) time: 0.1401 data: 0.0449 max mem: 9305 +Train: [20] [3900/6250] eta: 0:07:39 lr: 0.000117 grad: 0.0667 (0.0701) loss: 0.8781 (0.8780) time: 0.2262 data: 0.1285 max mem: 9305 +Train: [20] [4000/6250] eta: 0:07:21 lr: 0.000117 grad: 0.0650 (0.0700) loss: 0.8856 (0.8780) time: 0.1270 data: 0.0069 max mem: 9305 +Train: [20] [4100/6250] eta: 0:07:03 lr: 0.000117 grad: 0.0680 (0.0700) loss: 0.8790 (0.8780) time: 0.1240 data: 0.0002 max mem: 9305 +Train: [20] [4200/6250] eta: 0:06:43 lr: 0.000117 grad: 0.0666 (0.0700) loss: 0.8757 (0.8780) time: 0.1850 data: 0.0682 max mem: 9305 +Train: [20] [4300/6250] eta: 0:06:23 lr: 0.000117 grad: 0.0672 (0.0699) loss: 0.8785 (0.8781) time: 0.1742 data: 0.0738 max mem: 9305 +Train: [20] [4400/6250] eta: 0:06:03 lr: 0.000117 grad: 0.0645 (0.0699) loss: 0.8766 (0.8781) time: 0.2992 data: 0.1879 max mem: 9305 +Train: [20] [4500/6250] eta: 0:05:43 lr: 0.000117 grad: 0.0665 (0.0699) loss: 0.8760 (0.8781) time: 0.1825 data: 0.0949 max mem: 9305 +Train: [20] [4600/6250] eta: 0:05:22 lr: 0.000117 grad: 0.0645 (0.0698) loss: 0.8776 (0.8781) time: 0.1110 data: 0.0035 max mem: 9305 +Train: [20] [4700/6250] eta: 0:05:02 lr: 0.000117 grad: 0.0699 (0.0698) loss: 0.8752 (0.8781) time: 0.1889 data: 0.1079 max mem: 9305 +Train: [20] [4800/6250] eta: 0:04:42 lr: 0.000117 grad: 0.0656 (0.0698) loss: 0.8736 (0.8781) time: 0.1593 data: 0.0811 max mem: 9305 +Train: [20] [4900/6250] eta: 0:04:22 lr: 0.000117 grad: 0.0685 (0.0698) loss: 0.8780 (0.8780) time: 0.1489 data: 0.0532 max mem: 9305 +Train: [20] [5000/6250] eta: 0:04:02 lr: 0.000117 grad: 0.0659 (0.0697) loss: 0.8786 (0.8780) time: 0.1621 data: 0.0728 max mem: 9305 +Train: [20] [5100/6250] eta: 0:03:42 lr: 0.000117 grad: 0.0682 (0.0697) loss: 0.8773 (0.8780) time: 0.2168 data: 0.1249 max mem: 9305 +Train: [20] [5200/6250] eta: 0:03:22 lr: 0.000117 grad: 0.0645 (0.0696) loss: 0.8776 (0.8781) time: 0.1743 data: 0.0764 max mem: 9305 +Train: [20] [5300/6250] eta: 0:03:03 lr: 0.000117 grad: 0.0606 (0.0696) loss: 0.8826 (0.8781) time: 0.1515 data: 0.0549 max mem: 9305 +Train: [20] [5400/6250] eta: 0:02:43 lr: 0.000117 grad: 0.0682 (0.0695) loss: 0.8777 (0.8781) time: 0.2026 data: 0.1143 max mem: 9305 +Train: [20] [5500/6250] eta: 0:02:24 lr: 0.000117 grad: 0.0643 (0.0695) loss: 0.8814 (0.8782) time: 0.1489 data: 0.0516 max mem: 9305 +Train: [20] [5600/6250] eta: 0:02:04 lr: 0.000117 grad: 0.0647 (0.0695) loss: 0.8822 (0.8782) time: 0.1814 data: 0.0895 max mem: 9305 +Train: [20] [5700/6250] eta: 0:01:45 lr: 0.000117 grad: 0.0639 (0.0694) loss: 0.8777 (0.8783) time: 0.1920 data: 0.1001 max mem: 9305 +Train: [20] [5800/6250] eta: 0:01:25 lr: 0.000117 grad: 0.0638 (0.0694) loss: 0.8830 (0.8784) time: 0.1949 data: 0.1090 max mem: 9305 +Train: [20] [5900/6250] eta: 0:01:06 lr: 0.000117 grad: 0.0701 (0.0694) loss: 0.8779 (0.8784) time: 0.2061 data: 0.1238 max mem: 9305 +Train: [20] [6000/6250] eta: 0:00:47 lr: 0.000116 grad: 0.0688 (0.0694) loss: 0.8755 (0.8784) time: 0.2397 data: 0.1439 max mem: 9305 +Train: [20] [6100/6250] eta: 0:00:28 lr: 0.000116 grad: 0.0673 (0.0694) loss: 0.8752 (0.8784) time: 0.1216 data: 0.0160 max mem: 9305 +Train: [20] [6200/6250] eta: 0:00:09 lr: 0.000116 grad: 0.0649 (0.0694) loss: 0.8771 (0.8784) time: 0.2010 data: 0.1074 max mem: 9305 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0643 (0.0694) loss: 0.8804 (0.8784) time: 0.2397 data: 0.1501 max mem: 9305 +Train: [20] Total time: 0:20:01 (0.1922 s / it) +Averaged stats: lr: 0.000116 grad: 0.0643 (0.0694) loss: 0.8804 (0.8784) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:05:59 loss: 0.9011 (0.9011) time: 5.7966 data: 5.7593 max mem: 9305 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8899 (0.8902) time: 0.1488 data: 0.1199 max mem: 9305 +Eval (hcp-train-subset): [20] Total time: 0:00:16 (0.2676 s / it) +Averaged stats (hcp-train-subset): loss: 0.8899 (0.8902) +Eval (hcp-val): [20] [ 0/62] eta: 0:03:34 loss: 0.8862 (0.8862) time: 3.4534 data: 3.3313 max mem: 9305 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8841 (0.8857) time: 0.1598 data: 0.1305 max mem: 9305 +Eval (hcp-val): [20] Total time: 0:00:17 (0.2870 s / it) +Averaged stats (hcp-val): loss: 0.8841 (0.8857) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 12:30:05 lr: 0.000116 grad: 0.0619 (0.0619) loss: 0.9052 (0.9052) time: 7.2009 data: 6.9776 max mem: 9305 +Train: [21] [ 100/6250] eta: 0:29:00 lr: 0.000116 grad: 0.0686 (0.0749) loss: 0.8796 (0.8794) time: 0.2906 data: 0.1856 max mem: 9305 +Train: [21] [ 200/6250] eta: 0:28:13 lr: 0.000116 grad: 0.0684 (0.0726) loss: 0.8800 (0.8788) time: 0.5253 data: 0.4387 max mem: 9305 +Train: [21] [ 300/6250] eta: 0:25:13 lr: 0.000116 grad: 0.0644 (0.0726) loss: 0.8746 (0.8778) time: 0.3006 data: 0.1978 max mem: 9305 +Train: [21] [ 400/6250] eta: 0:24:13 lr: 0.000116 grad: 0.0662 (0.0711) loss: 0.8740 (0.8777) time: 0.1809 data: 0.0789 max mem: 9305 +Train: [21] [ 500/6250] eta: 0:22:46 lr: 0.000116 grad: 0.0654 (0.0703) loss: 0.8729 (0.8776) time: 0.2075 data: 0.1076 max mem: 9305 +Train: [21] [ 600/6250] eta: 0:22:08 lr: 0.000116 grad: 0.0660 (0.0699) loss: 0.8788 (0.8777) time: 0.1304 data: 0.0249 max mem: 9305 +Train: [21] [ 700/6250] eta: 0:21:49 lr: 0.000116 grad: 0.0675 (0.0701) loss: 0.8731 (0.8775) time: 0.4513 data: 0.2779 max mem: 9305 +Train: [21] [ 800/6250] eta: 0:20:34 lr: 0.000116 grad: 0.0692 (0.0698) loss: 0.8738 (0.8773) time: 0.1616 data: 0.0675 max mem: 9305 +Train: [21] [ 900/6250] eta: 0:19:54 lr: 0.000116 grad: 0.0659 (0.0698) loss: 0.8780 (0.8773) time: 0.2359 data: 0.1016 max mem: 9305 +Train: [21] [1000/6250] eta: 0:19:36 lr: 0.000116 grad: 0.0678 (0.0695) loss: 0.8748 (0.8775) time: 0.2007 data: 0.0859 max mem: 9305 +Train: [21] [1100/6250] eta: 0:19:15 lr: 0.000116 grad: 0.0696 (0.0696) loss: 0.8766 (0.8775) time: 0.4160 data: 0.2807 max mem: 9305 +Train: [21] [1200/6250] eta: 0:18:26 lr: 0.000116 grad: 0.0647 (0.0695) loss: 0.8782 (0.8776) time: 0.2217 data: 0.1272 max mem: 9305 +Train: [21] [1300/6250] eta: 0:17:44 lr: 0.000116 grad: 0.0646 (0.0693) loss: 0.8810 (0.8778) time: 0.1484 data: 0.0530 max mem: 9305 +Train: [21] [1400/6250] eta: 0:17:05 lr: 0.000116 grad: 0.0664 (0.0693) loss: 0.8811 (0.8779) time: 0.1366 data: 0.0480 max mem: 9305 +Train: [21] [1500/6250] eta: 0:16:32 lr: 0.000116 grad: 0.0690 (0.0693) loss: 0.8786 (0.8780) time: 0.1865 data: 0.0916 max mem: 9305 +Train: [21] [1600/6250] eta: 0:16:14 lr: 0.000116 grad: 0.0659 (0.0692) loss: 0.8767 (0.8780) time: 0.2450 data: 0.1304 max mem: 9305 +Train: [21] [1700/6250] eta: 0:15:41 lr: 0.000116 grad: 0.0658 (0.0693) loss: 0.8820 (0.8781) time: 0.1905 data: 0.0831 max mem: 9305 +Train: [21] [1800/6250] eta: 0:15:28 lr: 0.000116 grad: 0.0641 (0.0693) loss: 0.8780 (0.8780) time: 0.3349 data: 0.2251 max mem: 9305 +Train: [21] [1900/6250] eta: 0:15:06 lr: 0.000116 grad: 0.0640 (0.0694) loss: 0.8785 (0.8780) time: 0.3424 data: 0.2148 max mem: 9305 +Train: [21] [2000/6250] eta: 0:14:35 lr: 0.000116 grad: 0.0666 (0.0694) loss: 0.8798 (0.8780) time: 0.1875 data: 0.0959 max mem: 9305 +Train: [21] [2100/6250] eta: 0:14:12 lr: 0.000116 grad: 0.0692 (0.0695) loss: 0.8777 (0.8779) time: 0.1607 data: 0.0768 max mem: 9305 +Train: [21] [2200/6250] eta: 0:13:44 lr: 0.000116 grad: 0.0669 (0.0694) loss: 0.8813 (0.8779) time: 0.1979 data: 0.1165 max mem: 9305 +Train: [21] [2300/6250] eta: 0:13:33 lr: 0.000116 grad: 0.0705 (0.0694) loss: 0.8775 (0.8779) time: 0.1166 data: 0.0003 max mem: 9305 +Train: [21] [2400/6250] eta: 0:13:07 lr: 0.000116 grad: 0.0638 (0.0693) loss: 0.8805 (0.8779) time: 0.1437 data: 0.0467 max mem: 9305 +Train: [21] [2500/6250] eta: 0:12:43 lr: 0.000116 grad: 0.0676 (0.0693) loss: 0.8764 (0.8780) time: 0.2237 data: 0.1293 max mem: 9305 +Train: [21] [2600/6250] eta: 0:12:15 lr: 0.000116 grad: 0.0679 (0.0694) loss: 0.8801 (0.8780) time: 0.1285 data: 0.0352 max mem: 9305 +Train: [21] [2700/6250] eta: 0:11:49 lr: 0.000116 grad: 0.0706 (0.0694) loss: 0.8781 (0.8781) time: 0.1510 data: 0.0610 max mem: 9305 +Train: [21] [2800/6250] eta: 0:11:26 lr: 0.000116 grad: 0.0695 (0.0694) loss: 0.8858 (0.8781) time: 0.1645 data: 0.0823 max mem: 9305 +Train: [21] [2900/6250] eta: 0:11:03 lr: 0.000116 grad: 0.0643 (0.0693) loss: 0.8824 (0.8782) time: 0.1664 data: 0.0728 max mem: 9305 +Train: [21] [3000/6250] eta: 0:10:40 lr: 0.000116 grad: 0.0660 (0.0692) loss: 0.8802 (0.8783) time: 0.1465 data: 0.0579 max mem: 9305 +Train: [21] [3100/6250] eta: 0:10:18 lr: 0.000116 grad: 0.0715 (0.0692) loss: 0.8743 (0.8783) time: 0.2104 data: 0.1177 max mem: 9305 +Train: [21] [3200/6250] eta: 0:09:56 lr: 0.000116 grad: 0.0682 (0.0692) loss: 0.8781 (0.8783) time: 0.1674 data: 0.0739 max mem: 9305 +Train: [21] [3300/6250] eta: 0:09:35 lr: 0.000116 grad: 0.0650 (0.0692) loss: 0.8736 (0.8784) time: 0.2151 data: 0.1371 max mem: 9305 +Train: [21] [3400/6250] eta: 0:09:13 lr: 0.000116 grad: 0.0658 (0.0692) loss: 0.8840 (0.8784) time: 0.1790 data: 0.0958 max mem: 9305 +Train: [21] [3500/6250] eta: 0:08:54 lr: 0.000116 grad: 0.0687 (0.0692) loss: 0.8801 (0.8784) time: 0.2069 data: 0.1261 max mem: 9305 +Train: [21] [3600/6250] eta: 0:08:37 lr: 0.000116 grad: 0.0605 (0.0692) loss: 0.8733 (0.8784) time: 0.2463 data: 0.1537 max mem: 9305 +Train: [21] [3700/6250] eta: 0:08:22 lr: 0.000116 grad: 0.0683 (0.0692) loss: 0.8757 (0.8784) time: 0.4039 data: 0.3083 max mem: 9305 +Train: [21] [3800/6250] eta: 0:08:00 lr: 0.000116 grad: 0.0674 (0.0693) loss: 0.8750 (0.8784) time: 0.1859 data: 0.0926 max mem: 9305 +Train: [21] [3900/6250] eta: 0:07:39 lr: 0.000116 grad: 0.0696 (0.0693) loss: 0.8797 (0.8784) time: 0.1484 data: 0.0548 max mem: 9305 +Train: [21] [4000/6250] eta: 0:07:21 lr: 0.000116 grad: 0.0771 (0.0694) loss: 0.8755 (0.8783) time: 0.2028 data: 0.1120 max mem: 9305 +Train: [21] [4100/6250] eta: 0:07:01 lr: 0.000116 grad: 0.0688 (0.0695) loss: 0.8757 (0.8783) time: 0.1671 data: 0.0771 max mem: 9305 +Train: [21] [4200/6250] eta: 0:06:42 lr: 0.000116 grad: 0.0660 (0.0695) loss: 0.8738 (0.8783) time: 0.1415 data: 0.0411 max mem: 9305 +Train: [21] [4300/6250] eta: 0:06:21 lr: 0.000116 grad: 0.0670 (0.0695) loss: 0.8783 (0.8783) time: 0.1648 data: 0.0740 max mem: 9305 +Train: [21] [4400/6250] eta: 0:06:02 lr: 0.000116 grad: 0.0630 (0.0696) loss: 0.8823 (0.8783) time: 0.2814 data: 0.2018 max mem: 9305 +Train: [21] [4500/6250] eta: 0:05:41 lr: 0.000116 grad: 0.0694 (0.0696) loss: 0.8754 (0.8783) time: 0.1776 data: 0.0814 max mem: 9305 +Train: [21] [4600/6250] eta: 0:05:21 lr: 0.000116 grad: 0.0656 (0.0696) loss: 0.8830 (0.8783) time: 0.1252 data: 0.0308 max mem: 9305 +Train: [21] [4700/6250] eta: 0:05:03 lr: 0.000116 grad: 0.0663 (0.0696) loss: 0.8768 (0.8784) time: 0.1035 data: 0.0002 max mem: 9305 +Train: [21] [4800/6250] eta: 0:04:44 lr: 0.000116 grad: 0.0658 (0.0696) loss: 0.8767 (0.8784) time: 0.2920 data: 0.1753 max mem: 9305 +Train: [21] [4900/6250] eta: 0:04:24 lr: 0.000116 grad: 0.0639 (0.0695) loss: 0.8802 (0.8784) time: 0.2828 data: 0.1951 max mem: 9305 +Train: [21] [5000/6250] eta: 0:04:04 lr: 0.000116 grad: 0.0667 (0.0695) loss: 0.8800 (0.8785) time: 0.1856 data: 0.0881 max mem: 9305 +Train: [21] [5100/6250] eta: 0:03:44 lr: 0.000116 grad: 0.0610 (0.0695) loss: 0.8829 (0.8785) time: 0.1902 data: 0.0976 max mem: 9305 +Train: [21] [5200/6250] eta: 0:03:23 lr: 0.000116 grad: 0.0678 (0.0695) loss: 0.8828 (0.8786) time: 0.1714 data: 0.0818 max mem: 9305 +Train: [21] [5300/6250] eta: 0:03:03 lr: 0.000116 grad: 0.0691 (0.0695) loss: 0.8738 (0.8785) time: 0.1576 data: 0.0682 max mem: 9305 +Train: [21] [5400/6250] eta: 0:02:44 lr: 0.000116 grad: 0.0683 (0.0695) loss: 0.8792 (0.8786) time: 0.1602 data: 0.0705 max mem: 9305 +Train: [21] [5500/6250] eta: 0:02:25 lr: 0.000116 grad: 0.0712 (0.0696) loss: 0.8797 (0.8786) time: 0.2064 data: 0.0779 max mem: 9305 +Train: [21] [5600/6250] eta: 0:02:05 lr: 0.000115 grad: 0.0678 (0.0696) loss: 0.8801 (0.8786) time: 0.2502 data: 0.1498 max mem: 9305 +Train: [21] [5700/6250] eta: 0:01:46 lr: 0.000115 grad: 0.0667 (0.0696) loss: 0.8792 (0.8786) time: 0.1887 data: 0.0925 max mem: 9305 +Train: [21] [5800/6250] eta: 0:01:26 lr: 0.000115 grad: 0.0653 (0.0696) loss: 0.8801 (0.8786) time: 0.1889 data: 0.1011 max mem: 9305 +Train: [21] [5900/6250] eta: 0:01:07 lr: 0.000115 grad: 0.0668 (0.0696) loss: 0.8802 (0.8787) time: 0.1577 data: 0.0443 max mem: 9305 +Train: [21] [6000/6250] eta: 0:00:48 lr: 0.000115 grad: 0.0716 (0.0696) loss: 0.8793 (0.8787) time: 0.1274 data: 0.0183 max mem: 9305 +Train: [21] [6100/6250] eta: 0:00:28 lr: 0.000115 grad: 0.0648 (0.0697) loss: 0.8814 (0.8787) time: 0.1688 data: 0.0647 max mem: 9305 +Train: [21] [6200/6250] eta: 0:00:09 lr: 0.000115 grad: 0.0699 (0.0697) loss: 0.8786 (0.8787) time: 0.1997 data: 0.1071 max mem: 9305 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0708 (0.0697) loss: 0.8766 (0.8787) time: 0.1821 data: 0.0713 max mem: 9305 +Train: [21] Total time: 0:20:14 (0.1944 s / it) +Averaged stats: lr: 0.000115 grad: 0.0708 (0.0697) loss: 0.8766 (0.8787) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:27 loss: 0.8986 (0.8986) time: 4.3159 data: 4.2401 max mem: 9305 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8871 (0.8879) time: 0.1540 data: 0.1254 max mem: 9305 +Eval (hcp-train-subset): [21] Total time: 0:00:15 (0.2440 s / it) +Averaged stats (hcp-train-subset): loss: 0.8871 (0.8879) +Eval (hcp-val): [21] [ 0/62] eta: 0:04:46 loss: 0.8817 (0.8817) time: 4.6168 data: 4.5492 max mem: 9305 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8824 (0.8841) time: 0.1314 data: 0.1023 max mem: 9305 +Eval (hcp-val): [21] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (hcp-val): loss: 0.8824 (0.8841) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 12:54:59 lr: 0.000115 grad: 0.1301 (0.1301) loss: 0.8655 (0.8655) time: 7.4398 data: 7.3335 max mem: 9305 +Train: [22] [ 100/6250] eta: 0:26:07 lr: 0.000115 grad: 0.0606 (0.0742) loss: 0.8937 (0.8921) time: 0.2272 data: 0.1269 max mem: 9305 +Train: [22] [ 200/6250] eta: 0:23:25 lr: 0.000115 grad: 0.0651 (0.0712) loss: 0.8874 (0.8904) time: 0.2075 data: 0.0976 max mem: 9305 +Train: [22] [ 300/6250] eta: 0:21:31 lr: 0.000115 grad: 0.0646 (0.0700) loss: 0.8806 (0.8876) time: 0.1844 data: 0.0892 max mem: 9305 +Train: [22] [ 400/6250] eta: 0:21:25 lr: 0.000115 grad: 0.0619 (0.0703) loss: 0.8796 (0.8855) time: 0.2593 data: 0.1485 max mem: 9305 +Train: [22] [ 500/6250] eta: 0:20:00 lr: 0.000115 grad: 0.0614 (0.0694) loss: 0.8799 (0.8842) time: 0.1625 data: 0.0754 max mem: 9305 +Train: [22] [ 600/6250] eta: 0:19:36 lr: 0.000115 grad: 0.0652 (0.0686) loss: 0.8796 (0.8835) time: 0.2054 data: 0.0700 max mem: 9305 +Train: [22] [ 700/6250] eta: 0:20:03 lr: 0.000115 grad: 0.0613 (0.0683) loss: 0.8893 (0.8829) time: 0.1300 data: 0.0005 max mem: 9305 +Train: [22] [ 800/6250] eta: 0:19:30 lr: 0.000115 grad: 0.0618 (0.0678) loss: 0.8785 (0.8828) time: 0.2061 data: 0.0954 max mem: 9305 +Train: [22] [ 900/6250] eta: 0:19:13 lr: 0.000115 grad: 0.0595 (0.0672) loss: 0.8822 (0.8829) time: 0.3669 data: 0.2373 max mem: 9305 +Train: [22] [1000/6250] eta: 0:18:29 lr: 0.000115 grad: 0.0628 (0.0669) loss: 0.8829 (0.8828) time: 0.1614 data: 0.0694 max mem: 9305 +Train: [22] [1100/6250] eta: 0:18:10 lr: 0.000115 grad: 0.0655 (0.0668) loss: 0.8839 (0.8826) time: 0.3623 data: 0.1897 max mem: 9305 +Train: [22] [1200/6250] eta: 0:17:28 lr: 0.000115 grad: 0.0590 (0.0665) loss: 0.8825 (0.8824) time: 0.1715 data: 0.0786 max mem: 9305 +Train: [22] [1300/6250] eta: 0:17:10 lr: 0.000115 grad: 0.0627 (0.0666) loss: 0.8767 (0.8822) time: 0.1279 data: 0.0229 max mem: 9305 +Train: [22] [1400/6250] eta: 0:17:07 lr: 0.000115 grad: 0.0626 (0.0668) loss: 0.8752 (0.8820) time: 0.4307 data: 0.3224 max mem: 9305 +Train: [22] [1500/6250] eta: 0:16:23 lr: 0.000115 grad: 0.0638 (0.0666) loss: 0.8808 (0.8819) time: 0.1320 data: 0.0241 max mem: 9305 +Train: [22] [1600/6250] eta: 0:15:52 lr: 0.000115 grad: 0.0641 (0.0665) loss: 0.8811 (0.8817) time: 0.1849 data: 0.0939 max mem: 9305 +Train: [22] [1700/6250] eta: 0:15:33 lr: 0.000115 grad: 0.0626 (0.0665) loss: 0.8794 (0.8815) time: 0.3056 data: 0.1749 max mem: 9305 +Train: [22] [1800/6250] eta: 0:15:12 lr: 0.000115 grad: 0.0625 (0.0664) loss: 0.8763 (0.8813) time: 0.1031 data: 0.0003 max mem: 9305 +Train: [22] [1900/6250] eta: 0:14:39 lr: 0.000115 grad: 0.0661 (0.0664) loss: 0.8762 (0.8811) time: 0.1720 data: 0.0790 max mem: 9305 +Train: [22] [2000/6250] eta: 0:14:09 lr: 0.000115 grad: 0.0676 (0.0664) loss: 0.8731 (0.8809) time: 0.1592 data: 0.0771 max mem: 9305 +Train: [22] [2100/6250] eta: 0:13:42 lr: 0.000115 grad: 0.0633 (0.0665) loss: 0.8783 (0.8807) time: 0.1689 data: 0.0820 max mem: 9305 +Train: [22] [2200/6250] eta: 0:13:23 lr: 0.000115 grad: 0.0642 (0.0666) loss: 0.8776 (0.8806) time: 0.0966 data: 0.0004 max mem: 9305 +Train: [22] [2300/6250] eta: 0:12:57 lr: 0.000115 grad: 0.0666 (0.0667) loss: 0.8746 (0.8805) time: 0.1859 data: 0.0897 max mem: 9305 +Train: [22] [2400/6250] eta: 0:12:31 lr: 0.000115 grad: 0.0665 (0.0668) loss: 0.8781 (0.8803) time: 0.1328 data: 0.0471 max mem: 9305 +Train: [22] [2500/6250] eta: 0:12:07 lr: 0.000115 grad: 0.0642 (0.0668) loss: 0.8809 (0.8802) time: 0.1785 data: 0.0899 max mem: 9305 +Train: [22] [2600/6250] eta: 0:11:43 lr: 0.000115 grad: 0.0669 (0.0669) loss: 0.8789 (0.8801) time: 0.1623 data: 0.0770 max mem: 9305 +Train: [22] [2700/6250] eta: 0:11:19 lr: 0.000115 grad: 0.0667 (0.0670) loss: 0.8775 (0.8800) time: 0.1607 data: 0.0739 max mem: 9305 +Train: [22] [2800/6250] eta: 0:10:58 lr: 0.000115 grad: 0.0652 (0.0672) loss: 0.8743 (0.8798) time: 0.2054 data: 0.1183 max mem: 9305 +Train: [22] [2900/6250] eta: 0:10:35 lr: 0.000115 grad: 0.0650 (0.0672) loss: 0.8778 (0.8796) time: 0.1339 data: 0.0439 max mem: 9305 +Train: [22] [3000/6250] eta: 0:10:17 lr: 0.000115 grad: 0.0618 (0.0672) loss: 0.8817 (0.8796) time: 0.1620 data: 0.0754 max mem: 9305 +Train: [22] [3100/6250] eta: 0:10:00 lr: 0.000115 grad: 0.0747 (0.0674) loss: 0.8735 (0.8795) time: 0.1130 data: 0.0003 max mem: 9305 +Train: [22] [3200/6250] eta: 0:09:43 lr: 0.000115 grad: 0.0662 (0.0674) loss: 0.8727 (0.8794) time: 0.1044 data: 0.0003 max mem: 9305 +Train: [22] [3300/6250] eta: 0:09:23 lr: 0.000115 grad: 0.0700 (0.0675) loss: 0.8696 (0.8793) time: 0.1517 data: 0.0685 max mem: 9305 +Train: [22] [3400/6250] eta: 0:09:02 lr: 0.000115 grad: 0.0679 (0.0676) loss: 0.8772 (0.8793) time: 0.1742 data: 0.0910 max mem: 9305 +Train: [22] [3500/6250] eta: 0:08:44 lr: 0.000115 grad: 0.0658 (0.0677) loss: 0.8763 (0.8793) time: 0.2675 data: 0.1860 max mem: 9305 +Train: [22] [3600/6250] eta: 0:08:24 lr: 0.000115 grad: 0.0647 (0.0677) loss: 0.8803 (0.8792) time: 0.1583 data: 0.0550 max mem: 9305 +Train: [22] [3700/6250] eta: 0:08:04 lr: 0.000115 grad: 0.0686 (0.0678) loss: 0.8734 (0.8791) time: 0.1910 data: 0.1016 max mem: 9305 +Train: [22] [3800/6250] eta: 0:07:42 lr: 0.000115 grad: 0.0693 (0.0679) loss: 0.8755 (0.8791) time: 0.1436 data: 0.0549 max mem: 9305 +Train: [22] [3900/6250] eta: 0:07:22 lr: 0.000115 grad: 0.0656 (0.0679) loss: 0.8746 (0.8790) time: 0.1611 data: 0.0787 max mem: 9305 +Train: [22] [4000/6250] eta: 0:07:02 lr: 0.000115 grad: 0.0679 (0.0680) loss: 0.8768 (0.8789) time: 0.1867 data: 0.0799 max mem: 9305 +Train: [22] [4100/6250] eta: 0:06:42 lr: 0.000115 grad: 0.0698 (0.0680) loss: 0.8743 (0.8789) time: 0.1669 data: 0.0716 max mem: 9305 +Train: [22] [4200/6250] eta: 0:06:22 lr: 0.000115 grad: 0.0700 (0.0681) loss: 0.8794 (0.8789) time: 0.1328 data: 0.0507 max mem: 9305 +Train: [22] [4300/6250] eta: 0:06:03 lr: 0.000115 grad: 0.0701 (0.0682) loss: 0.8777 (0.8788) time: 0.2233 data: 0.1304 max mem: 9305 +Train: [22] [4400/6250] eta: 0:05:45 lr: 0.000115 grad: 0.0652 (0.0682) loss: 0.8752 (0.8788) time: 0.2722 data: 0.1680 max mem: 9305 +Train: [22] [4500/6250] eta: 0:05:26 lr: 0.000115 grad: 0.0726 (0.0683) loss: 0.8787 (0.8788) time: 0.1349 data: 0.0292 max mem: 9305 +Train: [22] [4600/6250] eta: 0:05:08 lr: 0.000115 grad: 0.0660 (0.0684) loss: 0.8777 (0.8787) time: 0.1986 data: 0.0995 max mem: 9305 +Train: [22] [4700/6250] eta: 0:04:48 lr: 0.000115 grad: 0.0692 (0.0684) loss: 0.8771 (0.8787) time: 0.1486 data: 0.0588 max mem: 9305 +Train: [22] [4800/6250] eta: 0:04:28 lr: 0.000115 grad: 0.0643 (0.0684) loss: 0.8741 (0.8787) time: 0.1363 data: 0.0435 max mem: 9305 +Train: [22] [4900/6250] eta: 0:04:09 lr: 0.000114 grad: 0.0716 (0.0684) loss: 0.8739 (0.8786) time: 0.1269 data: 0.0436 max mem: 9305 +Train: [22] [5000/6250] eta: 0:03:50 lr: 0.000114 grad: 0.0688 (0.0685) loss: 0.8786 (0.8785) time: 0.1936 data: 0.1072 max mem: 9305 +Train: [22] [5100/6250] eta: 0:03:32 lr: 0.000114 grad: 0.0695 (0.0686) loss: 0.8782 (0.8785) time: 0.2432 data: 0.1556 max mem: 9305 +Train: [22] [5200/6250] eta: 0:03:12 lr: 0.000114 grad: 0.0658 (0.0686) loss: 0.8753 (0.8785) time: 0.1418 data: 0.0541 max mem: 9305 +Train: [22] [5300/6250] eta: 0:02:54 lr: 0.000114 grad: 0.0701 (0.0686) loss: 0.8780 (0.8784) time: 0.1785 data: 0.0915 max mem: 9305 +Train: [22] [5400/6250] eta: 0:02:35 lr: 0.000114 grad: 0.0655 (0.0686) loss: 0.8771 (0.8784) time: 0.2442 data: 0.1533 max mem: 9305 +Train: [22] [5500/6250] eta: 0:02:18 lr: 0.000114 grad: 0.0672 (0.0686) loss: 0.8759 (0.8784) time: 0.2947 data: 0.2018 max mem: 9305 +Train: [22] [5600/6250] eta: 0:01:59 lr: 0.000114 grad: 0.0644 (0.0686) loss: 0.8779 (0.8783) time: 0.2461 data: 0.1628 max mem: 9305 +Train: [22] [5700/6250] eta: 0:01:41 lr: 0.000114 grad: 0.0654 (0.0687) loss: 0.8797 (0.8783) time: 0.1138 data: 0.0003 max mem: 9305 +Train: [22] [5800/6250] eta: 0:01:23 lr: 0.000114 grad: 0.0686 (0.0687) loss: 0.8784 (0.8783) time: 0.1115 data: 0.0124 max mem: 9305 +Train: [22] [5900/6250] eta: 0:01:04 lr: 0.000114 grad: 0.0661 (0.0687) loss: 0.8858 (0.8783) time: 0.2205 data: 0.1208 max mem: 9305 +Train: [22] [6000/6250] eta: 0:00:46 lr: 0.000114 grad: 0.0628 (0.0687) loss: 0.8834 (0.8783) time: 0.1903 data: 0.0745 max mem: 9305 +Train: [22] [6100/6250] eta: 0:00:27 lr: 0.000114 grad: 0.0666 (0.0688) loss: 0.8782 (0.8783) time: 0.1085 data: 0.0024 max mem: 9305 +Train: [22] [6200/6250] eta: 0:00:09 lr: 0.000114 grad: 0.0690 (0.0688) loss: 0.8832 (0.8783) time: 0.1665 data: 0.0753 max mem: 9305 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0686 (0.0688) loss: 0.8802 (0.8783) time: 0.2009 data: 0.1099 max mem: 9305 +Train: [22] Total time: 0:19:24 (0.1863 s / it) +Averaged stats: lr: 0.000114 grad: 0.0686 (0.0688) loss: 0.8802 (0.8783) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:03:45 loss: 0.9008 (0.9008) time: 3.6340 data: 3.5431 max mem: 9305 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8885 (0.8874) time: 0.1770 data: 0.1477 max mem: 9305 +Eval (hcp-train-subset): [22] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (hcp-train-subset): loss: 0.8885 (0.8874) +Eval (hcp-val): [22] [ 0/62] eta: 0:04:04 loss: 0.8772 (0.8772) time: 3.9499 data: 3.8629 max mem: 9305 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8842 (0.8836) time: 0.1711 data: 0.1424 max mem: 9305 +Eval (hcp-val): [22] Total time: 0:00:15 (0.2573 s / it) +Averaged stats (hcp-val): loss: 0.8842 (0.8836) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 7:20:52 lr: 0.000114 grad: 0.0505 (0.0505) loss: 0.8959 (0.8959) time: 4.2323 data: 4.0342 max mem: 9305 +Train: [23] [ 100/6250] eta: 0:24:58 lr: 0.000114 grad: 0.0627 (0.0734) loss: 0.8801 (0.8894) time: 0.2244 data: 0.1091 max mem: 9305 +Train: [23] [ 200/6250] eta: 0:21:51 lr: 0.000114 grad: 0.0644 (0.0695) loss: 0.8873 (0.8862) time: 0.1744 data: 0.0795 max mem: 9305 +Train: [23] [ 300/6250] eta: 0:21:53 lr: 0.000114 grad: 0.0664 (0.0683) loss: 0.8809 (0.8844) time: 0.2122 data: 0.1125 max mem: 9305 +Train: [23] [ 400/6250] eta: 0:20:36 lr: 0.000114 grad: 0.0671 (0.0677) loss: 0.8829 (0.8838) time: 0.1753 data: 0.0885 max mem: 9305 +Train: [23] [ 500/6250] eta: 0:20:33 lr: 0.000114 grad: 0.0678 (0.0676) loss: 0.8761 (0.8828) time: 0.3281 data: 0.2184 max mem: 9305 +Train: [23] [ 600/6250] eta: 0:19:35 lr: 0.000114 grad: 0.0689 (0.0678) loss: 0.8791 (0.8823) time: 0.1353 data: 0.0283 max mem: 9305 +Train: [23] [ 700/6250] eta: 0:19:28 lr: 0.000114 grad: 0.0687 (0.0682) loss: 0.8826 (0.8819) time: 0.1564 data: 0.0509 max mem: 9305 +Train: [23] [ 800/6250] eta: 0:18:36 lr: 0.000114 grad: 0.0699 (0.0680) loss: 0.8755 (0.8815) time: 0.1411 data: 0.0561 max mem: 9305 +Train: [23] [ 900/6250] eta: 0:17:57 lr: 0.000114 grad: 0.0698 (0.0682) loss: 0.8767 (0.8812) time: 0.1735 data: 0.0793 max mem: 9305 +Train: [23] [1000/6250] eta: 0:17:45 lr: 0.000114 grad: 0.0641 (0.0684) loss: 0.8834 (0.8809) time: 0.1170 data: 0.0197 max mem: 9305 +Train: [23] [1100/6250] eta: 0:17:13 lr: 0.000114 grad: 0.0669 (0.0686) loss: 0.8782 (0.8807) time: 0.2087 data: 0.1153 max mem: 9305 +Train: [23] [1200/6250] eta: 0:16:44 lr: 0.000114 grad: 0.0656 (0.0686) loss: 0.8750 (0.8805) time: 0.1541 data: 0.0742 max mem: 9305 +Train: [23] [1300/6250] eta: 0:16:16 lr: 0.000114 grad: 0.0717 (0.0687) loss: 0.8743 (0.8802) time: 0.1845 data: 0.0891 max mem: 9305 +Train: [23] [1400/6250] eta: 0:15:46 lr: 0.000114 grad: 0.0711 (0.0688) loss: 0.8761 (0.8799) time: 0.1704 data: 0.0782 max mem: 9305 +Train: [23] [1500/6250] eta: 0:15:20 lr: 0.000114 grad: 0.0637 (0.0688) loss: 0.8750 (0.8797) time: 0.1924 data: 0.0977 max mem: 9305 +Train: [23] [1600/6250] eta: 0:14:58 lr: 0.000114 grad: 0.0670 (0.0688) loss: 0.8781 (0.8796) time: 0.1592 data: 0.0681 max mem: 9305 +Train: [23] [1700/6250] eta: 0:14:35 lr: 0.000114 grad: 0.0673 (0.0688) loss: 0.8801 (0.8796) time: 0.1687 data: 0.0654 max mem: 9305 +Train: [23] [1800/6250] eta: 0:14:08 lr: 0.000114 grad: 0.0765 (0.0690) loss: 0.8735 (0.8794) time: 0.1812 data: 0.0936 max mem: 9305 +Train: [23] [1900/6250] eta: 0:13:46 lr: 0.000114 grad: 0.0679 (0.0693) loss: 0.8784 (0.8792) time: 0.1690 data: 0.0767 max mem: 9305 +Train: [23] [2000/6250] eta: 0:13:18 lr: 0.000114 grad: 0.0677 (0.0693) loss: 0.8769 (0.8790) time: 0.1847 data: 0.0960 max mem: 9305 +Train: [23] [2100/6250] eta: 0:12:53 lr: 0.000114 grad: 0.0694 (0.0694) loss: 0.8763 (0.8789) time: 0.1672 data: 0.0704 max mem: 9305 +Train: [23] [2200/6250] eta: 0:12:29 lr: 0.000114 grad: 0.0658 (0.0696) loss: 0.8776 (0.8789) time: 0.1689 data: 0.0702 max mem: 9305 +Train: [23] [2300/6250] eta: 0:12:08 lr: 0.000114 grad: 0.0708 (0.0696) loss: 0.8809 (0.8788) time: 0.1909 data: 0.0936 max mem: 9305 +Train: [23] [2400/6250] eta: 0:11:44 lr: 0.000114 grad: 0.0707 (0.0697) loss: 0.8747 (0.8786) time: 0.1184 data: 0.0123 max mem: 9305 +Train: [23] [2500/6250] eta: 0:11:23 lr: 0.000114 grad: 0.0704 (0.0698) loss: 0.8711 (0.8785) time: 0.1501 data: 0.0664 max mem: 9305 +Train: [23] [2600/6250] eta: 0:11:02 lr: 0.000114 grad: 0.0717 (0.0698) loss: 0.8737 (0.8784) time: 0.1557 data: 0.0760 max mem: 9305 +Train: [23] [2700/6250] eta: 0:10:41 lr: 0.000114 grad: 0.0691 (0.0698) loss: 0.8777 (0.8782) time: 0.1584 data: 0.0642 max mem: 9305 +Train: [23] [2800/6250] eta: 0:10:20 lr: 0.000114 grad: 0.0663 (0.0698) loss: 0.8750 (0.8781) time: 0.1227 data: 0.0349 max mem: 9305 +Train: [23] [2900/6250] eta: 0:10:00 lr: 0.000114 grad: 0.0709 (0.0699) loss: 0.8791 (0.8780) time: 0.1520 data: 0.0686 max mem: 9305 +Train: [23] [3000/6250] eta: 0:09:45 lr: 0.000114 grad: 0.0694 (0.0698) loss: 0.8817 (0.8780) time: 0.2341 data: 0.1457 max mem: 9305 +Train: [23] [3100/6250] eta: 0:09:28 lr: 0.000114 grad: 0.0672 (0.0698) loss: 0.8752 (0.8780) time: 0.1735 data: 0.0763 max mem: 9305 +Train: [23] [3200/6250] eta: 0:09:12 lr: 0.000114 grad: 0.0689 (0.0698) loss: 0.8726 (0.8780) time: 0.1425 data: 0.0551 max mem: 9305 +Train: [23] [3300/6250] eta: 0:08:53 lr: 0.000114 grad: 0.0684 (0.0699) loss: 0.8759 (0.8779) time: 0.1395 data: 0.0495 max mem: 9305 +Train: [23] [3400/6250] eta: 0:08:34 lr: 0.000114 grad: 0.0678 (0.0698) loss: 0.8768 (0.8779) time: 0.2048 data: 0.1116 max mem: 9305 +Train: [23] [3500/6250] eta: 0:08:17 lr: 0.000114 grad: 0.0692 (0.0699) loss: 0.8795 (0.8779) time: 0.2153 data: 0.1306 max mem: 9305 +Train: [23] [3600/6250] eta: 0:07:57 lr: 0.000114 grad: 0.0671 (0.0699) loss: 0.8797 (0.8779) time: 0.1899 data: 0.1030 max mem: 9305 +Train: [23] [3700/6250] eta: 0:07:38 lr: 0.000114 grad: 0.0712 (0.0699) loss: 0.8716 (0.8779) time: 0.1544 data: 0.0641 max mem: 9305 +Train: [23] [3800/6250] eta: 0:07:20 lr: 0.000114 grad: 0.0644 (0.0699) loss: 0.8829 (0.8779) time: 0.2013 data: 0.1157 max mem: 9305 +Train: [23] [3900/6250] eta: 0:07:01 lr: 0.000114 grad: 0.0630 (0.0699) loss: 0.8784 (0.8778) time: 0.1734 data: 0.0850 max mem: 9305 +Train: [23] [4000/6250] eta: 0:06:42 lr: 0.000113 grad: 0.0686 (0.0699) loss: 0.8783 (0.8778) time: 0.1902 data: 0.0905 max mem: 9305 +Train: [23] [4100/6250] eta: 0:06:23 lr: 0.000113 grad: 0.0700 (0.0699) loss: 0.8762 (0.8779) time: 0.1674 data: 0.0741 max mem: 9305 +Train: [23] [4200/6250] eta: 0:06:05 lr: 0.000113 grad: 0.0677 (0.0699) loss: 0.8795 (0.8779) time: 0.1821 data: 0.0871 max mem: 9305 +Train: [23] [4300/6250] eta: 0:05:47 lr: 0.000113 grad: 0.0683 (0.0699) loss: 0.8737 (0.8778) time: 0.2111 data: 0.0768 max mem: 9305 +Train: [23] [4400/6250] eta: 0:05:29 lr: 0.000113 grad: 0.0683 (0.0699) loss: 0.8784 (0.8778) time: 0.2003 data: 0.1064 max mem: 9305 +Train: [23] [4500/6250] eta: 0:05:10 lr: 0.000113 grad: 0.0703 (0.0700) loss: 0.8763 (0.8779) time: 0.1601 data: 0.0591 max mem: 9305 +Train: [23] [4600/6250] eta: 0:04:53 lr: 0.000113 grad: 0.0676 (0.0701) loss: 0.8786 (0.8779) time: 0.1433 data: 0.0551 max mem: 9305 +Train: [23] [4700/6250] eta: 0:04:34 lr: 0.000113 grad: 0.0700 (0.0701) loss: 0.8773 (0.8779) time: 0.1563 data: 0.0688 max mem: 9305 +Train: [23] [4800/6250] eta: 0:04:17 lr: 0.000113 grad: 0.0716 (0.0702) loss: 0.8781 (0.8780) time: 0.1854 data: 0.0912 max mem: 9305 +Train: [23] [4900/6250] eta: 0:03:59 lr: 0.000113 grad: 0.0681 (0.0702) loss: 0.8783 (0.8779) time: 0.1850 data: 0.0978 max mem: 9305 +Train: [23] [5000/6250] eta: 0:03:41 lr: 0.000113 grad: 0.0703 (0.0703) loss: 0.8863 (0.8779) time: 0.1472 data: 0.0463 max mem: 9305 +Train: [23] [5100/6250] eta: 0:03:22 lr: 0.000113 grad: 0.0684 (0.0703) loss: 0.8772 (0.8779) time: 0.1516 data: 0.0587 max mem: 9305 +Train: [23] [5200/6250] eta: 0:03:05 lr: 0.000113 grad: 0.0625 (0.0703) loss: 0.8819 (0.8780) time: 0.1891 data: 0.0878 max mem: 9305 +Train: [23] [5300/6250] eta: 0:02:47 lr: 0.000113 grad: 0.0652 (0.0702) loss: 0.8772 (0.8780) time: 0.1868 data: 0.0871 max mem: 9305 +Train: [23] [5400/6250] eta: 0:02:30 lr: 0.000113 grad: 0.0712 (0.0702) loss: 0.8784 (0.8780) time: 0.2631 data: 0.1064 max mem: 9305 +Train: [23] [5500/6250] eta: 0:02:13 lr: 0.000113 grad: 0.0674 (0.0702) loss: 0.8792 (0.8781) time: 0.2061 data: 0.0900 max mem: 9305 +Train: [23] [5600/6250] eta: 0:01:55 lr: 0.000113 grad: 0.0660 (0.0703) loss: 0.8774 (0.8781) time: 0.1562 data: 0.0613 max mem: 9305 +Train: [23] [5700/6250] eta: 0:01:37 lr: 0.000113 grad: 0.0710 (0.0703) loss: 0.8856 (0.8781) time: 0.2094 data: 0.0920 max mem: 9305 +Train: [23] [5800/6250] eta: 0:01:20 lr: 0.000113 grad: 0.0691 (0.0703) loss: 0.8766 (0.8781) time: 0.2005 data: 0.1127 max mem: 9305 +Train: [23] [5900/6250] eta: 0:01:02 lr: 0.000113 grad: 0.0664 (0.0703) loss: 0.8805 (0.8781) time: 0.4385 data: 0.3078 max mem: 9305 +Train: [23] [6000/6250] eta: 0:00:44 lr: 0.000113 grad: 0.0703 (0.0703) loss: 0.8769 (0.8781) time: 0.1466 data: 0.0530 max mem: 9305 +Train: [23] [6100/6250] eta: 0:00:26 lr: 0.000113 grad: 0.0654 (0.0704) loss: 0.8769 (0.8782) time: 0.1016 data: 0.0003 max mem: 9305 +Train: [23] [6200/6250] eta: 0:00:08 lr: 0.000113 grad: 0.0656 (0.0704) loss: 0.8813 (0.8782) time: 0.1720 data: 0.0799 max mem: 9305 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.0659 (0.0704) loss: 0.8827 (0.8782) time: 0.1504 data: 0.0610 max mem: 9305 +Train: [23] Total time: 0:18:46 (0.1802 s / it) +Averaged stats: lr: 0.000113 grad: 0.0659 (0.0704) loss: 0.8827 (0.8782) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:03:33 loss: 0.8960 (0.8960) time: 3.4499 data: 3.3088 max mem: 9305 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8852 (0.8877) time: 0.1635 data: 0.1350 max mem: 9305 +Eval (hcp-train-subset): [23] Total time: 0:00:15 (0.2477 s / it) +Averaged stats (hcp-train-subset): loss: 0.8852 (0.8877) +Eval (hcp-val): [23] [ 0/62] eta: 0:06:04 loss: 0.8828 (0.8828) time: 5.8719 data: 5.8224 max mem: 9305 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8826 (0.8837) time: 0.1663 data: 0.1377 max mem: 9305 +Eval (hcp-val): [23] Total time: 0:00:15 (0.2513 s / it) +Averaged stats (hcp-val): loss: 0.8826 (0.8837) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 11:09:46 lr: 0.000113 grad: 0.1228 (0.1228) loss: 0.8736 (0.8736) time: 6.4298 data: 6.2463 max mem: 9305 +Train: [24] [ 100/6250] eta: 0:25:27 lr: 0.000113 grad: 0.0703 (0.0784) loss: 0.8763 (0.8823) time: 0.2157 data: 0.1078 max mem: 9305 +Train: [24] [ 200/6250] eta: 0:22:11 lr: 0.000113 grad: 0.0731 (0.0768) loss: 0.8776 (0.8816) time: 0.1299 data: 0.0173 max mem: 9305 +Train: [24] [ 300/6250] eta: 0:21:15 lr: 0.000113 grad: 0.0720 (0.0759) loss: 0.8664 (0.8790) time: 0.2609 data: 0.1647 max mem: 9305 +Train: [24] [ 400/6250] eta: 0:20:46 lr: 0.000113 grad: 0.0757 (0.0764) loss: 0.8638 (0.8776) time: 0.1496 data: 0.0192 max mem: 9305 +Train: [24] [ 500/6250] eta: 0:19:41 lr: 0.000113 grad: 0.0739 (0.0760) loss: 0.8746 (0.8770) time: 0.1925 data: 0.1019 max mem: 9305 +Train: [24] [ 600/6250] eta: 0:19:45 lr: 0.000113 grad: 0.0765 (0.0761) loss: 0.8787 (0.8763) time: 0.3532 data: 0.2313 max mem: 9305 +Train: [24] [ 700/6250] eta: 0:18:47 lr: 0.000113 grad: 0.0680 (0.0760) loss: 0.8728 (0.8754) time: 0.1993 data: 0.1064 max mem: 9305 +Train: [24] [ 800/6250] eta: 0:18:48 lr: 0.000113 grad: 0.0696 (0.0761) loss: 0.8705 (0.8749) time: 0.2995 data: 0.2104 max mem: 9305 +Train: [24] [ 900/6250] eta: 0:17:54 lr: 0.000113 grad: 0.0651 (0.0757) loss: 0.8759 (0.8745) time: 0.1220 data: 0.0284 max mem: 9305 +Train: [24] [1000/6250] eta: 0:17:44 lr: 0.000113 grad: 0.0696 (0.0753) loss: 0.8705 (0.8745) time: 0.1145 data: 0.0003 max mem: 9305 +Train: [24] [1100/6250] eta: 0:17:02 lr: 0.000113 grad: 0.0682 (0.0749) loss: 0.8699 (0.8743) time: 0.1945 data: 0.1112 max mem: 9305 +Train: [24] [1200/6250] eta: 0:16:30 lr: 0.000113 grad: 0.0686 (0.0745) loss: 0.8653 (0.8740) time: 0.1686 data: 0.0752 max mem: 9305 +Train: [24] [1300/6250] eta: 0:15:58 lr: 0.000113 grad: 0.0680 (0.0742) loss: 0.8760 (0.8738) time: 0.1798 data: 0.0849 max mem: 9305 +Train: [24] [1400/6250] eta: 0:15:29 lr: 0.000113 grad: 0.0666 (0.0741) loss: 0.8750 (0.8738) time: 0.1544 data: 0.0663 max mem: 9305 +Train: [24] [1500/6250] eta: 0:15:02 lr: 0.000113 grad: 0.0736 (0.0741) loss: 0.8664 (0.8738) time: 0.1643 data: 0.0784 max mem: 9305 +Train: [24] [1600/6250] eta: 0:14:33 lr: 0.000113 grad: 0.0686 (0.0740) loss: 0.8723 (0.8736) time: 0.1678 data: 0.0746 max mem: 9305 +Train: [24] [1700/6250] eta: 0:14:08 lr: 0.000113 grad: 0.0741 (0.0742) loss: 0.8708 (0.8736) time: 0.1701 data: 0.0791 max mem: 9305 +Train: [24] [1800/6250] eta: 0:13:49 lr: 0.000113 grad: 0.0697 (0.0742) loss: 0.8760 (0.8736) time: 0.2100 data: 0.1162 max mem: 9305 +Train: [24] [1900/6250] eta: 0:13:26 lr: 0.000113 grad: 0.0687 (0.0739) loss: 0.8767 (0.8737) time: 0.2027 data: 0.1166 max mem: 9305 +Train: [24] [2000/6250] eta: 0:13:02 lr: 0.000113 grad: 0.0716 (0.0738) loss: 0.8697 (0.8739) time: 0.1210 data: 0.0356 max mem: 9305 +Train: [24] [2100/6250] eta: 0:12:44 lr: 0.000113 grad: 0.0701 (0.0737) loss: 0.8834 (0.8739) time: 0.2532 data: 0.1728 max mem: 9305 +Train: [24] [2200/6250] eta: 0:12:34 lr: 0.000113 grad: 0.0725 (0.0737) loss: 0.8736 (0.8739) time: 0.3842 data: 0.2480 max mem: 9305 +Train: [24] [2300/6250] eta: 0:12:14 lr: 0.000113 grad: 0.0673 (0.0736) loss: 0.8791 (0.8740) time: 0.1741 data: 0.0728 max mem: 9305 +Train: [24] [2400/6250] eta: 0:12:02 lr: 0.000113 grad: 0.0680 (0.0735) loss: 0.8795 (0.8741) time: 0.1199 data: 0.0003 max mem: 9305 +Train: [24] [2500/6250] eta: 0:11:41 lr: 0.000113 grad: 0.0671 (0.0734) loss: 0.8773 (0.8742) time: 0.1374 data: 0.0314 max mem: 9305 +Train: [24] [2600/6250] eta: 0:11:29 lr: 0.000113 grad: 0.0701 (0.0733) loss: 0.8795 (0.8743) time: 0.1234 data: 0.0003 max mem: 9305 +Train: [24] [2700/6250] eta: 0:11:08 lr: 0.000113 grad: 0.0685 (0.0732) loss: 0.8789 (0.8744) time: 0.1916 data: 0.1076 max mem: 9305 +Train: [24] [2800/6250] eta: 0:10:53 lr: 0.000113 grad: 0.0688 (0.0731) loss: 0.8777 (0.8745) time: 0.3029 data: 0.2069 max mem: 9305 +Train: [24] [2900/6250] eta: 0:10:31 lr: 0.000112 grad: 0.0643 (0.0730) loss: 0.8762 (0.8747) time: 0.1735 data: 0.0791 max mem: 9305 +Train: [24] [3000/6250] eta: 0:10:12 lr: 0.000112 grad: 0.0693 (0.0729) loss: 0.8796 (0.8747) time: 0.1728 data: 0.0797 max mem: 9305 +Train: [24] [3100/6250] eta: 0:09:52 lr: 0.000112 grad: 0.0689 (0.0729) loss: 0.8729 (0.8748) time: 0.1787 data: 0.0925 max mem: 9305 +Train: [24] [3200/6250] eta: 0:09:31 lr: 0.000112 grad: 0.0737 (0.0728) loss: 0.8740 (0.8748) time: 0.1766 data: 0.0977 max mem: 9305 +Train: [24] [3300/6250] eta: 0:09:11 lr: 0.000112 grad: 0.0661 (0.0728) loss: 0.8776 (0.8749) time: 0.1928 data: 0.1048 max mem: 9305 +Train: [24] [3400/6250] eta: 0:08:51 lr: 0.000112 grad: 0.0678 (0.0727) loss: 0.8793 (0.8750) time: 0.1763 data: 0.0759 max mem: 9305 +Train: [24] [3500/6250] eta: 0:08:32 lr: 0.000112 grad: 0.0672 (0.0727) loss: 0.8794 (0.8750) time: 0.1904 data: 0.1027 max mem: 9305 +Train: [24] [3600/6250] eta: 0:08:12 lr: 0.000112 grad: 0.0721 (0.0727) loss: 0.8746 (0.8750) time: 0.1672 data: 0.0673 max mem: 9305 +Train: [24] [3700/6250] eta: 0:07:52 lr: 0.000112 grad: 0.0710 (0.0728) loss: 0.8802 (0.8750) time: 0.2003 data: 0.0984 max mem: 9305 +Train: [24] [3800/6250] eta: 0:07:34 lr: 0.000112 grad: 0.0703 (0.0727) loss: 0.8663 (0.8749) time: 0.2252 data: 0.1289 max mem: 9305 +Train: [24] [3900/6250] eta: 0:07:19 lr: 0.000112 grad: 0.0754 (0.0728) loss: 0.8694 (0.8749) time: 0.4558 data: 0.3068 max mem: 9305 +Train: [24] [4000/6250] eta: 0:07:01 lr: 0.000112 grad: 0.0690 (0.0727) loss: 0.8704 (0.8748) time: 0.1529 data: 0.0478 max mem: 9305 +Train: [24] [4100/6250] eta: 0:06:40 lr: 0.000112 grad: 0.0758 (0.0728) loss: 0.8734 (0.8748) time: 0.1634 data: 0.0721 max mem: 9305 +Train: [24] [4200/6250] eta: 0:06:20 lr: 0.000112 grad: 0.0708 (0.0728) loss: 0.8730 (0.8747) time: 0.1566 data: 0.0662 max mem: 9305 +Train: [24] [4300/6250] eta: 0:06:00 lr: 0.000112 grad: 0.0744 (0.0729) loss: 0.8666 (0.8746) time: 0.1934 data: 0.0975 max mem: 9305 +Train: [24] [4400/6250] eta: 0:05:41 lr: 0.000112 grad: 0.0702 (0.0729) loss: 0.8722 (0.8746) time: 0.1975 data: 0.0983 max mem: 9305 +Train: [24] [4500/6250] eta: 0:05:22 lr: 0.000112 grad: 0.0728 (0.0730) loss: 0.8680 (0.8745) time: 0.1473 data: 0.0507 max mem: 9305 +Train: [24] [4600/6250] eta: 0:05:03 lr: 0.000112 grad: 0.0699 (0.0730) loss: 0.8754 (0.8745) time: 0.1714 data: 0.0753 max mem: 9305 +Train: [24] [4700/6250] eta: 0:04:44 lr: 0.000112 grad: 0.0716 (0.0730) loss: 0.8740 (0.8745) time: 0.1736 data: 0.0872 max mem: 9305 +Train: [24] [4800/6250] eta: 0:04:25 lr: 0.000112 grad: 0.0710 (0.0730) loss: 0.8671 (0.8745) time: 0.1813 data: 0.0904 max mem: 9305 +Train: [24] [4900/6250] eta: 0:04:07 lr: 0.000112 grad: 0.0753 (0.0731) loss: 0.8744 (0.8744) time: 0.2071 data: 0.1119 max mem: 9305 +Train: [24] [5000/6250] eta: 0:03:50 lr: 0.000112 grad: 0.0710 (0.0731) loss: 0.8758 (0.8744) time: 0.3722 data: 0.2722 max mem: 9305 +Train: [24] [5100/6250] eta: 0:03:32 lr: 0.000112 grad: 0.0732 (0.0732) loss: 0.8747 (0.8743) time: 0.3429 data: 0.2545 max mem: 9305 +Train: [24] [5200/6250] eta: 0:03:14 lr: 0.000112 grad: 0.0764 (0.0733) loss: 0.8705 (0.8743) time: 0.2798 data: 0.1350 max mem: 9305 +Train: [24] [5300/6250] eta: 0:02:55 lr: 0.000112 grad: 0.0772 (0.0733) loss: 0.8672 (0.8742) time: 0.2067 data: 0.1212 max mem: 9305 +Train: [24] [5400/6250] eta: 0:02:37 lr: 0.000112 grad: 0.0746 (0.0734) loss: 0.8669 (0.8741) time: 0.1796 data: 0.0942 max mem: 9305 +Train: [24] [5500/6250] eta: 0:02:18 lr: 0.000112 grad: 0.0728 (0.0734) loss: 0.8750 (0.8741) time: 0.1645 data: 0.0746 max mem: 9305 +Train: [24] [5600/6250] eta: 0:02:00 lr: 0.000112 grad: 0.0727 (0.0735) loss: 0.8752 (0.8740) time: 0.2665 data: 0.1657 max mem: 9305 +Train: [24] [5700/6250] eta: 0:01:41 lr: 0.000112 grad: 0.0717 (0.0735) loss: 0.8761 (0.8740) time: 0.1373 data: 0.0328 max mem: 9305 +Train: [24] [5800/6250] eta: 0:01:23 lr: 0.000112 grad: 0.0712 (0.0735) loss: 0.8698 (0.8740) time: 0.1476 data: 0.0549 max mem: 9305 +Train: [24] [5900/6250] eta: 0:01:04 lr: 0.000112 grad: 0.0742 (0.0736) loss: 0.8714 (0.8740) time: 0.1840 data: 0.0922 max mem: 9305 +Train: [24] [6000/6250] eta: 0:00:46 lr: 0.000112 grad: 0.0769 (0.0736) loss: 0.8722 (0.8739) time: 0.1594 data: 0.0706 max mem: 9305 +Train: [24] [6100/6250] eta: 0:00:27 lr: 0.000112 grad: 0.0709 (0.0737) loss: 0.8747 (0.8739) time: 0.1697 data: 0.0737 max mem: 9305 +Train: [24] [6200/6250] eta: 0:00:09 lr: 0.000112 grad: 0.0812 (0.0738) loss: 0.8681 (0.8739) time: 0.2941 data: 0.1757 max mem: 9305 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0713 (0.0738) loss: 0.8726 (0.8738) time: 0.1131 data: 0.0002 max mem: 9305 +Train: [24] Total time: 0:19:18 (0.1854 s / it) +Averaged stats: lr: 0.000112 grad: 0.0713 (0.0738) loss: 0.8726 (0.8738) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:05:37 loss: 0.8973 (0.8973) time: 5.4486 data: 5.4113 max mem: 9305 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8874 (0.8875) time: 0.1411 data: 0.1124 max mem: 9305 +Eval (hcp-train-subset): [24] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-train-subset): loss: 0.8874 (0.8875) +Making plots (hcp-train-subset): example=34 +Eval (hcp-val): [24] [ 0/62] eta: 0:05:55 loss: 0.8858 (0.8858) time: 5.7275 data: 5.6921 max mem: 9305 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8833 (0.8847) time: 0.1516 data: 0.1228 max mem: 9305 +Eval (hcp-val): [24] Total time: 0:00:17 (0.2825 s / it) +Averaged stats (hcp-val): loss: 0.8833 (0.8847) +Making plots (hcp-val): example=15 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [25] [ 0/6250] eta: 9:10:35 lr: 0.000112 grad: 0.1464 (0.1464) loss: 0.8986 (0.8986) time: 5.2857 data: 4.9496 max mem: 9305 +Train: [25] [ 100/6250] eta: 0:24:39 lr: 0.000112 grad: 0.0698 (0.0990) loss: 0.8875 (0.8867) time: 0.1912 data: 0.0849 max mem: 9305 +Train: [25] [ 200/6250] eta: 0:21:23 lr: 0.000112 grad: 0.0689 (0.0874) loss: 0.8773 (0.8820) time: 0.1660 data: 0.0616 max mem: 9305 +Train: [25] [ 300/6250] eta: 0:21:05 lr: 0.000112 grad: 0.0684 (0.0819) loss: 0.8791 (0.8817) time: 0.1429 data: 0.0242 max mem: 9305 +Train: [25] [ 400/6250] eta: 0:19:54 lr: 0.000112 grad: 0.0709 (0.0791) loss: 0.8726 (0.8796) time: 0.1363 data: 0.0336 max mem: 9305 +Train: [25] [ 500/6250] eta: 0:19:12 lr: 0.000112 grad: 0.0685 (0.0772) loss: 0.8757 (0.8788) time: 0.1229 data: 0.0260 max mem: 9305 +Train: [25] [ 600/6250] eta: 0:19:16 lr: 0.000112 grad: 0.0707 (0.0760) loss: 0.8702 (0.8781) time: 0.1120 data: 0.0003 max mem: 9305 +Train: [25] [ 700/6250] eta: 0:18:22 lr: 0.000112 grad: 0.0663 (0.0750) loss: 0.8795 (0.8778) time: 0.1736 data: 0.0703 max mem: 9305 +Train: [25] [ 800/6250] eta: 0:17:39 lr: 0.000112 grad: 0.0698 (0.0743) loss: 0.8755 (0.8775) time: 0.1672 data: 0.0705 max mem: 9305 +Train: [25] [ 900/6250] eta: 0:17:03 lr: 0.000112 grad: 0.0690 (0.0740) loss: 0.8746 (0.8772) time: 0.1905 data: 0.0956 max mem: 9305 +Train: [25] [1000/6250] eta: 0:16:36 lr: 0.000112 grad: 0.0682 (0.0735) loss: 0.8736 (0.8771) time: 0.1654 data: 0.0772 max mem: 9305 +Train: [25] [1100/6250] eta: 0:16:05 lr: 0.000112 grad: 0.0671 (0.0732) loss: 0.8754 (0.8770) time: 0.1821 data: 0.0915 max mem: 9305 +Train: [25] [1200/6250] eta: 0:15:36 lr: 0.000112 grad: 0.0651 (0.0730) loss: 0.8779 (0.8768) time: 0.1625 data: 0.0710 max mem: 9305 +Train: [25] [1300/6250] eta: 0:15:17 lr: 0.000112 grad: 0.0667 (0.0728) loss: 0.8790 (0.8767) time: 0.2161 data: 0.1358 max mem: 9305 +Train: [25] [1400/6250] eta: 0:14:49 lr: 0.000112 grad: 0.0703 (0.0726) loss: 0.8710 (0.8767) time: 0.1630 data: 0.0723 max mem: 9305 +Train: [25] [1500/6250] eta: 0:14:29 lr: 0.000112 grad: 0.0692 (0.0724) loss: 0.8747 (0.8766) time: 0.1847 data: 0.1008 max mem: 9305 +Train: [25] [1600/6250] eta: 0:14:08 lr: 0.000111 grad: 0.0657 (0.0725) loss: 0.8815 (0.8764) time: 0.1518 data: 0.0640 max mem: 9305 +Train: [25] [1700/6250] eta: 0:13:45 lr: 0.000111 grad: 0.0768 (0.0725) loss: 0.8739 (0.8763) time: 0.1714 data: 0.0836 max mem: 9305 +Train: [25] [1800/6250] eta: 0:13:25 lr: 0.000111 grad: 0.0693 (0.0726) loss: 0.8778 (0.8763) time: 0.1052 data: 0.0003 max mem: 9305 +Train: [25] [1900/6250] eta: 0:13:05 lr: 0.000111 grad: 0.0639 (0.0726) loss: 0.8753 (0.8764) time: 0.2153 data: 0.1309 max mem: 9305 +Train: [25] [2000/6250] eta: 0:12:58 lr: 0.000111 grad: 0.0693 (0.0727) loss: 0.8798 (0.8764) time: 0.1144 data: 0.0002 max mem: 9305 +Train: [25] [2100/6250] eta: 0:12:39 lr: 0.000111 grad: 0.0678 (0.0726) loss: 0.8765 (0.8764) time: 0.2072 data: 0.1178 max mem: 9305 +Train: [25] [2200/6250] eta: 0:12:20 lr: 0.000111 grad: 0.0649 (0.0726) loss: 0.8800 (0.8763) time: 0.1394 data: 0.0274 max mem: 9305 +Train: [25] [2300/6250] eta: 0:12:12 lr: 0.000111 grad: 0.0747 (0.0726) loss: 0.8778 (0.8762) time: 0.2670 data: 0.1628 max mem: 9305 +Train: [25] [2400/6250] eta: 0:11:55 lr: 0.000111 grad: 0.0712 (0.0727) loss: 0.8775 (0.8761) time: 0.2174 data: 0.1289 max mem: 9305 +Train: [25] [2500/6250] eta: 0:11:44 lr: 0.000111 grad: 0.0690 (0.0728) loss: 0.8765 (0.8760) time: 0.1054 data: 0.0003 max mem: 9305 +Train: [25] [2600/6250] eta: 0:11:29 lr: 0.000111 grad: 0.0759 (0.0727) loss: 0.8701 (0.8759) time: 0.1008 data: 0.0002 max mem: 9305 +Train: [25] [2700/6250] eta: 0:11:06 lr: 0.000111 grad: 0.0669 (0.0727) loss: 0.8765 (0.8758) time: 0.1597 data: 0.0730 max mem: 9305 +Train: [25] [2800/6250] eta: 0:10:54 lr: 0.000111 grad: 0.0674 (0.0726) loss: 0.8735 (0.8756) time: 0.3827 data: 0.2663 max mem: 9305 +Train: [25] [2900/6250] eta: 0:10:31 lr: 0.000111 grad: 0.0691 (0.0726) loss: 0.8777 (0.8756) time: 0.1618 data: 0.0607 max mem: 9305 +Train: [25] [3000/6250] eta: 0:10:10 lr: 0.000111 grad: 0.0723 (0.0726) loss: 0.8723 (0.8755) time: 0.1900 data: 0.0956 max mem: 9305 +Train: [25] [3100/6250] eta: 0:09:49 lr: 0.000111 grad: 0.0679 (0.0725) loss: 0.8738 (0.8754) time: 0.1585 data: 0.0700 max mem: 9305 +Train: [25] [3200/6250] eta: 0:09:29 lr: 0.000111 grad: 0.0720 (0.0725) loss: 0.8739 (0.8754) time: 0.1759 data: 0.0911 max mem: 9305 +Train: [25] [3300/6250] eta: 0:09:09 lr: 0.000111 grad: 0.0648 (0.0725) loss: 0.8782 (0.8754) time: 0.1970 data: 0.0982 max mem: 9305 +Train: [25] [3400/6250] eta: 0:08:49 lr: 0.000111 grad: 0.0719 (0.0725) loss: 0.8715 (0.8753) time: 0.1510 data: 0.0642 max mem: 9305 +Train: [25] [3500/6250] eta: 0:08:29 lr: 0.000111 grad: 0.0715 (0.0725) loss: 0.8743 (0.8752) time: 0.1617 data: 0.0725 max mem: 9305 +Train: [25] [3600/6250] eta: 0:08:08 lr: 0.000111 grad: 0.0732 (0.0726) loss: 0.8763 (0.8751) time: 0.1402 data: 0.0444 max mem: 9305 +Train: [25] [3700/6250] eta: 0:07:47 lr: 0.000111 grad: 0.0719 (0.0727) loss: 0.8690 (0.8750) time: 0.1617 data: 0.0650 max mem: 9305 +Train: [25] [3800/6250] eta: 0:07:27 lr: 0.000111 grad: 0.0679 (0.0727) loss: 0.8794 (0.8750) time: 0.1716 data: 0.0787 max mem: 9305 +Train: [25] [3900/6250] eta: 0:07:11 lr: 0.000111 grad: 0.0666 (0.0728) loss: 0.8829 (0.8751) time: 0.1586 data: 0.0659 max mem: 9305 +Train: [25] [4000/6250] eta: 0:06:52 lr: 0.000111 grad: 0.0739 (0.0729) loss: 0.8711 (0.8750) time: 0.1422 data: 0.0530 max mem: 9305 +Train: [25] [4100/6250] eta: 0:06:33 lr: 0.000111 grad: 0.0675 (0.0729) loss: 0.8727 (0.8749) time: 0.1622 data: 0.0701 max mem: 9305 +Train: [25] [4200/6250] eta: 0:06:13 lr: 0.000111 grad: 0.0702 (0.0730) loss: 0.8777 (0.8749) time: 0.1039 data: 0.0137 max mem: 9305 +Train: [25] [4300/6250] eta: 0:05:54 lr: 0.000111 grad: 0.0672 (0.0730) loss: 0.8742 (0.8749) time: 0.1152 data: 0.0241 max mem: 9305 +Train: [25] [4400/6250] eta: 0:05:35 lr: 0.000111 grad: 0.0703 (0.0730) loss: 0.8752 (0.8749) time: 0.1629 data: 0.0692 max mem: 9305 +Train: [25] [4500/6250] eta: 0:05:16 lr: 0.000111 grad: 0.0688 (0.0730) loss: 0.8787 (0.8749) time: 0.1681 data: 0.0802 max mem: 9305 +Train: [25] [4600/6250] eta: 0:04:57 lr: 0.000111 grad: 0.0701 (0.0730) loss: 0.8777 (0.8749) time: 0.1787 data: 0.0885 max mem: 9305 +Train: [25] [4700/6250] eta: 0:04:38 lr: 0.000111 grad: 0.0693 (0.0730) loss: 0.8785 (0.8749) time: 0.1523 data: 0.0684 max mem: 9305 +Train: [25] [4800/6250] eta: 0:04:20 lr: 0.000111 grad: 0.0738 (0.0730) loss: 0.8786 (0.8750) time: 0.2265 data: 0.1303 max mem: 9305 +Train: [25] [4900/6250] eta: 0:04:02 lr: 0.000111 grad: 0.0715 (0.0730) loss: 0.8795 (0.8750) time: 0.2039 data: 0.0861 max mem: 9305 +Train: [25] [5000/6250] eta: 0:03:45 lr: 0.000111 grad: 0.0682 (0.0730) loss: 0.8774 (0.8750) time: 0.1606 data: 0.0472 max mem: 9305 +Train: [25] [5100/6250] eta: 0:03:27 lr: 0.000111 grad: 0.0691 (0.0730) loss: 0.8802 (0.8750) time: 0.1604 data: 0.0277 max mem: 9305 +Train: [25] [5200/6250] eta: 0:03:09 lr: 0.000111 grad: 0.0721 (0.0730) loss: 0.8662 (0.8750) time: 0.1596 data: 0.0730 max mem: 9305 +Train: [25] [5300/6250] eta: 0:02:51 lr: 0.000111 grad: 0.0689 (0.0729) loss: 0.8777 (0.8750) time: 0.1497 data: 0.0348 max mem: 9305 +Train: [25] [5400/6250] eta: 0:02:33 lr: 0.000111 grad: 0.0725 (0.0729) loss: 0.8800 (0.8751) time: 0.1872 data: 0.0914 max mem: 9305 +Train: [25] [5500/6250] eta: 0:02:15 lr: 0.000111 grad: 0.0668 (0.0729) loss: 0.8742 (0.8751) time: 0.1451 data: 0.0451 max mem: 9305 +Train: [25] [5600/6250] eta: 0:01:57 lr: 0.000111 grad: 0.0724 (0.0729) loss: 0.8769 (0.8751) time: 0.1722 data: 0.0780 max mem: 9305 +Train: [25] [5700/6250] eta: 0:01:39 lr: 0.000111 grad: 0.0728 (0.0729) loss: 0.8733 (0.8750) time: 0.1664 data: 0.0803 max mem: 9305 +Train: [25] [5800/6250] eta: 0:01:21 lr: 0.000111 grad: 0.0715 (0.0730) loss: 0.8740 (0.8750) time: 0.1103 data: 0.0238 max mem: 9305 +Train: [25] [5900/6250] eta: 0:01:03 lr: 0.000111 grad: 0.0723 (0.0730) loss: 0.8716 (0.8749) time: 0.1551 data: 0.0675 max mem: 9305 +Train: [25] [6000/6250] eta: 0:00:44 lr: 0.000111 grad: 0.0693 (0.0730) loss: 0.8760 (0.8749) time: 0.1473 data: 0.0639 max mem: 9305 +Train: [25] [6100/6250] eta: 0:00:26 lr: 0.000111 grad: 0.0734 (0.0730) loss: 0.8660 (0.8748) time: 0.1697 data: 0.0911 max mem: 9305 +Train: [25] [6200/6250] eta: 0:00:08 lr: 0.000111 grad: 0.0648 (0.0730) loss: 0.8764 (0.8748) time: 0.1648 data: 0.0803 max mem: 9305 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0732 (0.0731) loss: 0.8755 (0.8748) time: 0.1628 data: 0.0807 max mem: 9305 +Train: [25] Total time: 0:18:45 (0.1801 s / it) +Averaged stats: lr: 0.000111 grad: 0.0732 (0.0731) loss: 0.8755 (0.8748) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:05:09 loss: 0.8976 (0.8976) time: 4.9951 data: 4.9557 max mem: 9305 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8873 (0.8872) time: 0.1277 data: 0.0948 max mem: 9305 +Eval (hcp-train-subset): [25] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (hcp-train-subset): loss: 0.8873 (0.8872) +Eval (hcp-val): [25] [ 0/62] eta: 0:05:26 loss: 0.8801 (0.8801) time: 5.2586 data: 5.2237 max mem: 9305 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8824 (0.8837) time: 0.1483 data: 0.1196 max mem: 9305 +Eval (hcp-val): [25] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-val): loss: 0.8824 (0.8837) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 13:36:47 lr: 0.000111 grad: 0.0485 (0.0485) loss: 0.9074 (0.9074) time: 7.8413 data: 7.7022 max mem: 9305 +Train: [26] [ 100/6250] eta: 0:24:40 lr: 0.000111 grad: 0.0688 (0.0897) loss: 0.8808 (0.8802) time: 0.1817 data: 0.0842 max mem: 9305 +Train: [26] [ 200/6250] eta: 0:20:15 lr: 0.000110 grad: 0.0661 (0.0802) loss: 0.8812 (0.8778) time: 0.1517 data: 0.0530 max mem: 9305 +Train: [26] [ 300/6250] eta: 0:18:42 lr: 0.000110 grad: 0.0732 (0.0784) loss: 0.8783 (0.8766) time: 0.1551 data: 0.0643 max mem: 9305 +Train: [26] [ 400/6250] eta: 0:17:50 lr: 0.000110 grad: 0.0702 (0.0779) loss: 0.8743 (0.8764) time: 0.1467 data: 0.0509 max mem: 9305 +Train: [26] [ 500/6250] eta: 0:17:03 lr: 0.000110 grad: 0.0710 (0.0769) loss: 0.8778 (0.8762) time: 0.1586 data: 0.0677 max mem: 9305 +Train: [26] [ 600/6250] eta: 0:16:04 lr: 0.000110 grad: 0.0634 (0.0758) loss: 0.8799 (0.8764) time: 0.1571 data: 0.0670 max mem: 9305 +Train: [26] [ 700/6250] eta: 0:15:17 lr: 0.000110 grad: 0.0665 (0.0752) loss: 0.8814 (0.8765) time: 0.1333 data: 0.0416 max mem: 9305 +Train: [26] [ 800/6250] eta: 0:14:40 lr: 0.000110 grad: 0.0677 (0.0750) loss: 0.8701 (0.8766) time: 0.1364 data: 0.0448 max mem: 9305 +Train: [26] [ 900/6250] eta: 0:14:10 lr: 0.000110 grad: 0.0710 (0.0747) loss: 0.8750 (0.8767) time: 0.1429 data: 0.0501 max mem: 9305 +Train: [26] [1000/6250] eta: 0:13:38 lr: 0.000110 grad: 0.0670 (0.0744) loss: 0.8811 (0.8769) time: 0.1347 data: 0.0477 max mem: 9305 +Train: [26] [1100/6250] eta: 0:13:11 lr: 0.000110 grad: 0.0660 (0.0742) loss: 0.8825 (0.8769) time: 0.1357 data: 0.0529 max mem: 9305 +Train: [26] [1200/6250] eta: 0:12:47 lr: 0.000110 grad: 0.0691 (0.0740) loss: 0.8712 (0.8767) time: 0.1489 data: 0.0656 max mem: 9305 +Train: [26] [1300/6250] eta: 0:12:24 lr: 0.000110 grad: 0.0727 (0.0741) loss: 0.8728 (0.8763) time: 0.1288 data: 0.0390 max mem: 9305 +Train: [26] [1400/6250] eta: 0:12:02 lr: 0.000110 grad: 0.0670 (0.0742) loss: 0.8811 (0.8761) time: 0.1328 data: 0.0483 max mem: 9305 +Train: [26] [1500/6250] eta: 0:11:44 lr: 0.000110 grad: 0.0715 (0.0742) loss: 0.8794 (0.8761) time: 0.1301 data: 0.0401 max mem: 9305 +Train: [26] [1600/6250] eta: 0:11:24 lr: 0.000110 grad: 0.0762 (0.0741) loss: 0.8771 (0.8760) time: 0.1397 data: 0.0468 max mem: 9305 +Train: [26] [1700/6250] eta: 0:11:06 lr: 0.000110 grad: 0.0709 (0.0741) loss: 0.8758 (0.8759) time: 0.1563 data: 0.0688 max mem: 9305 +Train: [26] [1800/6250] eta: 0:10:49 lr: 0.000110 grad: 0.0705 (0.0742) loss: 0.8783 (0.8759) time: 0.0867 data: 0.0002 max mem: 9305 +Train: [26] [1900/6250] eta: 0:10:33 lr: 0.000110 grad: 0.0709 (0.0743) loss: 0.8758 (0.8759) time: 0.1459 data: 0.0617 max mem: 9305 +Train: [26] [2000/6250] eta: 0:10:16 lr: 0.000110 grad: 0.0673 (0.0745) loss: 0.8743 (0.8756) time: 0.1357 data: 0.0477 max mem: 9305 +Train: [26] [2100/6250] eta: 0:10:02 lr: 0.000110 grad: 0.0789 (0.0746) loss: 0.8688 (0.8755) time: 0.1420 data: 0.0540 max mem: 9305 +Train: [26] [2200/6250] eta: 0:09:49 lr: 0.000110 grad: 0.0733 (0.0747) loss: 0.8752 (0.8754) time: 0.1833 data: 0.1012 max mem: 9305 +Train: [26] [2300/6250] eta: 0:09:35 lr: 0.000110 grad: 0.0795 (0.0748) loss: 0.8716 (0.8754) time: 0.1587 data: 0.0743 max mem: 9305 +Train: [26] [2400/6250] eta: 0:09:24 lr: 0.000110 grad: 0.0722 (0.0749) loss: 0.8767 (0.8754) time: 0.1646 data: 0.0791 max mem: 9305 +Train: [26] [2500/6250] eta: 0:09:10 lr: 0.000110 grad: 0.0730 (0.0749) loss: 0.8755 (0.8754) time: 0.1630 data: 0.0825 max mem: 9305 +Train: [26] [2600/6250] eta: 0:08:59 lr: 0.000110 grad: 0.0702 (0.0749) loss: 0.8743 (0.8755) time: 0.1804 data: 0.0835 max mem: 9305 +Train: [26] [2700/6250] eta: 0:08:49 lr: 0.000110 grad: 0.0729 (0.0751) loss: 0.8790 (0.8755) time: 0.1916 data: 0.1089 max mem: 9305 +Train: [26] [2800/6250] eta: 0:08:38 lr: 0.000110 grad: 0.0802 (0.0752) loss: 0.8777 (0.8754) time: 0.1649 data: 0.0866 max mem: 9305 +Train: [26] [2900/6250] eta: 0:08:24 lr: 0.000110 grad: 0.0784 (0.0755) loss: 0.8765 (0.8753) time: 0.1582 data: 0.0827 max mem: 9305 +Train: [26] [3000/6250] eta: 0:08:11 lr: 0.000110 grad: 0.0762 (0.0757) loss: 0.8728 (0.8753) time: 0.1811 data: 0.0954 max mem: 9305 +Train: [26] [3100/6250] eta: 0:07:58 lr: 0.000110 grad: 0.0774 (0.0760) loss: 0.8668 (0.8752) time: 0.1846 data: 0.0985 max mem: 9305 +Train: [26] [3200/6250] eta: 0:07:44 lr: 0.000110 grad: 0.0699 (0.0760) loss: 0.8796 (0.8752) time: 0.1541 data: 0.0674 max mem: 9305 +Train: [26] [3300/6250] eta: 0:07:29 lr: 0.000110 grad: 0.0751 (0.0760) loss: 0.8730 (0.8751) time: 0.1506 data: 0.0601 max mem: 9305 +Train: [26] [3400/6250] eta: 0:07:14 lr: 0.000110 grad: 0.0766 (0.0759) loss: 0.8734 (0.8751) time: 0.1519 data: 0.0653 max mem: 9305 +Train: [26] [3500/6250] eta: 0:06:59 lr: 0.000110 grad: 0.0745 (0.0761) loss: 0.8685 (0.8750) time: 0.1490 data: 0.0585 max mem: 9305 +Train: [26] [3600/6250] eta: 0:06:42 lr: 0.000110 grad: 0.0750 (0.0761) loss: 0.8757 (0.8750) time: 0.1376 data: 0.0480 max mem: 9305 +Train: [26] [3700/6250] eta: 0:06:26 lr: 0.000110 grad: 0.0706 (0.0762) loss: 0.8710 (0.8749) time: 0.1553 data: 0.0676 max mem: 9305 +Train: [26] [3800/6250] eta: 0:06:09 lr: 0.000110 grad: 0.0780 (0.0763) loss: 0.8727 (0.8748) time: 0.1412 data: 0.0559 max mem: 9305 +Train: [26] [3900/6250] eta: 0:05:53 lr: 0.000110 grad: 0.0748 (0.0764) loss: 0.8659 (0.8747) time: 0.1343 data: 0.0469 max mem: 9305 +Train: [26] [4000/6250] eta: 0:05:38 lr: 0.000110 grad: 0.0792 (0.0765) loss: 0.8705 (0.8746) time: 0.1480 data: 0.0658 max mem: 9305 +Train: [26] [4100/6250] eta: 0:05:23 lr: 0.000110 grad: 0.0734 (0.0765) loss: 0.8722 (0.8745) time: 0.1913 data: 0.0842 max mem: 9305 +Train: [26] [4200/6250] eta: 0:05:09 lr: 0.000110 grad: 0.0797 (0.0764) loss: 0.8727 (0.8745) time: 0.1676 data: 0.0855 max mem: 9305 +Train: [26] [4300/6250] eta: 0:04:54 lr: 0.000110 grad: 0.0702 (0.0764) loss: 0.8724 (0.8745) time: 0.1556 data: 0.0734 max mem: 9305 +Train: [26] [4400/6250] eta: 0:04:39 lr: 0.000110 grad: 0.0735 (0.0764) loss: 0.8780 (0.8745) time: 0.1614 data: 0.0806 max mem: 9305 +Train: [26] [4500/6250] eta: 0:04:24 lr: 0.000110 grad: 0.0714 (0.0764) loss: 0.8744 (0.8745) time: 0.1348 data: 0.0514 max mem: 9305 +Train: [26] [4600/6250] eta: 0:04:09 lr: 0.000110 grad: 0.0727 (0.0764) loss: 0.8707 (0.8745) time: 0.1556 data: 0.0734 max mem: 9305 +Train: [26] [4700/6250] eta: 0:03:54 lr: 0.000110 grad: 0.0742 (0.0765) loss: 0.8694 (0.8744) time: 0.2217 data: 0.1361 max mem: 9305 +Train: [26] [4800/6250] eta: 0:03:39 lr: 0.000109 grad: 0.0806 (0.0765) loss: 0.8676 (0.8744) time: 0.1322 data: 0.0468 max mem: 9305 +Train: [26] [4900/6250] eta: 0:03:23 lr: 0.000109 grad: 0.0741 (0.0765) loss: 0.8725 (0.8743) time: 0.1357 data: 0.0494 max mem: 9305 +Train: [26] [5000/6250] eta: 0:03:08 lr: 0.000109 grad: 0.0810 (0.0767) loss: 0.8676 (0.8742) time: 0.1227 data: 0.0367 max mem: 9305 +Train: [26] [5100/6250] eta: 0:02:53 lr: 0.000109 grad: 0.0785 (0.0767) loss: 0.8683 (0.8741) time: 0.1562 data: 0.0736 max mem: 9305 +Train: [26] [5200/6250] eta: 0:02:38 lr: 0.000109 grad: 0.0724 (0.0768) loss: 0.8723 (0.8740) time: 0.1363 data: 0.0509 max mem: 9305 +Train: [26] [5300/6250] eta: 0:02:23 lr: 0.000109 grad: 0.0766 (0.0768) loss: 0.8744 (0.8740) time: 0.1787 data: 0.0919 max mem: 9305 +Train: [26] [5400/6250] eta: 0:02:08 lr: 0.000109 grad: 0.0792 (0.0769) loss: 0.8719 (0.8739) time: 0.1525 data: 0.0676 max mem: 9305 +Train: [26] [5500/6250] eta: 0:01:53 lr: 0.000109 grad: 0.0695 (0.0769) loss: 0.8784 (0.8739) time: 0.1034 data: 0.0002 max mem: 9305 +Train: [26] [5600/6250] eta: 0:01:39 lr: 0.000109 grad: 0.0780 (0.0770) loss: 0.8723 (0.8738) time: 0.1464 data: 0.0576 max mem: 9305 +Train: [26] [5700/6250] eta: 0:01:23 lr: 0.000109 grad: 0.0807 (0.0770) loss: 0.8663 (0.8737) time: 0.1452 data: 0.0587 max mem: 9305 +Train: [26] [5800/6250] eta: 0:01:08 lr: 0.000109 grad: 0.0694 (0.0771) loss: 0.8744 (0.8737) time: 0.1567 data: 0.0754 max mem: 9305 +Train: [26] [5900/6250] eta: 0:00:53 lr: 0.000109 grad: 0.0784 (0.0770) loss: 0.8714 (0.8737) time: 0.1462 data: 0.0537 max mem: 9305 +Train: [26] [6000/6250] eta: 0:00:38 lr: 0.000109 grad: 0.0785 (0.0771) loss: 0.8714 (0.8737) time: 0.1653 data: 0.0717 max mem: 9305 +Train: [26] [6100/6250] eta: 0:00:23 lr: 0.000109 grad: 0.0765 (0.0771) loss: 0.8684 (0.8736) time: 0.1434 data: 0.0633 max mem: 9305 +Train: [26] [6200/6250] eta: 0:00:07 lr: 0.000109 grad: 0.0760 (0.0772) loss: 0.8747 (0.8736) time: 0.1452 data: 0.0592 max mem: 9305 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.0800 (0.0772) loss: 0.8709 (0.8736) time: 0.1132 data: 0.0233 max mem: 9305 +Train: [26] Total time: 0:16:05 (0.1545 s / it) +Averaged stats: lr: 0.000109 grad: 0.0800 (0.0772) loss: 0.8709 (0.8736) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:05:56 loss: 0.8947 (0.8947) time: 5.7531 data: 5.7136 max mem: 9305 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8843 (0.8868) time: 0.1408 data: 0.1106 max mem: 9305 +Eval (hcp-train-subset): [26] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (hcp-train-subset): loss: 0.8843 (0.8868) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:32 loss: 0.8788 (0.8788) time: 4.3916 data: 4.3361 max mem: 9305 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8835 (0.8844) time: 0.1596 data: 0.1299 max mem: 9305 +Eval (hcp-val): [26] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-val): loss: 0.8835 (0.8844) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 11:24:11 lr: 0.000109 grad: 0.1039 (0.1039) loss: 0.8359 (0.8359) time: 6.5683 data: 6.4563 max mem: 9305 +Train: [27] [ 100/6250] eta: 0:21:52 lr: 0.000109 grad: 0.0674 (0.0845) loss: 0.8797 (0.8812) time: 0.1557 data: 0.0596 max mem: 9305 +Train: [27] [ 200/6250] eta: 0:19:12 lr: 0.000109 grad: 0.0706 (0.0807) loss: 0.8758 (0.8778) time: 0.1891 data: 0.0977 max mem: 9305 +Train: [27] [ 300/6250] eta: 0:17:46 lr: 0.000109 grad: 0.0749 (0.0795) loss: 0.8741 (0.8767) time: 0.1751 data: 0.0925 max mem: 9305 +Train: [27] [ 400/6250] eta: 0:16:47 lr: 0.000109 grad: 0.0734 (0.0791) loss: 0.8811 (0.8762) time: 0.1523 data: 0.0610 max mem: 9305 +Train: [27] [ 500/6250] eta: 0:15:57 lr: 0.000109 grad: 0.0741 (0.0785) loss: 0.8773 (0.8764) time: 0.1318 data: 0.0309 max mem: 9305 +Train: [27] [ 600/6250] eta: 0:15:24 lr: 0.000109 grad: 0.0730 (0.0782) loss: 0.8732 (0.8764) time: 0.1484 data: 0.0515 max mem: 9305 +Train: [27] [ 700/6250] eta: 0:14:58 lr: 0.000109 grad: 0.0707 (0.0781) loss: 0.8714 (0.8758) time: 0.1660 data: 0.0799 max mem: 9305 +Train: [27] [ 800/6250] eta: 0:14:45 lr: 0.000109 grad: 0.0795 (0.0781) loss: 0.8710 (0.8752) time: 0.1878 data: 0.1055 max mem: 9305 +Train: [27] [ 900/6250] eta: 0:14:27 lr: 0.000109 grad: 0.0673 (0.0779) loss: 0.8742 (0.8751) time: 0.1682 data: 0.0852 max mem: 9305 +Train: [27] [1000/6250] eta: 0:14:05 lr: 0.000109 grad: 0.0770 (0.0779) loss: 0.8699 (0.8750) time: 0.1358 data: 0.0513 max mem: 9305 +Train: [27] [1100/6250] eta: 0:13:45 lr: 0.000109 grad: 0.0719 (0.0776) loss: 0.8732 (0.8748) time: 0.1416 data: 0.0551 max mem: 9305 +Train: [27] [1200/6250] eta: 0:13:20 lr: 0.000109 grad: 0.0694 (0.0776) loss: 0.8747 (0.8747) time: 0.1136 data: 0.0297 max mem: 9305 +Train: [27] [1300/6250] eta: 0:12:55 lr: 0.000109 grad: 0.0703 (0.0774) loss: 0.8732 (0.8746) time: 0.1278 data: 0.0399 max mem: 9305 +Train: [27] [1400/6250] eta: 0:12:33 lr: 0.000109 grad: 0.0762 (0.0776) loss: 0.8744 (0.8744) time: 0.1121 data: 0.0281 max mem: 9305 +Train: [27] [1500/6250] eta: 0:12:11 lr: 0.000109 grad: 0.0724 (0.0776) loss: 0.8757 (0.8743) time: 0.1393 data: 0.0457 max mem: 9305 +Train: [27] [1600/6250] eta: 0:11:55 lr: 0.000109 grad: 0.0736 (0.0775) loss: 0.8727 (0.8741) time: 0.1400 data: 0.0501 max mem: 9305 +Train: [27] [1700/6250] eta: 0:11:40 lr: 0.000109 grad: 0.0705 (0.0773) loss: 0.8748 (0.8741) time: 0.1630 data: 0.0789 max mem: 9305 +Train: [27] [1800/6250] eta: 0:11:21 lr: 0.000109 grad: 0.0774 (0.0773) loss: 0.8693 (0.8740) time: 0.1064 data: 0.0182 max mem: 9305 +Train: [27] [1900/6250] eta: 0:11:10 lr: 0.000109 grad: 0.0690 (0.0772) loss: 0.8765 (0.8739) time: 0.1614 data: 0.0667 max mem: 9305 +Train: [27] [2000/6250] eta: 0:10:57 lr: 0.000109 grad: 0.0718 (0.0770) loss: 0.8712 (0.8739) time: 0.2114 data: 0.1266 max mem: 9305 +Train: [27] [2100/6250] eta: 0:10:39 lr: 0.000109 grad: 0.0692 (0.0769) loss: 0.8776 (0.8738) time: 0.1339 data: 0.0495 max mem: 9305 +Train: [27] [2200/6250] eta: 0:10:26 lr: 0.000109 grad: 0.0744 (0.0768) loss: 0.8769 (0.8738) time: 0.2263 data: 0.1353 max mem: 9305 +Train: [27] [2300/6250] eta: 0:10:10 lr: 0.000109 grad: 0.0706 (0.0767) loss: 0.8745 (0.8739) time: 0.1554 data: 0.0744 max mem: 9305 +Train: [27] [2400/6250] eta: 0:09:55 lr: 0.000109 grad: 0.0752 (0.0768) loss: 0.8698 (0.8738) time: 0.1892 data: 0.1023 max mem: 9305 +Train: [27] [2500/6250] eta: 0:09:44 lr: 0.000109 grad: 0.0752 (0.0768) loss: 0.8733 (0.8738) time: 0.1751 data: 0.0983 max mem: 9305 +Train: [27] [2600/6250] eta: 0:09:32 lr: 0.000109 grad: 0.0758 (0.0768) loss: 0.8680 (0.8737) time: 0.1824 data: 0.0934 max mem: 9305 +Train: [27] [2700/6250] eta: 0:09:19 lr: 0.000109 grad: 0.0704 (0.0768) loss: 0.8722 (0.8738) time: 0.1684 data: 0.0788 max mem: 9305 +Train: [27] [2800/6250] eta: 0:09:07 lr: 0.000109 grad: 0.0723 (0.0769) loss: 0.8760 (0.8738) time: 0.1741 data: 0.0880 max mem: 9305 +Train: [27] [2900/6250] eta: 0:08:55 lr: 0.000109 grad: 0.0743 (0.0769) loss: 0.8679 (0.8737) time: 0.1348 data: 0.0475 max mem: 9305 +Train: [27] [3000/6250] eta: 0:08:40 lr: 0.000109 grad: 0.0762 (0.0769) loss: 0.8719 (0.8737) time: 0.1990 data: 0.1045 max mem: 9305 +Train: [27] [3100/6250] eta: 0:08:25 lr: 0.000108 grad: 0.0799 (0.0769) loss: 0.8765 (0.8737) time: 0.1823 data: 0.0928 max mem: 9305 +Train: [27] [3200/6250] eta: 0:08:08 lr: 0.000108 grad: 0.0727 (0.0769) loss: 0.8753 (0.8737) time: 0.1654 data: 0.0836 max mem: 9305 +Train: [27] [3300/6250] eta: 0:07:56 lr: 0.000108 grad: 0.0690 (0.0769) loss: 0.8741 (0.8737) time: 0.1130 data: 0.0003 max mem: 9305 +Train: [27] [3400/6250] eta: 0:07:38 lr: 0.000108 grad: 0.0705 (0.0769) loss: 0.8717 (0.8737) time: 0.1704 data: 0.0873 max mem: 9305 +Train: [27] [3500/6250] eta: 0:07:22 lr: 0.000108 grad: 0.0749 (0.0769) loss: 0.8724 (0.8737) time: 0.1642 data: 0.0827 max mem: 9305 +Train: [27] [3600/6250] eta: 0:07:05 lr: 0.000108 grad: 0.0791 (0.0770) loss: 0.8717 (0.8737) time: 0.1494 data: 0.0658 max mem: 9305 +Train: [27] [3700/6250] eta: 0:06:49 lr: 0.000108 grad: 0.0734 (0.0769) loss: 0.8760 (0.8736) time: 0.1403 data: 0.0610 max mem: 9305 +Train: [27] [3800/6250] eta: 0:06:33 lr: 0.000108 grad: 0.0729 (0.0769) loss: 0.8723 (0.8735) time: 0.1578 data: 0.0749 max mem: 9305 +Train: [27] [3900/6250] eta: 0:06:16 lr: 0.000108 grad: 0.0749 (0.0769) loss: 0.8704 (0.8735) time: 0.1199 data: 0.0331 max mem: 9305 +Train: [27] [4000/6250] eta: 0:06:00 lr: 0.000108 grad: 0.0795 (0.0770) loss: 0.8679 (0.8735) time: 0.1216 data: 0.0333 max mem: 9305 +Train: [27] [4100/6250] eta: 0:05:43 lr: 0.000108 grad: 0.0721 (0.0770) loss: 0.8798 (0.8735) time: 0.1254 data: 0.0463 max mem: 9305 +Train: [27] [4200/6250] eta: 0:05:26 lr: 0.000108 grad: 0.0722 (0.0771) loss: 0.8770 (0.8734) time: 0.1342 data: 0.0469 max mem: 9305 +Train: [27] [4300/6250] eta: 0:05:09 lr: 0.000108 grad: 0.0760 (0.0771) loss: 0.8729 (0.8734) time: 0.1346 data: 0.0524 max mem: 9305 +Train: [27] [4400/6250] eta: 0:04:53 lr: 0.000108 grad: 0.0742 (0.0771) loss: 0.8775 (0.8734) time: 0.1376 data: 0.0491 max mem: 9305 +Train: [27] [4500/6250] eta: 0:04:37 lr: 0.000108 grad: 0.0705 (0.0772) loss: 0.8715 (0.8733) time: 0.1543 data: 0.0567 max mem: 9305 +Train: [27] [4600/6250] eta: 0:04:21 lr: 0.000108 grad: 0.0704 (0.0772) loss: 0.8821 (0.8733) time: 0.1232 data: 0.0388 max mem: 9305 +Train: [27] [4700/6250] eta: 0:04:05 lr: 0.000108 grad: 0.0732 (0.0772) loss: 0.8743 (0.8733) time: 0.0918 data: 0.0005 max mem: 9305 +Train: [27] [4800/6250] eta: 0:03:50 lr: 0.000108 grad: 0.0764 (0.0772) loss: 0.8730 (0.8732) time: 0.1629 data: 0.0830 max mem: 9305 +Train: [27] [4900/6250] eta: 0:03:34 lr: 0.000108 grad: 0.0705 (0.0773) loss: 0.8735 (0.8732) time: 0.1324 data: 0.0475 max mem: 9305 +Train: [27] [5000/6250] eta: 0:03:18 lr: 0.000108 grad: 0.0725 (0.0773) loss: 0.8724 (0.8733) time: 0.1076 data: 0.0216 max mem: 9305 +Train: [27] [5100/6250] eta: 0:03:01 lr: 0.000108 grad: 0.0734 (0.0773) loss: 0.8760 (0.8733) time: 0.1097 data: 0.0251 max mem: 9305 +Train: [27] [5200/6250] eta: 0:02:45 lr: 0.000108 grad: 0.0746 (0.0773) loss: 0.8746 (0.8733) time: 0.1035 data: 0.0081 max mem: 9305 +Train: [27] [5300/6250] eta: 0:02:29 lr: 0.000108 grad: 0.0786 (0.0773) loss: 0.8725 (0.8733) time: 0.1439 data: 0.0619 max mem: 9305 +Train: [27] [5400/6250] eta: 0:02:13 lr: 0.000108 grad: 0.0716 (0.0773) loss: 0.8769 (0.8734) time: 0.1446 data: 0.0706 max mem: 9305 +Train: [27] [5500/6250] eta: 0:01:57 lr: 0.000108 grad: 0.0783 (0.0774) loss: 0.8758 (0.8733) time: 0.1355 data: 0.0528 max mem: 9305 +Train: [27] [5600/6250] eta: 0:01:42 lr: 0.000108 grad: 0.0826 (0.0775) loss: 0.8736 (0.8734) time: 0.1395 data: 0.0599 max mem: 9305 +Train: [27] [5700/6250] eta: 0:01:26 lr: 0.000108 grad: 0.0761 (0.0775) loss: 0.8721 (0.8734) time: 0.1432 data: 0.0601 max mem: 9305 +Train: [27] [5800/6250] eta: 0:01:10 lr: 0.000108 grad: 0.0785 (0.0776) loss: 0.8706 (0.8734) time: 0.1835 data: 0.1073 max mem: 9305 +Train: [27] [5900/6250] eta: 0:00:54 lr: 0.000108 grad: 0.0759 (0.0776) loss: 0.8720 (0.8733) time: 0.1309 data: 0.0458 max mem: 9305 +Train: [27] [6000/6250] eta: 0:00:39 lr: 0.000108 grad: 0.0798 (0.0777) loss: 0.8721 (0.8733) time: 0.1725 data: 0.0877 max mem: 9305 +Train: [27] [6100/6250] eta: 0:00:23 lr: 0.000108 grad: 0.0701 (0.0777) loss: 0.8720 (0.8732) time: 0.2688 data: 0.1649 max mem: 9305 +Train: [27] [6200/6250] eta: 0:00:07 lr: 0.000108 grad: 0.0784 (0.0778) loss: 0.8733 (0.8732) time: 0.0951 data: 0.0002 max mem: 9305 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0750 (0.0778) loss: 0.8737 (0.8732) time: 0.1395 data: 0.0588 max mem: 9305 +Train: [27] Total time: 0:16:31 (0.1587 s / it) +Averaged stats: lr: 0.000108 grad: 0.0750 (0.0778) loss: 0.8737 (0.8732) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:04:52 loss: 0.8985 (0.8985) time: 4.7220 data: 4.6844 max mem: 9305 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8880 (0.8870) time: 0.1375 data: 0.1091 max mem: 9305 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (hcp-train-subset): loss: 0.8880 (0.8870) +Eval (hcp-val): [27] [ 0/62] eta: 0:04:30 loss: 0.8816 (0.8816) time: 4.3573 data: 4.2936 max mem: 9305 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8820 (0.8842) time: 0.1376 data: 0.1081 max mem: 9305 +Eval (hcp-val): [27] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-val): loss: 0.8820 (0.8842) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 7:15:27 lr: 0.000108 grad: 0.0667 (0.0667) loss: 0.9072 (0.9072) time: 4.1804 data: 3.8690 max mem: 9305 +Train: [28] [ 100/6250] eta: 0:22:11 lr: 0.000108 grad: 0.0742 (0.0843) loss: 0.8739 (0.8814) time: 0.1599 data: 0.0662 max mem: 9305 +Train: [28] [ 200/6250] eta: 0:18:51 lr: 0.000108 grad: 0.0737 (0.0818) loss: 0.8734 (0.8755) time: 0.1626 data: 0.0769 max mem: 9305 +Train: [28] [ 300/6250] eta: 0:17:38 lr: 0.000108 grad: 0.0710 (0.0801) loss: 0.8706 (0.8734) time: 0.1720 data: 0.0720 max mem: 9305 +Train: [28] [ 400/6250] eta: 0:16:43 lr: 0.000108 grad: 0.0751 (0.0796) loss: 0.8710 (0.8724) time: 0.1424 data: 0.0495 max mem: 9305 +Train: [28] [ 500/6250] eta: 0:15:56 lr: 0.000108 grad: 0.0697 (0.0783) loss: 0.8740 (0.8721) time: 0.1441 data: 0.0505 max mem: 9305 +Train: [28] [ 600/6250] eta: 0:15:44 lr: 0.000108 grad: 0.0730 (0.0773) loss: 0.8741 (0.8722) time: 0.2062 data: 0.1014 max mem: 9305 +Train: [28] [ 700/6250] eta: 0:15:06 lr: 0.000108 grad: 0.0749 (0.0769) loss: 0.8732 (0.8726) time: 0.1539 data: 0.0698 max mem: 9305 +Train: [28] [ 800/6250] eta: 0:14:42 lr: 0.000108 grad: 0.0709 (0.0768) loss: 0.8743 (0.8724) time: 0.1152 data: 0.0209 max mem: 9305 +Train: [28] [ 900/6250] eta: 0:14:23 lr: 0.000108 grad: 0.0714 (0.0766) loss: 0.8722 (0.8724) time: 0.1580 data: 0.0657 max mem: 9305 +Train: [28] [1000/6250] eta: 0:14:09 lr: 0.000108 grad: 0.0744 (0.0766) loss: 0.8697 (0.8722) time: 0.1426 data: 0.0625 max mem: 9305 +Train: [28] [1100/6250] eta: 0:13:50 lr: 0.000108 grad: 0.0762 (0.0766) loss: 0.8694 (0.8721) time: 0.1680 data: 0.0851 max mem: 9305 +Train: [28] [1200/6250] eta: 0:13:38 lr: 0.000108 grad: 0.0715 (0.0768) loss: 0.8733 (0.8719) time: 0.1901 data: 0.0834 max mem: 9305 +Train: [28] [1300/6250] eta: 0:13:18 lr: 0.000107 grad: 0.0745 (0.0768) loss: 0.8665 (0.8718) time: 0.1481 data: 0.0634 max mem: 9305 +Train: [28] [1400/6250] eta: 0:13:05 lr: 0.000107 grad: 0.0755 (0.0769) loss: 0.8675 (0.8715) time: 0.1253 data: 0.0214 max mem: 9305 +Train: [28] [1500/6250] eta: 0:12:39 lr: 0.000107 grad: 0.0754 (0.0771) loss: 0.8745 (0.8713) time: 0.1387 data: 0.0552 max mem: 9305 +Train: [28] [1600/6250] eta: 0:12:18 lr: 0.000107 grad: 0.0719 (0.0772) loss: 0.8745 (0.8712) time: 0.1176 data: 0.0205 max mem: 9305 +Train: [28] [1700/6250] eta: 0:11:56 lr: 0.000107 grad: 0.0735 (0.0773) loss: 0.8702 (0.8710) time: 0.1329 data: 0.0512 max mem: 9305 +Train: [28] [1800/6250] eta: 0:11:36 lr: 0.000107 grad: 0.0777 (0.0773) loss: 0.8676 (0.8710) time: 0.1269 data: 0.0423 max mem: 9305 +Train: [28] [1900/6250] eta: 0:11:21 lr: 0.000107 grad: 0.0759 (0.0773) loss: 0.8710 (0.8710) time: 0.1521 data: 0.0748 max mem: 9305 +Train: [28] [2000/6250] eta: 0:11:03 lr: 0.000107 grad: 0.0780 (0.0775) loss: 0.8703 (0.8710) time: 0.1443 data: 0.0614 max mem: 9305 +Train: [28] [2100/6250] eta: 0:10:46 lr: 0.000107 grad: 0.0722 (0.0775) loss: 0.8698 (0.8709) time: 0.1319 data: 0.0452 max mem: 9305 +Train: [28] [2200/6250] eta: 0:10:28 lr: 0.000107 grad: 0.0773 (0.0776) loss: 0.8682 (0.8708) time: 0.1529 data: 0.0647 max mem: 9305 +Train: [28] [2300/6250] eta: 0:10:10 lr: 0.000107 grad: 0.0772 (0.0777) loss: 0.8712 (0.8708) time: 0.1516 data: 0.0640 max mem: 9305 +Train: [28] [2400/6250] eta: 0:09:55 lr: 0.000107 grad: 0.0734 (0.0778) loss: 0.8739 (0.8706) time: 0.1664 data: 0.0909 max mem: 9305 +Train: [28] [2500/6250] eta: 0:09:40 lr: 0.000107 grad: 0.0771 (0.0778) loss: 0.8647 (0.8704) time: 0.1543 data: 0.0635 max mem: 9305 +Train: [28] [2600/6250] eta: 0:09:24 lr: 0.000107 grad: 0.0778 (0.0779) loss: 0.8642 (0.8703) time: 0.1249 data: 0.0448 max mem: 9305 +Train: [28] [2700/6250] eta: 0:09:08 lr: 0.000107 grad: 0.0856 (0.0780) loss: 0.8635 (0.8702) time: 0.1542 data: 0.0730 max mem: 9305 +Train: [28] [2800/6250] eta: 0:08:52 lr: 0.000107 grad: 0.0796 (0.0783) loss: 0.8724 (0.8700) time: 0.1595 data: 0.0747 max mem: 9305 +Train: [28] [2900/6250] eta: 0:08:35 lr: 0.000107 grad: 0.0777 (0.0784) loss: 0.8673 (0.8698) time: 0.1424 data: 0.0588 max mem: 9305 +Train: [28] [3000/6250] eta: 0:08:18 lr: 0.000107 grad: 0.0832 (0.0785) loss: 0.8648 (0.8697) time: 0.1499 data: 0.0686 max mem: 9305 +Train: [28] [3100/6250] eta: 0:08:00 lr: 0.000107 grad: 0.0784 (0.0786) loss: 0.8704 (0.8697) time: 0.1256 data: 0.0386 max mem: 9305 +Train: [28] [3200/6250] eta: 0:07:42 lr: 0.000107 grad: 0.0805 (0.0786) loss: 0.8665 (0.8697) time: 0.1292 data: 0.0416 max mem: 9305 +Train: [28] [3300/6250] eta: 0:07:26 lr: 0.000107 grad: 0.0791 (0.0787) loss: 0.8661 (0.8697) time: 0.1305 data: 0.0414 max mem: 9305 +Train: [28] [3400/6250] eta: 0:07:09 lr: 0.000107 grad: 0.0798 (0.0787) loss: 0.8689 (0.8697) time: 0.1418 data: 0.0552 max mem: 9305 +Train: [28] [3500/6250] eta: 0:06:54 lr: 0.000107 grad: 0.0757 (0.0788) loss: 0.8716 (0.8697) time: 0.1718 data: 0.0877 max mem: 9305 +Train: [28] [3600/6250] eta: 0:06:38 lr: 0.000107 grad: 0.0792 (0.0789) loss: 0.8718 (0.8697) time: 0.1430 data: 0.0528 max mem: 9305 +Train: [28] [3700/6250] eta: 0:06:22 lr: 0.000107 grad: 0.0735 (0.0789) loss: 0.8706 (0.8697) time: 0.1330 data: 0.0447 max mem: 9305 +Train: [28] [3800/6250] eta: 0:06:07 lr: 0.000107 grad: 0.0836 (0.0790) loss: 0.8637 (0.8697) time: 0.2100 data: 0.0866 max mem: 9305 +Train: [28] [3900/6250] eta: 0:05:52 lr: 0.000107 grad: 0.0852 (0.0792) loss: 0.8731 (0.8696) time: 0.1450 data: 0.0598 max mem: 9305 +Train: [28] [4000/6250] eta: 0:05:36 lr: 0.000107 grad: 0.0810 (0.0792) loss: 0.8700 (0.8697) time: 0.1137 data: 0.0253 max mem: 9305 +Train: [28] [4100/6250] eta: 0:05:22 lr: 0.000107 grad: 0.0726 (0.0793) loss: 0.8716 (0.8697) time: 0.0947 data: 0.0004 max mem: 9305 +Train: [28] [4200/6250] eta: 0:05:07 lr: 0.000107 grad: 0.0846 (0.0793) loss: 0.8704 (0.8697) time: 0.1571 data: 0.0697 max mem: 9305 +Train: [28] [4300/6250] eta: 0:04:52 lr: 0.000107 grad: 0.0744 (0.0794) loss: 0.8719 (0.8697) time: 0.1418 data: 0.0596 max mem: 9305 +Train: [28] [4400/6250] eta: 0:04:36 lr: 0.000107 grad: 0.0773 (0.0794) loss: 0.8729 (0.8698) time: 0.1228 data: 0.0422 max mem: 9305 +Train: [28] [4500/6250] eta: 0:04:21 lr: 0.000107 grad: 0.0759 (0.0795) loss: 0.8716 (0.8699) time: 0.1528 data: 0.0710 max mem: 9305 +Train: [28] [4600/6250] eta: 0:04:06 lr: 0.000107 grad: 0.0875 (0.0796) loss: 0.8685 (0.8699) time: 0.1380 data: 0.0569 max mem: 9305 +Train: [28] [4700/6250] eta: 0:03:51 lr: 0.000107 grad: 0.0751 (0.0796) loss: 0.8701 (0.8699) time: 0.1403 data: 0.0482 max mem: 9305 +Train: [28] [4800/6250] eta: 0:03:36 lr: 0.000107 grad: 0.0743 (0.0796) loss: 0.8739 (0.8699) time: 0.1483 data: 0.0659 max mem: 9305 +Train: [28] [4900/6250] eta: 0:03:21 lr: 0.000107 grad: 0.0716 (0.0796) loss: 0.8675 (0.8699) time: 0.1398 data: 0.0536 max mem: 9305 +Train: [28] [5000/6250] eta: 0:03:06 lr: 0.000107 grad: 0.0794 (0.0796) loss: 0.8740 (0.8700) time: 0.1908 data: 0.1097 max mem: 9305 +Train: [28] [5100/6250] eta: 0:02:51 lr: 0.000107 grad: 0.0800 (0.0797) loss: 0.8720 (0.8700) time: 0.1385 data: 0.0573 max mem: 9305 +Train: [28] [5200/6250] eta: 0:02:36 lr: 0.000107 grad: 0.0750 (0.0797) loss: 0.8769 (0.8701) time: 0.1367 data: 0.0599 max mem: 9305 +Train: [28] [5300/6250] eta: 0:02:21 lr: 0.000107 grad: 0.0771 (0.0797) loss: 0.8735 (0.8701) time: 0.1416 data: 0.0577 max mem: 9305 +Train: [28] [5400/6250] eta: 0:02:06 lr: 0.000107 grad: 0.0766 (0.0797) loss: 0.8707 (0.8701) time: 0.1498 data: 0.0624 max mem: 9305 +Train: [28] [5500/6250] eta: 0:01:51 lr: 0.000107 grad: 0.0789 (0.0797) loss: 0.8735 (0.8702) time: 0.1753 data: 0.0925 max mem: 9305 +Train: [28] [5600/6250] eta: 0:01:37 lr: 0.000106 grad: 0.0726 (0.0797) loss: 0.8703 (0.8703) time: 0.1960 data: 0.1065 max mem: 9305 +Train: [28] [5700/6250] eta: 0:01:22 lr: 0.000106 grad: 0.0753 (0.0797) loss: 0.8742 (0.8703) time: 0.2242 data: 0.1389 max mem: 9305 +Train: [28] [5800/6250] eta: 0:01:07 lr: 0.000106 grad: 0.0774 (0.0797) loss: 0.8725 (0.8703) time: 0.2003 data: 0.1113 max mem: 9305 +Train: [28] [5900/6250] eta: 0:00:52 lr: 0.000106 grad: 0.0705 (0.0796) loss: 0.8753 (0.8704) time: 0.1839 data: 0.0942 max mem: 9305 +Train: [28] [6000/6250] eta: 0:00:37 lr: 0.000106 grad: 0.0772 (0.0796) loss: 0.8700 (0.8704) time: 0.1478 data: 0.0653 max mem: 9305 +Train: [28] [6100/6250] eta: 0:00:22 lr: 0.000106 grad: 0.0798 (0.0797) loss: 0.8736 (0.8705) time: 0.1957 data: 0.1082 max mem: 9305 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.0766 (0.0797) loss: 0.8715 (0.8705) time: 0.1421 data: 0.0635 max mem: 9305 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.0775 (0.0797) loss: 0.8760 (0.8705) time: 0.1401 data: 0.0578 max mem: 9305 +Train: [28] Total time: 0:15:41 (0.1507 s / it) +Averaged stats: lr: 0.000106 grad: 0.0775 (0.0797) loss: 0.8760 (0.8705) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:03:57 loss: 0.8954 (0.8954) time: 3.8244 data: 3.7413 max mem: 9305 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8863 (0.8866) time: 0.1284 data: 0.0999 max mem: 9305 +Eval (hcp-train-subset): [28] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (hcp-train-subset): loss: 0.8863 (0.8866) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:33 loss: 0.8832 (0.8832) time: 4.4112 data: 4.3760 max mem: 9305 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8830 (0.8841) time: 0.1204 data: 0.0899 max mem: 9305 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-val): loss: 0.8830 (0.8841) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 6:50:17 lr: 0.000106 grad: 0.0371 (0.0371) loss: 0.9383 (0.9383) time: 3.9388 data: 3.6756 max mem: 9305 +Train: [29] [ 100/6250] eta: 0:21:37 lr: 0.000106 grad: 0.0701 (0.0860) loss: 0.8831 (0.8862) time: 0.1618 data: 0.0669 max mem: 9305 +Train: [29] [ 200/6250] eta: 0:18:09 lr: 0.000106 grad: 0.0758 (0.0850) loss: 0.8734 (0.8789) time: 0.1584 data: 0.0629 max mem: 9305 +Train: [29] [ 300/6250] eta: 0:16:50 lr: 0.000106 grad: 0.0733 (0.0829) loss: 0.8720 (0.8767) time: 0.1610 data: 0.0677 max mem: 9305 +Train: [29] [ 400/6250] eta: 0:16:03 lr: 0.000106 grad: 0.0739 (0.0816) loss: 0.8722 (0.8748) time: 0.1557 data: 0.0555 max mem: 9305 +Train: [29] [ 500/6250] eta: 0:15:22 lr: 0.000106 grad: 0.0725 (0.0809) loss: 0.8753 (0.8747) time: 0.1415 data: 0.0521 max mem: 9305 +Train: [29] [ 600/6250] eta: 0:14:48 lr: 0.000106 grad: 0.0771 (0.0802) loss: 0.8727 (0.8746) time: 0.1470 data: 0.0594 max mem: 9305 +Train: [29] [ 700/6250] eta: 0:14:28 lr: 0.000106 grad: 0.0730 (0.0796) loss: 0.8655 (0.8742) time: 0.1445 data: 0.0606 max mem: 9305 +Train: [29] [ 800/6250] eta: 0:14:07 lr: 0.000106 grad: 0.0721 (0.0791) loss: 0.8688 (0.8740) time: 0.1517 data: 0.0673 max mem: 9305 +Train: [29] [ 900/6250] eta: 0:14:05 lr: 0.000106 grad: 0.0693 (0.0785) loss: 0.8693 (0.8736) time: 0.1291 data: 0.0353 max mem: 9305 +Train: [29] [1000/6250] eta: 0:13:50 lr: 0.000106 grad: 0.0700 (0.0780) loss: 0.8726 (0.8734) time: 0.1280 data: 0.0393 max mem: 9305 +Train: [29] [1100/6250] eta: 0:13:31 lr: 0.000106 grad: 0.0728 (0.0777) loss: 0.8704 (0.8733) time: 0.1359 data: 0.0495 max mem: 9305 +Train: [29] [1200/6250] eta: 0:13:14 lr: 0.000106 grad: 0.0726 (0.0773) loss: 0.8680 (0.8731) time: 0.1281 data: 0.0389 max mem: 9305 +Train: [29] [1300/6250] eta: 0:13:02 lr: 0.000106 grad: 0.0733 (0.0772) loss: 0.8681 (0.8727) time: 0.1262 data: 0.0306 max mem: 9305 +Train: [29] [1400/6250] eta: 0:12:44 lr: 0.000106 grad: 0.0699 (0.0770) loss: 0.8703 (0.8726) time: 0.1288 data: 0.0443 max mem: 9305 +Train: [29] [1500/6250] eta: 0:12:24 lr: 0.000106 grad: 0.0779 (0.0771) loss: 0.8636 (0.8723) time: 0.1369 data: 0.0530 max mem: 9305 +Train: [29] [1600/6250] eta: 0:12:03 lr: 0.000106 grad: 0.0709 (0.0770) loss: 0.8649 (0.8722) time: 0.0983 data: 0.0147 max mem: 9305 +Train: [29] [1700/6250] eta: 0:11:47 lr: 0.000106 grad: 0.0707 (0.0770) loss: 0.8725 (0.8720) time: 0.1759 data: 0.0958 max mem: 9305 +Train: [29] [1800/6250] eta: 0:11:28 lr: 0.000106 grad: 0.0727 (0.0771) loss: 0.8709 (0.8718) time: 0.1326 data: 0.0544 max mem: 9305 +Train: [29] [1900/6250] eta: 0:11:14 lr: 0.000106 grad: 0.0739 (0.0771) loss: 0.8712 (0.8717) time: 0.1714 data: 0.0764 max mem: 9305 +Train: [29] [2000/6250] eta: 0:10:58 lr: 0.000106 grad: 0.0788 (0.0772) loss: 0.8649 (0.8715) time: 0.1150 data: 0.0263 max mem: 9305 +Train: [29] [2100/6250] eta: 0:10:42 lr: 0.000106 grad: 0.0742 (0.0775) loss: 0.8678 (0.8714) time: 0.1632 data: 0.0731 max mem: 9305 +Train: [29] [2200/6250] eta: 0:10:26 lr: 0.000106 grad: 0.0755 (0.0775) loss: 0.8699 (0.8712) time: 0.1372 data: 0.0473 max mem: 9305 +Train: [29] [2300/6250] eta: 0:10:12 lr: 0.000106 grad: 0.0735 (0.0776) loss: 0.8743 (0.8711) time: 0.1910 data: 0.0974 max mem: 9305 +Train: [29] [2400/6250] eta: 0:09:57 lr: 0.000106 grad: 0.0835 (0.0778) loss: 0.8670 (0.8710) time: 0.1465 data: 0.0603 max mem: 9305 +Train: [29] [2500/6250] eta: 0:09:41 lr: 0.000106 grad: 0.0781 (0.0778) loss: 0.8664 (0.8709) time: 0.1476 data: 0.0571 max mem: 9305 +Train: [29] [2600/6250] eta: 0:09:25 lr: 0.000106 grad: 0.0829 (0.0778) loss: 0.8670 (0.8709) time: 0.1431 data: 0.0639 max mem: 9305 +Train: [29] [2700/6250] eta: 0:09:09 lr: 0.000106 grad: 0.0858 (0.0780) loss: 0.8648 (0.8708) time: 0.1519 data: 0.0736 max mem: 9305 +Train: [29] [2800/6250] eta: 0:08:52 lr: 0.000106 grad: 0.0737 (0.0780) loss: 0.8694 (0.8709) time: 0.1505 data: 0.0600 max mem: 9305 +Train: [29] [2900/6250] eta: 0:08:37 lr: 0.000106 grad: 0.0762 (0.0780) loss: 0.8681 (0.8709) time: 0.1585 data: 0.0665 max mem: 9305 +Train: [29] [3000/6250] eta: 0:08:23 lr: 0.000106 grad: 0.0832 (0.0781) loss: 0.8685 (0.8708) time: 0.1628 data: 0.0761 max mem: 9305 +Train: [29] [3100/6250] eta: 0:08:06 lr: 0.000106 grad: 0.0841 (0.0782) loss: 0.8733 (0.8708) time: 0.1690 data: 0.0896 max mem: 9305 +Train: [29] [3200/6250] eta: 0:07:50 lr: 0.000106 grad: 0.0806 (0.0783) loss: 0.8644 (0.8708) time: 0.1417 data: 0.0612 max mem: 9305 +Train: [29] [3300/6250] eta: 0:07:34 lr: 0.000106 grad: 0.0809 (0.0783) loss: 0.8726 (0.8709) time: 0.1605 data: 0.0776 max mem: 9305 +Train: [29] [3400/6250] eta: 0:07:18 lr: 0.000106 grad: 0.0776 (0.0784) loss: 0.8716 (0.8709) time: 0.1376 data: 0.0404 max mem: 9305 +Train: [29] [3500/6250] eta: 0:07:01 lr: 0.000105 grad: 0.0744 (0.0785) loss: 0.8687 (0.8708) time: 0.1365 data: 0.0499 max mem: 9305 +Train: [29] [3600/6250] eta: 0:06:45 lr: 0.000105 grad: 0.0796 (0.0785) loss: 0.8695 (0.8709) time: 0.1435 data: 0.0560 max mem: 9305 +Train: [29] [3700/6250] eta: 0:06:29 lr: 0.000105 grad: 0.0768 (0.0786) loss: 0.8716 (0.8709) time: 0.1452 data: 0.0551 max mem: 9305 +Train: [29] [3800/6250] eta: 0:06:14 lr: 0.000105 grad: 0.0725 (0.0785) loss: 0.8741 (0.8710) time: 0.1822 data: 0.0978 max mem: 9305 +Train: [29] [3900/6250] eta: 0:05:59 lr: 0.000105 grad: 0.0750 (0.0785) loss: 0.8749 (0.8711) time: 0.1473 data: 0.0418 max mem: 9305 +Train: [29] [4000/6250] eta: 0:05:44 lr: 0.000105 grad: 0.0795 (0.0786) loss: 0.8759 (0.8712) time: 0.1417 data: 0.0456 max mem: 9305 +Train: [29] [4100/6250] eta: 0:05:30 lr: 0.000105 grad: 0.0738 (0.0787) loss: 0.8737 (0.8712) time: 0.1963 data: 0.1155 max mem: 9305 +Train: [29] [4200/6250] eta: 0:05:16 lr: 0.000105 grad: 0.0742 (0.0787) loss: 0.8714 (0.8713) time: 0.1171 data: 0.0210 max mem: 9305 +Train: [29] [4300/6250] eta: 0:05:00 lr: 0.000105 grad: 0.0778 (0.0787) loss: 0.8739 (0.8713) time: 0.1768 data: 0.0937 max mem: 9305 +Train: [29] [4400/6250] eta: 0:04:45 lr: 0.000105 grad: 0.0728 (0.0788) loss: 0.8754 (0.8714) time: 0.1886 data: 0.1000 max mem: 9305 +Train: [29] [4500/6250] eta: 0:04:30 lr: 0.000105 grad: 0.0741 (0.0788) loss: 0.8755 (0.8714) time: 0.1104 data: 0.0238 max mem: 9305 +Train: [29] [4600/6250] eta: 0:04:15 lr: 0.000105 grad: 0.0800 (0.0788) loss: 0.8714 (0.8715) time: 0.1784 data: 0.0867 max mem: 9305 +Train: [29] [4700/6250] eta: 0:03:59 lr: 0.000105 grad: 0.0704 (0.0788) loss: 0.8799 (0.8715) time: 0.1768 data: 0.0951 max mem: 9305 +Train: [29] [4800/6250] eta: 0:03:44 lr: 0.000105 grad: 0.0781 (0.0788) loss: 0.8794 (0.8716) time: 0.1446 data: 0.0630 max mem: 9305 +Train: [29] [4900/6250] eta: 0:03:28 lr: 0.000105 grad: 0.0755 (0.0789) loss: 0.8719 (0.8717) time: 0.1385 data: 0.0528 max mem: 9305 +Train: [29] [5000/6250] eta: 0:03:12 lr: 0.000105 grad: 0.0792 (0.0789) loss: 0.8754 (0.8717) time: 0.1754 data: 0.0936 max mem: 9305 +Train: [29] [5100/6250] eta: 0:02:56 lr: 0.000105 grad: 0.0759 (0.0789) loss: 0.8760 (0.8717) time: 0.1332 data: 0.0480 max mem: 9305 +Train: [29] [5200/6250] eta: 0:02:41 lr: 0.000105 grad: 0.0796 (0.0789) loss: 0.8712 (0.8717) time: 0.1327 data: 0.0415 max mem: 9305 +Train: [29] [5300/6250] eta: 0:02:25 lr: 0.000105 grad: 0.0738 (0.0789) loss: 0.8715 (0.8717) time: 0.1298 data: 0.0369 max mem: 9305 +Train: [29] [5400/6250] eta: 0:02:10 lr: 0.000105 grad: 0.0816 (0.0789) loss: 0.8693 (0.8717) time: 0.1308 data: 0.0284 max mem: 9305 +Train: [29] [5500/6250] eta: 0:01:54 lr: 0.000105 grad: 0.0788 (0.0789) loss: 0.8722 (0.8717) time: 0.1484 data: 0.0598 max mem: 9305 +Train: [29] [5600/6250] eta: 0:01:39 lr: 0.000105 grad: 0.0749 (0.0790) loss: 0.8734 (0.8717) time: 0.1617 data: 0.0793 max mem: 9305 +Train: [29] [5700/6250] eta: 0:01:24 lr: 0.000105 grad: 0.0737 (0.0789) loss: 0.8733 (0.8716) time: 0.1505 data: 0.0679 max mem: 9305 +Train: [29] [5800/6250] eta: 0:01:08 lr: 0.000105 grad: 0.0719 (0.0789) loss: 0.8755 (0.8717) time: 0.1542 data: 0.0771 max mem: 9305 +Train: [29] [5900/6250] eta: 0:00:53 lr: 0.000105 grad: 0.0733 (0.0789) loss: 0.8761 (0.8717) time: 0.1344 data: 0.0441 max mem: 9305 +Train: [29] [6000/6250] eta: 0:00:38 lr: 0.000105 grad: 0.0737 (0.0789) loss: 0.8748 (0.8718) time: 0.1768 data: 0.0796 max mem: 9305 +Train: [29] [6100/6250] eta: 0:00:22 lr: 0.000105 grad: 0.0784 (0.0789) loss: 0.8754 (0.8718) time: 0.1591 data: 0.0715 max mem: 9305 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.0792 (0.0789) loss: 0.8720 (0.8719) time: 0.1597 data: 0.0443 max mem: 9305 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.0716 (0.0789) loss: 0.8737 (0.8719) time: 0.1012 data: 0.0002 max mem: 9305 +Train: [29] Total time: 0:16:10 (0.1553 s / it) +Averaged stats: lr: 0.000105 grad: 0.0716 (0.0789) loss: 0.8737 (0.8719) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:06:13 loss: 0.8991 (0.8991) time: 6.0183 data: 5.9806 max mem: 9305 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8887 (0.8881) time: 0.1240 data: 0.0956 max mem: 9305 +Eval (hcp-train-subset): [29] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8887 (0.8881) +Making plots (hcp-train-subset): example=10 +Eval (hcp-val): [29] [ 0/62] eta: 0:05:06 loss: 0.8848 (0.8848) time: 4.9487 data: 4.9128 max mem: 9305 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8826 (0.8844) time: 0.1465 data: 0.1167 max mem: 9305 +Eval (hcp-val): [29] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-val): loss: 0.8826 (0.8844) +Making plots (hcp-val): example=53 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [30] [ 0/6250] eta: 8:11:41 lr: 0.000105 grad: 0.1082 (0.1082) loss: 0.8885 (0.8885) time: 4.7203 data: 4.4250 max mem: 9305 +Train: [30] [ 100/6250] eta: 0:21:31 lr: 0.000105 grad: 0.0794 (0.0851) loss: 0.8672 (0.8758) time: 0.1411 data: 0.0419 max mem: 9305 +Train: [30] [ 200/6250] eta: 0:18:26 lr: 0.000105 grad: 0.0735 (0.0857) loss: 0.8748 (0.8713) time: 0.1480 data: 0.0512 max mem: 9305 +Train: [30] [ 300/6250] eta: 0:17:35 lr: 0.000105 grad: 0.0764 (0.0842) loss: 0.8762 (0.8709) time: 0.1668 data: 0.0740 max mem: 9305 +Train: [30] [ 400/6250] eta: 0:16:47 lr: 0.000105 grad: 0.0664 (0.0823) loss: 0.8734 (0.8711) time: 0.1575 data: 0.0637 max mem: 9305 +Train: [30] [ 500/6250] eta: 0:15:58 lr: 0.000105 grad: 0.0722 (0.0819) loss: 0.8722 (0.8708) time: 0.1262 data: 0.0316 max mem: 9305 +Train: [30] [ 600/6250] eta: 0:15:28 lr: 0.000105 grad: 0.0726 (0.0814) loss: 0.8661 (0.8706) time: 0.1592 data: 0.0683 max mem: 9305 +Train: [30] [ 700/6250] eta: 0:14:58 lr: 0.000105 grad: 0.0871 (0.0809) loss: 0.8624 (0.8700) time: 0.1670 data: 0.0790 max mem: 9305 +Train: [30] [ 800/6250] eta: 0:14:30 lr: 0.000105 grad: 0.0744 (0.0804) loss: 0.8616 (0.8699) time: 0.1431 data: 0.0533 max mem: 9305 +Train: [30] [ 900/6250] eta: 0:14:25 lr: 0.000105 grad: 0.0744 (0.0799) loss: 0.8673 (0.8697) time: 0.2400 data: 0.1566 max mem: 9305 +Train: [30] [1000/6250] eta: 0:14:04 lr: 0.000105 grad: 0.0731 (0.0796) loss: 0.8657 (0.8697) time: 0.1305 data: 0.0004 max mem: 9305 +Train: [30] [1100/6250] eta: 0:13:53 lr: 0.000105 grad: 0.0742 (0.0790) loss: 0.8706 (0.8698) time: 0.1303 data: 0.0338 max mem: 9305 +Train: [30] [1200/6250] eta: 0:13:34 lr: 0.000105 grad: 0.0734 (0.0787) loss: 0.8701 (0.8699) time: 0.1354 data: 0.0486 max mem: 9305 +Train: [30] [1300/6250] eta: 0:13:11 lr: 0.000105 grad: 0.0791 (0.0786) loss: 0.8694 (0.8701) time: 0.1385 data: 0.0519 max mem: 9305 +Train: [30] [1400/6250] eta: 0:12:48 lr: 0.000104 grad: 0.0765 (0.0787) loss: 0.8630 (0.8702) time: 0.1360 data: 0.0575 max mem: 9305 +Train: [30] [1500/6250] eta: 0:12:26 lr: 0.000104 grad: 0.0754 (0.0787) loss: 0.8662 (0.8702) time: 0.1340 data: 0.0535 max mem: 9305 +Train: [30] [1600/6250] eta: 0:12:13 lr: 0.000104 grad: 0.0737 (0.0787) loss: 0.8732 (0.8702) time: 0.2253 data: 0.1339 max mem: 9305 +Train: [30] [1700/6250] eta: 0:11:51 lr: 0.000104 grad: 0.0745 (0.0786) loss: 0.8732 (0.8702) time: 0.1501 data: 0.0639 max mem: 9305 +Train: [30] [1800/6250] eta: 0:11:32 lr: 0.000104 grad: 0.0742 (0.0786) loss: 0.8725 (0.8702) time: 0.1597 data: 0.0799 max mem: 9305 +Train: [30] [1900/6250] eta: 0:11:15 lr: 0.000104 grad: 0.0801 (0.0785) loss: 0.8740 (0.8703) time: 0.1340 data: 0.0414 max mem: 9305 +Train: [30] [2000/6250] eta: 0:10:58 lr: 0.000104 grad: 0.0774 (0.0786) loss: 0.8729 (0.8703) time: 0.1293 data: 0.0388 max mem: 9305 +Train: [30] [2100/6250] eta: 0:10:48 lr: 0.000104 grad: 0.0795 (0.0786) loss: 0.8754 (0.8704) time: 0.1829 data: 0.1073 max mem: 9305 +Train: [30] [2200/6250] eta: 0:10:33 lr: 0.000104 grad: 0.0759 (0.0787) loss: 0.8739 (0.8705) time: 0.1581 data: 0.0787 max mem: 9305 +Train: [30] [2300/6250] eta: 0:10:18 lr: 0.000104 grad: 0.0737 (0.0787) loss: 0.8702 (0.8706) time: 0.1911 data: 0.1049 max mem: 9305 +Train: [30] [2400/6250] eta: 0:10:04 lr: 0.000104 grad: 0.0736 (0.0787) loss: 0.8742 (0.8707) time: 0.1643 data: 0.0742 max mem: 9305 +Train: [30] [2500/6250] eta: 0:09:48 lr: 0.000104 grad: 0.0726 (0.0787) loss: 0.8693 (0.8707) time: 0.1516 data: 0.0768 max mem: 9305 +Train: [30] [2600/6250] eta: 0:09:31 lr: 0.000104 grad: 0.0746 (0.0787) loss: 0.8705 (0.8707) time: 0.1431 data: 0.0570 max mem: 9305 +Train: [30] [2700/6250] eta: 0:09:13 lr: 0.000104 grad: 0.0784 (0.0787) loss: 0.8761 (0.8706) time: 0.1282 data: 0.0371 max mem: 9305 +Train: [30] [2800/6250] eta: 0:08:56 lr: 0.000104 grad: 0.0740 (0.0788) loss: 0.8693 (0.8706) time: 0.1573 data: 0.0717 max mem: 9305 +Train: [30] [2900/6250] eta: 0:08:37 lr: 0.000104 grad: 0.0743 (0.0789) loss: 0.8742 (0.8705) time: 0.1243 data: 0.0349 max mem: 9305 +Train: [30] [3000/6250] eta: 0:08:22 lr: 0.000104 grad: 0.0901 (0.0790) loss: 0.8626 (0.8704) time: 0.1454 data: 0.0581 max mem: 9305 +Train: [30] [3100/6250] eta: 0:08:08 lr: 0.000104 grad: 0.0839 (0.0793) loss: 0.8597 (0.8703) time: 0.1208 data: 0.0300 max mem: 9305 +Train: [30] [3200/6250] eta: 0:07:52 lr: 0.000104 grad: 0.0815 (0.0794) loss: 0.8687 (0.8702) time: 0.1467 data: 0.0557 max mem: 9305 +Train: [30] [3300/6250] eta: 0:07:37 lr: 0.000104 grad: 0.0790 (0.0796) loss: 0.8718 (0.8702) time: 0.1969 data: 0.1109 max mem: 9305 +Train: [30] [3400/6250] eta: 0:07:21 lr: 0.000104 grad: 0.0815 (0.0796) loss: 0.8688 (0.8701) time: 0.1665 data: 0.0720 max mem: 9305 +Train: [30] [3500/6250] eta: 0:07:06 lr: 0.000104 grad: 0.0826 (0.0798) loss: 0.8683 (0.8701) time: 0.1584 data: 0.0666 max mem: 9305 +Train: [30] [3600/6250] eta: 0:06:52 lr: 0.000104 grad: 0.0869 (0.0799) loss: 0.8673 (0.8700) time: 0.2263 data: 0.1334 max mem: 9305 +Train: [30] [3700/6250] eta: 0:06:36 lr: 0.000104 grad: 0.0788 (0.0800) loss: 0.8640 (0.8699) time: 0.1541 data: 0.0617 max mem: 9305 +Train: [30] [3800/6250] eta: 0:06:20 lr: 0.000104 grad: 0.0770 (0.0801) loss: 0.8637 (0.8698) time: 0.1200 data: 0.0386 max mem: 9305 +Train: [30] [3900/6250] eta: 0:06:04 lr: 0.000104 grad: 0.0850 (0.0803) loss: 0.8624 (0.8698) time: 0.1323 data: 0.0521 max mem: 9305 +Train: [30] [4000/6250] eta: 0:05:48 lr: 0.000104 grad: 0.0878 (0.0803) loss: 0.8663 (0.8698) time: 0.1487 data: 0.0636 max mem: 9305 +Train: [30] [4100/6250] eta: 0:05:32 lr: 0.000104 grad: 0.0829 (0.0804) loss: 0.8678 (0.8697) time: 0.1347 data: 0.0541 max mem: 9305 +Train: [30] [4200/6250] eta: 0:05:17 lr: 0.000104 grad: 0.0819 (0.0805) loss: 0.8667 (0.8697) time: 0.1883 data: 0.1042 max mem: 9305 +Train: [30] [4300/6250] eta: 0:05:01 lr: 0.000104 grad: 0.0778 (0.0805) loss: 0.8642 (0.8696) time: 0.1430 data: 0.0541 max mem: 9305 +Train: [30] [4400/6250] eta: 0:04:46 lr: 0.000104 grad: 0.0764 (0.0805) loss: 0.8661 (0.8696) time: 0.1449 data: 0.0613 max mem: 9305 +Train: [30] [4500/6250] eta: 0:04:30 lr: 0.000104 grad: 0.0792 (0.0806) loss: 0.8696 (0.8696) time: 0.1508 data: 0.0700 max mem: 9305 +Train: [30] [4600/6250] eta: 0:04:15 lr: 0.000104 grad: 0.0799 (0.0806) loss: 0.8686 (0.8696) time: 0.1280 data: 0.0410 max mem: 9305 +Train: [30] [4700/6250] eta: 0:03:59 lr: 0.000104 grad: 0.0744 (0.0805) loss: 0.8752 (0.8696) time: 0.1519 data: 0.0711 max mem: 9305 +Train: [30] [4800/6250] eta: 0:03:44 lr: 0.000104 grad: 0.0790 (0.0805) loss: 0.8679 (0.8697) time: 0.1253 data: 0.0383 max mem: 9305 +Train: [30] [4900/6250] eta: 0:03:28 lr: 0.000104 grad: 0.0723 (0.0805) loss: 0.8729 (0.8697) time: 0.1537 data: 0.0739 max mem: 9305 +Train: [30] [5000/6250] eta: 0:03:12 lr: 0.000104 grad: 0.0776 (0.0805) loss: 0.8757 (0.8698) time: 0.1531 data: 0.0712 max mem: 9305 +Train: [30] [5100/6250] eta: 0:02:57 lr: 0.000104 grad: 0.0771 (0.0805) loss: 0.8704 (0.8698) time: 0.1549 data: 0.0739 max mem: 9305 +Train: [30] [5200/6250] eta: 0:02:41 lr: 0.000104 grad: 0.0812 (0.0805) loss: 0.8709 (0.8698) time: 0.1620 data: 0.0726 max mem: 9305 +Train: [30] [5300/6250] eta: 0:02:26 lr: 0.000104 grad: 0.0853 (0.0806) loss: 0.8736 (0.8697) time: 0.1670 data: 0.0758 max mem: 9305 +Train: [30] [5400/6250] eta: 0:02:10 lr: 0.000103 grad: 0.0759 (0.0806) loss: 0.8733 (0.8698) time: 0.1460 data: 0.0611 max mem: 9305 +Train: [30] [5500/6250] eta: 0:01:55 lr: 0.000103 grad: 0.0781 (0.0806) loss: 0.8720 (0.8698) time: 0.1264 data: 0.0424 max mem: 9305 +Train: [30] [5600/6250] eta: 0:01:39 lr: 0.000103 grad: 0.0793 (0.0806) loss: 0.8700 (0.8698) time: 0.1350 data: 0.0541 max mem: 9305 +Train: [30] [5700/6250] eta: 0:01:24 lr: 0.000103 grad: 0.0721 (0.0806) loss: 0.8758 (0.8698) time: 0.1366 data: 0.0565 max mem: 9305 +Train: [30] [5800/6250] eta: 0:01:08 lr: 0.000103 grad: 0.0844 (0.0806) loss: 0.8658 (0.8698) time: 0.1424 data: 0.0626 max mem: 9305 +Train: [30] [5900/6250] eta: 0:00:53 lr: 0.000103 grad: 0.0843 (0.0807) loss: 0.8667 (0.8698) time: 0.1106 data: 0.0298 max mem: 9305 +Train: [30] [6000/6250] eta: 0:00:38 lr: 0.000103 grad: 0.0819 (0.0807) loss: 0.8687 (0.8698) time: 0.1524 data: 0.0740 max mem: 9305 +Train: [30] [6100/6250] eta: 0:00:22 lr: 0.000103 grad: 0.0755 (0.0808) loss: 0.8696 (0.8697) time: 0.1315 data: 0.0434 max mem: 9305 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.0800 (0.0808) loss: 0.8738 (0.8698) time: 0.1250 data: 0.0443 max mem: 9305 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.0821 (0.0808) loss: 0.8701 (0.8697) time: 0.1218 data: 0.0356 max mem: 9305 +Train: [30] Total time: 0:15:52 (0.1524 s / it) +Averaged stats: lr: 0.000103 grad: 0.0821 (0.0808) loss: 0.8701 (0.8697) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:05:04 loss: 0.9012 (0.9012) time: 4.9146 data: 4.8806 max mem: 9305 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8871 (0.8879) time: 0.1318 data: 0.1020 max mem: 9305 +Eval (hcp-train-subset): [30] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (hcp-train-subset): loss: 0.8871 (0.8879) +Eval (hcp-val): [30] [ 0/62] eta: 0:04:10 loss: 0.8780 (0.8780) time: 4.0437 data: 3.9891 max mem: 9305 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8821 (0.8832) time: 0.1603 data: 0.1322 max mem: 9305 +Eval (hcp-val): [30] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8821 (0.8832) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 8:34:32 lr: 0.000103 grad: 0.0499 (0.0499) loss: 0.8970 (0.8970) time: 4.9395 data: 4.6830 max mem: 9305 +Train: [31] [ 100/6250] eta: 0:21:26 lr: 0.000103 grad: 0.0781 (0.0937) loss: 0.8644 (0.8766) time: 0.1483 data: 0.0443 max mem: 9305 +Train: [31] [ 200/6250] eta: 0:18:39 lr: 0.000103 grad: 0.0790 (0.0904) loss: 0.8737 (0.8713) time: 0.1281 data: 0.0269 max mem: 9305 +Train: [31] [ 300/6250] eta: 0:17:04 lr: 0.000103 grad: 0.0761 (0.0892) loss: 0.8697 (0.8692) time: 0.1442 data: 0.0445 max mem: 9305 +Train: [31] [ 400/6250] eta: 0:16:11 lr: 0.000103 grad: 0.0837 (0.0884) loss: 0.8697 (0.8685) time: 0.1726 data: 0.0915 max mem: 9305 +Train: [31] [ 500/6250] eta: 0:15:15 lr: 0.000103 grad: 0.0765 (0.0867) loss: 0.8778 (0.8687) time: 0.1545 data: 0.0698 max mem: 9305 +Train: [31] [ 600/6250] eta: 0:14:55 lr: 0.000103 grad: 0.0680 (0.0857) loss: 0.8737 (0.8693) time: 0.1737 data: 0.0862 max mem: 9305 +Train: [31] [ 700/6250] eta: 0:14:51 lr: 0.000103 grad: 0.0782 (0.0853) loss: 0.8697 (0.8695) time: 0.2402 data: 0.1537 max mem: 9305 +Train: [31] [ 800/6250] eta: 0:14:37 lr: 0.000103 grad: 0.0712 (0.0845) loss: 0.8672 (0.8695) time: 0.1064 data: 0.0002 max mem: 9305 +Train: [31] [ 900/6250] eta: 0:14:21 lr: 0.000103 grad: 0.0811 (0.0841) loss: 0.8652 (0.8693) time: 0.1603 data: 0.0814 max mem: 9305 +Train: [31] [1000/6250] eta: 0:13:56 lr: 0.000103 grad: 0.0738 (0.0838) loss: 0.8681 (0.8695) time: 0.1846 data: 0.1006 max mem: 9305 +Train: [31] [1100/6250] eta: 0:13:37 lr: 0.000103 grad: 0.0711 (0.0835) loss: 0.8695 (0.8695) time: 0.2035 data: 0.1227 max mem: 9305 +Train: [31] [1200/6250] eta: 0:13:17 lr: 0.000103 grad: 0.0731 (0.0831) loss: 0.8713 (0.8695) time: 0.1689 data: 0.0768 max mem: 9305 +Train: [31] [1300/6250] eta: 0:13:04 lr: 0.000103 grad: 0.0736 (0.0828) loss: 0.8680 (0.8696) time: 0.1528 data: 0.0717 max mem: 9305 +Train: [31] [1400/6250] eta: 0:12:52 lr: 0.000103 grad: 0.0737 (0.0825) loss: 0.8723 (0.8695) time: 0.2223 data: 0.1326 max mem: 9305 +Train: [31] [1500/6250] eta: 0:12:26 lr: 0.000103 grad: 0.0775 (0.0823) loss: 0.8729 (0.8695) time: 0.1456 data: 0.0621 max mem: 9305 +Train: [31] [1600/6250] eta: 0:12:26 lr: 0.000103 grad: 0.0762 (0.0821) loss: 0.8651 (0.8695) time: 0.3719 data: 0.2694 max mem: 9305 +Train: [31] [1700/6250] eta: 0:12:08 lr: 0.000103 grad: 0.0736 (0.0820) loss: 0.8721 (0.8694) time: 0.1883 data: 0.0517 max mem: 9305 +Train: [31] [1800/6250] eta: 0:11:54 lr: 0.000103 grad: 0.0757 (0.0822) loss: 0.8713 (0.8693) time: 0.1707 data: 0.0768 max mem: 9305 +Train: [31] [1900/6250] eta: 0:11:36 lr: 0.000103 grad: 0.0729 (0.0821) loss: 0.8684 (0.8692) time: 0.1658 data: 0.0810 max mem: 9305 +Train: [31] [2000/6250] eta: 0:11:18 lr: 0.000103 grad: 0.0766 (0.0819) loss: 0.8570 (0.8691) time: 0.1550 data: 0.0749 max mem: 9305 +Train: [31] [2100/6250] eta: 0:11:01 lr: 0.000103 grad: 0.0840 (0.0820) loss: 0.8640 (0.8689) time: 0.1523 data: 0.0618 max mem: 9305 +Train: [31] [2200/6250] eta: 0:10:42 lr: 0.000103 grad: 0.0813 (0.0821) loss: 0.8686 (0.8689) time: 0.1396 data: 0.0541 max mem: 9305 +Train: [31] [2300/6250] eta: 0:10:22 lr: 0.000103 grad: 0.0757 (0.0822) loss: 0.8663 (0.8687) time: 0.1293 data: 0.0507 max mem: 9305 +Train: [31] [2400/6250] eta: 0:10:02 lr: 0.000103 grad: 0.0763 (0.0821) loss: 0.8730 (0.8686) time: 0.1300 data: 0.0423 max mem: 9305 +Train: [31] [2500/6250] eta: 0:09:42 lr: 0.000103 grad: 0.0786 (0.0822) loss: 0.8662 (0.8686) time: 0.1044 data: 0.0158 max mem: 9305 +Train: [31] [2600/6250] eta: 0:09:23 lr: 0.000103 grad: 0.0772 (0.0821) loss: 0.8683 (0.8686) time: 0.1198 data: 0.0359 max mem: 9305 +Train: [31] [2700/6250] eta: 0:09:04 lr: 0.000103 grad: 0.0749 (0.0823) loss: 0.8712 (0.8685) time: 0.1316 data: 0.0416 max mem: 9305 +Train: [31] [2800/6250] eta: 0:08:46 lr: 0.000103 grad: 0.0809 (0.0823) loss: 0.8642 (0.8685) time: 0.1081 data: 0.0265 max mem: 9305 +Train: [31] [2900/6250] eta: 0:08:28 lr: 0.000103 grad: 0.0732 (0.0823) loss: 0.8704 (0.8685) time: 0.1453 data: 0.0652 max mem: 9305 +Train: [31] [3000/6250] eta: 0:08:15 lr: 0.000103 grad: 0.0795 (0.0823) loss: 0.8749 (0.8686) time: 0.2875 data: 0.1957 max mem: 9305 +Train: [31] [3100/6250] eta: 0:07:56 lr: 0.000103 grad: 0.0782 (0.0823) loss: 0.8682 (0.8686) time: 0.1406 data: 0.0595 max mem: 9305 +Train: [31] [3200/6250] eta: 0:07:40 lr: 0.000102 grad: 0.0777 (0.0823) loss: 0.8675 (0.8686) time: 0.1311 data: 0.0528 max mem: 9305 +Train: [31] [3300/6250] eta: 0:07:23 lr: 0.000102 grad: 0.0751 (0.0822) loss: 0.8725 (0.8687) time: 0.1330 data: 0.0494 max mem: 9305 +Train: [31] [3400/6250] eta: 0:07:07 lr: 0.000102 grad: 0.0740 (0.0821) loss: 0.8712 (0.8688) time: 0.1697 data: 0.0845 max mem: 9305 +Train: [31] [3500/6250] eta: 0:06:51 lr: 0.000102 grad: 0.0787 (0.0820) loss: 0.8737 (0.8689) time: 0.1428 data: 0.0605 max mem: 9305 +Train: [31] [3600/6250] eta: 0:06:35 lr: 0.000102 grad: 0.0787 (0.0819) loss: 0.8690 (0.8689) time: 0.1379 data: 0.0568 max mem: 9305 +Train: [31] [3700/6250] eta: 0:06:21 lr: 0.000102 grad: 0.0711 (0.0818) loss: 0.8733 (0.8689) time: 0.2005 data: 0.1161 max mem: 9305 +Train: [31] [3800/6250] eta: 0:06:04 lr: 0.000102 grad: 0.0802 (0.0818) loss: 0.8702 (0.8690) time: 0.1184 data: 0.0348 max mem: 9305 +Train: [31] [3900/6250] eta: 0:05:49 lr: 0.000102 grad: 0.0839 (0.0817) loss: 0.8666 (0.8690) time: 0.1434 data: 0.0644 max mem: 9305 +Train: [31] [4000/6250] eta: 0:05:33 lr: 0.000102 grad: 0.0776 (0.0817) loss: 0.8708 (0.8690) time: 0.1454 data: 0.0648 max mem: 9305 +Train: [31] [4100/6250] eta: 0:05:17 lr: 0.000102 grad: 0.0768 (0.0816) loss: 0.8712 (0.8691) time: 0.1290 data: 0.0400 max mem: 9305 +Train: [31] [4200/6250] eta: 0:05:02 lr: 0.000102 grad: 0.0765 (0.0816) loss: 0.8622 (0.8690) time: 0.1330 data: 0.0503 max mem: 9305 +Train: [31] [4300/6250] eta: 0:04:47 lr: 0.000102 grad: 0.0796 (0.0816) loss: 0.8701 (0.8690) time: 0.1933 data: 0.1003 max mem: 9305 +Train: [31] [4400/6250] eta: 0:04:32 lr: 0.000102 grad: 0.0770 (0.0816) loss: 0.8663 (0.8690) time: 0.1330 data: 0.0493 max mem: 9305 +Train: [31] [4500/6250] eta: 0:04:16 lr: 0.000102 grad: 0.0794 (0.0816) loss: 0.8663 (0.8689) time: 0.1180 data: 0.0358 max mem: 9305 +Train: [31] [4600/6250] eta: 0:04:01 lr: 0.000102 grad: 0.0784 (0.0816) loss: 0.8635 (0.8688) time: 0.1335 data: 0.0511 max mem: 9305 +Train: [31] [4700/6250] eta: 0:03:46 lr: 0.000102 grad: 0.0841 (0.0816) loss: 0.8644 (0.8687) time: 0.1224 data: 0.0373 max mem: 9305 +Train: [31] [4800/6250] eta: 0:03:31 lr: 0.000102 grad: 0.0829 (0.0817) loss: 0.8575 (0.8686) time: 0.0896 data: 0.0002 max mem: 9305 +Train: [31] [4900/6250] eta: 0:03:16 lr: 0.000102 grad: 0.0740 (0.0817) loss: 0.8662 (0.8685) time: 0.0918 data: 0.0108 max mem: 9305 +Train: [31] [5000/6250] eta: 0:03:01 lr: 0.000102 grad: 0.0793 (0.0817) loss: 0.8618 (0.8685) time: 0.1371 data: 0.0427 max mem: 9305 +Train: [31] [5100/6250] eta: 0:02:47 lr: 0.000102 grad: 0.0766 (0.0817) loss: 0.8699 (0.8684) time: 0.1715 data: 0.0934 max mem: 9305 +Train: [31] [5200/6250] eta: 0:02:32 lr: 0.000102 grad: 0.0782 (0.0818) loss: 0.8675 (0.8683) time: 0.1169 data: 0.0335 max mem: 9305 +Train: [31] [5300/6250] eta: 0:02:17 lr: 0.000102 grad: 0.0868 (0.0818) loss: 0.8627 (0.8683) time: 0.1605 data: 0.0746 max mem: 9305 +Train: [31] [5400/6250] eta: 0:02:03 lr: 0.000102 grad: 0.0768 (0.0818) loss: 0.8678 (0.8683) time: 0.1427 data: 0.0649 max mem: 9305 +Train: [31] [5500/6250] eta: 0:01:48 lr: 0.000102 grad: 0.0829 (0.0818) loss: 0.8702 (0.8682) time: 0.1619 data: 0.0743 max mem: 9305 +Train: [31] [5600/6250] eta: 0:01:34 lr: 0.000102 grad: 0.0809 (0.0818) loss: 0.8653 (0.8682) time: 0.1505 data: 0.0338 max mem: 9305 +Train: [31] [5700/6250] eta: 0:01:19 lr: 0.000102 grad: 0.0806 (0.0819) loss: 0.8673 (0.8682) time: 0.1709 data: 0.0827 max mem: 9305 +Train: [31] [5800/6250] eta: 0:01:05 lr: 0.000102 grad: 0.0823 (0.0819) loss: 0.8640 (0.8682) time: 0.1430 data: 0.0672 max mem: 9305 +Train: [31] [5900/6250] eta: 0:00:50 lr: 0.000102 grad: 0.0810 (0.0819) loss: 0.8697 (0.8682) time: 0.1593 data: 0.0637 max mem: 9305 +Train: [31] [6000/6250] eta: 0:00:36 lr: 0.000102 grad: 0.0866 (0.0820) loss: 0.8649 (0.8681) time: 0.1316 data: 0.0395 max mem: 9305 +Train: [31] [6100/6250] eta: 0:00:21 lr: 0.000102 grad: 0.0777 (0.0821) loss: 0.8674 (0.8681) time: 0.1189 data: 0.0431 max mem: 9305 +Train: [31] [6200/6250] eta: 0:00:07 lr: 0.000102 grad: 0.0789 (0.0821) loss: 0.8725 (0.8681) time: 0.1290 data: 0.0478 max mem: 9305 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.0845 (0.0821) loss: 0.8577 (0.8681) time: 0.1551 data: 0.0785 max mem: 9305 +Train: [31] Total time: 0:15:10 (0.1457 s / it) +Averaged stats: lr: 0.000102 grad: 0.0845 (0.0821) loss: 0.8577 (0.8681) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:03:51 loss: 0.8955 (0.8955) time: 3.7408 data: 3.6534 max mem: 9305 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8857 (0.8868) time: 0.1160 data: 0.0878 max mem: 9305 +Eval (hcp-train-subset): [31] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-train-subset): loss: 0.8857 (0.8868) +Eval (hcp-val): [31] [ 0/62] eta: 0:03:20 loss: 0.8772 (0.8772) time: 3.2261 data: 3.1275 max mem: 9305 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8807 (0.8830) time: 0.1310 data: 0.1003 max mem: 9305 +Eval (hcp-val): [31] Total time: 0:00:12 (0.2043 s / it) +Averaged stats (hcp-val): loss: 0.8807 (0.8830) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 8:01:46 lr: 0.000102 grad: 0.0540 (0.0540) loss: 0.9276 (0.9276) time: 4.6251 data: 4.3740 max mem: 9305 +Train: [32] [ 100/6250] eta: 0:19:19 lr: 0.000102 grad: 0.0732 (0.0779) loss: 0.8786 (0.8886) time: 0.1334 data: 0.0352 max mem: 9305 +Train: [32] [ 200/6250] eta: 0:16:31 lr: 0.000102 grad: 0.0708 (0.0783) loss: 0.8771 (0.8840) time: 0.1304 data: 0.0411 max mem: 9305 +Train: [32] [ 300/6250] eta: 0:15:15 lr: 0.000102 grad: 0.0733 (0.0771) loss: 0.8781 (0.8819) time: 0.1319 data: 0.0510 max mem: 9305 +Train: [32] [ 400/6250] eta: 0:14:11 lr: 0.000102 grad: 0.0766 (0.0763) loss: 0.8812 (0.8814) time: 0.1235 data: 0.0334 max mem: 9305 +Train: [32] [ 500/6250] eta: 0:13:25 lr: 0.000102 grad: 0.0765 (0.0762) loss: 0.8787 (0.8808) time: 0.1241 data: 0.0376 max mem: 9305 +Train: [32] [ 600/6250] eta: 0:12:52 lr: 0.000102 grad: 0.0728 (0.0761) loss: 0.8813 (0.8803) time: 0.1118 data: 0.0220 max mem: 9305 +Train: [32] [ 700/6250] eta: 0:12:22 lr: 0.000102 grad: 0.0745 (0.0762) loss: 0.8743 (0.8794) time: 0.1068 data: 0.0202 max mem: 9305 +Train: [32] [ 800/6250] eta: 0:12:00 lr: 0.000101 grad: 0.0732 (0.0762) loss: 0.8714 (0.8784) time: 0.1321 data: 0.0386 max mem: 9305 +Train: [32] [ 900/6250] eta: 0:11:46 lr: 0.000101 grad: 0.0724 (0.0762) loss: 0.8746 (0.8777) time: 0.1245 data: 0.0365 max mem: 9305 +Train: [32] [1000/6250] eta: 0:11:30 lr: 0.000101 grad: 0.0707 (0.0765) loss: 0.8724 (0.8773) time: 0.1358 data: 0.0492 max mem: 9305 +Train: [32] [1100/6250] eta: 0:11:17 lr: 0.000101 grad: 0.0729 (0.0771) loss: 0.8760 (0.8769) time: 0.1403 data: 0.0537 max mem: 9305 +Train: [32] [1200/6250] eta: 0:11:06 lr: 0.000101 grad: 0.0711 (0.0771) loss: 0.8694 (0.8765) time: 0.1144 data: 0.0311 max mem: 9305 +Train: [32] [1300/6250] eta: 0:10:57 lr: 0.000101 grad: 0.0758 (0.0774) loss: 0.8669 (0.8760) time: 0.1460 data: 0.0636 max mem: 9305 +Train: [32] [1400/6250] eta: 0:10:47 lr: 0.000101 grad: 0.0788 (0.0776) loss: 0.8692 (0.8754) time: 0.1530 data: 0.0648 max mem: 9305 +Train: [32] [1500/6250] eta: 0:10:36 lr: 0.000101 grad: 0.0774 (0.0779) loss: 0.8747 (0.8751) time: 0.1395 data: 0.0595 max mem: 9305 +Train: [32] [1600/6250] eta: 0:10:22 lr: 0.000101 grad: 0.0777 (0.0780) loss: 0.8718 (0.8747) time: 0.1305 data: 0.0432 max mem: 9305 +Train: [32] [1700/6250] eta: 0:10:14 lr: 0.000101 grad: 0.0831 (0.0781) loss: 0.8749 (0.8743) time: 0.1478 data: 0.0640 max mem: 9305 +Train: [32] [1800/6250] eta: 0:10:01 lr: 0.000101 grad: 0.0837 (0.0783) loss: 0.8644 (0.8739) time: 0.1266 data: 0.0454 max mem: 9305 +Train: [32] [1900/6250] eta: 0:09:50 lr: 0.000101 grad: 0.0834 (0.0784) loss: 0.8683 (0.8737) time: 0.1371 data: 0.0473 max mem: 9305 +Train: [32] [2000/6250] eta: 0:09:39 lr: 0.000101 grad: 0.0761 (0.0785) loss: 0.8692 (0.8735) time: 0.1599 data: 0.0767 max mem: 9305 +Train: [32] [2100/6250] eta: 0:09:24 lr: 0.000101 grad: 0.0770 (0.0784) loss: 0.8719 (0.8734) time: 0.1312 data: 0.0471 max mem: 9305 +Train: [32] [2200/6250] eta: 0:09:08 lr: 0.000101 grad: 0.0809 (0.0785) loss: 0.8667 (0.8733) time: 0.1335 data: 0.0488 max mem: 9305 +Train: [32] [2300/6250] eta: 0:08:52 lr: 0.000101 grad: 0.0750 (0.0786) loss: 0.8686 (0.8732) time: 0.1186 data: 0.0362 max mem: 9305 +Train: [32] [2400/6250] eta: 0:08:35 lr: 0.000101 grad: 0.0734 (0.0788) loss: 0.8698 (0.8731) time: 0.1246 data: 0.0396 max mem: 9305 +Train: [32] [2500/6250] eta: 0:08:19 lr: 0.000101 grad: 0.0762 (0.0790) loss: 0.8704 (0.8730) time: 0.1078 data: 0.0274 max mem: 9305 +Train: [32] [2600/6250] eta: 0:08:04 lr: 0.000101 grad: 0.0772 (0.0791) loss: 0.8706 (0.8729) time: 0.1252 data: 0.0423 max mem: 9305 +Train: [32] [2700/6250] eta: 0:07:49 lr: 0.000101 grad: 0.0807 (0.0794) loss: 0.8675 (0.8728) time: 0.1156 data: 0.0305 max mem: 9305 +Train: [32] [2800/6250] eta: 0:07:36 lr: 0.000101 grad: 0.0801 (0.0795) loss: 0.8719 (0.8726) time: 0.1309 data: 0.0513 max mem: 9305 +Train: [32] [2900/6250] eta: 0:07:22 lr: 0.000101 grad: 0.0816 (0.0796) loss: 0.8739 (0.8726) time: 0.1266 data: 0.0407 max mem: 9305 +Train: [32] [3000/6250] eta: 0:07:09 lr: 0.000101 grad: 0.0847 (0.0797) loss: 0.8719 (0.8725) time: 0.1339 data: 0.0540 max mem: 9305 +Train: [32] [3100/6250] eta: 0:06:57 lr: 0.000101 grad: 0.0883 (0.0798) loss: 0.8701 (0.8725) time: 0.1292 data: 0.0430 max mem: 9305 +Train: [32] [3200/6250] eta: 0:06:45 lr: 0.000101 grad: 0.0843 (0.0800) loss: 0.8707 (0.8724) time: 0.1160 data: 0.0339 max mem: 9305 +Train: [32] [3300/6250] eta: 0:06:32 lr: 0.000101 grad: 0.0806 (0.0802) loss: 0.8733 (0.8723) time: 0.1412 data: 0.0569 max mem: 9305 +Train: [32] [3400/6250] eta: 0:06:20 lr: 0.000101 grad: 0.0843 (0.0804) loss: 0.8653 (0.8721) time: 0.1596 data: 0.0629 max mem: 9305 +Train: [32] [3500/6250] eta: 0:06:07 lr: 0.000101 grad: 0.0872 (0.0806) loss: 0.8651 (0.8720) time: 0.1339 data: 0.0520 max mem: 9305 +Train: [32] [3600/6250] eta: 0:05:54 lr: 0.000101 grad: 0.0867 (0.0808) loss: 0.8664 (0.8718) time: 0.1323 data: 0.0403 max mem: 9305 +Train: [32] [3700/6250] eta: 0:05:40 lr: 0.000101 grad: 0.0835 (0.0810) loss: 0.8628 (0.8717) time: 0.1408 data: 0.0568 max mem: 9305 +Train: [32] [3800/6250] eta: 0:05:28 lr: 0.000101 grad: 0.0826 (0.0811) loss: 0.8648 (0.8715) time: 0.1432 data: 0.0669 max mem: 9305 +Train: [32] [3900/6250] eta: 0:05:14 lr: 0.000101 grad: 0.0866 (0.0813) loss: 0.8556 (0.8714) time: 0.1082 data: 0.0253 max mem: 9305 +Train: [32] [4000/6250] eta: 0:05:00 lr: 0.000101 grad: 0.0838 (0.0813) loss: 0.8722 (0.8712) time: 0.1291 data: 0.0442 max mem: 9305 +Train: [32] [4100/6250] eta: 0:04:47 lr: 0.000101 grad: 0.0772 (0.0814) loss: 0.8712 (0.8712) time: 0.1150 data: 0.0273 max mem: 9305 +Train: [32] [4200/6250] eta: 0:04:34 lr: 0.000101 grad: 0.0817 (0.0816) loss: 0.8669 (0.8710) time: 0.1355 data: 0.0483 max mem: 9305 +Train: [32] [4300/6250] eta: 0:04:21 lr: 0.000101 grad: 0.0835 (0.0817) loss: 0.8642 (0.8709) time: 0.1571 data: 0.0780 max mem: 9305 +Train: [32] [4400/6250] eta: 0:04:08 lr: 0.000101 grad: 0.0837 (0.0817) loss: 0.8620 (0.8708) time: 0.1547 data: 0.0712 max mem: 9305 +Train: [32] [4500/6250] eta: 0:03:55 lr: 0.000101 grad: 0.0852 (0.0818) loss: 0.8639 (0.8708) time: 0.1343 data: 0.0518 max mem: 9305 +Train: [32] [4600/6250] eta: 0:03:42 lr: 0.000101 grad: 0.0869 (0.0821) loss: 0.8665 (0.8707) time: 0.1425 data: 0.0550 max mem: 9305 +Train: [32] [4700/6250] eta: 0:03:29 lr: 0.000100 grad: 0.0821 (0.0822) loss: 0.8658 (0.8706) time: 0.1323 data: 0.0519 max mem: 9305 +Train: [32] [4800/6250] eta: 0:03:16 lr: 0.000100 grad: 0.0821 (0.0822) loss: 0.8717 (0.8706) time: 0.1495 data: 0.0658 max mem: 9305 +Train: [32] [4900/6250] eta: 0:03:03 lr: 0.000100 grad: 0.0803 (0.0822) loss: 0.8702 (0.8706) time: 0.1650 data: 0.0845 max mem: 9305 +Train: [32] [5000/6250] eta: 0:02:50 lr: 0.000100 grad: 0.0836 (0.0824) loss: 0.8697 (0.8706) time: 0.1382 data: 0.0537 max mem: 9305 +Train: [32] [5100/6250] eta: 0:02:36 lr: 0.000100 grad: 0.0809 (0.0824) loss: 0.8699 (0.8705) time: 0.1283 data: 0.0470 max mem: 9305 +Train: [32] [5200/6250] eta: 0:02:22 lr: 0.000100 grad: 0.0816 (0.0825) loss: 0.8721 (0.8705) time: 0.1389 data: 0.0522 max mem: 9305 +Train: [32] [5300/6250] eta: 0:02:09 lr: 0.000100 grad: 0.0824 (0.0825) loss: 0.8730 (0.8705) time: 0.1410 data: 0.0521 max mem: 9305 +Train: [32] [5400/6250] eta: 0:01:55 lr: 0.000100 grad: 0.0953 (0.0827) loss: 0.8606 (0.8704) time: 0.1768 data: 0.0979 max mem: 9305 +Train: [32] [5500/6250] eta: 0:01:41 lr: 0.000100 grad: 0.0863 (0.0828) loss: 0.8707 (0.8703) time: 0.1459 data: 0.0607 max mem: 9305 +Train: [32] [5600/6250] eta: 0:01:28 lr: 0.000100 grad: 0.0772 (0.0828) loss: 0.8710 (0.8703) time: 0.1262 data: 0.0421 max mem: 9305 +Train: [32] [5700/6250] eta: 0:01:14 lr: 0.000100 grad: 0.0823 (0.0829) loss: 0.8750 (0.8703) time: 0.1376 data: 0.0583 max mem: 9305 +Train: [32] [5800/6250] eta: 0:01:01 lr: 0.000100 grad: 0.0866 (0.0829) loss: 0.8706 (0.8702) time: 0.1409 data: 0.0628 max mem: 9305 +Train: [32] [5900/6250] eta: 0:00:47 lr: 0.000100 grad: 0.0849 (0.0830) loss: 0.8709 (0.8702) time: 0.1423 data: 0.0572 max mem: 9305 +Train: [32] [6000/6250] eta: 0:00:34 lr: 0.000100 grad: 0.0769 (0.0830) loss: 0.8745 (0.8702) time: 0.0960 data: 0.0151 max mem: 9305 +Train: [32] [6100/6250] eta: 0:00:20 lr: 0.000100 grad: 0.0787 (0.0830) loss: 0.8761 (0.8702) time: 0.1332 data: 0.0531 max mem: 9305 +Train: [32] [6200/6250] eta: 0:00:06 lr: 0.000100 grad: 0.0845 (0.0831) loss: 0.8682 (0.8702) time: 0.1429 data: 0.0617 max mem: 9305 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.0867 (0.0831) loss: 0.8698 (0.8702) time: 0.1437 data: 0.0598 max mem: 9305 +Train: [32] Total time: 0:14:15 (0.1368 s / it) +Averaged stats: lr: 0.000100 grad: 0.0867 (0.0831) loss: 0.8698 (0.8702) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:59 loss: 0.8921 (0.8921) time: 4.8253 data: 4.7908 max mem: 9305 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8856 (0.8861) time: 0.1187 data: 0.0905 max mem: 9305 +Eval (hcp-train-subset): [32] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (hcp-train-subset): loss: 0.8856 (0.8861) +Eval (hcp-val): [32] [ 0/62] eta: 0:05:56 loss: 0.8821 (0.8821) time: 5.7507 data: 5.6916 max mem: 9305 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8826 (0.8834) time: 0.1423 data: 0.1139 max mem: 9305 +Eval (hcp-val): [32] Total time: 0:00:13 (0.2197 s / it) +Averaged stats (hcp-val): loss: 0.8826 (0.8834) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 9:37:39 lr: 0.000100 grad: 0.1877 (0.1877) loss: 0.9193 (0.9193) time: 5.5456 data: 5.4373 max mem: 9305 +Train: [33] [ 100/6250] eta: 0:21:14 lr: 0.000100 grad: 0.0756 (0.0762) loss: 0.8908 (0.8888) time: 0.1633 data: 0.0500 max mem: 9305 +Train: [33] [ 200/6250] eta: 0:18:02 lr: 0.000100 grad: 0.0827 (0.0803) loss: 0.8709 (0.8800) time: 0.1687 data: 0.0776 max mem: 9305 +Train: [33] [ 300/6250] eta: 0:16:47 lr: 0.000100 grad: 0.0788 (0.0808) loss: 0.8748 (0.8769) time: 0.1897 data: 0.0929 max mem: 9305 +Train: [33] [ 400/6250] eta: 0:15:48 lr: 0.000100 grad: 0.0798 (0.0807) loss: 0.8678 (0.8751) time: 0.1366 data: 0.0513 max mem: 9305 +Train: [33] [ 500/6250] eta: 0:15:08 lr: 0.000100 grad: 0.0809 (0.0811) loss: 0.8672 (0.8735) time: 0.1338 data: 0.0501 max mem: 9305 +Train: [33] [ 600/6250] eta: 0:14:35 lr: 0.000100 grad: 0.0764 (0.0813) loss: 0.8671 (0.8728) time: 0.1534 data: 0.0679 max mem: 9305 +Train: [33] [ 700/6250] eta: 0:14:04 lr: 0.000100 grad: 0.0782 (0.0811) loss: 0.8688 (0.8722) time: 0.1369 data: 0.0519 max mem: 9305 +Train: [33] [ 800/6250] eta: 0:14:04 lr: 0.000100 grad: 0.0765 (0.0812) loss: 0.8707 (0.8718) time: 0.1436 data: 0.0485 max mem: 9305 +Train: [33] [ 900/6250] eta: 0:13:42 lr: 0.000100 grad: 0.0758 (0.0811) loss: 0.8679 (0.8712) time: 0.1682 data: 0.0821 max mem: 9305 +Train: [33] [1000/6250] eta: 0:13:42 lr: 0.000100 grad: 0.0721 (0.0808) loss: 0.8676 (0.8708) time: 0.3192 data: 0.2425 max mem: 9305 +Train: [33] [1100/6250] eta: 0:13:07 lr: 0.000100 grad: 0.0838 (0.0811) loss: 0.8614 (0.8701) time: 0.1516 data: 0.0686 max mem: 9305 +Train: [33] [1200/6250] eta: 0:12:47 lr: 0.000100 grad: 0.0807 (0.0811) loss: 0.8650 (0.8695) time: 0.1389 data: 0.0593 max mem: 9305 +Train: [33] [1300/6250] eta: 0:12:29 lr: 0.000100 grad: 0.0758 (0.0810) loss: 0.8662 (0.8692) time: 0.1507 data: 0.0728 max mem: 9305 +Train: [33] [1400/6250] eta: 0:12:13 lr: 0.000100 grad: 0.0780 (0.0809) loss: 0.8717 (0.8691) time: 0.1366 data: 0.0525 max mem: 9305 +Train: [33] [1500/6250] eta: 0:12:01 lr: 0.000100 grad: 0.0812 (0.0809) loss: 0.8635 (0.8689) time: 0.1305 data: 0.0463 max mem: 9305 +Train: [33] [1600/6250] eta: 0:11:45 lr: 0.000100 grad: 0.0795 (0.0810) loss: 0.8686 (0.8686) time: 0.1380 data: 0.0599 max mem: 9305 +Train: [33] [1700/6250] eta: 0:11:29 lr: 0.000100 grad: 0.0873 (0.0813) loss: 0.8565 (0.8683) time: 0.1479 data: 0.0616 max mem: 9305 +Train: [33] [1800/6250] eta: 0:11:11 lr: 0.000100 grad: 0.0783 (0.0815) loss: 0.8580 (0.8681) time: 0.1287 data: 0.0442 max mem: 9305 +Train: [33] [1900/6250] eta: 0:10:52 lr: 0.000100 grad: 0.0825 (0.0816) loss: 0.8670 (0.8680) time: 0.1401 data: 0.0617 max mem: 9305 +Train: [33] [2000/6250] eta: 0:10:33 lr: 0.000100 grad: 0.0867 (0.0818) loss: 0.8685 (0.8678) time: 0.1275 data: 0.0440 max mem: 9305 +Train: [33] [2100/6250] eta: 0:10:13 lr: 0.000100 grad: 0.0816 (0.0821) loss: 0.8673 (0.8676) time: 0.1235 data: 0.0382 max mem: 9305 +Train: [33] [2200/6250] eta: 0:09:55 lr: 0.000099 grad: 0.0855 (0.0824) loss: 0.8622 (0.8674) time: 0.1168 data: 0.0215 max mem: 9305 +Train: [33] [2300/6250] eta: 0:09:37 lr: 0.000099 grad: 0.0870 (0.0827) loss: 0.8614 (0.8673) time: 0.1226 data: 0.0347 max mem: 9305 +Train: [33] [2400/6250] eta: 0:09:19 lr: 0.000099 grad: 0.0889 (0.0828) loss: 0.8706 (0.8673) time: 0.1387 data: 0.0584 max mem: 9305 +Train: [33] [2500/6250] eta: 0:09:03 lr: 0.000099 grad: 0.0771 (0.0829) loss: 0.8646 (0.8671) time: 0.1391 data: 0.0572 max mem: 9305 +Train: [33] [2600/6250] eta: 0:08:48 lr: 0.000099 grad: 0.0899 (0.0832) loss: 0.8620 (0.8670) time: 0.1440 data: 0.0632 max mem: 9305 +Train: [33] [2700/6250] eta: 0:08:34 lr: 0.000099 grad: 0.0872 (0.0836) loss: 0.8632 (0.8668) time: 0.1312 data: 0.0526 max mem: 9305 +Train: [33] [2800/6250] eta: 0:08:19 lr: 0.000099 grad: 0.0837 (0.0839) loss: 0.8682 (0.8668) time: 0.1351 data: 0.0530 max mem: 9305 +Train: [33] [2900/6250] eta: 0:08:04 lr: 0.000099 grad: 0.0823 (0.0840) loss: 0.8643 (0.8667) time: 0.1407 data: 0.0603 max mem: 9305 +Train: [33] [3000/6250] eta: 0:07:50 lr: 0.000099 grad: 0.0802 (0.0841) loss: 0.8638 (0.8666) time: 0.1569 data: 0.0786 max mem: 9305 +Train: [33] [3100/6250] eta: 0:07:36 lr: 0.000099 grad: 0.0788 (0.0841) loss: 0.8678 (0.8666) time: 0.1578 data: 0.0766 max mem: 9305 +Train: [33] [3200/6250] eta: 0:07:21 lr: 0.000099 grad: 0.0790 (0.0841) loss: 0.8701 (0.8665) time: 0.1095 data: 0.0274 max mem: 9305 +Train: [33] [3300/6250] eta: 0:07:06 lr: 0.000099 grad: 0.0806 (0.0842) loss: 0.8612 (0.8665) time: 0.1501 data: 0.0692 max mem: 9305 +Train: [33] [3400/6250] eta: 0:06:52 lr: 0.000099 grad: 0.0820 (0.0841) loss: 0.8627 (0.8665) time: 0.1330 data: 0.0497 max mem: 9305 +Train: [33] [3500/6250] eta: 0:06:37 lr: 0.000099 grad: 0.0758 (0.0842) loss: 0.8664 (0.8664) time: 0.1541 data: 0.0749 max mem: 9305 +Train: [33] [3600/6250] eta: 0:06:23 lr: 0.000099 grad: 0.0865 (0.0841) loss: 0.8589 (0.8663) time: 0.1129 data: 0.0304 max mem: 9305 +Train: [33] [3700/6250] eta: 0:06:08 lr: 0.000099 grad: 0.0811 (0.0841) loss: 0.8622 (0.8663) time: 0.1489 data: 0.0636 max mem: 9305 +Train: [33] [3800/6250] eta: 0:05:53 lr: 0.000099 grad: 0.0881 (0.0842) loss: 0.8653 (0.8663) time: 0.1525 data: 0.0655 max mem: 9305 +Train: [33] [3900/6250] eta: 0:05:38 lr: 0.000099 grad: 0.0793 (0.0842) loss: 0.8674 (0.8662) time: 0.1675 data: 0.0756 max mem: 9305 +Train: [33] [4000/6250] eta: 0:05:24 lr: 0.000099 grad: 0.0813 (0.0842) loss: 0.8656 (0.8662) time: 0.1621 data: 0.0764 max mem: 9305 +Train: [33] [4100/6250] eta: 0:05:10 lr: 0.000099 grad: 0.0747 (0.0842) loss: 0.8698 (0.8662) time: 0.1478 data: 0.0646 max mem: 9305 +Train: [33] [4200/6250] eta: 0:04:56 lr: 0.000099 grad: 0.0800 (0.0842) loss: 0.8701 (0.8662) time: 0.2011 data: 0.1175 max mem: 9305 +Train: [33] [4300/6250] eta: 0:04:41 lr: 0.000099 grad: 0.0821 (0.0842) loss: 0.8703 (0.8663) time: 0.1228 data: 0.0333 max mem: 9305 +Train: [33] [4400/6250] eta: 0:04:27 lr: 0.000099 grad: 0.0813 (0.0843) loss: 0.8717 (0.8663) time: 0.1813 data: 0.1043 max mem: 9305 +Train: [33] [4500/6250] eta: 0:04:12 lr: 0.000099 grad: 0.0851 (0.0843) loss: 0.8664 (0.8663) time: 0.1344 data: 0.0604 max mem: 9305 +Train: [33] [4600/6250] eta: 0:03:57 lr: 0.000099 grad: 0.0834 (0.0844) loss: 0.8610 (0.8663) time: 0.1342 data: 0.0531 max mem: 9305 +Train: [33] [4700/6250] eta: 0:03:42 lr: 0.000099 grad: 0.0798 (0.0844) loss: 0.8707 (0.8664) time: 0.1288 data: 0.0468 max mem: 9305 +Train: [33] [4800/6250] eta: 0:03:27 lr: 0.000099 grad: 0.0835 (0.0845) loss: 0.8677 (0.8664) time: 0.1176 data: 0.0346 max mem: 9305 +Train: [33] [4900/6250] eta: 0:03:13 lr: 0.000099 grad: 0.0778 (0.0845) loss: 0.8735 (0.8664) time: 0.1504 data: 0.0727 max mem: 9305 +Train: [33] [5000/6250] eta: 0:02:59 lr: 0.000099 grad: 0.0790 (0.0845) loss: 0.8678 (0.8664) time: 0.1040 data: 0.0194 max mem: 9305 +Train: [33] [5100/6250] eta: 0:02:44 lr: 0.000099 grad: 0.0770 (0.0846) loss: 0.8702 (0.8664) time: 0.1428 data: 0.0639 max mem: 9305 +Train: [33] [5200/6250] eta: 0:02:30 lr: 0.000099 grad: 0.0870 (0.0846) loss: 0.8594 (0.8664) time: 0.1269 data: 0.0406 max mem: 9305 +Train: [33] [5300/6250] eta: 0:02:16 lr: 0.000099 grad: 0.0916 (0.0847) loss: 0.8651 (0.8664) time: 0.1566 data: 0.0750 max mem: 9305 +Train: [33] [5400/6250] eta: 0:02:01 lr: 0.000099 grad: 0.0827 (0.0847) loss: 0.8633 (0.8664) time: 0.1304 data: 0.0492 max mem: 9305 +Train: [33] [5500/6250] eta: 0:01:47 lr: 0.000099 grad: 0.0779 (0.0848) loss: 0.8674 (0.8664) time: 0.2033 data: 0.1210 max mem: 9305 +Train: [33] [5600/6250] eta: 0:01:32 lr: 0.000099 grad: 0.0823 (0.0848) loss: 0.8664 (0.8664) time: 0.1260 data: 0.0376 max mem: 9305 +Train: [33] [5700/6250] eta: 0:01:18 lr: 0.000099 grad: 0.0795 (0.0848) loss: 0.8690 (0.8664) time: 0.1303 data: 0.0497 max mem: 9305 +Train: [33] [5800/6250] eta: 0:01:04 lr: 0.000099 grad: 0.0838 (0.0848) loss: 0.8642 (0.8664) time: 0.1474 data: 0.0644 max mem: 9305 +Train: [33] [5900/6250] eta: 0:00:50 lr: 0.000098 grad: 0.0811 (0.0848) loss: 0.8690 (0.8664) time: 0.2067 data: 0.1326 max mem: 9305 +Train: [33] [6000/6250] eta: 0:00:35 lr: 0.000098 grad: 0.0854 (0.0848) loss: 0.8706 (0.8664) time: 0.1380 data: 0.0591 max mem: 9305 +Train: [33] [6100/6250] eta: 0:00:21 lr: 0.000098 grad: 0.0858 (0.0849) loss: 0.8647 (0.8664) time: 0.1305 data: 0.0497 max mem: 9305 +Train: [33] [6200/6250] eta: 0:00:07 lr: 0.000098 grad: 0.0805 (0.0849) loss: 0.8663 (0.8664) time: 0.1430 data: 0.0603 max mem: 9305 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.0859 (0.0849) loss: 0.8623 (0.8664) time: 0.1214 data: 0.0383 max mem: 9305 +Train: [33] Total time: 0:15:00 (0.1442 s / it) +Averaged stats: lr: 0.000098 grad: 0.0859 (0.0849) loss: 0.8623 (0.8664) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:04:42 loss: 0.8993 (0.8993) time: 4.5584 data: 4.5234 max mem: 9305 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8857 (0.8857) time: 0.1335 data: 0.1050 max mem: 9305 +Eval (hcp-train-subset): [33] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-train-subset): loss: 0.8857 (0.8857) +Eval (hcp-val): [33] [ 0/62] eta: 0:05:27 loss: 0.8769 (0.8769) time: 5.2892 data: 5.2553 max mem: 9305 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8814 (0.8844) time: 0.1371 data: 0.1071 max mem: 9305 +Eval (hcp-val): [33] Total time: 0:00:12 (0.2060 s / it) +Averaged stats (hcp-val): loss: 0.8814 (0.8844) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 9:45:15 lr: 0.000098 grad: 0.1157 (0.1157) loss: 0.8965 (0.8965) time: 5.6185 data: 5.5132 max mem: 9305 +Train: [34] [ 100/6250] eta: 0:19:38 lr: 0.000098 grad: 0.0747 (0.1132) loss: 0.8745 (0.8787) time: 0.1515 data: 0.0473 max mem: 9305 +Train: [34] [ 200/6250] eta: 0:17:00 lr: 0.000098 grad: 0.0805 (0.1001) loss: 0.8710 (0.8751) time: 0.1394 data: 0.0474 max mem: 9305 +Train: [34] [ 300/6250] eta: 0:15:46 lr: 0.000098 grad: 0.0946 (0.0965) loss: 0.8596 (0.8727) time: 0.1472 data: 0.0599 max mem: 9305 +Train: [34] [ 400/6250] eta: 0:14:53 lr: 0.000098 grad: 0.0857 (0.0951) loss: 0.8710 (0.8707) time: 0.1098 data: 0.0226 max mem: 9305 +Train: [34] [ 500/6250] eta: 0:14:10 lr: 0.000098 grad: 0.0791 (0.0947) loss: 0.8645 (0.8692) time: 0.1315 data: 0.0424 max mem: 9305 +Train: [34] [ 600/6250] eta: 0:13:34 lr: 0.000098 grad: 0.0959 (0.0947) loss: 0.8602 (0.8676) time: 0.1273 data: 0.0389 max mem: 9305 +Train: [34] [ 700/6250] eta: 0:13:02 lr: 0.000098 grad: 0.0831 (0.0941) loss: 0.8643 (0.8670) time: 0.1251 data: 0.0398 max mem: 9305 +Train: [34] [ 800/6250] eta: 0:12:37 lr: 0.000098 grad: 0.0842 (0.0935) loss: 0.8694 (0.8666) time: 0.1344 data: 0.0462 max mem: 9305 +Train: [34] [ 900/6250] eta: 0:12:19 lr: 0.000098 grad: 0.0780 (0.0927) loss: 0.8737 (0.8664) time: 0.1378 data: 0.0529 max mem: 9305 +Train: [34] [1000/6250] eta: 0:12:03 lr: 0.000098 grad: 0.0773 (0.0917) loss: 0.8641 (0.8665) time: 0.1210 data: 0.0393 max mem: 9305 +Train: [34] [1100/6250] eta: 0:12:00 lr: 0.000098 grad: 0.0860 (0.0912) loss: 0.8638 (0.8663) time: 0.1413 data: 0.0590 max mem: 9305 +Train: [34] [1200/6250] eta: 0:11:50 lr: 0.000098 grad: 0.0864 (0.0909) loss: 0.8658 (0.8661) time: 0.1492 data: 0.0653 max mem: 9305 +Train: [34] [1300/6250] eta: 0:11:36 lr: 0.000098 grad: 0.0787 (0.0909) loss: 0.8682 (0.8658) time: 0.1355 data: 0.0540 max mem: 9305 +Train: [34] [1400/6250] eta: 0:11:27 lr: 0.000098 grad: 0.0882 (0.0906) loss: 0.8621 (0.8655) time: 0.1435 data: 0.0565 max mem: 9305 +Train: [34] [1500/6250] eta: 0:11:17 lr: 0.000098 grad: 0.0856 (0.0905) loss: 0.8645 (0.8654) time: 0.1504 data: 0.0692 max mem: 9305 +Train: [34] [1600/6250] eta: 0:11:03 lr: 0.000098 grad: 0.0880 (0.0903) loss: 0.8617 (0.8653) time: 0.1218 data: 0.0407 max mem: 9305 +Train: [34] [1700/6250] eta: 0:10:47 lr: 0.000098 grad: 0.0850 (0.0902) loss: 0.8628 (0.8652) time: 0.1421 data: 0.0569 max mem: 9305 +Train: [34] [1800/6250] eta: 0:10:31 lr: 0.000098 grad: 0.0816 (0.0899) loss: 0.8568 (0.8652) time: 0.0928 data: 0.0002 max mem: 9305 +Train: [34] [1900/6250] eta: 0:10:15 lr: 0.000098 grad: 0.0809 (0.0899) loss: 0.8650 (0.8651) time: 0.1013 data: 0.0125 max mem: 9305 +Train: [34] [2000/6250] eta: 0:09:57 lr: 0.000098 grad: 0.0777 (0.0896) loss: 0.8630 (0.8652) time: 0.1182 data: 0.0339 max mem: 9305 +Train: [34] [2100/6250] eta: 0:09:44 lr: 0.000098 grad: 0.0880 (0.0897) loss: 0.8695 (0.8651) time: 0.1815 data: 0.1000 max mem: 9305 +Train: [34] [2200/6250] eta: 0:09:34 lr: 0.000098 grad: 0.0909 (0.0895) loss: 0.8692 (0.8652) time: 0.1348 data: 0.0554 max mem: 9305 +Train: [34] [2300/6250] eta: 0:09:24 lr: 0.000098 grad: 0.0851 (0.0895) loss: 0.8681 (0.8652) time: 0.1341 data: 0.0574 max mem: 9305 +Train: [34] [2400/6250] eta: 0:09:14 lr: 0.000098 grad: 0.0851 (0.0896) loss: 0.8613 (0.8650) time: 0.1517 data: 0.0730 max mem: 9305 +Train: [34] [2500/6250] eta: 0:09:03 lr: 0.000098 grad: 0.0849 (0.0896) loss: 0.8748 (0.8650) time: 0.1557 data: 0.0779 max mem: 9305 +Train: [34] [2600/6250] eta: 0:08:51 lr: 0.000098 grad: 0.0873 (0.0894) loss: 0.8677 (0.8651) time: 0.1627 data: 0.0830 max mem: 9305 +Train: [34] [2700/6250] eta: 0:08:38 lr: 0.000098 grad: 0.0863 (0.0894) loss: 0.8631 (0.8651) time: 0.1241 data: 0.0453 max mem: 9305 +Train: [34] [2800/6250] eta: 0:08:24 lr: 0.000098 grad: 0.0848 (0.0893) loss: 0.8645 (0.8651) time: 0.1657 data: 0.0773 max mem: 9305 +Train: [34] [2900/6250] eta: 0:08:08 lr: 0.000098 grad: 0.0870 (0.0894) loss: 0.8669 (0.8651) time: 0.1252 data: 0.0445 max mem: 9305 +Train: [34] [3000/6250] eta: 0:07:54 lr: 0.000098 grad: 0.0870 (0.0895) loss: 0.8663 (0.8651) time: 0.1504 data: 0.0546 max mem: 9305 +Train: [34] [3100/6250] eta: 0:07:40 lr: 0.000098 grad: 0.0850 (0.0893) loss: 0.8682 (0.8651) time: 0.1436 data: 0.0562 max mem: 9305 +Train: [34] [3200/6250] eta: 0:07:27 lr: 0.000098 grad: 0.0821 (0.0893) loss: 0.8683 (0.8651) time: 0.1205 data: 0.0372 max mem: 9305 +Train: [34] [3300/6250] eta: 0:07:11 lr: 0.000097 grad: 0.0794 (0.0892) loss: 0.8703 (0.8651) time: 0.1436 data: 0.0633 max mem: 9305 +Train: [34] [3400/6250] eta: 0:06:57 lr: 0.000097 grad: 0.0852 (0.0891) loss: 0.8680 (0.8652) time: 0.1515 data: 0.0634 max mem: 9305 +Train: [34] [3500/6250] eta: 0:06:44 lr: 0.000097 grad: 0.0836 (0.0890) loss: 0.8640 (0.8653) time: 0.0880 data: 0.0002 max mem: 9305 +Train: [34] [3600/6250] eta: 0:06:28 lr: 0.000097 grad: 0.0780 (0.0889) loss: 0.8741 (0.8654) time: 0.1343 data: 0.0573 max mem: 9305 +Train: [34] [3700/6250] eta: 0:06:13 lr: 0.000097 grad: 0.0803 (0.0887) loss: 0.8667 (0.8656) time: 0.1309 data: 0.0485 max mem: 9305 +Train: [34] [3800/6250] eta: 0:05:57 lr: 0.000097 grad: 0.0837 (0.0886) loss: 0.8740 (0.8657) time: 0.1350 data: 0.0505 max mem: 9305 +Train: [34] [3900/6250] eta: 0:05:42 lr: 0.000097 grad: 0.0819 (0.0885) loss: 0.8663 (0.8658) time: 0.1437 data: 0.0557 max mem: 9305 +Train: [34] [4000/6250] eta: 0:05:27 lr: 0.000097 grad: 0.0805 (0.0884) loss: 0.8728 (0.8660) time: 0.1148 data: 0.0294 max mem: 9305 +Train: [34] [4100/6250] eta: 0:05:12 lr: 0.000097 grad: 0.0789 (0.0883) loss: 0.8676 (0.8661) time: 0.1385 data: 0.0522 max mem: 9305 +Train: [34] [4200/6250] eta: 0:04:57 lr: 0.000097 grad: 0.0820 (0.0881) loss: 0.8716 (0.8662) time: 0.1463 data: 0.0635 max mem: 9305 +Train: [34] [4300/6250] eta: 0:04:42 lr: 0.000097 grad: 0.0823 (0.0880) loss: 0.8729 (0.8663) time: 0.1490 data: 0.0625 max mem: 9305 +Train: [34] [4400/6250] eta: 0:04:28 lr: 0.000097 grad: 0.0819 (0.0880) loss: 0.8733 (0.8664) time: 0.1701 data: 0.0884 max mem: 9305 +Train: [34] [4500/6250] eta: 0:04:13 lr: 0.000097 grad: 0.0837 (0.0879) loss: 0.8722 (0.8665) time: 0.1400 data: 0.0565 max mem: 9305 +Train: [34] [4600/6250] eta: 0:03:58 lr: 0.000097 grad: 0.0832 (0.0879) loss: 0.8652 (0.8664) time: 0.1465 data: 0.0634 max mem: 9305 +Train: [34] [4700/6250] eta: 0:03:44 lr: 0.000097 grad: 0.0847 (0.0879) loss: 0.8594 (0.8664) time: 0.1314 data: 0.0544 max mem: 9305 +Train: [34] [4800/6250] eta: 0:03:29 lr: 0.000097 grad: 0.0781 (0.0878) loss: 0.8698 (0.8665) time: 0.1345 data: 0.0532 max mem: 9305 +Train: [34] [4900/6250] eta: 0:03:14 lr: 0.000097 grad: 0.0831 (0.0878) loss: 0.8670 (0.8665) time: 0.1287 data: 0.0421 max mem: 9305 +Train: [34] [5000/6250] eta: 0:03:00 lr: 0.000097 grad: 0.0795 (0.0878) loss: 0.8705 (0.8665) time: 0.1320 data: 0.0517 max mem: 9305 +Train: [34] [5100/6250] eta: 0:02:45 lr: 0.000097 grad: 0.0923 (0.0878) loss: 0.8670 (0.8665) time: 0.1486 data: 0.0686 max mem: 9305 +Train: [34] [5200/6250] eta: 0:02:31 lr: 0.000097 grad: 0.0813 (0.0878) loss: 0.8669 (0.8664) time: 0.1361 data: 0.0458 max mem: 9305 +Train: [34] [5300/6250] eta: 0:02:16 lr: 0.000097 grad: 0.0859 (0.0878) loss: 0.8656 (0.8664) time: 0.1645 data: 0.0647 max mem: 9305 +Train: [34] [5400/6250] eta: 0:02:02 lr: 0.000097 grad: 0.0888 (0.0878) loss: 0.8657 (0.8664) time: 0.1559 data: 0.0746 max mem: 9305 +Train: [34] [5500/6250] eta: 0:01:48 lr: 0.000097 grad: 0.0840 (0.0878) loss: 0.8630 (0.8665) time: 0.3296 data: 0.2245 max mem: 9305 +Train: [34] [5600/6250] eta: 0:01:33 lr: 0.000097 grad: 0.0818 (0.0877) loss: 0.8734 (0.8665) time: 0.1438 data: 0.0673 max mem: 9305 +Train: [34] [5700/6250] eta: 0:01:19 lr: 0.000097 grad: 0.0829 (0.0877) loss: 0.8667 (0.8665) time: 0.1298 data: 0.0464 max mem: 9305 +Train: [34] [5800/6250] eta: 0:01:04 lr: 0.000097 grad: 0.0860 (0.0876) loss: 0.8674 (0.8666) time: 0.1443 data: 0.0650 max mem: 9305 +Train: [34] [5900/6250] eta: 0:00:50 lr: 0.000097 grad: 0.0851 (0.0876) loss: 0.8618 (0.8666) time: 0.1484 data: 0.0631 max mem: 9305 +Train: [34] [6000/6250] eta: 0:00:35 lr: 0.000097 grad: 0.0835 (0.0877) loss: 0.8694 (0.8666) time: 0.1511 data: 0.0692 max mem: 9305 +Train: [34] [6100/6250] eta: 0:00:21 lr: 0.000097 grad: 0.0889 (0.0876) loss: 0.8690 (0.8666) time: 0.1104 data: 0.0198 max mem: 9305 +Train: [34] [6200/6250] eta: 0:00:07 lr: 0.000097 grad: 0.0772 (0.0876) loss: 0.8701 (0.8667) time: 0.1318 data: 0.0498 max mem: 9305 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.0817 (0.0876) loss: 0.8640 (0.8666) time: 0.1261 data: 0.0439 max mem: 9305 +Train: [34] Total time: 0:15:01 (0.1442 s / it) +Averaged stats: lr: 0.000097 grad: 0.0817 (0.0876) loss: 0.8640 (0.8666) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:03:08 loss: 0.8956 (0.8956) time: 3.0349 data: 2.9600 max mem: 9305 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8886 (0.8871) time: 0.1077 data: 0.0791 max mem: 9305 +Eval (hcp-train-subset): [34] Total time: 0:00:12 (0.1966 s / it) +Averaged stats (hcp-train-subset): loss: 0.8886 (0.8871) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [34] [ 0/62] eta: 0:03:51 loss: 0.8812 (0.8812) time: 3.7388 data: 3.6705 max mem: 9305 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8807 (0.8825) time: 0.1235 data: 0.0939 max mem: 9305 +Eval (hcp-val): [34] Total time: 0:00:12 (0.2037 s / it) +Averaged stats (hcp-val): loss: 0.8807 (0.8825) +Making plots (hcp-val): example=53 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [35] [ 0/6250] eta: 9:44:14 lr: 0.000097 grad: 0.1779 (0.1779) loss: 0.8821 (0.8821) time: 5.6087 data: 5.5048 max mem: 9305 +Train: [35] [ 100/6250] eta: 0:19:25 lr: 0.000097 grad: 0.0854 (0.0943) loss: 0.8685 (0.8734) time: 0.1277 data: 0.0339 max mem: 9305 +Train: [35] [ 200/6250] eta: 0:16:39 lr: 0.000097 grad: 0.0878 (0.0928) loss: 0.8617 (0.8700) time: 0.1226 data: 0.0323 max mem: 9305 +Train: [35] [ 300/6250] eta: 0:15:07 lr: 0.000097 grad: 0.0873 (0.0917) loss: 0.8649 (0.8677) time: 0.1226 data: 0.0399 max mem: 9305 +Train: [35] [ 400/6250] eta: 0:14:15 lr: 0.000097 grad: 0.0828 (0.0906) loss: 0.8579 (0.8658) time: 0.1162 data: 0.0299 max mem: 9305 +Train: [35] [ 500/6250] eta: 0:13:35 lr: 0.000097 grad: 0.0874 (0.0904) loss: 0.8635 (0.8642) time: 0.1164 data: 0.0165 max mem: 9305 +Train: [35] [ 600/6250] eta: 0:13:08 lr: 0.000097 grad: 0.0802 (0.0894) loss: 0.8715 (0.8642) time: 0.1217 data: 0.0340 max mem: 9305 +Train: [35] [ 700/6250] eta: 0:12:45 lr: 0.000096 grad: 0.0842 (0.0891) loss: 0.8631 (0.8642) time: 0.1382 data: 0.0477 max mem: 9305 +Train: [35] [ 800/6250] eta: 0:12:33 lr: 0.000096 grad: 0.0773 (0.0890) loss: 0.8677 (0.8639) time: 0.1696 data: 0.0879 max mem: 9305 +Train: [35] [ 900/6250] eta: 0:12:26 lr: 0.000096 grad: 0.0767 (0.0887) loss: 0.8641 (0.8640) time: 0.1310 data: 0.0490 max mem: 9305 +Train: [35] [1000/6250] eta: 0:12:14 lr: 0.000096 grad: 0.0757 (0.0882) loss: 0.8671 (0.8641) time: 0.1488 data: 0.0630 max mem: 9305 +Train: [35] [1100/6250] eta: 0:12:02 lr: 0.000096 grad: 0.0840 (0.0877) loss: 0.8644 (0.8642) time: 0.1641 data: 0.0882 max mem: 9305 +Train: [35] [1200/6250] eta: 0:11:42 lr: 0.000096 grad: 0.0851 (0.0873) loss: 0.8638 (0.8644) time: 0.1339 data: 0.0567 max mem: 9305 +Train: [35] [1300/6250] eta: 0:11:23 lr: 0.000096 grad: 0.0743 (0.0870) loss: 0.8688 (0.8646) time: 0.1121 data: 0.0318 max mem: 9305 +Train: [35] [1400/6250] eta: 0:11:04 lr: 0.000096 grad: 0.0752 (0.0866) loss: 0.8697 (0.8650) time: 0.1385 data: 0.0438 max mem: 9305 +Train: [35] [1500/6250] eta: 0:10:43 lr: 0.000096 grad: 0.0801 (0.0865) loss: 0.8639 (0.8649) time: 0.1142 data: 0.0310 max mem: 9305 +Train: [35] [1600/6250] eta: 0:10:25 lr: 0.000096 grad: 0.0822 (0.0863) loss: 0.8609 (0.8649) time: 0.1325 data: 0.0538 max mem: 9305 +Train: [35] [1700/6250] eta: 0:10:08 lr: 0.000096 grad: 0.0826 (0.0862) loss: 0.8648 (0.8650) time: 0.1237 data: 0.0396 max mem: 9305 +Train: [35] [1800/6250] eta: 0:09:54 lr: 0.000096 grad: 0.0745 (0.0861) loss: 0.8722 (0.8652) time: 0.1643 data: 0.0775 max mem: 9305 +Train: [35] [1900/6250] eta: 0:09:39 lr: 0.000096 grad: 0.0804 (0.0859) loss: 0.8658 (0.8653) time: 0.1391 data: 0.0528 max mem: 9305 +Train: [35] [2000/6250] eta: 0:09:25 lr: 0.000096 grad: 0.0808 (0.0858) loss: 0.8648 (0.8655) time: 0.1335 data: 0.0503 max mem: 9305 +Train: [35] [2100/6250] eta: 0:09:16 lr: 0.000096 grad: 0.0840 (0.0857) loss: 0.8752 (0.8656) time: 0.1734 data: 0.0868 max mem: 9305 +Train: [35] [2200/6250] eta: 0:09:04 lr: 0.000096 grad: 0.0770 (0.0856) loss: 0.8744 (0.8658) time: 0.1829 data: 0.1018 max mem: 9305 +Train: [35] [2300/6250] eta: 0:08:50 lr: 0.000096 grad: 0.0894 (0.0855) loss: 0.8608 (0.8659) time: 0.1264 data: 0.0378 max mem: 9305 +Train: [35] [2400/6250] eta: 0:08:44 lr: 0.000096 grad: 0.0788 (0.0854) loss: 0.8670 (0.8660) time: 0.2587 data: 0.1230 max mem: 9305 +Train: [35] [2500/6250] eta: 0:08:30 lr: 0.000096 grad: 0.0791 (0.0853) loss: 0.8722 (0.8661) time: 0.1293 data: 0.0485 max mem: 9305 +Train: [35] [2600/6250] eta: 0:08:16 lr: 0.000096 grad: 0.0810 (0.0852) loss: 0.8727 (0.8662) time: 0.1388 data: 0.0588 max mem: 9305 +Train: [35] [2700/6250] eta: 0:08:02 lr: 0.000096 grad: 0.0770 (0.0853) loss: 0.8675 (0.8662) time: 0.1188 data: 0.0346 max mem: 9305 +Train: [35] [2800/6250] eta: 0:07:48 lr: 0.000096 grad: 0.0824 (0.0853) loss: 0.8740 (0.8663) time: 0.1283 data: 0.0489 max mem: 9305 +Train: [35] [2900/6250] eta: 0:07:33 lr: 0.000096 grad: 0.0866 (0.0855) loss: 0.8663 (0.8663) time: 0.1322 data: 0.0553 max mem: 9305 +Train: [35] [3000/6250] eta: 0:07:19 lr: 0.000096 grad: 0.0825 (0.0855) loss: 0.8687 (0.8663) time: 0.1429 data: 0.0524 max mem: 9305 +Train: [35] [3100/6250] eta: 0:07:05 lr: 0.000096 grad: 0.0885 (0.0856) loss: 0.8633 (0.8662) time: 0.1229 data: 0.0363 max mem: 9305 +Train: [35] [3200/6250] eta: 0:06:51 lr: 0.000096 grad: 0.0824 (0.0856) loss: 0.8675 (0.8662) time: 0.1252 data: 0.0422 max mem: 9305 +Train: [35] [3300/6250] eta: 0:06:38 lr: 0.000096 grad: 0.0831 (0.0858) loss: 0.8692 (0.8661) time: 0.1404 data: 0.0598 max mem: 9305 +Train: [35] [3400/6250] eta: 0:06:25 lr: 0.000096 grad: 0.0823 (0.0859) loss: 0.8743 (0.8660) time: 0.1560 data: 0.0771 max mem: 9305 +Train: [35] [3500/6250] eta: 0:06:14 lr: 0.000096 grad: 0.0766 (0.0860) loss: 0.8734 (0.8660) time: 0.1109 data: 0.0270 max mem: 9305 +Train: [35] [3600/6250] eta: 0:06:00 lr: 0.000096 grad: 0.0915 (0.0863) loss: 0.8635 (0.8658) time: 0.1311 data: 0.0478 max mem: 9305 +Train: [35] [3700/6250] eta: 0:05:46 lr: 0.000096 grad: 0.0918 (0.0865) loss: 0.8633 (0.8657) time: 0.1249 data: 0.0442 max mem: 9305 +Train: [35] [3800/6250] eta: 0:05:33 lr: 0.000096 grad: 0.0895 (0.0866) loss: 0.8633 (0.8656) time: 0.1164 data: 0.0342 max mem: 9305 +Train: [35] [3900/6250] eta: 0:05:19 lr: 0.000096 grad: 0.0825 (0.0868) loss: 0.8720 (0.8655) time: 0.1214 data: 0.0379 max mem: 9305 +Train: [35] [4000/6250] eta: 0:05:05 lr: 0.000096 grad: 0.0816 (0.0868) loss: 0.8679 (0.8655) time: 0.1379 data: 0.0610 max mem: 9305 +Train: [35] [4100/6250] eta: 0:04:52 lr: 0.000096 grad: 0.0850 (0.0869) loss: 0.8663 (0.8655) time: 0.1591 data: 0.0720 max mem: 9305 +Train: [35] [4200/6250] eta: 0:04:38 lr: 0.000096 grad: 0.0778 (0.0869) loss: 0.8631 (0.8655) time: 0.1088 data: 0.0320 max mem: 9305 +Train: [35] [4300/6250] eta: 0:04:25 lr: 0.000095 grad: 0.0823 (0.0869) loss: 0.8723 (0.8655) time: 0.1512 data: 0.0686 max mem: 9305 +Train: [35] [4400/6250] eta: 0:04:11 lr: 0.000095 grad: 0.0797 (0.0869) loss: 0.8714 (0.8656) time: 0.1266 data: 0.0467 max mem: 9305 +Train: [35] [4500/6250] eta: 0:03:58 lr: 0.000095 grad: 0.0833 (0.0870) loss: 0.8610 (0.8656) time: 0.1277 data: 0.0504 max mem: 9305 +Train: [35] [4600/6250] eta: 0:03:44 lr: 0.000095 grad: 0.0813 (0.0870) loss: 0.8666 (0.8656) time: 0.1484 data: 0.0641 max mem: 9305 +Train: [35] [4700/6250] eta: 0:03:31 lr: 0.000095 grad: 0.0829 (0.0870) loss: 0.8672 (0.8656) time: 0.1460 data: 0.0613 max mem: 9305 +Train: [35] [4800/6250] eta: 0:03:18 lr: 0.000095 grad: 0.0825 (0.0871) loss: 0.8667 (0.8656) time: 0.1249 data: 0.0436 max mem: 9305 +Train: [35] [4900/6250] eta: 0:03:04 lr: 0.000095 grad: 0.0852 (0.0870) loss: 0.8674 (0.8657) time: 0.0937 data: 0.0003 max mem: 9305 +Train: [35] [5000/6250] eta: 0:02:50 lr: 0.000095 grad: 0.0821 (0.0872) loss: 0.8638 (0.8657) time: 0.1076 data: 0.0172 max mem: 9305 +Train: [35] [5100/6250] eta: 0:02:37 lr: 0.000095 grad: 0.0822 (0.0872) loss: 0.8729 (0.8658) time: 0.1154 data: 0.0320 max mem: 9305 +Train: [35] [5200/6250] eta: 0:02:23 lr: 0.000095 grad: 0.0833 (0.0872) loss: 0.8673 (0.8658) time: 0.1102 data: 0.0269 max mem: 9305 +Train: [35] [5300/6250] eta: 0:02:09 lr: 0.000095 grad: 0.0890 (0.0872) loss: 0.8642 (0.8658) time: 0.1380 data: 0.0571 max mem: 9305 +Train: [35] [5400/6250] eta: 0:01:56 lr: 0.000095 grad: 0.0768 (0.0873) loss: 0.8710 (0.8658) time: 0.1425 data: 0.0573 max mem: 9305 +Train: [35] [5500/6250] eta: 0:01:42 lr: 0.000095 grad: 0.0818 (0.0873) loss: 0.8655 (0.8658) time: 0.1508 data: 0.0732 max mem: 9305 +Train: [35] [5600/6250] eta: 0:01:29 lr: 0.000095 grad: 0.0878 (0.0873) loss: 0.8645 (0.8658) time: 0.1434 data: 0.0600 max mem: 9305 +Train: [35] [5700/6250] eta: 0:01:15 lr: 0.000095 grad: 0.0824 (0.0873) loss: 0.8683 (0.8658) time: 0.1505 data: 0.0685 max mem: 9305 +Train: [35] [5800/6250] eta: 0:01:01 lr: 0.000095 grad: 0.0803 (0.0873) loss: 0.8666 (0.8659) time: 0.1192 data: 0.0365 max mem: 9305 +Train: [35] [5900/6250] eta: 0:00:48 lr: 0.000095 grad: 0.0795 (0.0872) loss: 0.8616 (0.8659) time: 0.1476 data: 0.0526 max mem: 9305 +Train: [35] [6000/6250] eta: 0:00:34 lr: 0.000095 grad: 0.0742 (0.0871) loss: 0.8730 (0.8659) time: 0.1555 data: 0.0729 max mem: 9305 +Train: [35] [6100/6250] eta: 0:00:20 lr: 0.000095 grad: 0.0788 (0.0871) loss: 0.8712 (0.8660) time: 0.1321 data: 0.0484 max mem: 9305 +Train: [35] [6200/6250] eta: 0:00:06 lr: 0.000095 grad: 0.0901 (0.0871) loss: 0.8672 (0.8660) time: 0.1290 data: 0.0502 max mem: 9305 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.0871 (0.0871) loss: 0.8661 (0.8660) time: 0.1454 data: 0.0690 max mem: 9305 +Train: [35] Total time: 0:14:26 (0.1386 s / it) +Averaged stats: lr: 0.000095 grad: 0.0871 (0.0871) loss: 0.8661 (0.8660) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:04:58 loss: 0.9005 (0.9005) time: 4.8211 data: 4.7811 max mem: 9305 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8878 (0.8867) time: 0.1180 data: 0.0896 max mem: 9305 +Eval (hcp-train-subset): [35] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-train-subset): loss: 0.8878 (0.8867) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:57 loss: 0.8781 (0.8781) time: 3.8360 data: 3.7263 max mem: 9305 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8797 (0.8833) time: 0.1260 data: 0.0974 max mem: 9305 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-val): loss: 0.8797 (0.8833) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 8:54:50 lr: 0.000095 grad: 0.2450 (0.2450) loss: 0.8565 (0.8565) time: 5.1344 data: 5.0127 max mem: 9305 +Train: [36] [ 100/6250] eta: 0:20:25 lr: 0.000095 grad: 0.0809 (0.0924) loss: 0.8771 (0.8765) time: 0.1436 data: 0.0477 max mem: 9305 +Train: [36] [ 200/6250] eta: 0:17:38 lr: 0.000095 grad: 0.0780 (0.0921) loss: 0.8695 (0.8721) time: 0.1506 data: 0.0546 max mem: 9305 +Train: [36] [ 300/6250] eta: 0:16:10 lr: 0.000095 grad: 0.0849 (0.0902) loss: 0.8679 (0.8711) time: 0.1235 data: 0.0387 max mem: 9305 +Train: [36] [ 400/6250] eta: 0:15:11 lr: 0.000095 grad: 0.0805 (0.0892) loss: 0.8747 (0.8711) time: 0.1310 data: 0.0473 max mem: 9305 +Train: [36] [ 500/6250] eta: 0:14:26 lr: 0.000095 grad: 0.0900 (0.0892) loss: 0.8695 (0.8707) time: 0.1326 data: 0.0433 max mem: 9305 +Train: [36] [ 600/6250] eta: 0:14:06 lr: 0.000095 grad: 0.0783 (0.0884) loss: 0.8681 (0.8700) time: 0.1524 data: 0.0533 max mem: 9305 +Train: [36] [ 700/6250] eta: 0:13:47 lr: 0.000095 grad: 0.0858 (0.0885) loss: 0.8670 (0.8691) time: 0.1369 data: 0.0489 max mem: 9305 +Train: [36] [ 800/6250] eta: 0:13:31 lr: 0.000095 grad: 0.0824 (0.0882) loss: 0.8670 (0.8685) time: 0.1530 data: 0.0674 max mem: 9305 +Train: [36] [ 900/6250] eta: 0:13:12 lr: 0.000095 grad: 0.0798 (0.0884) loss: 0.8692 (0.8682) time: 0.1488 data: 0.0654 max mem: 9305 +Train: [36] [1000/6250] eta: 0:12:46 lr: 0.000095 grad: 0.0821 (0.0881) loss: 0.8694 (0.8681) time: 0.1284 data: 0.0505 max mem: 9305 +Train: [36] [1100/6250] eta: 0:12:19 lr: 0.000095 grad: 0.0810 (0.0879) loss: 0.8707 (0.8680) time: 0.1221 data: 0.0412 max mem: 9305 +Train: [36] [1200/6250] eta: 0:11:54 lr: 0.000095 grad: 0.0869 (0.0878) loss: 0.8617 (0.8678) time: 0.1155 data: 0.0342 max mem: 9305 +Train: [36] [1300/6250] eta: 0:11:32 lr: 0.000095 grad: 0.0797 (0.0876) loss: 0.8647 (0.8676) time: 0.1012 data: 0.0191 max mem: 9305 +Train: [36] [1400/6250] eta: 0:11:12 lr: 0.000095 grad: 0.0852 (0.0876) loss: 0.8644 (0.8674) time: 0.1366 data: 0.0565 max mem: 9305 +Train: [36] [1500/6250] eta: 0:10:56 lr: 0.000095 grad: 0.0820 (0.0875) loss: 0.8688 (0.8671) time: 0.1279 data: 0.0449 max mem: 9305 +Train: [36] [1600/6250] eta: 0:10:42 lr: 0.000094 grad: 0.0815 (0.0875) loss: 0.8647 (0.8668) time: 0.1371 data: 0.0528 max mem: 9305 +Train: [36] [1700/6250] eta: 0:10:28 lr: 0.000094 grad: 0.0786 (0.0873) loss: 0.8686 (0.8667) time: 0.1049 data: 0.0276 max mem: 9305 +Train: [36] [1800/6250] eta: 0:10:14 lr: 0.000094 grad: 0.0788 (0.0871) loss: 0.8739 (0.8666) time: 0.1054 data: 0.0242 max mem: 9305 +Train: [36] [1900/6250] eta: 0:10:03 lr: 0.000094 grad: 0.0763 (0.0868) loss: 0.8736 (0.8667) time: 0.1871 data: 0.1078 max mem: 9305 +Train: [36] [2000/6250] eta: 0:09:47 lr: 0.000094 grad: 0.0734 (0.0866) loss: 0.8716 (0.8668) time: 0.1423 data: 0.0572 max mem: 9305 +Train: [36] [2100/6250] eta: 0:09:32 lr: 0.000094 grad: 0.0754 (0.0864) loss: 0.8675 (0.8668) time: 0.1098 data: 0.0201 max mem: 9305 +Train: [36] [2200/6250] eta: 0:09:20 lr: 0.000094 grad: 0.0809 (0.0864) loss: 0.8687 (0.8666) time: 0.1363 data: 0.0464 max mem: 9305 +Train: [36] [2300/6250] eta: 0:09:06 lr: 0.000094 grad: 0.0799 (0.0863) loss: 0.8671 (0.8666) time: 0.1381 data: 0.0571 max mem: 9305 +Train: [36] [2400/6250] eta: 0:08:52 lr: 0.000094 grad: 0.0849 (0.0862) loss: 0.8619 (0.8666) time: 0.1162 data: 0.0326 max mem: 9305 +Train: [36] [2500/6250] eta: 0:08:38 lr: 0.000094 grad: 0.0814 (0.0861) loss: 0.8649 (0.8666) time: 0.1577 data: 0.0743 max mem: 9305 +Train: [36] [2600/6250] eta: 0:08:25 lr: 0.000094 grad: 0.0860 (0.0862) loss: 0.8668 (0.8666) time: 0.1371 data: 0.0580 max mem: 9305 +Train: [36] [2700/6250] eta: 0:08:12 lr: 0.000094 grad: 0.0810 (0.0862) loss: 0.8679 (0.8665) time: 0.1444 data: 0.0604 max mem: 9305 +Train: [36] [2800/6250] eta: 0:07:57 lr: 0.000094 grad: 0.0810 (0.0862) loss: 0.8673 (0.8665) time: 0.1236 data: 0.0384 max mem: 9305 +Train: [36] [2900/6250] eta: 0:07:45 lr: 0.000094 grad: 0.0798 (0.0862) loss: 0.8672 (0.8665) time: 0.2002 data: 0.1212 max mem: 9305 +Train: [36] [3000/6250] eta: 0:07:30 lr: 0.000094 grad: 0.0878 (0.0863) loss: 0.8672 (0.8665) time: 0.1420 data: 0.0570 max mem: 9305 +Train: [36] [3100/6250] eta: 0:07:18 lr: 0.000094 grad: 0.0767 (0.0863) loss: 0.8662 (0.8666) time: 0.1157 data: 0.0264 max mem: 9305 +Train: [36] [3200/6250] eta: 0:07:05 lr: 0.000094 grad: 0.0799 (0.0863) loss: 0.8620 (0.8665) time: 0.1836 data: 0.1052 max mem: 9305 +Train: [36] [3300/6250] eta: 0:06:51 lr: 0.000094 grad: 0.0866 (0.0865) loss: 0.8670 (0.8665) time: 0.1861 data: 0.0893 max mem: 9305 +Train: [36] [3400/6250] eta: 0:06:38 lr: 0.000094 grad: 0.0827 (0.0866) loss: 0.8680 (0.8664) time: 0.1519 data: 0.0656 max mem: 9305 +Train: [36] [3500/6250] eta: 0:06:24 lr: 0.000094 grad: 0.0804 (0.0867) loss: 0.8641 (0.8664) time: 0.1545 data: 0.0635 max mem: 9305 +Train: [36] [3600/6250] eta: 0:06:10 lr: 0.000094 grad: 0.0816 (0.0866) loss: 0.8629 (0.8664) time: 0.1393 data: 0.0559 max mem: 9305 +Train: [36] [3700/6250] eta: 0:05:57 lr: 0.000094 grad: 0.0834 (0.0867) loss: 0.8671 (0.8664) time: 0.1182 data: 0.0383 max mem: 9305 +Train: [36] [3800/6250] eta: 0:05:42 lr: 0.000094 grad: 0.0849 (0.0869) loss: 0.8641 (0.8664) time: 0.1425 data: 0.0593 max mem: 9305 +Train: [36] [3900/6250] eta: 0:05:28 lr: 0.000094 grad: 0.0840 (0.0870) loss: 0.8655 (0.8664) time: 0.1221 data: 0.0424 max mem: 9305 +Train: [36] [4000/6250] eta: 0:05:14 lr: 0.000094 grad: 0.0859 (0.0870) loss: 0.8665 (0.8664) time: 0.1221 data: 0.0446 max mem: 9305 +Train: [36] [4100/6250] eta: 0:04:59 lr: 0.000094 grad: 0.0833 (0.0870) loss: 0.8656 (0.8663) time: 0.1514 data: 0.0659 max mem: 9305 +Train: [36] [4200/6250] eta: 0:04:45 lr: 0.000094 grad: 0.0853 (0.0872) loss: 0.8680 (0.8663) time: 0.1354 data: 0.0566 max mem: 9305 +Train: [36] [4300/6250] eta: 0:04:31 lr: 0.000094 grad: 0.0814 (0.0872) loss: 0.8665 (0.8663) time: 0.1426 data: 0.0612 max mem: 9305 +Train: [36] [4400/6250] eta: 0:04:17 lr: 0.000094 grad: 0.0806 (0.0872) loss: 0.8650 (0.8663) time: 0.1748 data: 0.0941 max mem: 9305 +Train: [36] [4500/6250] eta: 0:04:03 lr: 0.000094 grad: 0.0857 (0.0873) loss: 0.8680 (0.8662) time: 0.1255 data: 0.0400 max mem: 9305 +Train: [36] [4600/6250] eta: 0:03:50 lr: 0.000094 grad: 0.0866 (0.0874) loss: 0.8659 (0.8662) time: 0.1906 data: 0.1121 max mem: 9305 +Train: [36] [4700/6250] eta: 0:03:35 lr: 0.000094 grad: 0.0894 (0.0874) loss: 0.8617 (0.8661) time: 0.1407 data: 0.0636 max mem: 9305 +Train: [36] [4800/6250] eta: 0:03:21 lr: 0.000094 grad: 0.0888 (0.0875) loss: 0.8660 (0.8660) time: 0.1294 data: 0.0522 max mem: 9305 +Train: [36] [4900/6250] eta: 0:03:07 lr: 0.000094 grad: 0.0861 (0.0876) loss: 0.8649 (0.8660) time: 0.1042 data: 0.0286 max mem: 9305 +Train: [36] [5000/6250] eta: 0:02:53 lr: 0.000094 grad: 0.0836 (0.0876) loss: 0.8715 (0.8660) time: 0.1348 data: 0.0508 max mem: 9305 +Train: [36] [5100/6250] eta: 0:02:39 lr: 0.000093 grad: 0.0887 (0.0876) loss: 0.8643 (0.8660) time: 0.1265 data: 0.0463 max mem: 9305 +Train: [36] [5200/6250] eta: 0:02:25 lr: 0.000093 grad: 0.0870 (0.0877) loss: 0.8660 (0.8660) time: 0.1478 data: 0.0584 max mem: 9305 +Train: [36] [5300/6250] eta: 0:02:12 lr: 0.000093 grad: 0.0863 (0.0878) loss: 0.8653 (0.8659) time: 0.2136 data: 0.0987 max mem: 9305 +Train: [36] [5400/6250] eta: 0:01:58 lr: 0.000093 grad: 0.0848 (0.0878) loss: 0.8660 (0.8659) time: 0.1515 data: 0.0678 max mem: 9305 +Train: [36] [5500/6250] eta: 0:01:44 lr: 0.000093 grad: 0.0880 (0.0878) loss: 0.8646 (0.8658) time: 0.1460 data: 0.0529 max mem: 9305 +Train: [36] [5600/6250] eta: 0:01:30 lr: 0.000093 grad: 0.0839 (0.0878) loss: 0.8572 (0.8658) time: 0.1299 data: 0.0494 max mem: 9305 +Train: [36] [5700/6250] eta: 0:01:16 lr: 0.000093 grad: 0.0855 (0.0878) loss: 0.8654 (0.8658) time: 0.1435 data: 0.0647 max mem: 9305 +Train: [36] [5800/6250] eta: 0:01:02 lr: 0.000093 grad: 0.0854 (0.0878) loss: 0.8630 (0.8658) time: 0.1279 data: 0.0478 max mem: 9305 +Train: [36] [5900/6250] eta: 0:00:48 lr: 0.000093 grad: 0.0881 (0.0878) loss: 0.8615 (0.8657) time: 0.1422 data: 0.0608 max mem: 9305 +Train: [36] [6000/6250] eta: 0:00:34 lr: 0.000093 grad: 0.0862 (0.0878) loss: 0.8606 (0.8658) time: 0.1420 data: 0.0649 max mem: 9305 +Train: [36] [6100/6250] eta: 0:00:20 lr: 0.000093 grad: 0.0834 (0.0878) loss: 0.8669 (0.8657) time: 0.1650 data: 0.0851 max mem: 9305 +Train: [36] [6200/6250] eta: 0:00:06 lr: 0.000093 grad: 0.0829 (0.0878) loss: 0.8651 (0.8657) time: 0.1279 data: 0.0397 max mem: 9305 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.0858 (0.0878) loss: 0.8611 (0.8657) time: 0.1349 data: 0.0317 max mem: 9305 +Train: [36] Total time: 0:14:35 (0.1401 s / it) +Averaged stats: lr: 0.000093 grad: 0.0858 (0.0878) loss: 0.8611 (0.8657) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:04:26 loss: 0.8921 (0.8921) time: 4.2961 data: 4.2536 max mem: 9305 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8862 (0.8860) time: 0.1176 data: 0.0886 max mem: 9305 +Eval (hcp-train-subset): [36] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (hcp-train-subset): loss: 0.8862 (0.8860) +Eval (hcp-val): [36] [ 0/62] eta: 0:03:06 loss: 0.8795 (0.8795) time: 3.0013 data: 2.9222 max mem: 9305 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8810 (0.8827) time: 0.1398 data: 0.1095 max mem: 9305 +Eval (hcp-val): [36] Total time: 0:00:12 (0.2067 s / it) +Averaged stats (hcp-val): loss: 0.8810 (0.8827) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 9:05:19 lr: 0.000093 grad: 0.0829 (0.0829) loss: 0.8687 (0.8687) time: 5.2352 data: 5.0856 max mem: 9305 +Train: [37] [ 100/6250] eta: 0:20:12 lr: 0.000093 grad: 0.0840 (0.1066) loss: 0.8689 (0.8779) time: 0.1538 data: 0.0700 max mem: 9305 +Train: [37] [ 200/6250] eta: 0:17:26 lr: 0.000093 grad: 0.0896 (0.1058) loss: 0.8721 (0.8712) time: 0.1301 data: 0.0386 max mem: 9305 +Train: [37] [ 300/6250] eta: 0:15:52 lr: 0.000093 grad: 0.0808 (0.1002) loss: 0.8810 (0.8707) time: 0.1379 data: 0.0506 max mem: 9305 +Train: [37] [ 400/6250] eta: 0:15:09 lr: 0.000093 grad: 0.0844 (0.0960) loss: 0.8720 (0.8706) time: 0.1936 data: 0.0950 max mem: 9305 +Train: [37] [ 500/6250] eta: 0:14:39 lr: 0.000093 grad: 0.0758 (0.0927) loss: 0.8850 (0.8715) time: 0.1331 data: 0.0478 max mem: 9305 +Train: [37] [ 600/6250] eta: 0:14:11 lr: 0.000093 grad: 0.0796 (0.0908) loss: 0.8742 (0.8720) time: 0.1370 data: 0.0581 max mem: 9305 +Train: [37] [ 700/6250] eta: 0:13:52 lr: 0.000093 grad: 0.0782 (0.0897) loss: 0.8806 (0.8722) time: 0.1482 data: 0.0621 max mem: 9305 +Train: [37] [ 800/6250] eta: 0:13:32 lr: 0.000093 grad: 0.0767 (0.0889) loss: 0.8711 (0.8721) time: 0.1284 data: 0.0432 max mem: 9305 +Train: [37] [ 900/6250] eta: 0:13:15 lr: 0.000093 grad: 0.0835 (0.0884) loss: 0.8698 (0.8719) time: 0.1236 data: 0.0256 max mem: 9305 +Train: [37] [1000/6250] eta: 0:12:52 lr: 0.000093 grad: 0.0820 (0.0879) loss: 0.8629 (0.8718) time: 0.1122 data: 0.0279 max mem: 9305 +Train: [37] [1100/6250] eta: 0:12:27 lr: 0.000093 grad: 0.0841 (0.0876) loss: 0.8670 (0.8716) time: 0.1310 data: 0.0492 max mem: 9305 +Train: [37] [1200/6250] eta: 0:12:09 lr: 0.000093 grad: 0.0821 (0.0874) loss: 0.8705 (0.8713) time: 0.1432 data: 0.0596 max mem: 9305 +Train: [37] [1300/6250] eta: 0:11:54 lr: 0.000093 grad: 0.0775 (0.0873) loss: 0.8697 (0.8710) time: 0.1378 data: 0.0572 max mem: 9305 +Train: [37] [1400/6250] eta: 0:11:42 lr: 0.000093 grad: 0.0772 (0.0873) loss: 0.8557 (0.8704) time: 0.1707 data: 0.0684 max mem: 9305 +Train: [37] [1500/6250] eta: 0:11:37 lr: 0.000093 grad: 0.0857 (0.0871) loss: 0.8751 (0.8703) time: 0.1693 data: 0.0920 max mem: 9305 +Train: [37] [1600/6250] eta: 0:11:26 lr: 0.000093 grad: 0.0851 (0.0871) loss: 0.8644 (0.8699) time: 0.1918 data: 0.1132 max mem: 9305 +Train: [37] [1700/6250] eta: 0:11:16 lr: 0.000093 grad: 0.0825 (0.0871) loss: 0.8681 (0.8696) time: 0.1559 data: 0.0708 max mem: 9305 +Train: [37] [1800/6250] eta: 0:11:06 lr: 0.000093 grad: 0.0866 (0.0871) loss: 0.8603 (0.8692) time: 0.1448 data: 0.0666 max mem: 9305 +Train: [37] [1900/6250] eta: 0:10:56 lr: 0.000093 grad: 0.0857 (0.0872) loss: 0.8650 (0.8688) time: 0.1764 data: 0.0974 max mem: 9305 +Train: [37] [2000/6250] eta: 0:10:46 lr: 0.000093 grad: 0.0850 (0.0874) loss: 0.8623 (0.8686) time: 0.2333 data: 0.1555 max mem: 9305 +Train: [37] [2100/6250] eta: 0:10:30 lr: 0.000093 grad: 0.0795 (0.0873) loss: 0.8640 (0.8684) time: 0.1717 data: 0.0914 max mem: 9305 +Train: [37] [2200/6250] eta: 0:10:18 lr: 0.000093 grad: 0.0893 (0.0873) loss: 0.8638 (0.8682) time: 0.1855 data: 0.1080 max mem: 9305 +Train: [37] [2300/6250] eta: 0:09:58 lr: 0.000092 grad: 0.0858 (0.0874) loss: 0.8659 (0.8682) time: 0.1255 data: 0.0456 max mem: 9305 +Train: [37] [2400/6250] eta: 0:09:41 lr: 0.000092 grad: 0.0932 (0.0875) loss: 0.8647 (0.8680) time: 0.1601 data: 0.0739 max mem: 9305 +Train: [37] [2500/6250] eta: 0:09:27 lr: 0.000092 grad: 0.0914 (0.0877) loss: 0.8622 (0.8679) time: 0.1462 data: 0.0647 max mem: 9305 +Train: [37] [2600/6250] eta: 0:09:15 lr: 0.000092 grad: 0.0866 (0.0878) loss: 0.8647 (0.8678) time: 0.2391 data: 0.1425 max mem: 9305 +Train: [37] [2700/6250] eta: 0:08:57 lr: 0.000092 grad: 0.0883 (0.0881) loss: 0.8644 (0.8676) time: 0.1686 data: 0.0914 max mem: 9305 +Train: [37] [2800/6250] eta: 0:08:41 lr: 0.000092 grad: 0.0944 (0.0885) loss: 0.8657 (0.8675) time: 0.1627 data: 0.0686 max mem: 9305 +Train: [37] [2900/6250] eta: 0:08:23 lr: 0.000092 grad: 0.0917 (0.0887) loss: 0.8614 (0.8673) time: 0.1443 data: 0.0625 max mem: 9305 +Train: [37] [3000/6250] eta: 0:08:05 lr: 0.000092 grad: 0.0882 (0.0889) loss: 0.8603 (0.8672) time: 0.1262 data: 0.0438 max mem: 9305 +Train: [37] [3100/6250] eta: 0:07:52 lr: 0.000092 grad: 0.0817 (0.0889) loss: 0.8687 (0.8671) time: 0.2361 data: 0.1544 max mem: 9305 +Train: [37] [3200/6250] eta: 0:07:34 lr: 0.000092 grad: 0.0870 (0.0891) loss: 0.8657 (0.8669) time: 0.1272 data: 0.0449 max mem: 9305 +Train: [37] [3300/6250] eta: 0:07:18 lr: 0.000092 grad: 0.0830 (0.0891) loss: 0.8708 (0.8668) time: 0.1427 data: 0.0615 max mem: 9305 +Train: [37] [3400/6250] eta: 0:07:03 lr: 0.000092 grad: 0.0876 (0.0892) loss: 0.8676 (0.8666) time: 0.1321 data: 0.0497 max mem: 9305 +Train: [37] [3500/6250] eta: 0:06:48 lr: 0.000092 grad: 0.0905 (0.0892) loss: 0.8670 (0.8666) time: 0.1022 data: 0.0191 max mem: 9305 +Train: [37] [3600/6250] eta: 0:06:32 lr: 0.000092 grad: 0.0854 (0.0892) loss: 0.8690 (0.8666) time: 0.1138 data: 0.0274 max mem: 9305 +Train: [37] [3700/6250] eta: 0:06:17 lr: 0.000092 grad: 0.0952 (0.0893) loss: 0.8636 (0.8666) time: 0.1312 data: 0.0531 max mem: 9305 +Train: [37] [3800/6250] eta: 0:06:01 lr: 0.000092 grad: 0.0855 (0.0894) loss: 0.8660 (0.8666) time: 0.1390 data: 0.0575 max mem: 9305 +Train: [37] [3900/6250] eta: 0:05:46 lr: 0.000092 grad: 0.0800 (0.0894) loss: 0.8685 (0.8666) time: 0.1397 data: 0.0583 max mem: 9305 +Train: [37] [4000/6250] eta: 0:05:31 lr: 0.000092 grad: 0.0808 (0.0893) loss: 0.8802 (0.8666) time: 0.1275 data: 0.0434 max mem: 9305 +Train: [37] [4100/6250] eta: 0:05:17 lr: 0.000092 grad: 0.0854 (0.0893) loss: 0.8675 (0.8666) time: 0.1442 data: 0.0584 max mem: 9305 +Train: [37] [4200/6250] eta: 0:05:01 lr: 0.000092 grad: 0.0970 (0.0894) loss: 0.8617 (0.8665) time: 0.0904 data: 0.0079 max mem: 9305 +Train: [37] [4300/6250] eta: 0:04:47 lr: 0.000092 grad: 0.0818 (0.0894) loss: 0.8666 (0.8664) time: 0.1569 data: 0.0769 max mem: 9305 +Train: [37] [4400/6250] eta: 0:04:32 lr: 0.000092 grad: 0.0795 (0.0895) loss: 0.8773 (0.8664) time: 0.1561 data: 0.0658 max mem: 9305 +Train: [37] [4500/6250] eta: 0:04:17 lr: 0.000092 grad: 0.0820 (0.0895) loss: 0.8673 (0.8664) time: 0.1246 data: 0.0377 max mem: 9305 +Train: [37] [4600/6250] eta: 0:04:03 lr: 0.000092 grad: 0.0927 (0.0895) loss: 0.8669 (0.8664) time: 0.0918 data: 0.0002 max mem: 9305 +Train: [37] [4700/6250] eta: 0:03:48 lr: 0.000092 grad: 0.0866 (0.0895) loss: 0.8616 (0.8664) time: 0.1335 data: 0.0509 max mem: 9305 +Train: [37] [4800/6250] eta: 0:03:33 lr: 0.000092 grad: 0.0808 (0.0895) loss: 0.8755 (0.8664) time: 0.1122 data: 0.0261 max mem: 9305 +Train: [37] [4900/6250] eta: 0:03:18 lr: 0.000092 grad: 0.0858 (0.0894) loss: 0.8701 (0.8663) time: 0.1431 data: 0.0638 max mem: 9305 +Train: [37] [5000/6250] eta: 0:03:03 lr: 0.000092 grad: 0.0941 (0.0895) loss: 0.8651 (0.8663) time: 0.1108 data: 0.0226 max mem: 9305 +Train: [37] [5100/6250] eta: 0:02:48 lr: 0.000092 grad: 0.0894 (0.0895) loss: 0.8666 (0.8662) time: 0.1239 data: 0.0422 max mem: 9305 +Train: [37] [5200/6250] eta: 0:02:33 lr: 0.000092 grad: 0.0904 (0.0895) loss: 0.8747 (0.8662) time: 0.1335 data: 0.0536 max mem: 9305 +Train: [37] [5300/6250] eta: 0:02:18 lr: 0.000092 grad: 0.0817 (0.0895) loss: 0.8744 (0.8662) time: 0.1302 data: 0.0461 max mem: 9305 +Train: [37] [5400/6250] eta: 0:02:04 lr: 0.000092 grad: 0.0920 (0.0895) loss: 0.8635 (0.8662) time: 0.1556 data: 0.0765 max mem: 9305 +Train: [37] [5500/6250] eta: 0:01:49 lr: 0.000092 grad: 0.0863 (0.0895) loss: 0.8715 (0.8661) time: 0.1366 data: 0.0549 max mem: 9305 +Train: [37] [5600/6250] eta: 0:01:35 lr: 0.000092 grad: 0.0857 (0.0896) loss: 0.8685 (0.8662) time: 0.1615 data: 0.0826 max mem: 9305 +Train: [37] [5700/6250] eta: 0:01:20 lr: 0.000091 grad: 0.0816 (0.0897) loss: 0.8664 (0.8662) time: 0.1566 data: 0.0736 max mem: 9305 +Train: [37] [5800/6250] eta: 0:01:05 lr: 0.000091 grad: 0.0956 (0.0897) loss: 0.8648 (0.8661) time: 0.1489 data: 0.0727 max mem: 9305 +Train: [37] [5900/6250] eta: 0:00:51 lr: 0.000091 grad: 0.0779 (0.0897) loss: 0.8667 (0.8661) time: 0.0984 data: 0.0163 max mem: 9305 +Train: [37] [6000/6250] eta: 0:00:36 lr: 0.000091 grad: 0.0901 (0.0897) loss: 0.8671 (0.8662) time: 0.1334 data: 0.0477 max mem: 9305 +Train: [37] [6100/6250] eta: 0:00:21 lr: 0.000091 grad: 0.0859 (0.0897) loss: 0.8679 (0.8662) time: 0.1189 data: 0.0326 max mem: 9305 +Train: [37] [6200/6250] eta: 0:00:07 lr: 0.000091 grad: 0.0870 (0.0897) loss: 0.8696 (0.8662) time: 0.1485 data: 0.0666 max mem: 9305 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.0828 (0.0897) loss: 0.8643 (0.8662) time: 0.1238 data: 0.0391 max mem: 9305 +Train: [37] Total time: 0:15:15 (0.1465 s / it) +Averaged stats: lr: 0.000091 grad: 0.0828 (0.0897) loss: 0.8643 (0.8662) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:04:55 loss: 0.8961 (0.8961) time: 4.7675 data: 4.7289 max mem: 9305 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8835 (0.8857) time: 0.1267 data: 0.0971 max mem: 9305 +Eval (hcp-train-subset): [37] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-train-subset): loss: 0.8835 (0.8857) +Eval (hcp-val): [37] [ 0/62] eta: 0:03:16 loss: 0.8789 (0.8789) time: 3.1686 data: 3.0699 max mem: 9305 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8810 (0.8818) time: 0.1308 data: 0.1024 max mem: 9305 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (hcp-val): loss: 0.8810 (0.8818) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 8:27:43 lr: 0.000091 grad: 0.3267 (0.3267) loss: 0.8460 (0.8460) time: 4.8741 data: 4.6275 max mem: 9305 +Train: [38] [ 100/6250] eta: 0:20:56 lr: 0.000091 grad: 0.0722 (0.0952) loss: 0.8830 (0.8825) time: 0.1527 data: 0.0669 max mem: 9305 +Train: [38] [ 200/6250] eta: 0:18:11 lr: 0.000091 grad: 0.0809 (0.0923) loss: 0.8696 (0.8768) time: 0.1638 data: 0.0770 max mem: 9305 +Train: [38] [ 300/6250] eta: 0:17:38 lr: 0.000091 grad: 0.0834 (0.0916) loss: 0.8620 (0.8736) time: 0.1939 data: 0.1005 max mem: 9305 +Train: [38] [ 400/6250] eta: 0:16:47 lr: 0.000091 grad: 0.0795 (0.0897) loss: 0.8691 (0.8726) time: 0.1606 data: 0.0672 max mem: 9305 +Train: [38] [ 500/6250] eta: 0:16:04 lr: 0.000091 grad: 0.0816 (0.0882) loss: 0.8713 (0.8723) time: 0.1730 data: 0.0808 max mem: 9305 +Train: [38] [ 600/6250] eta: 0:15:24 lr: 0.000091 grad: 0.0831 (0.0876) loss: 0.8718 (0.8720) time: 0.1590 data: 0.0728 max mem: 9305 +Train: [38] [ 700/6250] eta: 0:14:43 lr: 0.000091 grad: 0.0798 (0.0870) loss: 0.8684 (0.8718) time: 0.1362 data: 0.0512 max mem: 9305 +Train: [38] [ 800/6250] eta: 0:14:02 lr: 0.000091 grad: 0.0779 (0.0865) loss: 0.8754 (0.8719) time: 0.1243 data: 0.0351 max mem: 9305 +Train: [38] [ 900/6250] eta: 0:13:28 lr: 0.000091 grad: 0.0841 (0.0860) loss: 0.8665 (0.8719) time: 0.1222 data: 0.0346 max mem: 9305 +Train: [38] [1000/6250] eta: 0:12:55 lr: 0.000091 grad: 0.0888 (0.0860) loss: 0.8708 (0.8718) time: 0.1218 data: 0.0390 max mem: 9305 +Train: [38] [1100/6250] eta: 0:12:27 lr: 0.000091 grad: 0.0778 (0.0860) loss: 0.8675 (0.8716) time: 0.1280 data: 0.0488 max mem: 9305 +Train: [38] [1200/6250] eta: 0:11:58 lr: 0.000091 grad: 0.0826 (0.0859) loss: 0.8667 (0.8714) time: 0.1115 data: 0.0274 max mem: 9305 +Train: [38] [1300/6250] eta: 0:11:36 lr: 0.000091 grad: 0.0806 (0.0860) loss: 0.8674 (0.8713) time: 0.1190 data: 0.0360 max mem: 9305 +Train: [38] [1400/6250] eta: 0:11:22 lr: 0.000091 grad: 0.0815 (0.0859) loss: 0.8718 (0.8712) time: 0.1462 data: 0.0597 max mem: 9305 +Train: [38] [1500/6250] eta: 0:11:09 lr: 0.000091 grad: 0.0892 (0.0863) loss: 0.8683 (0.8711) time: 0.1334 data: 0.0505 max mem: 9305 +Train: [38] [1600/6250] eta: 0:10:54 lr: 0.000091 grad: 0.0939 (0.0865) loss: 0.8641 (0.8709) time: 0.1277 data: 0.0441 max mem: 9305 +Train: [38] [1700/6250] eta: 0:10:41 lr: 0.000091 grad: 0.0866 (0.0865) loss: 0.8661 (0.8706) time: 0.1395 data: 0.0515 max mem: 9305 +Train: [38] [1800/6250] eta: 0:10:28 lr: 0.000091 grad: 0.0903 (0.0865) loss: 0.8634 (0.8704) time: 0.1717 data: 0.0773 max mem: 9305 +Train: [38] [1900/6250] eta: 0:10:21 lr: 0.000091 grad: 0.0818 (0.0867) loss: 0.8701 (0.8702) time: 0.1274 data: 0.0450 max mem: 9305 +Train: [38] [2000/6250] eta: 0:10:05 lr: 0.000091 grad: 0.0901 (0.0869) loss: 0.8631 (0.8698) time: 0.1349 data: 0.0526 max mem: 9305 +Train: [38] [2100/6250] eta: 0:09:50 lr: 0.000091 grad: 0.0957 (0.0871) loss: 0.8617 (0.8696) time: 0.1421 data: 0.0518 max mem: 9305 +Train: [38] [2200/6250] eta: 0:09:36 lr: 0.000091 grad: 0.0862 (0.0872) loss: 0.8678 (0.8695) time: 0.0991 data: 0.0168 max mem: 9305 +Train: [38] [2300/6250] eta: 0:09:20 lr: 0.000091 grad: 0.0848 (0.0872) loss: 0.8641 (0.8693) time: 0.1204 data: 0.0359 max mem: 9305 +Train: [38] [2400/6250] eta: 0:09:03 lr: 0.000091 grad: 0.0902 (0.0874) loss: 0.8690 (0.8692) time: 0.1398 data: 0.0563 max mem: 9305 +Train: [38] [2500/6250] eta: 0:08:48 lr: 0.000091 grad: 0.0897 (0.0873) loss: 0.8669 (0.8691) time: 0.1456 data: 0.0637 max mem: 9305 +Train: [38] [2600/6250] eta: 0:08:33 lr: 0.000091 grad: 0.0884 (0.0874) loss: 0.8711 (0.8691) time: 0.1351 data: 0.0546 max mem: 9305 +Train: [38] [2700/6250] eta: 0:08:18 lr: 0.000091 grad: 0.0849 (0.0874) loss: 0.8690 (0.8691) time: 0.1316 data: 0.0472 max mem: 9305 +Train: [38] [2800/6250] eta: 0:08:04 lr: 0.000091 grad: 0.0834 (0.0874) loss: 0.8679 (0.8691) time: 0.1459 data: 0.0617 max mem: 9305 +Train: [38] [2900/6250] eta: 0:07:49 lr: 0.000090 grad: 0.0797 (0.0874) loss: 0.8721 (0.8691) time: 0.1227 data: 0.0406 max mem: 9305 +Train: [38] [3000/6250] eta: 0:07:35 lr: 0.000090 grad: 0.0856 (0.0876) loss: 0.8715 (0.8691) time: 0.1441 data: 0.0635 max mem: 9305 +Train: [38] [3100/6250] eta: 0:07:20 lr: 0.000090 grad: 0.0833 (0.0877) loss: 0.8698 (0.8691) time: 0.1302 data: 0.0448 max mem: 9305 +Train: [38] [3200/6250] eta: 0:07:05 lr: 0.000090 grad: 0.0902 (0.0878) loss: 0.8683 (0.8689) time: 0.1375 data: 0.0495 max mem: 9305 +Train: [38] [3300/6250] eta: 0:06:51 lr: 0.000090 grad: 0.0872 (0.0879) loss: 0.8633 (0.8689) time: 0.1376 data: 0.0526 max mem: 9305 +Train: [38] [3400/6250] eta: 0:06:37 lr: 0.000090 grad: 0.0907 (0.0880) loss: 0.8658 (0.8687) time: 0.1468 data: 0.0665 max mem: 9305 +Train: [38] [3500/6250] eta: 0:06:24 lr: 0.000090 grad: 0.0861 (0.0881) loss: 0.8699 (0.8687) time: 0.1686 data: 0.0848 max mem: 9305 +Train: [38] [3600/6250] eta: 0:06:10 lr: 0.000090 grad: 0.0894 (0.0882) loss: 0.8636 (0.8687) time: 0.1235 data: 0.0401 max mem: 9305 +Train: [38] [3700/6250] eta: 0:05:55 lr: 0.000090 grad: 0.0879 (0.0883) loss: 0.8686 (0.8686) time: 0.1338 data: 0.0498 max mem: 9305 +Train: [38] [3800/6250] eta: 0:05:42 lr: 0.000090 grad: 0.0813 (0.0883) loss: 0.8682 (0.8686) time: 0.1454 data: 0.0661 max mem: 9305 +Train: [38] [3900/6250] eta: 0:05:28 lr: 0.000090 grad: 0.0878 (0.0883) loss: 0.8673 (0.8686) time: 0.1666 data: 0.0875 max mem: 9305 +Train: [38] [4000/6250] eta: 0:05:14 lr: 0.000090 grad: 0.0830 (0.0884) loss: 0.8670 (0.8685) time: 0.1195 data: 0.0348 max mem: 9305 +Train: [38] [4100/6250] eta: 0:04:59 lr: 0.000090 grad: 0.0848 (0.0884) loss: 0.8718 (0.8685) time: 0.1066 data: 0.0232 max mem: 9305 +Train: [38] [4200/6250] eta: 0:04:45 lr: 0.000090 grad: 0.0866 (0.0884) loss: 0.8707 (0.8685) time: 0.1226 data: 0.0345 max mem: 9305 +Train: [38] [4300/6250] eta: 0:04:31 lr: 0.000090 grad: 0.0806 (0.0884) loss: 0.8669 (0.8684) time: 0.1723 data: 0.0782 max mem: 9305 +Train: [38] [4400/6250] eta: 0:04:17 lr: 0.000090 grad: 0.0827 (0.0884) loss: 0.8654 (0.8684) time: 0.1381 data: 0.0578 max mem: 9305 +Train: [38] [4500/6250] eta: 0:04:02 lr: 0.000090 grad: 0.0839 (0.0884) loss: 0.8657 (0.8684) time: 0.1224 data: 0.0290 max mem: 9305 +Train: [38] [4600/6250] eta: 0:03:48 lr: 0.000090 grad: 0.0830 (0.0884) loss: 0.8695 (0.8684) time: 0.1345 data: 0.0481 max mem: 9305 +Train: [38] [4700/6250] eta: 0:03:34 lr: 0.000090 grad: 0.0870 (0.0884) loss: 0.8739 (0.8683) time: 0.1156 data: 0.0356 max mem: 9305 +Train: [38] [4800/6250] eta: 0:03:21 lr: 0.000090 grad: 0.0848 (0.0883) loss: 0.8661 (0.8683) time: 0.1398 data: 0.0614 max mem: 9305 +Train: [38] [4900/6250] eta: 0:03:07 lr: 0.000090 grad: 0.0862 (0.0883) loss: 0.8692 (0.8684) time: 0.1373 data: 0.0543 max mem: 9305 +Train: [38] [5000/6250] eta: 0:02:53 lr: 0.000090 grad: 0.0816 (0.0882) loss: 0.8758 (0.8684) time: 0.1513 data: 0.0735 max mem: 9305 +Train: [38] [5100/6250] eta: 0:02:39 lr: 0.000090 grad: 0.0801 (0.0882) loss: 0.8701 (0.8684) time: 0.1457 data: 0.0654 max mem: 9305 +Train: [38] [5200/6250] eta: 0:02:25 lr: 0.000090 grad: 0.0836 (0.0881) loss: 0.8689 (0.8685) time: 0.1509 data: 0.0677 max mem: 9305 +Train: [38] [5300/6250] eta: 0:02:11 lr: 0.000090 grad: 0.0846 (0.0881) loss: 0.8689 (0.8685) time: 0.1332 data: 0.0497 max mem: 9305 +Train: [38] [5400/6250] eta: 0:01:57 lr: 0.000090 grad: 0.0850 (0.0881) loss: 0.8684 (0.8685) time: 0.1436 data: 0.0607 max mem: 9305 +Train: [38] [5500/6250] eta: 0:01:44 lr: 0.000090 grad: 0.0844 (0.0881) loss: 0.8742 (0.8685) time: 0.1300 data: 0.0466 max mem: 9305 +Train: [38] [5600/6250] eta: 0:01:30 lr: 0.000090 grad: 0.0808 (0.0881) loss: 0.8684 (0.8685) time: 0.1718 data: 0.0850 max mem: 9305 +Train: [38] [5700/6250] eta: 0:01:16 lr: 0.000090 grad: 0.0880 (0.0880) loss: 0.8677 (0.8686) time: 0.1354 data: 0.0502 max mem: 9305 +Train: [38] [5800/6250] eta: 0:01:02 lr: 0.000090 grad: 0.0845 (0.0880) loss: 0.8701 (0.8686) time: 0.1313 data: 0.0530 max mem: 9305 +Train: [38] [5900/6250] eta: 0:00:48 lr: 0.000090 grad: 0.0834 (0.0880) loss: 0.8754 (0.8687) time: 0.1331 data: 0.0518 max mem: 9305 +Train: [38] [6000/6250] eta: 0:00:34 lr: 0.000090 grad: 0.0825 (0.0880) loss: 0.8709 (0.8687) time: 0.1144 data: 0.0303 max mem: 9305 +Train: [38] [6100/6250] eta: 0:00:20 lr: 0.000090 grad: 0.0910 (0.0880) loss: 0.8671 (0.8687) time: 0.0968 data: 0.0071 max mem: 9305 +Train: [38] [6200/6250] eta: 0:00:06 lr: 0.000089 grad: 0.0896 (0.0880) loss: 0.8724 (0.8687) time: 0.1860 data: 0.1048 max mem: 9305 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.0919 (0.0880) loss: 0.8691 (0.8687) time: 0.1972 data: 0.1156 max mem: 9305 +Train: [38] Total time: 0:14:29 (0.1391 s / it) +Averaged stats: lr: 0.000089 grad: 0.0919 (0.0880) loss: 0.8691 (0.8687) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:04:32 loss: 0.8967 (0.8967) time: 4.3948 data: 4.3595 max mem: 9305 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8796 (0.8830) time: 0.1035 data: 0.0750 max mem: 9305 +Eval (hcp-train-subset): [38] Total time: 0:00:12 (0.2047 s / it) +Averaged stats (hcp-train-subset): loss: 0.8796 (0.8830) +Eval (hcp-val): [38] [ 0/62] eta: 0:03:26 loss: 0.8798 (0.8798) time: 3.3384 data: 3.2840 max mem: 9305 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8799 (0.8816) time: 0.1083 data: 0.0786 max mem: 9305 +Eval (hcp-val): [38] Total time: 0:00:12 (0.2018 s / it) +Averaged stats (hcp-val): loss: 0.8799 (0.8816) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 7:17:38 lr: 0.000089 grad: 0.2217 (0.2217) loss: 0.8742 (0.8742) time: 4.2013 data: 3.9933 max mem: 9305 +Train: [39] [ 100/6250] eta: 0:19:52 lr: 0.000089 grad: 0.0907 (0.0935) loss: 0.8650 (0.8780) time: 0.1942 data: 0.0929 max mem: 9305 +Train: [39] [ 200/6250] eta: 0:17:07 lr: 0.000089 grad: 0.0801 (0.0912) loss: 0.8711 (0.8725) time: 0.1391 data: 0.0528 max mem: 9305 +Train: [39] [ 300/6250] eta: 0:15:53 lr: 0.000089 grad: 0.0756 (0.0890) loss: 0.8719 (0.8715) time: 0.1226 data: 0.0337 max mem: 9305 +Train: [39] [ 400/6250] eta: 0:14:47 lr: 0.000089 grad: 0.0802 (0.0879) loss: 0.8723 (0.8714) time: 0.1300 data: 0.0430 max mem: 9305 +Train: [39] [ 500/6250] eta: 0:14:10 lr: 0.000089 grad: 0.0879 (0.0876) loss: 0.8730 (0.8713) time: 0.1096 data: 0.0188 max mem: 9305 +Train: [39] [ 600/6250] eta: 0:13:37 lr: 0.000089 grad: 0.0828 (0.0871) loss: 0.8726 (0.8715) time: 0.1261 data: 0.0337 max mem: 9305 +Train: [39] [ 700/6250] eta: 0:13:10 lr: 0.000089 grad: 0.0841 (0.0867) loss: 0.8676 (0.8712) time: 0.1261 data: 0.0378 max mem: 9305 +Train: [39] [ 800/6250] eta: 0:12:47 lr: 0.000089 grad: 0.0804 (0.0864) loss: 0.8669 (0.8710) time: 0.1228 data: 0.0107 max mem: 9305 +Train: [39] [ 900/6250] eta: 0:12:30 lr: 0.000089 grad: 0.0801 (0.0859) loss: 0.8665 (0.8704) time: 0.1211 data: 0.0178 max mem: 9305 +Train: [39] [1000/6250] eta: 0:12:09 lr: 0.000089 grad: 0.0815 (0.0858) loss: 0.8708 (0.8701) time: 0.1421 data: 0.0582 max mem: 9305 +Train: [39] [1100/6250] eta: 0:11:49 lr: 0.000089 grad: 0.0833 (0.0857) loss: 0.8650 (0.8697) time: 0.1233 data: 0.0344 max mem: 9305 +Train: [39] [1200/6250] eta: 0:11:33 lr: 0.000089 grad: 0.0767 (0.0857) loss: 0.8644 (0.8696) time: 0.1453 data: 0.0604 max mem: 9305 +Train: [39] [1300/6250] eta: 0:11:20 lr: 0.000089 grad: 0.0796 (0.0856) loss: 0.8707 (0.8694) time: 0.1515 data: 0.0725 max mem: 9305 +Train: [39] [1400/6250] eta: 0:11:10 lr: 0.000089 grad: 0.0899 (0.0857) loss: 0.8650 (0.8692) time: 0.1577 data: 0.0735 max mem: 9305 +Train: [39] [1500/6250] eta: 0:10:55 lr: 0.000089 grad: 0.0772 (0.0857) loss: 0.8670 (0.8689) time: 0.1552 data: 0.0772 max mem: 9305 +Train: [39] [1600/6250] eta: 0:10:41 lr: 0.000089 grad: 0.0879 (0.0859) loss: 0.8635 (0.8686) time: 0.1400 data: 0.0594 max mem: 9305 +Train: [39] [1700/6250] eta: 0:10:25 lr: 0.000089 grad: 0.0865 (0.0861) loss: 0.8595 (0.8683) time: 0.1247 data: 0.0381 max mem: 9305 +Train: [39] [1800/6250] eta: 0:10:13 lr: 0.000089 grad: 0.0884 (0.0861) loss: 0.8630 (0.8681) time: 0.1319 data: 0.0464 max mem: 9305 +Train: [39] [1900/6250] eta: 0:09:59 lr: 0.000089 grad: 0.0807 (0.0862) loss: 0.8628 (0.8679) time: 0.1265 data: 0.0367 max mem: 9305 +Train: [39] [2000/6250] eta: 0:09:45 lr: 0.000089 grad: 0.0877 (0.0865) loss: 0.8634 (0.8677) time: 0.1468 data: 0.0644 max mem: 9305 +Train: [39] [2100/6250] eta: 0:09:32 lr: 0.000089 grad: 0.0912 (0.0868) loss: 0.8639 (0.8675) time: 0.1269 data: 0.0405 max mem: 9305 +Train: [39] [2200/6250] eta: 0:09:19 lr: 0.000089 grad: 0.0881 (0.0871) loss: 0.8616 (0.8673) time: 0.1345 data: 0.0570 max mem: 9305 +Train: [39] [2300/6250] eta: 0:09:06 lr: 0.000089 grad: 0.0911 (0.0875) loss: 0.8570 (0.8670) time: 0.1447 data: 0.0564 max mem: 9305 +Train: [39] [2400/6250] eta: 0:08:52 lr: 0.000089 grad: 0.0917 (0.0877) loss: 0.8613 (0.8667) time: 0.1204 data: 0.0359 max mem: 9305 +Train: [39] [2500/6250] eta: 0:08:40 lr: 0.000089 grad: 0.0935 (0.0879) loss: 0.8573 (0.8664) time: 0.1298 data: 0.0483 max mem: 9305 +Train: [39] [2600/6250] eta: 0:08:26 lr: 0.000089 grad: 0.0880 (0.0881) loss: 0.8668 (0.8662) time: 0.1290 data: 0.0496 max mem: 9305 +Train: [39] [2700/6250] eta: 0:08:13 lr: 0.000089 grad: 0.0899 (0.0883) loss: 0.8599 (0.8660) time: 0.1307 data: 0.0314 max mem: 9305 +Train: [39] [2800/6250] eta: 0:08:01 lr: 0.000089 grad: 0.0855 (0.0884) loss: 0.8596 (0.8658) time: 0.1446 data: 0.0627 max mem: 9305 +Train: [39] [2900/6250] eta: 0:07:48 lr: 0.000089 grad: 0.0846 (0.0885) loss: 0.8640 (0.8656) time: 0.1324 data: 0.0534 max mem: 9305 +Train: [39] [3000/6250] eta: 0:07:34 lr: 0.000089 grad: 0.0889 (0.0886) loss: 0.8572 (0.8655) time: 0.0884 data: 0.0002 max mem: 9305 +Train: [39] [3100/6250] eta: 0:07:21 lr: 0.000089 grad: 0.0885 (0.0887) loss: 0.8613 (0.8653) time: 0.1415 data: 0.0594 max mem: 9305 +Train: [39] [3200/6250] eta: 0:07:06 lr: 0.000089 grad: 0.0897 (0.0888) loss: 0.8639 (0.8652) time: 0.1304 data: 0.0439 max mem: 9305 +Train: [39] [3300/6250] eta: 0:06:52 lr: 0.000088 grad: 0.0883 (0.0889) loss: 0.8711 (0.8651) time: 0.1401 data: 0.0566 max mem: 9305 +Train: [39] [3400/6250] eta: 0:06:38 lr: 0.000088 grad: 0.0862 (0.0890) loss: 0.8600 (0.8650) time: 0.1508 data: 0.0715 max mem: 9305 +Train: [39] [3500/6250] eta: 0:06:23 lr: 0.000088 grad: 0.0874 (0.0891) loss: 0.8673 (0.8650) time: 0.1241 data: 0.0406 max mem: 9305 +Train: [39] [3600/6250] eta: 0:06:09 lr: 0.000088 grad: 0.0935 (0.0892) loss: 0.8593 (0.8649) time: 0.1328 data: 0.0536 max mem: 9305 +Train: [39] [3700/6250] eta: 0:05:55 lr: 0.000088 grad: 0.0890 (0.0893) loss: 0.8596 (0.8648) time: 0.1324 data: 0.0465 max mem: 9305 +Train: [39] [3800/6250] eta: 0:05:41 lr: 0.000088 grad: 0.0883 (0.0894) loss: 0.8609 (0.8648) time: 0.1254 data: 0.0433 max mem: 9305 +Train: [39] [3900/6250] eta: 0:05:27 lr: 0.000088 grad: 0.0859 (0.0897) loss: 0.8633 (0.8647) time: 0.1534 data: 0.0703 max mem: 9305 +Train: [39] [4000/6250] eta: 0:05:12 lr: 0.000088 grad: 0.0866 (0.0898) loss: 0.8600 (0.8647) time: 0.1367 data: 0.0498 max mem: 9305 +Train: [39] [4100/6250] eta: 0:04:59 lr: 0.000088 grad: 0.0873 (0.0899) loss: 0.8650 (0.8646) time: 0.1600 data: 0.0756 max mem: 9305 +Train: [39] [4200/6250] eta: 0:04:45 lr: 0.000088 grad: 0.0924 (0.0900) loss: 0.8654 (0.8645) time: 0.1494 data: 0.0732 max mem: 9305 +Train: [39] [4300/6250] eta: 0:04:31 lr: 0.000088 grad: 0.0858 (0.0901) loss: 0.8644 (0.8645) time: 0.1148 data: 0.0345 max mem: 9305 +Train: [39] [4400/6250] eta: 0:04:17 lr: 0.000088 grad: 0.0929 (0.0902) loss: 0.8585 (0.8644) time: 0.1295 data: 0.0466 max mem: 9305 +Train: [39] [4500/6250] eta: 0:04:03 lr: 0.000088 grad: 0.0998 (0.0903) loss: 0.8545 (0.8644) time: 0.1682 data: 0.0887 max mem: 9305 +Train: [39] [4600/6250] eta: 0:03:48 lr: 0.000088 grad: 0.0918 (0.0904) loss: 0.8568 (0.8643) time: 0.1344 data: 0.0523 max mem: 9305 +Train: [39] [4700/6250] eta: 0:03:35 lr: 0.000088 grad: 0.0933 (0.0904) loss: 0.8634 (0.8643) time: 0.1388 data: 0.0543 max mem: 9305 +Train: [39] [4800/6250] eta: 0:03:21 lr: 0.000088 grad: 0.0921 (0.0905) loss: 0.8647 (0.8643) time: 0.1367 data: 0.0544 max mem: 9305 +Train: [39] [4900/6250] eta: 0:03:07 lr: 0.000088 grad: 0.0904 (0.0906) loss: 0.8560 (0.8642) time: 0.1121 data: 0.0252 max mem: 9305 +Train: [39] [5000/6250] eta: 0:02:53 lr: 0.000088 grad: 0.0980 (0.0907) loss: 0.8602 (0.8642) time: 0.0890 data: 0.0026 max mem: 9305 +Train: [39] [5100/6250] eta: 0:02:39 lr: 0.000088 grad: 0.0970 (0.0907) loss: 0.8659 (0.8641) time: 0.1217 data: 0.0342 max mem: 9305 +Train: [39] [5200/6250] eta: 0:02:25 lr: 0.000088 grad: 0.0915 (0.0907) loss: 0.8546 (0.8640) time: 0.1366 data: 0.0546 max mem: 9305 +Train: [39] [5300/6250] eta: 0:02:11 lr: 0.000088 grad: 0.0891 (0.0908) loss: 0.8575 (0.8640) time: 0.1297 data: 0.0527 max mem: 9305 +Train: [39] [5400/6250] eta: 0:01:57 lr: 0.000088 grad: 0.0872 (0.0908) loss: 0.8650 (0.8640) time: 0.1243 data: 0.0418 max mem: 9305 +Train: [39] [5500/6250] eta: 0:01:43 lr: 0.000088 grad: 0.0936 (0.0909) loss: 0.8654 (0.8640) time: 0.1484 data: 0.0686 max mem: 9305 +Train: [39] [5600/6250] eta: 0:01:29 lr: 0.000088 grad: 0.0901 (0.0909) loss: 0.8667 (0.8640) time: 0.1578 data: 0.0778 max mem: 9305 +Train: [39] [5700/6250] eta: 0:01:16 lr: 0.000088 grad: 0.0888 (0.0910) loss: 0.8631 (0.8640) time: 0.1589 data: 0.0708 max mem: 9305 +Train: [39] [5800/6250] eta: 0:01:02 lr: 0.000088 grad: 0.0860 (0.0910) loss: 0.8698 (0.8640) time: 0.1741 data: 0.0839 max mem: 9305 +Train: [39] [5900/6250] eta: 0:00:48 lr: 0.000088 grad: 0.0922 (0.0910) loss: 0.8639 (0.8640) time: 0.1270 data: 0.0372 max mem: 9305 +Train: [39] [6000/6250] eta: 0:00:34 lr: 0.000088 grad: 0.0879 (0.0911) loss: 0.8661 (0.8640) time: 0.1482 data: 0.0700 max mem: 9305 +Train: [39] [6100/6250] eta: 0:00:20 lr: 0.000088 grad: 0.0889 (0.0911) loss: 0.8656 (0.8640) time: 0.1556 data: 0.0779 max mem: 9305 +Train: [39] [6200/6250] eta: 0:00:06 lr: 0.000088 grad: 0.0891 (0.0911) loss: 0.8659 (0.8640) time: 0.1154 data: 0.0297 max mem: 9305 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.0983 (0.0912) loss: 0.8628 (0.8640) time: 0.1541 data: 0.0729 max mem: 9305 +Train: [39] Total time: 0:14:34 (0.1399 s / it) +Averaged stats: lr: 0.000088 grad: 0.0983 (0.0912) loss: 0.8628 (0.8640) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:04:20 loss: 0.8974 (0.8974) time: 4.1997 data: 4.1600 max mem: 9305 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8843 (0.8865) time: 0.1354 data: 0.1074 max mem: 9305 +Eval (hcp-train-subset): [39] Total time: 0:00:12 (0.2063 s / it) +Averaged stats (hcp-train-subset): loss: 0.8843 (0.8865) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:24 loss: 0.8778 (0.8778) time: 4.2605 data: 4.1850 max mem: 9305 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8808 (0.8829) time: 0.1601 data: 0.1312 max mem: 9305 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (hcp-val): loss: 0.8808 (0.8829) +Making plots (hcp-val): example=10 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 10:03:04 lr: 0.000088 grad: 0.0733 (0.0733) loss: 0.8988 (0.8988) time: 5.7895 data: 5.6719 max mem: 9305 +Train: [40] [ 100/6250] eta: 0:21:08 lr: 0.000088 grad: 0.0805 (0.0814) loss: 0.8739 (0.8802) time: 0.1857 data: 0.0928 max mem: 9305 +Train: [40] [ 200/6250] eta: 0:18:08 lr: 0.000088 grad: 0.0836 (0.0857) loss: 0.8727 (0.8769) time: 0.1594 data: 0.0578 max mem: 9305 +Train: [40] [ 300/6250] eta: 0:16:34 lr: 0.000088 grad: 0.0830 (0.0869) loss: 0.8727 (0.8737) time: 0.1366 data: 0.0488 max mem: 9305 +Train: [40] [ 400/6250] eta: 0:15:28 lr: 0.000087 grad: 0.0924 (0.0867) loss: 0.8671 (0.8723) time: 0.1405 data: 0.0456 max mem: 9305 +Train: [40] [ 500/6250] eta: 0:14:39 lr: 0.000087 grad: 0.0879 (0.0865) loss: 0.8701 (0.8719) time: 0.1174 data: 0.0214 max mem: 9305 +Train: [40] [ 600/6250] eta: 0:13:58 lr: 0.000087 grad: 0.0864 (0.0867) loss: 0.8634 (0.8713) time: 0.1190 data: 0.0340 max mem: 9305 +Train: [40] [ 700/6250] eta: 0:13:24 lr: 0.000087 grad: 0.0770 (0.0866) loss: 0.8755 (0.8709) time: 0.1197 data: 0.0259 max mem: 9305 +Train: [40] [ 800/6250] eta: 0:12:54 lr: 0.000087 grad: 0.0817 (0.0865) loss: 0.8682 (0.8708) time: 0.1191 data: 0.0281 max mem: 9305 +Train: [40] [ 900/6250] eta: 0:12:42 lr: 0.000087 grad: 0.0778 (0.0866) loss: 0.8733 (0.8709) time: 0.1423 data: 0.0652 max mem: 9305 +Train: [40] [1000/6250] eta: 0:12:42 lr: 0.000087 grad: 0.0746 (0.0860) loss: 0.8721 (0.8709) time: 0.1535 data: 0.0709 max mem: 9305 +Train: [40] [1100/6250] eta: 0:12:34 lr: 0.000087 grad: 0.0796 (0.0858) loss: 0.8699 (0.8711) time: 0.1567 data: 0.0792 max mem: 9305 +Train: [40] [1200/6250] eta: 0:12:22 lr: 0.000087 grad: 0.0805 (0.0857) loss: 0.8698 (0.8710) time: 0.1320 data: 0.0452 max mem: 9305 +Train: [40] [1300/6250] eta: 0:12:01 lr: 0.000087 grad: 0.0845 (0.0856) loss: 0.8703 (0.8708) time: 0.1305 data: 0.0483 max mem: 9305 +Train: [40] [1400/6250] eta: 0:11:41 lr: 0.000087 grad: 0.0821 (0.0857) loss: 0.8670 (0.8706) time: 0.1212 data: 0.0277 max mem: 9305 +Train: [40] [1500/6250] eta: 0:11:23 lr: 0.000087 grad: 0.0848 (0.0856) loss: 0.8694 (0.8705) time: 0.1389 data: 0.0539 max mem: 9305 +Train: [40] [1600/6250] eta: 0:11:07 lr: 0.000087 grad: 0.0915 (0.0858) loss: 0.8612 (0.8702) time: 0.1276 data: 0.0387 max mem: 9305 +Train: [40] [1700/6250] eta: 0:10:56 lr: 0.000087 grad: 0.0861 (0.0860) loss: 0.8681 (0.8701) time: 0.1577 data: 0.0715 max mem: 9305 +Train: [40] [1800/6250] eta: 0:10:40 lr: 0.000087 grad: 0.0862 (0.0860) loss: 0.8681 (0.8699) time: 0.1293 data: 0.0476 max mem: 9305 +Train: [40] [1900/6250] eta: 0:10:26 lr: 0.000087 grad: 0.0816 (0.0861) loss: 0.8691 (0.8699) time: 0.0920 data: 0.0002 max mem: 9305 +Train: [40] [2000/6250] eta: 0:10:13 lr: 0.000087 grad: 0.0830 (0.0862) loss: 0.8686 (0.8699) time: 0.1485 data: 0.0676 max mem: 9305 +Train: [40] [2100/6250] eta: 0:09:58 lr: 0.000087 grad: 0.0856 (0.0864) loss: 0.8686 (0.8697) time: 0.1373 data: 0.0587 max mem: 9305 +Train: [40] [2200/6250] eta: 0:09:41 lr: 0.000087 grad: 0.0879 (0.0865) loss: 0.8671 (0.8696) time: 0.1459 data: 0.0660 max mem: 9305 +Train: [40] [2300/6250] eta: 0:09:30 lr: 0.000087 grad: 0.0849 (0.0866) loss: 0.8678 (0.8694) time: 0.2027 data: 0.0796 max mem: 9305 +Train: [40] [2400/6250] eta: 0:09:15 lr: 0.000087 grad: 0.0883 (0.0868) loss: 0.8657 (0.8694) time: 0.1751 data: 0.0943 max mem: 9305 +Train: [40] [2500/6250] eta: 0:09:03 lr: 0.000087 grad: 0.0824 (0.0869) loss: 0.8585 (0.8692) time: 0.1401 data: 0.0594 max mem: 9305 +Train: [40] [2600/6250] eta: 0:08:46 lr: 0.000087 grad: 0.0811 (0.0870) loss: 0.8678 (0.8692) time: 0.1317 data: 0.0473 max mem: 9305 +Train: [40] [2700/6250] eta: 0:08:29 lr: 0.000087 grad: 0.0820 (0.0870) loss: 0.8708 (0.8692) time: 0.1317 data: 0.0514 max mem: 9305 +Train: [40] [2800/6250] eta: 0:08:14 lr: 0.000087 grad: 0.0826 (0.0869) loss: 0.8696 (0.8692) time: 0.1245 data: 0.0437 max mem: 9305 +Train: [40] [2900/6250] eta: 0:07:58 lr: 0.000087 grad: 0.0822 (0.0869) loss: 0.8680 (0.8692) time: 0.1391 data: 0.0590 max mem: 9305 +Train: [40] [3000/6250] eta: 0:07:44 lr: 0.000087 grad: 0.0847 (0.0869) loss: 0.8695 (0.8692) time: 0.1583 data: 0.0774 max mem: 9305 +Train: [40] [3100/6250] eta: 0:07:30 lr: 0.000087 grad: 0.0884 (0.0872) loss: 0.8641 (0.8691) time: 0.1350 data: 0.0531 max mem: 9305 +Train: [40] [3200/6250] eta: 0:07:15 lr: 0.000087 grad: 0.0866 (0.0872) loss: 0.8654 (0.8691) time: 0.1329 data: 0.0542 max mem: 9305 +Train: [40] [3300/6250] eta: 0:07:02 lr: 0.000087 grad: 0.0852 (0.0872) loss: 0.8705 (0.8691) time: 0.1740 data: 0.1007 max mem: 9305 +Train: [40] [3400/6250] eta: 0:06:48 lr: 0.000087 grad: 0.0867 (0.0872) loss: 0.8644 (0.8691) time: 0.1868 data: 0.1114 max mem: 9305 +Train: [40] [3500/6250] eta: 0:06:32 lr: 0.000087 grad: 0.0948 (0.0873) loss: 0.8657 (0.8692) time: 0.1497 data: 0.0666 max mem: 9305 +Train: [40] [3600/6250] eta: 0:06:19 lr: 0.000087 grad: 0.0863 (0.0873) loss: 0.8677 (0.8691) time: 0.1545 data: 0.0745 max mem: 9305 +Train: [40] [3700/6250] eta: 0:06:04 lr: 0.000086 grad: 0.0892 (0.0874) loss: 0.8632 (0.8690) time: 0.1547 data: 0.0700 max mem: 9305 +Train: [40] [3800/6250] eta: 0:05:49 lr: 0.000086 grad: 0.0859 (0.0875) loss: 0.8684 (0.8690) time: 0.1247 data: 0.0419 max mem: 9305 +Train: [40] [3900/6250] eta: 0:05:35 lr: 0.000086 grad: 0.0852 (0.0876) loss: 0.8703 (0.8689) time: 0.1528 data: 0.0770 max mem: 9305 +Train: [40] [4000/6250] eta: 0:05:20 lr: 0.000086 grad: 0.0908 (0.0876) loss: 0.8599 (0.8689) time: 0.1448 data: 0.0666 max mem: 9305 +Train: [40] [4100/6250] eta: 0:05:06 lr: 0.000086 grad: 0.0849 (0.0876) loss: 0.8681 (0.8688) time: 0.2020 data: 0.1016 max mem: 9305 +Train: [40] [4200/6250] eta: 0:04:53 lr: 0.000086 grad: 0.0943 (0.0878) loss: 0.8595 (0.8687) time: 0.2494 data: 0.1423 max mem: 9305 +Train: [40] [4300/6250] eta: 0:04:38 lr: 0.000086 grad: 0.0860 (0.0879) loss: 0.8671 (0.8687) time: 0.1395 data: 0.0533 max mem: 9305 +Train: [40] [4400/6250] eta: 0:04:23 lr: 0.000086 grad: 0.0892 (0.0880) loss: 0.8654 (0.8686) time: 0.1233 data: 0.0345 max mem: 9305 +Train: [40] [4500/6250] eta: 0:04:09 lr: 0.000086 grad: 0.0910 (0.0880) loss: 0.8615 (0.8685) time: 0.1383 data: 0.0599 max mem: 9305 +Train: [40] [4600/6250] eta: 0:03:54 lr: 0.000086 grad: 0.0872 (0.0881) loss: 0.8632 (0.8684) time: 0.1561 data: 0.0807 max mem: 9305 +Train: [40] [4700/6250] eta: 0:03:40 lr: 0.000086 grad: 0.0911 (0.0882) loss: 0.8617 (0.8683) time: 0.1140 data: 0.0322 max mem: 9305 +Train: [40] [4800/6250] eta: 0:03:25 lr: 0.000086 grad: 0.0895 (0.0882) loss: 0.8695 (0.8682) time: 0.1467 data: 0.0615 max mem: 9305 +Train: [40] [4900/6250] eta: 0:03:11 lr: 0.000086 grad: 0.0845 (0.0882) loss: 0.8621 (0.8682) time: 0.1303 data: 0.0469 max mem: 9305 +Train: [40] [5000/6250] eta: 0:02:57 lr: 0.000086 grad: 0.0887 (0.0883) loss: 0.8587 (0.8681) time: 0.1343 data: 0.0497 max mem: 9305 +Train: [40] [5100/6250] eta: 0:02:43 lr: 0.000086 grad: 0.0931 (0.0884) loss: 0.8635 (0.8679) time: 0.1342 data: 0.0562 max mem: 9305 +Train: [40] [5200/6250] eta: 0:02:29 lr: 0.000086 grad: 0.0936 (0.0885) loss: 0.8590 (0.8678) time: 0.1739 data: 0.0898 max mem: 9305 +Train: [40] [5300/6250] eta: 0:02:14 lr: 0.000086 grad: 0.0886 (0.0887) loss: 0.8581 (0.8677) time: 0.1303 data: 0.0498 max mem: 9305 +Train: [40] [5400/6250] eta: 0:02:00 lr: 0.000086 grad: 0.0873 (0.0887) loss: 0.8634 (0.8676) time: 0.1318 data: 0.0556 max mem: 9305 +Train: [40] [5500/6250] eta: 0:01:46 lr: 0.000086 grad: 0.0887 (0.0888) loss: 0.8674 (0.8675) time: 0.1325 data: 0.0515 max mem: 9305 +Train: [40] [5600/6250] eta: 0:01:32 lr: 0.000086 grad: 0.0917 (0.0890) loss: 0.8623 (0.8674) time: 0.1471 data: 0.0640 max mem: 9305 +Train: [40] [5700/6250] eta: 0:01:18 lr: 0.000086 grad: 0.0908 (0.0892) loss: 0.8616 (0.8672) time: 0.1519 data: 0.0693 max mem: 9305 +Train: [40] [5800/6250] eta: 0:01:03 lr: 0.000086 grad: 0.0872 (0.0893) loss: 0.8651 (0.8671) time: 0.1397 data: 0.0605 max mem: 9305 +Train: [40] [5900/6250] eta: 0:00:49 lr: 0.000086 grad: 0.0894 (0.0893) loss: 0.8589 (0.8670) time: 0.0960 data: 0.0043 max mem: 9305 +Train: [40] [6000/6250] eta: 0:00:35 lr: 0.000086 grad: 0.0855 (0.0894) loss: 0.8652 (0.8669) time: 0.1638 data: 0.0791 max mem: 9305 +Train: [40] [6100/6250] eta: 0:00:21 lr: 0.000086 grad: 0.0979 (0.0897) loss: 0.8618 (0.8668) time: 0.1160 data: 0.0317 max mem: 9305 +Train: [40] [6200/6250] eta: 0:00:07 lr: 0.000086 grad: 0.0937 (0.0898) loss: 0.8559 (0.8666) time: 0.1389 data: 0.0616 max mem: 9305 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1043 (0.0899) loss: 0.8594 (0.8666) time: 0.1580 data: 0.0723 max mem: 9305 +Train: [40] Total time: 0:14:51 (0.1426 s / it) +Averaged stats: lr: 0.000086 grad: 0.1043 (0.0899) loss: 0.8594 (0.8666) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:04:20 loss: 0.8955 (0.8955) time: 4.1988 data: 4.1621 max mem: 9305 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8844 (0.8840) time: 0.1497 data: 0.1211 max mem: 9305 +Eval (hcp-train-subset): [40] Total time: 0:00:12 (0.2085 s / it) +Averaged stats (hcp-train-subset): loss: 0.8844 (0.8840) +Eval (hcp-val): [40] [ 0/62] eta: 0:03:05 loss: 0.8749 (0.8749) time: 2.9907 data: 2.9056 max mem: 9305 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8813 (0.8815) time: 0.1299 data: 0.1001 max mem: 9305 +Eval (hcp-val): [40] Total time: 0:00:13 (0.2111 s / it) +Averaged stats (hcp-val): loss: 0.8813 (0.8815) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 9:15:52 lr: 0.000086 grad: 0.0586 (0.0586) loss: 0.9032 (0.9032) time: 5.3364 data: 5.2084 max mem: 9305 +Train: [41] [ 100/6250] eta: 0:18:31 lr: 0.000086 grad: 0.1125 (0.1414) loss: 0.8553 (0.8737) time: 0.1464 data: 0.0513 max mem: 9305 +Train: [41] [ 200/6250] eta: 0:15:21 lr: 0.000086 grad: 0.0866 (0.1235) loss: 0.8702 (0.8692) time: 0.1186 data: 0.0276 max mem: 9305 +Train: [41] [ 300/6250] eta: 0:14:07 lr: 0.000086 grad: 0.0903 (0.1129) loss: 0.8633 (0.8680) time: 0.1233 data: 0.0312 max mem: 9305 +Train: [41] [ 400/6250] eta: 0:13:17 lr: 0.000086 grad: 0.0929 (0.1082) loss: 0.8618 (0.8673) time: 0.1145 data: 0.0277 max mem: 9305 +Train: [41] [ 500/6250] eta: 0:12:49 lr: 0.000086 grad: 0.0998 (0.1043) loss: 0.8648 (0.8673) time: 0.1051 data: 0.0131 max mem: 9305 +Train: [41] [ 600/6250] eta: 0:12:25 lr: 0.000086 grad: 0.0945 (0.1023) loss: 0.8654 (0.8668) time: 0.1218 data: 0.0282 max mem: 9305 +Train: [41] [ 700/6250] eta: 0:12:10 lr: 0.000085 grad: 0.0839 (0.1009) loss: 0.8686 (0.8667) time: 0.1405 data: 0.0540 max mem: 9305 +Train: [41] [ 800/6250] eta: 0:12:00 lr: 0.000085 grad: 0.0983 (0.1004) loss: 0.8601 (0.8663) time: 0.1114 data: 0.0224 max mem: 9305 +Train: [41] [ 900/6250] eta: 0:11:48 lr: 0.000085 grad: 0.0925 (0.1004) loss: 0.8634 (0.8657) time: 0.1348 data: 0.0555 max mem: 9305 +Train: [41] [1000/6250] eta: 0:11:34 lr: 0.000085 grad: 0.0926 (0.1000) loss: 0.8598 (0.8652) time: 0.1210 data: 0.0376 max mem: 9305 +Train: [41] [1100/6250] eta: 0:11:20 lr: 0.000085 grad: 0.0983 (0.0997) loss: 0.8712 (0.8647) time: 0.1322 data: 0.0514 max mem: 9305 +Train: [41] [1200/6250] eta: 0:11:09 lr: 0.000085 grad: 0.0901 (0.0993) loss: 0.8598 (0.8642) time: 0.1360 data: 0.0532 max mem: 9305 +Train: [41] [1300/6250] eta: 0:11:02 lr: 0.000085 grad: 0.0966 (0.0993) loss: 0.8664 (0.8639) time: 0.1445 data: 0.0694 max mem: 9305 +Train: [41] [1400/6250] eta: 0:10:49 lr: 0.000085 grad: 0.0932 (0.0990) loss: 0.8592 (0.8635) time: 0.1353 data: 0.0536 max mem: 9305 +Train: [41] [1500/6250] eta: 0:10:33 lr: 0.000085 grad: 0.0964 (0.0990) loss: 0.8586 (0.8631) time: 0.1179 data: 0.0354 max mem: 9305 +Train: [41] [1600/6250] eta: 0:10:19 lr: 0.000085 grad: 0.0939 (0.0990) loss: 0.8546 (0.8629) time: 0.1174 data: 0.0337 max mem: 9305 +Train: [41] [1700/6250] eta: 0:10:05 lr: 0.000085 grad: 0.0912 (0.0990) loss: 0.8553 (0.8626) time: 0.1199 data: 0.0340 max mem: 9305 +Train: [41] [1800/6250] eta: 0:09:52 lr: 0.000085 grad: 0.0918 (0.0991) loss: 0.8583 (0.8624) time: 0.1361 data: 0.0577 max mem: 9305 +Train: [41] [1900/6250] eta: 0:09:41 lr: 0.000085 grad: 0.0960 (0.0990) loss: 0.8638 (0.8622) time: 0.1064 data: 0.0179 max mem: 9305 +Train: [41] [2000/6250] eta: 0:09:29 lr: 0.000085 grad: 0.1027 (0.0993) loss: 0.8564 (0.8620) time: 0.1420 data: 0.0625 max mem: 9305 +Train: [41] [2100/6250] eta: 0:09:17 lr: 0.000085 grad: 0.0969 (0.0994) loss: 0.8530 (0.8618) time: 0.1471 data: 0.0658 max mem: 9305 +Train: [41] [2200/6250] eta: 0:09:03 lr: 0.000085 grad: 0.1011 (0.0996) loss: 0.8624 (0.8616) time: 0.1425 data: 0.0574 max mem: 9305 +Train: [41] [2300/6250] eta: 0:08:53 lr: 0.000085 grad: 0.0973 (0.0995) loss: 0.8533 (0.8615) time: 0.1149 data: 0.0345 max mem: 9305 +Train: [41] [2400/6250] eta: 0:08:41 lr: 0.000085 grad: 0.0924 (0.0994) loss: 0.8607 (0.8614) time: 0.1343 data: 0.0567 max mem: 9305 +Train: [41] [2500/6250] eta: 0:08:29 lr: 0.000085 grad: 0.0934 (0.0994) loss: 0.8622 (0.8613) time: 0.1415 data: 0.0544 max mem: 9305 +Train: [41] [2600/6250] eta: 0:08:16 lr: 0.000085 grad: 0.0916 (0.0993) loss: 0.8639 (0.8613) time: 0.1396 data: 0.0574 max mem: 9305 +Train: [41] [2700/6250] eta: 0:08:02 lr: 0.000085 grad: 0.1016 (0.0993) loss: 0.8577 (0.8613) time: 0.1435 data: 0.0533 max mem: 9305 +Train: [41] [2800/6250] eta: 0:07:48 lr: 0.000085 grad: 0.0929 (0.0992) loss: 0.8645 (0.8614) time: 0.1275 data: 0.0457 max mem: 9305 +Train: [41] [2900/6250] eta: 0:07:34 lr: 0.000085 grad: 0.0932 (0.0991) loss: 0.8638 (0.8614) time: 0.1199 data: 0.0357 max mem: 9305 +Train: [41] [3000/6250] eta: 0:07:21 lr: 0.000085 grad: 0.1000 (0.0990) loss: 0.8637 (0.8615) time: 0.1646 data: 0.0839 max mem: 9305 +Train: [41] [3100/6250] eta: 0:07:09 lr: 0.000085 grad: 0.0979 (0.0991) loss: 0.8612 (0.8616) time: 0.1079 data: 0.0012 max mem: 9305 +Train: [41] [3200/6250] eta: 0:06:55 lr: 0.000085 grad: 0.0904 (0.0991) loss: 0.8615 (0.8617) time: 0.1182 data: 0.0343 max mem: 9305 +Train: [41] [3300/6250] eta: 0:06:41 lr: 0.000085 grad: 0.0910 (0.0990) loss: 0.8644 (0.8617) time: 0.1423 data: 0.0643 max mem: 9305 +Train: [41] [3400/6250] eta: 0:06:28 lr: 0.000085 grad: 0.0882 (0.0989) loss: 0.8602 (0.8617) time: 0.1529 data: 0.0699 max mem: 9305 +Train: [41] [3500/6250] eta: 0:06:15 lr: 0.000085 grad: 0.0913 (0.0988) loss: 0.8605 (0.8617) time: 0.1303 data: 0.0487 max mem: 9305 +Train: [41] [3600/6250] eta: 0:06:01 lr: 0.000085 grad: 0.0946 (0.0990) loss: 0.8607 (0.8617) time: 0.1311 data: 0.0509 max mem: 9305 +Train: [41] [3700/6250] eta: 0:05:48 lr: 0.000085 grad: 0.0959 (0.0990) loss: 0.8676 (0.8618) time: 0.1382 data: 0.0485 max mem: 9305 +Train: [41] [3800/6250] eta: 0:05:33 lr: 0.000085 grad: 0.0947 (0.0989) loss: 0.8637 (0.8619) time: 0.1111 data: 0.0283 max mem: 9305 +Train: [41] [3900/6250] eta: 0:05:24 lr: 0.000084 grad: 0.0878 (0.0988) loss: 0.8606 (0.8620) time: 0.1604 data: 0.0736 max mem: 9305 +Train: [41] [4000/6250] eta: 0:05:09 lr: 0.000084 grad: 0.0904 (0.0987) loss: 0.8645 (0.8620) time: 0.1210 data: 0.0385 max mem: 9305 +Train: [41] [4100/6250] eta: 0:04:55 lr: 0.000084 grad: 0.0895 (0.0986) loss: 0.8607 (0.8620) time: 0.0990 data: 0.0178 max mem: 9305 +Train: [41] [4200/6250] eta: 0:04:41 lr: 0.000084 grad: 0.0949 (0.0985) loss: 0.8647 (0.8620) time: 0.1588 data: 0.0781 max mem: 9305 +Train: [41] [4300/6250] eta: 0:04:27 lr: 0.000084 grad: 0.0890 (0.0984) loss: 0.8683 (0.8620) time: 0.1439 data: 0.0611 max mem: 9305 +Train: [41] [4400/6250] eta: 0:04:13 lr: 0.000084 grad: 0.0896 (0.0983) loss: 0.8645 (0.8620) time: 0.1406 data: 0.0602 max mem: 9305 +Train: [41] [4500/6250] eta: 0:03:59 lr: 0.000084 grad: 0.0915 (0.0982) loss: 0.8629 (0.8620) time: 0.1375 data: 0.0559 max mem: 9305 +Train: [41] [4600/6250] eta: 0:03:46 lr: 0.000084 grad: 0.0884 (0.0982) loss: 0.8600 (0.8620) time: 0.1391 data: 0.0601 max mem: 9305 +Train: [41] [4700/6250] eta: 0:03:32 lr: 0.000084 grad: 0.0961 (0.0981) loss: 0.8611 (0.8620) time: 0.1438 data: 0.0578 max mem: 9305 +Train: [41] [4800/6250] eta: 0:03:19 lr: 0.000084 grad: 0.0919 (0.0980) loss: 0.8633 (0.8620) time: 0.1375 data: 0.0566 max mem: 9305 +Train: [41] [4900/6250] eta: 0:03:04 lr: 0.000084 grad: 0.0932 (0.0980) loss: 0.8556 (0.8620) time: 0.1139 data: 0.0214 max mem: 9305 +Train: [41] [5000/6250] eta: 0:02:50 lr: 0.000084 grad: 0.0900 (0.0979) loss: 0.8556 (0.8620) time: 0.1231 data: 0.0379 max mem: 9305 +Train: [41] [5100/6250] eta: 0:02:37 lr: 0.000084 grad: 0.0888 (0.0978) loss: 0.8626 (0.8619) time: 0.1442 data: 0.0654 max mem: 9305 +Train: [41] [5200/6250] eta: 0:02:23 lr: 0.000084 grad: 0.0923 (0.0977) loss: 0.8557 (0.8620) time: 0.1270 data: 0.0436 max mem: 9305 +Train: [41] [5300/6250] eta: 0:02:09 lr: 0.000084 grad: 0.0942 (0.0976) loss: 0.8612 (0.8620) time: 0.1212 data: 0.0388 max mem: 9305 +Train: [41] [5400/6250] eta: 0:01:55 lr: 0.000084 grad: 0.0849 (0.0975) loss: 0.8630 (0.8620) time: 0.1498 data: 0.0647 max mem: 9305 +Train: [41] [5500/6250] eta: 0:01:42 lr: 0.000084 grad: 0.0930 (0.0975) loss: 0.8624 (0.8619) time: 0.1255 data: 0.0385 max mem: 9305 +Train: [41] [5600/6250] eta: 0:01:28 lr: 0.000084 grad: 0.0960 (0.0975) loss: 0.8577 (0.8619) time: 0.1302 data: 0.0454 max mem: 9305 +Train: [41] [5700/6250] eta: 0:01:14 lr: 0.000084 grad: 0.0982 (0.0974) loss: 0.8597 (0.8618) time: 0.2515 data: 0.1589 max mem: 9305 +Train: [41] [5800/6250] eta: 0:01:01 lr: 0.000084 grad: 0.0892 (0.0974) loss: 0.8590 (0.8617) time: 0.1230 data: 0.0378 max mem: 9305 +Train: [41] [5900/6250] eta: 0:00:47 lr: 0.000084 grad: 0.0911 (0.0974) loss: 0.8591 (0.8617) time: 0.1325 data: 0.0505 max mem: 9305 +Train: [41] [6000/6250] eta: 0:00:34 lr: 0.000084 grad: 0.0905 (0.0973) loss: 0.8630 (0.8617) time: 0.1672 data: 0.0744 max mem: 9305 +Train: [41] [6100/6250] eta: 0:00:20 lr: 0.000084 grad: 0.0933 (0.0973) loss: 0.8588 (0.8616) time: 0.1534 data: 0.0668 max mem: 9305 +Train: [41] [6200/6250] eta: 0:00:06 lr: 0.000084 grad: 0.1004 (0.0972) loss: 0.8565 (0.8616) time: 0.1421 data: 0.0541 max mem: 9305 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.0894 (0.0972) loss: 0.8670 (0.8615) time: 0.1725 data: 0.0882 max mem: 9305 +Train: [41] Total time: 0:14:17 (0.1372 s / it) +Averaged stats: lr: 0.000084 grad: 0.0894 (0.0972) loss: 0.8670 (0.8615) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:05:46 loss: 0.8941 (0.8941) time: 5.5921 data: 5.5575 max mem: 9305 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8852 (0.8848) time: 0.0499 data: 0.0205 max mem: 9305 +Eval (hcp-train-subset): [41] Total time: 0:00:15 (0.2435 s / it) +Averaged stats (hcp-train-subset): loss: 0.8852 (0.8848) +Eval (hcp-val): [41] [ 0/62] eta: 0:10:04 loss: 0.8773 (0.8773) time: 9.7448 data: 9.7149 max mem: 9305 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8811 (0.8816) time: 0.0991 data: 0.0704 max mem: 9305 +Eval (hcp-val): [41] Total time: 0:00:15 (0.2552 s / it) +Averaged stats (hcp-val): loss: 0.8811 (0.8816) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 9:47:54 lr: 0.000084 grad: 0.0680 (0.0680) loss: 0.8948 (0.8948) time: 5.6439 data: 5.5495 max mem: 9305 +Train: [42] [ 100/6250] eta: 0:17:48 lr: 0.000084 grad: 0.0943 (0.1105) loss: 0.8690 (0.8709) time: 0.1234 data: 0.0292 max mem: 9305 +Train: [42] [ 200/6250] eta: 0:15:19 lr: 0.000084 grad: 0.0906 (0.1050) loss: 0.8650 (0.8670) time: 0.0987 data: 0.0030 max mem: 9305 +Train: [42] [ 300/6250] eta: 0:14:39 lr: 0.000084 grad: 0.0861 (0.1009) loss: 0.8728 (0.8671) time: 0.1529 data: 0.0610 max mem: 9305 +Train: [42] [ 400/6250] eta: 0:14:02 lr: 0.000084 grad: 0.0843 (0.0986) loss: 0.8672 (0.8674) time: 0.1470 data: 0.0505 max mem: 9305 +Train: [42] [ 500/6250] eta: 0:13:34 lr: 0.000084 grad: 0.0799 (0.0965) loss: 0.8653 (0.8674) time: 0.1191 data: 0.0307 max mem: 9305 +Train: [42] [ 600/6250] eta: 0:13:12 lr: 0.000084 grad: 0.0875 (0.0954) loss: 0.8654 (0.8676) time: 0.1264 data: 0.0465 max mem: 9305 +Train: [42] [ 700/6250] eta: 0:13:04 lr: 0.000084 grad: 0.0936 (0.0947) loss: 0.8640 (0.8675) time: 0.1345 data: 0.0416 max mem: 9305 +Train: [42] [ 800/6250] eta: 0:13:19 lr: 0.000084 grad: 0.0841 (0.0941) loss: 0.8731 (0.8674) time: 0.2185 data: 0.1378 max mem: 9305 +Train: [42] [ 900/6250] eta: 0:13:21 lr: 0.000083 grad: 0.0789 (0.0936) loss: 0.8688 (0.8673) time: 0.1809 data: 0.0943 max mem: 9305 +Train: [42] [1000/6250] eta: 0:13:27 lr: 0.000083 grad: 0.0878 (0.0933) loss: 0.8602 (0.8673) time: 0.1871 data: 0.1107 max mem: 9305 +Train: [42] [1100/6250] eta: 0:13:17 lr: 0.000083 grad: 0.0904 (0.0931) loss: 0.8610 (0.8670) time: 0.1541 data: 0.0677 max mem: 9305 +Train: [42] [1200/6250] eta: 0:13:07 lr: 0.000083 grad: 0.0835 (0.0929) loss: 0.8608 (0.8667) time: 0.1391 data: 0.0524 max mem: 9305 +Train: [42] [1300/6250] eta: 0:12:49 lr: 0.000083 grad: 0.0893 (0.0927) loss: 0.8671 (0.8667) time: 0.1291 data: 0.0392 max mem: 9305 +Train: [42] [1400/6250] eta: 0:12:29 lr: 0.000083 grad: 0.0902 (0.0926) loss: 0.8591 (0.8665) time: 0.1493 data: 0.0682 max mem: 9305 +Train: [42] [1500/6250] eta: 0:12:10 lr: 0.000083 grad: 0.0885 (0.0925) loss: 0.8688 (0.8664) time: 0.1279 data: 0.0443 max mem: 9305 +Train: [42] [1600/6250] eta: 0:11:51 lr: 0.000083 grad: 0.0857 (0.0924) loss: 0.8614 (0.8662) time: 0.1388 data: 0.0570 max mem: 9305 +Train: [42] [1700/6250] eta: 0:11:37 lr: 0.000083 grad: 0.0900 (0.0924) loss: 0.8710 (0.8662) time: 0.1493 data: 0.0614 max mem: 9305 +Train: [42] [1800/6250] eta: 0:11:21 lr: 0.000083 grad: 0.0916 (0.0924) loss: 0.8597 (0.8661) time: 0.1468 data: 0.0464 max mem: 9305 +Train: [42] [1900/6250] eta: 0:11:10 lr: 0.000083 grad: 0.0901 (0.0922) loss: 0.8663 (0.8660) time: 0.2691 data: 0.1731 max mem: 9305 +Train: [42] [2000/6250] eta: 0:10:51 lr: 0.000083 grad: 0.0921 (0.0922) loss: 0.8640 (0.8659) time: 0.2075 data: 0.1190 max mem: 9305 +Train: [42] [2100/6250] eta: 0:10:36 lr: 0.000083 grad: 0.0918 (0.0923) loss: 0.8646 (0.8657) time: 0.0944 data: 0.0002 max mem: 9305 +Train: [42] [2200/6250] eta: 0:10:17 lr: 0.000083 grad: 0.0903 (0.0923) loss: 0.8661 (0.8656) time: 0.1520 data: 0.0691 max mem: 9305 +Train: [42] [2300/6250] eta: 0:10:00 lr: 0.000083 grad: 0.0924 (0.0924) loss: 0.8592 (0.8656) time: 0.1146 data: 0.0211 max mem: 9305 +Train: [42] [2400/6250] eta: 0:09:43 lr: 0.000083 grad: 0.0907 (0.0924) loss: 0.8633 (0.8656) time: 0.1361 data: 0.0582 max mem: 9305 +Train: [42] [2500/6250] eta: 0:09:26 lr: 0.000083 grad: 0.0941 (0.0924) loss: 0.8694 (0.8657) time: 0.1543 data: 0.0639 max mem: 9305 +Train: [42] [2600/6250] eta: 0:09:11 lr: 0.000083 grad: 0.0849 (0.0924) loss: 0.8637 (0.8656) time: 0.1358 data: 0.0526 max mem: 9305 +Train: [42] [2700/6250] eta: 0:08:55 lr: 0.000083 grad: 0.0961 (0.0924) loss: 0.8709 (0.8656) time: 0.1247 data: 0.0439 max mem: 9305 +Train: [42] [2800/6250] eta: 0:08:38 lr: 0.000083 grad: 0.0868 (0.0925) loss: 0.8687 (0.8656) time: 0.1344 data: 0.0512 max mem: 9305 +Train: [42] [2900/6250] eta: 0:08:21 lr: 0.000083 grad: 0.0901 (0.0926) loss: 0.8701 (0.8655) time: 0.1481 data: 0.0654 max mem: 9305 +Train: [42] [3000/6250] eta: 0:08:07 lr: 0.000083 grad: 0.0905 (0.0926) loss: 0.8698 (0.8655) time: 0.1868 data: 0.1099 max mem: 9305 +Train: [42] [3100/6250] eta: 0:07:53 lr: 0.000083 grad: 0.0910 (0.0927) loss: 0.8680 (0.8655) time: 0.1480 data: 0.0654 max mem: 9305 +Train: [42] [3200/6250] eta: 0:07:37 lr: 0.000083 grad: 0.0956 (0.0928) loss: 0.8646 (0.8655) time: 0.1589 data: 0.0783 max mem: 9305 +Train: [42] [3300/6250] eta: 0:07:21 lr: 0.000083 grad: 0.0936 (0.0930) loss: 0.8617 (0.8654) time: 0.1397 data: 0.0560 max mem: 9305 +Train: [42] [3400/6250] eta: 0:07:05 lr: 0.000083 grad: 0.0965 (0.0931) loss: 0.8600 (0.8653) time: 0.1457 data: 0.0586 max mem: 9305 +Train: [42] [3500/6250] eta: 0:06:50 lr: 0.000083 grad: 0.0941 (0.0932) loss: 0.8661 (0.8653) time: 0.1564 data: 0.0735 max mem: 9305 +Train: [42] [3600/6250] eta: 0:06:35 lr: 0.000083 grad: 0.0925 (0.0932) loss: 0.8647 (0.8652) time: 0.1363 data: 0.0579 max mem: 9305 +Train: [42] [3700/6250] eta: 0:06:19 lr: 0.000083 grad: 0.0980 (0.0934) loss: 0.8609 (0.8652) time: 0.1407 data: 0.0579 max mem: 9305 +Train: [42] [3800/6250] eta: 0:06:03 lr: 0.000083 grad: 0.0883 (0.0934) loss: 0.8610 (0.8651) time: 0.1304 data: 0.0490 max mem: 9305 +Train: [42] [3900/6250] eta: 0:05:48 lr: 0.000083 grad: 0.0945 (0.0934) loss: 0.8655 (0.8650) time: 0.1289 data: 0.0458 max mem: 9305 +Train: [42] [4000/6250] eta: 0:05:32 lr: 0.000083 grad: 0.0909 (0.0935) loss: 0.8634 (0.8650) time: 0.1415 data: 0.0614 max mem: 9305 +Train: [42] [4100/6250] eta: 0:05:17 lr: 0.000082 grad: 0.0901 (0.0936) loss: 0.8567 (0.8650) time: 0.1307 data: 0.0493 max mem: 9305 +Train: [42] [4200/6250] eta: 0:05:02 lr: 0.000082 grad: 0.0901 (0.0936) loss: 0.8643 (0.8649) time: 0.1459 data: 0.0566 max mem: 9305 +Train: [42] [4300/6250] eta: 0:04:46 lr: 0.000082 grad: 0.0974 (0.0936) loss: 0.8575 (0.8648) time: 0.1297 data: 0.0487 max mem: 9305 +Train: [42] [4400/6250] eta: 0:04:31 lr: 0.000082 grad: 0.0900 (0.0936) loss: 0.8654 (0.8647) time: 0.1307 data: 0.0466 max mem: 9305 +Train: [42] [4500/6250] eta: 0:04:16 lr: 0.000082 grad: 0.0936 (0.0937) loss: 0.8600 (0.8646) time: 0.1372 data: 0.0566 max mem: 9305 +Train: [42] [4600/6250] eta: 0:04:01 lr: 0.000082 grad: 0.0897 (0.0938) loss: 0.8702 (0.8646) time: 0.1594 data: 0.0590 max mem: 9305 +Train: [42] [4700/6250] eta: 0:03:46 lr: 0.000082 grad: 0.0893 (0.0939) loss: 0.8656 (0.8646) time: 0.1017 data: 0.0215 max mem: 9305 +Train: [42] [4800/6250] eta: 0:03:31 lr: 0.000082 grad: 0.0917 (0.0939) loss: 0.8615 (0.8645) time: 0.1287 data: 0.0452 max mem: 9305 +Train: [42] [4900/6250] eta: 0:03:16 lr: 0.000082 grad: 0.0874 (0.0939) loss: 0.8727 (0.8646) time: 0.1555 data: 0.0794 max mem: 9305 +Train: [42] [5000/6250] eta: 0:03:01 lr: 0.000082 grad: 0.0884 (0.0939) loss: 0.8635 (0.8646) time: 0.1235 data: 0.0316 max mem: 9305 +Train: [42] [5100/6250] eta: 0:02:47 lr: 0.000082 grad: 0.0913 (0.0939) loss: 0.8654 (0.8646) time: 0.1695 data: 0.0905 max mem: 9305 +Train: [42] [5200/6250] eta: 0:02:32 lr: 0.000082 grad: 0.0892 (0.0939) loss: 0.8648 (0.8646) time: 0.1435 data: 0.0598 max mem: 9305 +Train: [42] [5300/6250] eta: 0:02:18 lr: 0.000082 grad: 0.0961 (0.0940) loss: 0.8700 (0.8646) time: 0.1836 data: 0.1026 max mem: 9305 +Train: [42] [5400/6250] eta: 0:02:03 lr: 0.000082 grad: 0.0931 (0.0941) loss: 0.8713 (0.8646) time: 0.1210 data: 0.0412 max mem: 9305 +Train: [42] [5500/6250] eta: 0:01:49 lr: 0.000082 grad: 0.0876 (0.0941) loss: 0.8630 (0.8646) time: 0.1492 data: 0.0718 max mem: 9305 +Train: [42] [5600/6250] eta: 0:01:34 lr: 0.000082 grad: 0.0869 (0.0941) loss: 0.8660 (0.8646) time: 0.1508 data: 0.0701 max mem: 9305 +Train: [42] [5700/6250] eta: 0:01:20 lr: 0.000082 grad: 0.0884 (0.0940) loss: 0.8661 (0.8646) time: 0.1404 data: 0.0599 max mem: 9305 +Train: [42] [5800/6250] eta: 0:01:05 lr: 0.000082 grad: 0.0925 (0.0941) loss: 0.8586 (0.8645) time: 0.1310 data: 0.0463 max mem: 9305 +Train: [42] [5900/6250] eta: 0:00:50 lr: 0.000082 grad: 0.0946 (0.0940) loss: 0.8674 (0.8645) time: 0.1187 data: 0.0404 max mem: 9305 +Train: [42] [6000/6250] eta: 0:00:36 lr: 0.000082 grad: 0.0897 (0.0941) loss: 0.8625 (0.8644) time: 0.1315 data: 0.0495 max mem: 9305 +Train: [42] [6100/6250] eta: 0:00:21 lr: 0.000082 grad: 0.0963 (0.0940) loss: 0.8618 (0.8644) time: 0.1234 data: 0.0430 max mem: 9305 +Train: [42] [6200/6250] eta: 0:00:07 lr: 0.000082 grad: 0.0905 (0.0940) loss: 0.8603 (0.8644) time: 0.1250 data: 0.0410 max mem: 9305 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.0927 (0.0940) loss: 0.8663 (0.8644) time: 0.1336 data: 0.0497 max mem: 9305 +Train: [42] Total time: 0:15:05 (0.1449 s / it) +Averaged stats: lr: 0.000082 grad: 0.0927 (0.0940) loss: 0.8663 (0.8644) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:04:23 loss: 0.8940 (0.8940) time: 4.2472 data: 4.2033 max mem: 9305 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8869 (0.8847) time: 0.1076 data: 0.0791 max mem: 9305 +Eval (hcp-train-subset): [42] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-train-subset): loss: 0.8869 (0.8847) +Eval (hcp-val): [42] [ 0/62] eta: 0:03:06 loss: 0.8826 (0.8826) time: 3.0066 data: 2.9276 max mem: 9305 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8803 (0.8829) time: 0.1329 data: 0.1033 max mem: 9305 +Eval (hcp-val): [42] Total time: 0:00:12 (0.2019 s / it) +Averaged stats (hcp-val): loss: 0.8803 (0.8829) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 8:43:01 lr: 0.000082 grad: 0.0623 (0.0623) loss: 0.8879 (0.8879) time: 5.0210 data: 4.8787 max mem: 9305 +Train: [43] [ 100/6250] eta: 0:18:57 lr: 0.000082 grad: 0.0887 (0.1107) loss: 0.8701 (0.8745) time: 0.1293 data: 0.0423 max mem: 9305 +Train: [43] [ 200/6250] eta: 0:16:00 lr: 0.000082 grad: 0.0867 (0.1005) loss: 0.8760 (0.8718) time: 0.1437 data: 0.0548 max mem: 9305 +Train: [43] [ 300/6250] eta: 0:14:39 lr: 0.000082 grad: 0.0871 (0.0974) loss: 0.8637 (0.8688) time: 0.1250 data: 0.0368 max mem: 9305 +Train: [43] [ 400/6250] eta: 0:13:50 lr: 0.000082 grad: 0.0880 (0.0958) loss: 0.8620 (0.8672) time: 0.1132 data: 0.0191 max mem: 9305 +Train: [43] [ 500/6250] eta: 0:13:25 lr: 0.000082 grad: 0.0888 (0.0953) loss: 0.8638 (0.8662) time: 0.1337 data: 0.0453 max mem: 9305 +Train: [43] [ 600/6250] eta: 0:12:56 lr: 0.000082 grad: 0.0928 (0.0952) loss: 0.8663 (0.8654) time: 0.1206 data: 0.0340 max mem: 9305 +Train: [43] [ 700/6250] eta: 0:12:51 lr: 0.000082 grad: 0.0852 (0.0947) loss: 0.8659 (0.8649) time: 0.1467 data: 0.0554 max mem: 9305 +Train: [43] [ 800/6250] eta: 0:12:50 lr: 0.000082 grad: 0.0908 (0.0942) loss: 0.8634 (0.8645) time: 0.1424 data: 0.0584 max mem: 9305 +Train: [43] [ 900/6250] eta: 0:12:50 lr: 0.000082 grad: 0.0855 (0.0936) loss: 0.8614 (0.8640) time: 0.1534 data: 0.0657 max mem: 9305 +Train: [43] [1000/6250] eta: 0:12:45 lr: 0.000081 grad: 0.0907 (0.0934) loss: 0.8572 (0.8638) time: 0.2321 data: 0.1515 max mem: 9305 +Train: [43] [1100/6250] eta: 0:12:21 lr: 0.000081 grad: 0.0854 (0.0931) loss: 0.8628 (0.8634) time: 0.1472 data: 0.0638 max mem: 9305 +Train: [43] [1200/6250] eta: 0:12:01 lr: 0.000081 grad: 0.0880 (0.0929) loss: 0.8616 (0.8632) time: 0.1346 data: 0.0535 max mem: 9305 +Train: [43] [1300/6250] eta: 0:11:46 lr: 0.000081 grad: 0.0926 (0.0928) loss: 0.8535 (0.8629) time: 0.1439 data: 0.0631 max mem: 9305 +Train: [43] [1400/6250] eta: 0:11:41 lr: 0.000081 grad: 0.0876 (0.0928) loss: 0.8581 (0.8627) time: 0.0897 data: 0.0002 max mem: 9305 +Train: [43] [1500/6250] eta: 0:11:24 lr: 0.000081 grad: 0.0911 (0.0927) loss: 0.8585 (0.8625) time: 0.1528 data: 0.0711 max mem: 9305 +Train: [43] [1600/6250] eta: 0:11:07 lr: 0.000081 grad: 0.0913 (0.0928) loss: 0.8597 (0.8623) time: 0.1492 data: 0.0701 max mem: 9305 +Train: [43] [1700/6250] eta: 0:10:51 lr: 0.000081 grad: 0.0917 (0.0930) loss: 0.8569 (0.8622) time: 0.1397 data: 0.0583 max mem: 9305 +Train: [43] [1800/6250] eta: 0:10:34 lr: 0.000081 grad: 0.0946 (0.0931) loss: 0.8559 (0.8621) time: 0.1230 data: 0.0384 max mem: 9305 +Train: [43] [1900/6250] eta: 0:10:19 lr: 0.000081 grad: 0.0923 (0.0933) loss: 0.8580 (0.8621) time: 0.1460 data: 0.0647 max mem: 9305 +Train: [43] [2000/6250] eta: 0:10:03 lr: 0.000081 grad: 0.0887 (0.0933) loss: 0.8586 (0.8621) time: 0.1260 data: 0.0427 max mem: 9305 +Train: [43] [2100/6250] eta: 0:09:48 lr: 0.000081 grad: 0.0879 (0.0934) loss: 0.8626 (0.8621) time: 0.1527 data: 0.0671 max mem: 9305 +Train: [43] [2200/6250] eta: 0:09:35 lr: 0.000081 grad: 0.0904 (0.0935) loss: 0.8653 (0.8623) time: 0.1382 data: 0.0507 max mem: 9305 +Train: [43] [2300/6250] eta: 0:09:22 lr: 0.000081 grad: 0.0845 (0.0936) loss: 0.8683 (0.8623) time: 0.1514 data: 0.0696 max mem: 9305 +Train: [43] [2400/6250] eta: 0:09:07 lr: 0.000081 grad: 0.0944 (0.0938) loss: 0.8615 (0.8624) time: 0.1183 data: 0.0274 max mem: 9305 +Train: [43] [2500/6250] eta: 0:08:51 lr: 0.000081 grad: 0.0996 (0.0940) loss: 0.8644 (0.8623) time: 0.1325 data: 0.0535 max mem: 9305 +Train: [43] [2600/6250] eta: 0:08:38 lr: 0.000081 grad: 0.1001 (0.0942) loss: 0.8591 (0.8622) time: 0.1884 data: 0.1015 max mem: 9305 +Train: [43] [2700/6250] eta: 0:08:22 lr: 0.000081 grad: 0.0933 (0.0945) loss: 0.8581 (0.8621) time: 0.1518 data: 0.0730 max mem: 9305 +Train: [43] [2800/6250] eta: 0:08:07 lr: 0.000081 grad: 0.0965 (0.0947) loss: 0.8614 (0.8620) time: 0.1481 data: 0.0608 max mem: 9305 +Train: [43] [2900/6250] eta: 0:07:54 lr: 0.000081 grad: 0.0888 (0.0948) loss: 0.8643 (0.8620) time: 0.1803 data: 0.1004 max mem: 9305 +Train: [43] [3000/6250] eta: 0:07:40 lr: 0.000081 grad: 0.1005 (0.0948) loss: 0.8576 (0.8619) time: 0.1608 data: 0.0710 max mem: 9305 +Train: [43] [3100/6250] eta: 0:07:27 lr: 0.000081 grad: 0.0874 (0.0948) loss: 0.8656 (0.8619) time: 0.1399 data: 0.0493 max mem: 9305 +Train: [43] [3200/6250] eta: 0:07:12 lr: 0.000081 grad: 0.0907 (0.0948) loss: 0.8614 (0.8618) time: 0.1355 data: 0.0480 max mem: 9305 +Train: [43] [3300/6250] eta: 0:06:57 lr: 0.000081 grad: 0.0943 (0.0949) loss: 0.8664 (0.8618) time: 0.1424 data: 0.0638 max mem: 9305 +Train: [43] [3400/6250] eta: 0:06:42 lr: 0.000081 grad: 0.0935 (0.0949) loss: 0.8607 (0.8618) time: 0.1221 data: 0.0370 max mem: 9305 +Train: [43] [3500/6250] eta: 0:06:28 lr: 0.000081 grad: 0.0893 (0.0949) loss: 0.8609 (0.8619) time: 0.1506 data: 0.0660 max mem: 9305 +Train: [43] [3600/6250] eta: 0:06:14 lr: 0.000081 grad: 0.0848 (0.0948) loss: 0.8703 (0.8619) time: 0.1557 data: 0.0743 max mem: 9305 +Train: [43] [3700/6250] eta: 0:05:58 lr: 0.000081 grad: 0.0958 (0.0949) loss: 0.8652 (0.8620) time: 0.1402 data: 0.0623 max mem: 9305 +Train: [43] [3800/6250] eta: 0:05:44 lr: 0.000081 grad: 0.0924 (0.0949) loss: 0.8614 (0.8620) time: 0.1339 data: 0.0431 max mem: 9305 +Train: [43] [3900/6250] eta: 0:05:29 lr: 0.000081 grad: 0.0840 (0.0949) loss: 0.8630 (0.8620) time: 0.1259 data: 0.0449 max mem: 9305 +Train: [43] [4000/6250] eta: 0:05:15 lr: 0.000081 grad: 0.0916 (0.0948) loss: 0.8581 (0.8621) time: 0.1332 data: 0.0489 max mem: 9305 +Train: [43] [4100/6250] eta: 0:05:01 lr: 0.000081 grad: 0.0878 (0.0948) loss: 0.8623 (0.8622) time: 0.1915 data: 0.1056 max mem: 9305 +Train: [43] [4200/6250] eta: 0:04:47 lr: 0.000080 grad: 0.0991 (0.0948) loss: 0.8602 (0.8622) time: 0.1474 data: 0.0636 max mem: 9305 +Train: [43] [4300/6250] eta: 0:04:33 lr: 0.000080 grad: 0.0890 (0.0948) loss: 0.8622 (0.8623) time: 0.1330 data: 0.0533 max mem: 9305 +Train: [43] [4400/6250] eta: 0:04:19 lr: 0.000080 grad: 0.0931 (0.0948) loss: 0.8614 (0.8623) time: 0.1950 data: 0.1138 max mem: 9305 +Train: [43] [4500/6250] eta: 0:04:05 lr: 0.000080 grad: 0.0803 (0.0948) loss: 0.8708 (0.8623) time: 0.1182 data: 0.0299 max mem: 9305 +Train: [43] [4600/6250] eta: 0:03:51 lr: 0.000080 grad: 0.0941 (0.0947) loss: 0.8704 (0.8624) time: 0.1444 data: 0.0659 max mem: 9305 +Train: [43] [4700/6250] eta: 0:03:37 lr: 0.000080 grad: 0.0893 (0.0947) loss: 0.8665 (0.8624) time: 0.1460 data: 0.0632 max mem: 9305 +Train: [43] [4800/6250] eta: 0:03:23 lr: 0.000080 grad: 0.0893 (0.0947) loss: 0.8616 (0.8624) time: 0.1446 data: 0.0510 max mem: 9305 +Train: [43] [4900/6250] eta: 0:03:09 lr: 0.000080 grad: 0.0877 (0.0947) loss: 0.8673 (0.8624) time: 0.1257 data: 0.0457 max mem: 9305 +Train: [43] [5000/6250] eta: 0:02:55 lr: 0.000080 grad: 0.0903 (0.0946) loss: 0.8646 (0.8624) time: 0.1352 data: 0.0565 max mem: 9305 +Train: [43] [5100/6250] eta: 0:02:41 lr: 0.000080 grad: 0.0942 (0.0946) loss: 0.8593 (0.8624) time: 0.1407 data: 0.0539 max mem: 9305 +Train: [43] [5200/6250] eta: 0:02:27 lr: 0.000080 grad: 0.0866 (0.0946) loss: 0.8593 (0.8624) time: 0.1402 data: 0.0625 max mem: 9305 +Train: [43] [5300/6250] eta: 0:02:13 lr: 0.000080 grad: 0.0953 (0.0946) loss: 0.8579 (0.8623) time: 0.1431 data: 0.0538 max mem: 9305 +Train: [43] [5400/6250] eta: 0:01:59 lr: 0.000080 grad: 0.1001 (0.0947) loss: 0.8558 (0.8623) time: 0.1595 data: 0.0814 max mem: 9305 +Train: [43] [5500/6250] eta: 0:01:45 lr: 0.000080 grad: 0.0881 (0.0946) loss: 0.8610 (0.8622) time: 0.1346 data: 0.0517 max mem: 9305 +Train: [43] [5600/6250] eta: 0:01:31 lr: 0.000080 grad: 0.0913 (0.0946) loss: 0.8518 (0.8622) time: 0.1432 data: 0.0578 max mem: 9305 +Train: [43] [5700/6250] eta: 0:01:17 lr: 0.000080 grad: 0.0888 (0.0946) loss: 0.8599 (0.8622) time: 0.1339 data: 0.0511 max mem: 9305 +Train: [43] [5800/6250] eta: 0:01:03 lr: 0.000080 grad: 0.0922 (0.0946) loss: 0.8619 (0.8622) time: 0.1258 data: 0.0452 max mem: 9305 +Train: [43] [5900/6250] eta: 0:00:48 lr: 0.000080 grad: 0.0948 (0.0947) loss: 0.8638 (0.8621) time: 0.1336 data: 0.0534 max mem: 9305 +Train: [43] [6000/6250] eta: 0:00:34 lr: 0.000080 grad: 0.0925 (0.0947) loss: 0.8628 (0.8621) time: 0.1187 data: 0.0355 max mem: 9305 +Train: [43] [6100/6250] eta: 0:00:20 lr: 0.000080 grad: 0.0973 (0.0948) loss: 0.8583 (0.8621) time: 0.1225 data: 0.0335 max mem: 9305 +Train: [43] [6200/6250] eta: 0:00:06 lr: 0.000080 grad: 0.0938 (0.0948) loss: 0.8623 (0.8621) time: 0.1380 data: 0.0524 max mem: 9305 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.0948 (0.0948) loss: 0.8627 (0.8621) time: 0.1313 data: 0.0463 max mem: 9305 +Train: [43] Total time: 0:14:33 (0.1397 s / it) +Averaged stats: lr: 0.000080 grad: 0.0948 (0.0948) loss: 0.8627 (0.8621) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:03:14 loss: 0.8974 (0.8974) time: 3.1300 data: 3.0325 max mem: 9305 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8836 (0.8851) time: 0.1313 data: 0.1014 max mem: 9305 +Eval (hcp-train-subset): [43] Total time: 0:00:12 (0.2080 s / it) +Averaged stats (hcp-train-subset): loss: 0.8836 (0.8851) +Eval (hcp-val): [43] [ 0/62] eta: 0:03:52 loss: 0.8763 (0.8763) time: 3.7489 data: 3.6910 max mem: 9305 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8810 (0.8820) time: 0.1218 data: 0.0920 max mem: 9305 +Eval (hcp-val): [43] Total time: 0:00:12 (0.1999 s / it) +Averaged stats (hcp-val): loss: 0.8810 (0.8820) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 8:29:53 lr: 0.000080 grad: 0.1812 (0.1812) loss: 0.9013 (0.9013) time: 4.8949 data: 4.7445 max mem: 9305 +Train: [44] [ 100/6250] eta: 0:17:51 lr: 0.000080 grad: 0.0830 (0.1173) loss: 0.8783 (0.8755) time: 0.1318 data: 0.0303 max mem: 9305 +Train: [44] [ 200/6250] eta: 0:15:23 lr: 0.000080 grad: 0.0917 (0.1091) loss: 0.8690 (0.8714) time: 0.1350 data: 0.0431 max mem: 9305 +Train: [44] [ 300/6250] eta: 0:14:05 lr: 0.000080 grad: 0.1000 (0.1064) loss: 0.8544 (0.8681) time: 0.1138 data: 0.0244 max mem: 9305 +Train: [44] [ 400/6250] eta: 0:13:21 lr: 0.000080 grad: 0.0939 (0.1049) loss: 0.8613 (0.8664) time: 0.1243 data: 0.0363 max mem: 9305 +Train: [44] [ 500/6250] eta: 0:12:43 lr: 0.000080 grad: 0.0875 (0.1036) loss: 0.8692 (0.8657) time: 0.0990 data: 0.0025 max mem: 9305 +Train: [44] [ 600/6250] eta: 0:12:23 lr: 0.000080 grad: 0.0832 (0.1019) loss: 0.8574 (0.8651) time: 0.1413 data: 0.0526 max mem: 9305 +Train: [44] [ 700/6250] eta: 0:12:14 lr: 0.000080 grad: 0.0884 (0.1002) loss: 0.8668 (0.8650) time: 0.1560 data: 0.0739 max mem: 9305 +Train: [44] [ 800/6250] eta: 0:12:02 lr: 0.000080 grad: 0.0892 (0.0989) loss: 0.8685 (0.8649) time: 0.1280 data: 0.0448 max mem: 9305 +Train: [44] [ 900/6250] eta: 0:11:54 lr: 0.000080 grad: 0.0851 (0.0979) loss: 0.8684 (0.8649) time: 0.1591 data: 0.0712 max mem: 9305 +Train: [44] [1000/6250] eta: 0:11:59 lr: 0.000080 grad: 0.0950 (0.0974) loss: 0.8695 (0.8648) time: 0.0813 data: 0.0002 max mem: 9305 +Train: [44] [1100/6250] eta: 0:11:38 lr: 0.000079 grad: 0.0918 (0.0971) loss: 0.8631 (0.8645) time: 0.1015 data: 0.0215 max mem: 9305 +Train: [44] [1200/6250] eta: 0:11:23 lr: 0.000079 grad: 0.0855 (0.0966) loss: 0.8624 (0.8643) time: 0.0946 data: 0.0131 max mem: 9305 +Train: [44] [1300/6250] eta: 0:11:08 lr: 0.000079 grad: 0.0851 (0.0964) loss: 0.8657 (0.8640) time: 0.1359 data: 0.0551 max mem: 9305 +Train: [44] [1400/6250] eta: 0:10:57 lr: 0.000079 grad: 0.0896 (0.0964) loss: 0.8656 (0.8639) time: 0.1320 data: 0.0425 max mem: 9305 +Train: [44] [1500/6250] eta: 0:10:42 lr: 0.000079 grad: 0.0931 (0.0961) loss: 0.8545 (0.8637) time: 0.1395 data: 0.0546 max mem: 9305 +Train: [44] [1600/6250] eta: 0:10:29 lr: 0.000079 grad: 0.0884 (0.0960) loss: 0.8604 (0.8637) time: 0.1326 data: 0.0536 max mem: 9305 +Train: [44] [1700/6250] eta: 0:10:17 lr: 0.000079 grad: 0.0849 (0.0958) loss: 0.8644 (0.8636) time: 0.1350 data: 0.0544 max mem: 9305 +Train: [44] [1800/6250] eta: 0:10:05 lr: 0.000079 grad: 0.0915 (0.0959) loss: 0.8582 (0.8636) time: 0.1506 data: 0.0697 max mem: 9305 +Train: [44] [1900/6250] eta: 0:09:52 lr: 0.000079 grad: 0.0911 (0.0959) loss: 0.8604 (0.8634) time: 0.1426 data: 0.0614 max mem: 9305 +Train: [44] [2000/6250] eta: 0:09:39 lr: 0.000079 grad: 0.0898 (0.0959) loss: 0.8624 (0.8634) time: 0.1426 data: 0.0632 max mem: 9305 +Train: [44] [2100/6250] eta: 0:09:27 lr: 0.000079 grad: 0.0860 (0.0959) loss: 0.8652 (0.8634) time: 0.1545 data: 0.0765 max mem: 9305 +Train: [44] [2200/6250] eta: 0:09:13 lr: 0.000079 grad: 0.0845 (0.0957) loss: 0.8647 (0.8634) time: 0.1431 data: 0.0558 max mem: 9305 +Train: [44] [2300/6250] eta: 0:08:59 lr: 0.000079 grad: 0.0885 (0.0956) loss: 0.8666 (0.8634) time: 0.1257 data: 0.0407 max mem: 9305 +Train: [44] [2400/6250] eta: 0:08:45 lr: 0.000079 grad: 0.0939 (0.0956) loss: 0.8607 (0.8633) time: 0.1351 data: 0.0452 max mem: 9305 +Train: [44] [2500/6250] eta: 0:08:34 lr: 0.000079 grad: 0.0901 (0.0955) loss: 0.8579 (0.8633) time: 0.1199 data: 0.0406 max mem: 9305 +Train: [44] [2600/6250] eta: 0:08:20 lr: 0.000079 grad: 0.0927 (0.0955) loss: 0.8582 (0.8632) time: 0.1477 data: 0.0636 max mem: 9305 +Train: [44] [2700/6250] eta: 0:08:05 lr: 0.000079 grad: 0.0916 (0.0956) loss: 0.8582 (0.8631) time: 0.1354 data: 0.0523 max mem: 9305 +Train: [44] [2800/6250] eta: 0:07:51 lr: 0.000079 grad: 0.0955 (0.0958) loss: 0.8581 (0.8629) time: 0.1143 data: 0.0241 max mem: 9305 +Train: [44] [2900/6250] eta: 0:07:37 lr: 0.000079 grad: 0.0899 (0.0960) loss: 0.8576 (0.8628) time: 0.1394 data: 0.0578 max mem: 9305 +Train: [44] [3000/6250] eta: 0:07:24 lr: 0.000079 grad: 0.0883 (0.0962) loss: 0.8613 (0.8626) time: 0.1245 data: 0.0481 max mem: 9305 +Train: [44] [3100/6250] eta: 0:07:10 lr: 0.000079 grad: 0.0926 (0.0962) loss: 0.8496 (0.8624) time: 0.1039 data: 0.0149 max mem: 9305 +Train: [44] [3200/6250] eta: 0:06:56 lr: 0.000079 grad: 0.1044 (0.0965) loss: 0.8477 (0.8621) time: 0.1373 data: 0.0571 max mem: 9305 +Train: [44] [3300/6250] eta: 0:06:41 lr: 0.000079 grad: 0.0925 (0.0966) loss: 0.8621 (0.8619) time: 0.1170 data: 0.0299 max mem: 9305 +Train: [44] [3400/6250] eta: 0:06:28 lr: 0.000079 grad: 0.0925 (0.0967) loss: 0.8571 (0.8618) time: 0.1419 data: 0.0627 max mem: 9305 +Train: [44] [3500/6250] eta: 0:06:14 lr: 0.000079 grad: 0.0944 (0.0970) loss: 0.8572 (0.8616) time: 0.1348 data: 0.0485 max mem: 9305 +Train: [44] [3600/6250] eta: 0:06:00 lr: 0.000079 grad: 0.0980 (0.0970) loss: 0.8504 (0.8615) time: 0.1555 data: 0.0777 max mem: 9305 +Train: [44] [3700/6250] eta: 0:05:46 lr: 0.000079 grad: 0.1000 (0.0971) loss: 0.8567 (0.8613) time: 0.1319 data: 0.0465 max mem: 9305 +Train: [44] [3800/6250] eta: 0:05:32 lr: 0.000079 grad: 0.0956 (0.0972) loss: 0.8596 (0.8613) time: 0.1151 data: 0.0378 max mem: 9305 +Train: [44] [3900/6250] eta: 0:05:18 lr: 0.000079 grad: 0.0937 (0.0973) loss: 0.8591 (0.8611) time: 0.1287 data: 0.0473 max mem: 9305 +Train: [44] [4000/6250] eta: 0:05:04 lr: 0.000079 grad: 0.0986 (0.0974) loss: 0.8575 (0.8610) time: 0.1339 data: 0.0453 max mem: 9305 +Train: [44] [4100/6250] eta: 0:04:50 lr: 0.000079 grad: 0.0984 (0.0975) loss: 0.8582 (0.8609) time: 0.1191 data: 0.0318 max mem: 9305 +Train: [44] [4200/6250] eta: 0:04:37 lr: 0.000078 grad: 0.1037 (0.0976) loss: 0.8542 (0.8608) time: 0.1246 data: 0.0352 max mem: 9305 +Train: [44] [4300/6250] eta: 0:04:23 lr: 0.000078 grad: 0.1039 (0.0978) loss: 0.8569 (0.8607) time: 0.1134 data: 0.0279 max mem: 9305 +Train: [44] [4400/6250] eta: 0:04:10 lr: 0.000078 grad: 0.1046 (0.0979) loss: 0.8597 (0.8607) time: 0.1362 data: 0.0587 max mem: 9305 +Train: [44] [4500/6250] eta: 0:03:57 lr: 0.000078 grad: 0.0950 (0.0981) loss: 0.8606 (0.8606) time: 0.1561 data: 0.0724 max mem: 9305 +Train: [44] [4600/6250] eta: 0:03:43 lr: 0.000078 grad: 0.0936 (0.0982) loss: 0.8541 (0.8606) time: 0.1275 data: 0.0470 max mem: 9305 +Train: [44] [4700/6250] eta: 0:03:30 lr: 0.000078 grad: 0.1003 (0.0983) loss: 0.8586 (0.8605) time: 0.1264 data: 0.0456 max mem: 9305 +Train: [44] [4800/6250] eta: 0:03:16 lr: 0.000078 grad: 0.1005 (0.0983) loss: 0.8542 (0.8605) time: 0.1304 data: 0.0448 max mem: 9305 +Train: [44] [4900/6250] eta: 0:03:02 lr: 0.000078 grad: 0.0962 (0.0984) loss: 0.8512 (0.8604) time: 0.1438 data: 0.0568 max mem: 9305 +Train: [44] [5000/6250] eta: 0:02:49 lr: 0.000078 grad: 0.0910 (0.0984) loss: 0.8615 (0.8604) time: 0.1459 data: 0.0612 max mem: 9305 +Train: [44] [5100/6250] eta: 0:02:35 lr: 0.000078 grad: 0.1013 (0.0985) loss: 0.8570 (0.8604) time: 0.1386 data: 0.0578 max mem: 9305 +Train: [44] [5200/6250] eta: 0:02:22 lr: 0.000078 grad: 0.1049 (0.0985) loss: 0.8517 (0.8603) time: 0.1554 data: 0.0671 max mem: 9305 +Train: [44] [5300/6250] eta: 0:02:09 lr: 0.000078 grad: 0.0985 (0.0986) loss: 0.8567 (0.8603) time: 0.1649 data: 0.0840 max mem: 9305 +Train: [44] [5400/6250] eta: 0:01:55 lr: 0.000078 grad: 0.0934 (0.0986) loss: 0.8610 (0.8603) time: 0.1315 data: 0.0476 max mem: 9305 +Train: [44] [5500/6250] eta: 0:01:42 lr: 0.000078 grad: 0.0959 (0.0986) loss: 0.8662 (0.8602) time: 0.1480 data: 0.0683 max mem: 9305 +Train: [44] [5600/6250] eta: 0:01:28 lr: 0.000078 grad: 0.0956 (0.0986) loss: 0.8584 (0.8602) time: 0.1204 data: 0.0386 max mem: 9305 +Train: [44] [5700/6250] eta: 0:01:14 lr: 0.000078 grad: 0.1083 (0.0987) loss: 0.8606 (0.8602) time: 0.1235 data: 0.0412 max mem: 9305 +Train: [44] [5800/6250] eta: 0:01:01 lr: 0.000078 grad: 0.0925 (0.0988) loss: 0.8655 (0.8601) time: 0.1232 data: 0.0414 max mem: 9305 +Train: [44] [5900/6250] eta: 0:00:47 lr: 0.000078 grad: 0.0928 (0.0988) loss: 0.8607 (0.8601) time: 0.1182 data: 0.0365 max mem: 9305 +Train: [44] [6000/6250] eta: 0:00:33 lr: 0.000078 grad: 0.0942 (0.0987) loss: 0.8534 (0.8601) time: 0.1369 data: 0.0534 max mem: 9305 +Train: [44] [6100/6250] eta: 0:00:20 lr: 0.000078 grad: 0.0965 (0.0988) loss: 0.8528 (0.8601) time: 0.1380 data: 0.0524 max mem: 9305 +Train: [44] [6200/6250] eta: 0:00:06 lr: 0.000078 grad: 0.0912 (0.0987) loss: 0.8553 (0.8601) time: 0.1330 data: 0.0465 max mem: 9305 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.0885 (0.0987) loss: 0.8649 (0.8601) time: 0.1084 data: 0.0232 max mem: 9305 +Train: [44] Total time: 0:14:07 (0.1356 s / it) +Averaged stats: lr: 0.000078 grad: 0.0885 (0.0987) loss: 0.8649 (0.8601) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:04:14 loss: 0.8954 (0.8954) time: 4.1061 data: 3.9889 max mem: 9305 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8821 (0.8831) time: 0.1174 data: 0.0889 max mem: 9305 +Eval (hcp-train-subset): [44] Total time: 0:00:13 (0.2175 s / it) +Averaged stats (hcp-train-subset): loss: 0.8821 (0.8831) +Making plots (hcp-train-subset): example=14 +Eval (hcp-val): [44] [ 0/62] eta: 0:04:46 loss: 0.8805 (0.8805) time: 4.6277 data: 4.5937 max mem: 9305 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8801 (0.8816) time: 0.1273 data: 0.0977 max mem: 9305 +Eval (hcp-val): [44] Total time: 0:00:12 (0.2080 s / it) +Averaged stats (hcp-val): loss: 0.8801 (0.8816) +Making plots (hcp-val): example=45 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [45] [ 0/6250] eta: 8:38:05 lr: 0.000078 grad: 0.2189 (0.2189) loss: 0.8763 (0.8763) time: 4.9736 data: 4.6911 max mem: 9305 +Train: [45] [ 100/6250] eta: 0:20:17 lr: 0.000078 grad: 0.0962 (0.0920) loss: 0.8719 (0.8850) time: 0.1507 data: 0.0573 max mem: 9305 +Train: [45] [ 200/6250] eta: 0:17:25 lr: 0.000078 grad: 0.0982 (0.0954) loss: 0.8684 (0.8794) time: 0.1386 data: 0.0515 max mem: 9305 +Train: [45] [ 300/6250] eta: 0:16:24 lr: 0.000078 grad: 0.0965 (0.0959) loss: 0.8718 (0.8756) time: 0.1511 data: 0.0618 max mem: 9305 +Train: [45] [ 400/6250] eta: 0:15:19 lr: 0.000078 grad: 0.0977 (0.0970) loss: 0.8647 (0.8726) time: 0.1128 data: 0.0156 max mem: 9305 +Train: [45] [ 500/6250] eta: 0:14:40 lr: 0.000078 grad: 0.0982 (0.0976) loss: 0.8626 (0.8706) time: 0.1225 data: 0.0289 max mem: 9305 +Train: [45] [ 600/6250] eta: 0:14:05 lr: 0.000078 grad: 0.0981 (0.0972) loss: 0.8645 (0.8695) time: 0.1449 data: 0.0591 max mem: 9305 +Train: [45] [ 700/6250] eta: 0:13:46 lr: 0.000078 grad: 0.0936 (0.0975) loss: 0.8634 (0.8686) time: 0.1482 data: 0.0461 max mem: 9305 +Train: [45] [ 800/6250] eta: 0:13:51 lr: 0.000078 grad: 0.0902 (0.0969) loss: 0.8644 (0.8682) time: 0.2021 data: 0.1175 max mem: 9305 +Train: [45] [ 900/6250] eta: 0:13:42 lr: 0.000078 grad: 0.0907 (0.0967) loss: 0.8677 (0.8680) time: 0.1084 data: 0.0197 max mem: 9305 +Train: [45] [1000/6250] eta: 0:13:33 lr: 0.000078 grad: 0.0960 (0.0968) loss: 0.8681 (0.8674) time: 0.1918 data: 0.1090 max mem: 9305 +Train: [45] [1100/6250] eta: 0:13:33 lr: 0.000077 grad: 0.1005 (0.0970) loss: 0.8612 (0.8668) time: 0.2536 data: 0.1741 max mem: 9305 +Train: [45] [1200/6250] eta: 0:13:14 lr: 0.000077 grad: 0.0892 (0.0972) loss: 0.8651 (0.8662) time: 0.1650 data: 0.0826 max mem: 9305 +Train: [45] [1300/6250] eta: 0:13:04 lr: 0.000077 grad: 0.0923 (0.0974) loss: 0.8611 (0.8656) time: 0.1892 data: 0.1109 max mem: 9305 +Train: [45] [1400/6250] eta: 0:12:54 lr: 0.000077 grad: 0.0946 (0.0973) loss: 0.8555 (0.8654) time: 0.1962 data: 0.1209 max mem: 9305 +Train: [45] [1500/6250] eta: 0:12:36 lr: 0.000077 grad: 0.1018 (0.0976) loss: 0.8572 (0.8649) time: 0.1737 data: 0.0951 max mem: 9305 +Train: [45] [1600/6250] eta: 0:12:16 lr: 0.000077 grad: 0.1027 (0.0976) loss: 0.8644 (0.8646) time: 0.1527 data: 0.0634 max mem: 9305 +Train: [45] [1700/6250] eta: 0:12:02 lr: 0.000077 grad: 0.0979 (0.0978) loss: 0.8583 (0.8643) time: 0.1818 data: 0.1010 max mem: 9305 +Train: [45] [1800/6250] eta: 0:11:42 lr: 0.000077 grad: 0.0916 (0.0979) loss: 0.8623 (0.8641) time: 0.1168 data: 0.0378 max mem: 9305 +Train: [45] [1900/6250] eta: 0:11:28 lr: 0.000077 grad: 0.1058 (0.0982) loss: 0.8491 (0.8639) time: 0.2441 data: 0.1175 max mem: 9305 +Train: [45] [2000/6250] eta: 0:11:11 lr: 0.000077 grad: 0.0977 (0.0982) loss: 0.8598 (0.8637) time: 0.1464 data: 0.0544 max mem: 9305 +Train: [45] [2100/6250] eta: 0:10:57 lr: 0.000077 grad: 0.1069 (0.0984) loss: 0.8504 (0.8634) time: 0.0959 data: 0.0002 max mem: 9305 +Train: [45] [2200/6250] eta: 0:10:33 lr: 0.000077 grad: 0.0983 (0.0984) loss: 0.8662 (0.8632) time: 0.1189 data: 0.0408 max mem: 9305 +Train: [45] [2300/6250] eta: 0:10:13 lr: 0.000077 grad: 0.0954 (0.0987) loss: 0.8557 (0.8629) time: 0.1211 data: 0.0357 max mem: 9305 +Train: [45] [2400/6250] eta: 0:09:57 lr: 0.000077 grad: 0.0995 (0.0988) loss: 0.8591 (0.8627) time: 0.1563 data: 0.0722 max mem: 9305 +Train: [45] [2500/6250] eta: 0:09:38 lr: 0.000077 grad: 0.0910 (0.0988) loss: 0.8642 (0.8625) time: 0.1404 data: 0.0557 max mem: 9305 +Train: [45] [2600/6250] eta: 0:09:23 lr: 0.000077 grad: 0.0913 (0.0989) loss: 0.8699 (0.8624) time: 0.1798 data: 0.0959 max mem: 9305 +Train: [45] [2700/6250] eta: 0:09:04 lr: 0.000077 grad: 0.0853 (0.0988) loss: 0.8667 (0.8624) time: 0.1423 data: 0.0637 max mem: 9305 +Train: [45] [2800/6250] eta: 0:08:48 lr: 0.000077 grad: 0.1000 (0.0988) loss: 0.8615 (0.8624) time: 0.1247 data: 0.0452 max mem: 9305 +Train: [45] [2900/6250] eta: 0:08:32 lr: 0.000077 grad: 0.0978 (0.0988) loss: 0.8571 (0.8623) time: 0.1627 data: 0.0853 max mem: 9305 +Train: [45] [3000/6250] eta: 0:08:16 lr: 0.000077 grad: 0.0876 (0.0989) loss: 0.8638 (0.8624) time: 0.1437 data: 0.0633 max mem: 9305 +Train: [45] [3100/6250] eta: 0:08:02 lr: 0.000077 grad: 0.1003 (0.0990) loss: 0.8628 (0.8624) time: 0.1526 data: 0.0766 max mem: 9305 +Train: [45] [3200/6250] eta: 0:07:47 lr: 0.000077 grad: 0.1037 (0.0991) loss: 0.8632 (0.8624) time: 0.1359 data: 0.0562 max mem: 9305 +Train: [45] [3300/6250] eta: 0:07:30 lr: 0.000077 grad: 0.1082 (0.0992) loss: 0.8558 (0.8623) time: 0.1483 data: 0.0682 max mem: 9305 +Train: [45] [3400/6250] eta: 0:07:14 lr: 0.000077 grad: 0.0985 (0.0992) loss: 0.8565 (0.8623) time: 0.1283 data: 0.0463 max mem: 9305 +Train: [45] [3500/6250] eta: 0:06:59 lr: 0.000077 grad: 0.0933 (0.0992) loss: 0.8601 (0.8622) time: 0.1879 data: 0.0854 max mem: 9305 +Train: [45] [3600/6250] eta: 0:06:44 lr: 0.000077 grad: 0.1018 (0.0992) loss: 0.8615 (0.8623) time: 0.1506 data: 0.0748 max mem: 9305 +Train: [45] [3700/6250] eta: 0:06:28 lr: 0.000077 grad: 0.0930 (0.0991) loss: 0.8593 (0.8623) time: 0.1388 data: 0.0539 max mem: 9305 +Train: [45] [3800/6250] eta: 0:06:12 lr: 0.000077 grad: 0.0992 (0.0991) loss: 0.8643 (0.8623) time: 0.1332 data: 0.0546 max mem: 9305 +Train: [45] [3900/6250] eta: 0:05:56 lr: 0.000077 grad: 0.0945 (0.0991) loss: 0.8667 (0.8624) time: 0.0990 data: 0.0199 max mem: 9305 +Train: [45] [4000/6250] eta: 0:05:40 lr: 0.000077 grad: 0.0917 (0.0992) loss: 0.8701 (0.8624) time: 0.1026 data: 0.0153 max mem: 9305 +Train: [45] [4100/6250] eta: 0:05:24 lr: 0.000077 grad: 0.1017 (0.0993) loss: 0.8625 (0.8625) time: 0.1616 data: 0.0846 max mem: 9305 +Train: [45] [4200/6250] eta: 0:05:08 lr: 0.000076 grad: 0.1008 (0.0992) loss: 0.8641 (0.8625) time: 0.1440 data: 0.0622 max mem: 9305 +Train: [45] [4300/6250] eta: 0:04:53 lr: 0.000076 grad: 0.0983 (0.0993) loss: 0.8641 (0.8625) time: 0.1457 data: 0.0538 max mem: 9305 +Train: [45] [4400/6250] eta: 0:04:38 lr: 0.000076 grad: 0.0939 (0.0993) loss: 0.8579 (0.8625) time: 0.1616 data: 0.0779 max mem: 9305 +Train: [45] [4500/6250] eta: 0:04:22 lr: 0.000076 grad: 0.1050 (0.0993) loss: 0.8610 (0.8625) time: 0.1289 data: 0.0402 max mem: 9305 +Train: [45] [4600/6250] eta: 0:04:07 lr: 0.000076 grad: 0.0957 (0.0994) loss: 0.8648 (0.8625) time: 0.1480 data: 0.0697 max mem: 9305 +Train: [45] [4700/6250] eta: 0:03:52 lr: 0.000076 grad: 0.0944 (0.0993) loss: 0.8652 (0.8625) time: 0.1296 data: 0.0501 max mem: 9305 +Train: [45] [4800/6250] eta: 0:03:37 lr: 0.000076 grad: 0.0940 (0.0993) loss: 0.8608 (0.8625) time: 0.1636 data: 0.0712 max mem: 9305 +Train: [45] [4900/6250] eta: 0:03:21 lr: 0.000076 grad: 0.0975 (0.0993) loss: 0.8598 (0.8625) time: 0.1219 data: 0.0423 max mem: 9305 +Train: [45] [5000/6250] eta: 0:03:07 lr: 0.000076 grad: 0.0914 (0.0993) loss: 0.8694 (0.8626) time: 0.1475 data: 0.0671 max mem: 9305 +Train: [45] [5100/6250] eta: 0:02:52 lr: 0.000076 grad: 0.1017 (0.0993) loss: 0.8593 (0.8625) time: 0.1415 data: 0.0585 max mem: 9305 +Train: [45] [5200/6250] eta: 0:02:36 lr: 0.000076 grad: 0.0918 (0.0993) loss: 0.8644 (0.8625) time: 0.1392 data: 0.0516 max mem: 9305 +Train: [45] [5300/6250] eta: 0:02:21 lr: 0.000076 grad: 0.0964 (0.0993) loss: 0.8623 (0.8625) time: 0.1249 data: 0.0325 max mem: 9305 +Train: [45] [5400/6250] eta: 0:02:06 lr: 0.000076 grad: 0.0929 (0.0993) loss: 0.8608 (0.8625) time: 0.1298 data: 0.0474 max mem: 9305 +Train: [45] [5500/6250] eta: 0:01:51 lr: 0.000076 grad: 0.0986 (0.0993) loss: 0.8548 (0.8625) time: 0.1080 data: 0.0179 max mem: 9305 +Train: [45] [5600/6250] eta: 0:01:36 lr: 0.000076 grad: 0.0976 (0.0993) loss: 0.8636 (0.8625) time: 0.1389 data: 0.0560 max mem: 9305 +Train: [45] [5700/6250] eta: 0:01:21 lr: 0.000076 grad: 0.0920 (0.0993) loss: 0.8622 (0.8625) time: 0.1199 data: 0.0375 max mem: 9305 +Train: [45] [5800/6250] eta: 0:01:06 lr: 0.000076 grad: 0.1034 (0.0994) loss: 0.8669 (0.8625) time: 0.1530 data: 0.0717 max mem: 9305 +Train: [45] [5900/6250] eta: 0:00:51 lr: 0.000076 grad: 0.1043 (0.0994) loss: 0.8622 (0.8624) time: 0.1531 data: 0.0700 max mem: 9305 +Train: [45] [6000/6250] eta: 0:00:36 lr: 0.000076 grad: 0.0984 (0.0994) loss: 0.8566 (0.8625) time: 0.1236 data: 0.0477 max mem: 9305 +Train: [45] [6100/6250] eta: 0:00:22 lr: 0.000076 grad: 0.0990 (0.0994) loss: 0.8625 (0.8624) time: 0.1648 data: 0.0885 max mem: 9305 +Train: [45] [6200/6250] eta: 0:00:07 lr: 0.000076 grad: 0.0981 (0.0995) loss: 0.8593 (0.8624) time: 0.1460 data: 0.0659 max mem: 9305 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.0991 (0.0995) loss: 0.8582 (0.8624) time: 0.1408 data: 0.0588 max mem: 9305 +Train: [45] Total time: 0:15:25 (0.1482 s / it) +Averaged stats: lr: 0.000076 grad: 0.0991 (0.0995) loss: 0.8582 (0.8624) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:03:20 loss: 0.8913 (0.8913) time: 3.2383 data: 3.1546 max mem: 9305 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8819 (0.8820) time: 0.1008 data: 0.0718 max mem: 9305 +Eval (hcp-train-subset): [45] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-train-subset): loss: 0.8819 (0.8820) +Eval (hcp-val): [45] [ 0/62] eta: 0:05:27 loss: 0.8754 (0.8754) time: 5.2758 data: 5.2424 max mem: 9305 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8796 (0.8806) time: 0.1179 data: 0.0884 max mem: 9305 +Eval (hcp-val): [45] Total time: 0:00:12 (0.2084 s / it) +Averaged stats (hcp-val): loss: 0.8796 (0.8806) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 9:12:28 lr: 0.000076 grad: 0.1105 (0.1105) loss: 0.8337 (0.8337) time: 5.3037 data: 5.1944 max mem: 9305 +Train: [46] [ 100/6250] eta: 0:19:28 lr: 0.000076 grad: 0.0910 (0.0994) loss: 0.8603 (0.8741) time: 0.1376 data: 0.0390 max mem: 9305 +Train: [46] [ 200/6250] eta: 0:16:25 lr: 0.000076 grad: 0.0832 (0.0961) loss: 0.8691 (0.8705) time: 0.1074 data: 0.0152 max mem: 9305 +Train: [46] [ 300/6250] eta: 0:15:06 lr: 0.000076 grad: 0.0823 (0.0933) loss: 0.8692 (0.8706) time: 0.1315 data: 0.0381 max mem: 9305 +Train: [46] [ 400/6250] eta: 0:14:10 lr: 0.000076 grad: 0.0898 (0.0927) loss: 0.8725 (0.8709) time: 0.1287 data: 0.0406 max mem: 9305 +Train: [46] [ 500/6250] eta: 0:13:33 lr: 0.000076 grad: 0.0795 (0.0926) loss: 0.8664 (0.8703) time: 0.1294 data: 0.0415 max mem: 9305 +Train: [46] [ 600/6250] eta: 0:13:03 lr: 0.000076 grad: 0.0850 (0.0931) loss: 0.8702 (0.8702) time: 0.1217 data: 0.0285 max mem: 9305 +Train: [46] [ 700/6250] eta: 0:12:51 lr: 0.000076 grad: 0.0815 (0.0928) loss: 0.8733 (0.8702) time: 0.1428 data: 0.0610 max mem: 9305 +Train: [46] [ 800/6250] eta: 0:12:41 lr: 0.000076 grad: 0.0834 (0.0921) loss: 0.8715 (0.8701) time: 0.1743 data: 0.0923 max mem: 9305 +Train: [46] [ 900/6250] eta: 0:12:31 lr: 0.000076 grad: 0.0741 (0.0913) loss: 0.8741 (0.8700) time: 0.1317 data: 0.0534 max mem: 9305 +Train: [46] [1000/6250] eta: 0:12:16 lr: 0.000076 grad: 0.0823 (0.0909) loss: 0.8689 (0.8702) time: 0.1236 data: 0.0391 max mem: 9305 +Train: [46] [1100/6250] eta: 0:12:11 lr: 0.000075 grad: 0.0856 (0.0906) loss: 0.8688 (0.8700) time: 0.1019 data: 0.0215 max mem: 9305 +Train: [46] [1200/6250] eta: 0:11:53 lr: 0.000075 grad: 0.0895 (0.0904) loss: 0.8674 (0.8697) time: 0.1271 data: 0.0501 max mem: 9305 +Train: [46] [1300/6250] eta: 0:11:35 lr: 0.000075 grad: 0.0821 (0.0905) loss: 0.8642 (0.8695) time: 0.1385 data: 0.0572 max mem: 9305 +Train: [46] [1400/6250] eta: 0:11:20 lr: 0.000075 grad: 0.0869 (0.0905) loss: 0.8674 (0.8693) time: 0.1373 data: 0.0528 max mem: 9305 +Train: [46] [1500/6250] eta: 0:11:04 lr: 0.000075 grad: 0.0917 (0.0906) loss: 0.8655 (0.8690) time: 0.1261 data: 0.0420 max mem: 9305 +Train: [46] [1600/6250] eta: 0:10:52 lr: 0.000075 grad: 0.0876 (0.0907) loss: 0.8635 (0.8688) time: 0.1586 data: 0.0812 max mem: 9305 +Train: [46] [1700/6250] eta: 0:10:36 lr: 0.000075 grad: 0.0939 (0.0908) loss: 0.8624 (0.8685) time: 0.1328 data: 0.0492 max mem: 9305 +Train: [46] [1800/6250] eta: 0:10:19 lr: 0.000075 grad: 0.0922 (0.0908) loss: 0.8636 (0.8682) time: 0.1320 data: 0.0503 max mem: 9305 +Train: [46] [1900/6250] eta: 0:10:03 lr: 0.000075 grad: 0.0904 (0.0910) loss: 0.8655 (0.8680) time: 0.1338 data: 0.0506 max mem: 9305 +Train: [46] [2000/6250] eta: 0:09:51 lr: 0.000075 grad: 0.1001 (0.0911) loss: 0.8561 (0.8676) time: 0.1186 data: 0.0256 max mem: 9305 +Train: [46] [2100/6250] eta: 0:09:37 lr: 0.000075 grad: 0.0973 (0.0913) loss: 0.8660 (0.8673) time: 0.1523 data: 0.0715 max mem: 9305 +Train: [46] [2200/6250] eta: 0:09:25 lr: 0.000075 grad: 0.0882 (0.0915) loss: 0.8645 (0.8671) time: 0.1668 data: 0.0792 max mem: 9305 +Train: [46] [2300/6250] eta: 0:09:13 lr: 0.000075 grad: 0.0941 (0.0917) loss: 0.8570 (0.8666) time: 0.1559 data: 0.0741 max mem: 9305 +Train: [46] [2400/6250] eta: 0:09:00 lr: 0.000075 grad: 0.0892 (0.0917) loss: 0.8621 (0.8664) time: 0.1336 data: 0.0433 max mem: 9305 +Train: [46] [2500/6250] eta: 0:08:47 lr: 0.000075 grad: 0.0871 (0.0918) loss: 0.8609 (0.8663) time: 0.1582 data: 0.0750 max mem: 9305 +Train: [46] [2600/6250] eta: 0:08:34 lr: 0.000075 grad: 0.0917 (0.0920) loss: 0.8619 (0.8660) time: 0.1491 data: 0.0630 max mem: 9305 +Train: [46] [2700/6250] eta: 0:08:21 lr: 0.000075 grad: 0.0921 (0.0921) loss: 0.8627 (0.8658) time: 0.1442 data: 0.0630 max mem: 9305 +Train: [46] [2800/6250] eta: 0:08:07 lr: 0.000075 grad: 0.0929 (0.0922) loss: 0.8580 (0.8657) time: 0.1529 data: 0.0675 max mem: 9305 +Train: [46] [2900/6250] eta: 0:07:52 lr: 0.000075 grad: 0.0984 (0.0923) loss: 0.8536 (0.8654) time: 0.1880 data: 0.1055 max mem: 9305 +Train: [46] [3000/6250] eta: 0:07:38 lr: 0.000075 grad: 0.0907 (0.0924) loss: 0.8629 (0.8653) time: 0.1283 data: 0.0476 max mem: 9305 +Train: [46] [3100/6250] eta: 0:07:23 lr: 0.000075 grad: 0.0917 (0.0926) loss: 0.8587 (0.8651) time: 0.1442 data: 0.0679 max mem: 9305 +Train: [46] [3200/6250] eta: 0:07:09 lr: 0.000075 grad: 0.0912 (0.0927) loss: 0.8633 (0.8649) time: 0.1300 data: 0.0505 max mem: 9305 +Train: [46] [3300/6250] eta: 0:06:54 lr: 0.000075 grad: 0.0982 (0.0928) loss: 0.8533 (0.8648) time: 0.1388 data: 0.0527 max mem: 9305 +Train: [46] [3400/6250] eta: 0:06:39 lr: 0.000075 grad: 0.0917 (0.0930) loss: 0.8619 (0.8646) time: 0.1309 data: 0.0474 max mem: 9305 +Train: [46] [3500/6250] eta: 0:06:25 lr: 0.000075 grad: 0.0957 (0.0931) loss: 0.8530 (0.8645) time: 0.1424 data: 0.0618 max mem: 9305 +Train: [46] [3600/6250] eta: 0:06:11 lr: 0.000075 grad: 0.0938 (0.0932) loss: 0.8634 (0.8643) time: 0.1404 data: 0.0597 max mem: 9305 +Train: [46] [3700/6250] eta: 0:05:57 lr: 0.000075 grad: 0.0924 (0.0934) loss: 0.8525 (0.8641) time: 0.1477 data: 0.0668 max mem: 9305 +Train: [46] [3800/6250] eta: 0:05:43 lr: 0.000075 grad: 0.0971 (0.0935) loss: 0.8579 (0.8639) time: 0.0911 data: 0.0007 max mem: 9305 +Train: [46] [3900/6250] eta: 0:05:29 lr: 0.000075 grad: 0.1022 (0.0937) loss: 0.8561 (0.8637) time: 0.1533 data: 0.0716 max mem: 9305 +Train: [46] [4000/6250] eta: 0:05:15 lr: 0.000075 grad: 0.0937 (0.0939) loss: 0.8588 (0.8635) time: 0.1302 data: 0.0504 max mem: 9305 +Train: [46] [4100/6250] eta: 0:05:01 lr: 0.000075 grad: 0.0970 (0.0941) loss: 0.8534 (0.8633) time: 0.1365 data: 0.0609 max mem: 9305 +Train: [46] [4200/6250] eta: 0:04:47 lr: 0.000074 grad: 0.0968 (0.0942) loss: 0.8582 (0.8632) time: 0.1346 data: 0.0494 max mem: 9305 +Train: [46] [4300/6250] eta: 0:04:32 lr: 0.000074 grad: 0.0942 (0.0943) loss: 0.8601 (0.8630) time: 0.1285 data: 0.0486 max mem: 9305 +Train: [46] [4400/6250] eta: 0:04:18 lr: 0.000074 grad: 0.0874 (0.0944) loss: 0.8572 (0.8628) time: 0.1243 data: 0.0430 max mem: 9305 +Train: [46] [4500/6250] eta: 0:04:05 lr: 0.000074 grad: 0.0909 (0.0945) loss: 0.8585 (0.8627) time: 0.1254 data: 0.0353 max mem: 9305 +Train: [46] [4600/6250] eta: 0:03:51 lr: 0.000074 grad: 0.0894 (0.0945) loss: 0.8644 (0.8626) time: 0.1260 data: 0.0384 max mem: 9305 +Train: [46] [4700/6250] eta: 0:03:37 lr: 0.000074 grad: 0.0897 (0.0945) loss: 0.8598 (0.8626) time: 0.1699 data: 0.0865 max mem: 9305 +Train: [46] [4800/6250] eta: 0:03:24 lr: 0.000074 grad: 0.0913 (0.0946) loss: 0.8629 (0.8625) time: 0.1502 data: 0.0686 max mem: 9305 +Train: [46] [4900/6250] eta: 0:03:09 lr: 0.000074 grad: 0.0904 (0.0947) loss: 0.8622 (0.8624) time: 0.1137 data: 0.0322 max mem: 9305 +Train: [46] [5000/6250] eta: 0:02:55 lr: 0.000074 grad: 0.0872 (0.0946) loss: 0.8616 (0.8625) time: 0.1324 data: 0.0406 max mem: 9305 +Train: [46] [5100/6250] eta: 0:02:41 lr: 0.000074 grad: 0.0907 (0.0946) loss: 0.8610 (0.8625) time: 0.1298 data: 0.0506 max mem: 9305 +Train: [46] [5200/6250] eta: 0:02:27 lr: 0.000074 grad: 0.0929 (0.0946) loss: 0.8628 (0.8625) time: 0.1321 data: 0.0489 max mem: 9305 +Train: [46] [5300/6250] eta: 0:02:12 lr: 0.000074 grad: 0.0976 (0.0947) loss: 0.8691 (0.8625) time: 0.1196 data: 0.0338 max mem: 9305 +Train: [46] [5400/6250] eta: 0:01:58 lr: 0.000074 grad: 0.0910 (0.0947) loss: 0.8647 (0.8625) time: 0.1276 data: 0.0435 max mem: 9305 +Train: [46] [5500/6250] eta: 0:01:44 lr: 0.000074 grad: 0.1030 (0.0947) loss: 0.8632 (0.8626) time: 0.1343 data: 0.0450 max mem: 9305 +Train: [46] [5600/6250] eta: 0:01:30 lr: 0.000074 grad: 0.0981 (0.0948) loss: 0.8659 (0.8626) time: 0.1547 data: 0.0757 max mem: 9305 +Train: [46] [5700/6250] eta: 0:01:16 lr: 0.000074 grad: 0.0956 (0.0948) loss: 0.8616 (0.8626) time: 0.1683 data: 0.0602 max mem: 9305 +Train: [46] [5800/6250] eta: 0:01:03 lr: 0.000074 grad: 0.0881 (0.0949) loss: 0.8675 (0.8625) time: 0.1609 data: 0.0624 max mem: 9305 +Train: [46] [5900/6250] eta: 0:00:49 lr: 0.000074 grad: 0.0917 (0.0949) loss: 0.8565 (0.8625) time: 0.1388 data: 0.0551 max mem: 9305 +Train: [46] [6000/6250] eta: 0:00:35 lr: 0.000074 grad: 0.0985 (0.0951) loss: 0.8578 (0.8624) time: 0.2027 data: 0.0955 max mem: 9305 +Train: [46] [6100/6250] eta: 0:00:21 lr: 0.000074 grad: 0.0990 (0.0952) loss: 0.8624 (0.8624) time: 0.1827 data: 0.0596 max mem: 9305 +Train: [46] [6200/6250] eta: 0:00:07 lr: 0.000074 grad: 0.0993 (0.0953) loss: 0.8589 (0.8623) time: 0.1126 data: 0.0321 max mem: 9305 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1030 (0.0953) loss: 0.8647 (0.8623) time: 0.1394 data: 0.0486 max mem: 9305 +Train: [46] Total time: 0:14:46 (0.1418 s / it) +Averaged stats: lr: 0.000074 grad: 0.1030 (0.0953) loss: 0.8647 (0.8623) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:04:43 loss: 0.8960 (0.8960) time: 4.5801 data: 4.5452 max mem: 9305 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8832 (0.8832) time: 0.1244 data: 0.0959 max mem: 9305 +Eval (hcp-train-subset): [46] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-train-subset): loss: 0.8832 (0.8832) +Eval (hcp-val): [46] [ 0/62] eta: 0:03:25 loss: 0.8748 (0.8748) time: 3.3189 data: 3.2330 max mem: 9305 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8796 (0.8809) time: 0.1336 data: 0.1049 max mem: 9305 +Eval (hcp-val): [46] Total time: 0:00:13 (0.2101 s / it) +Averaged stats (hcp-val): loss: 0.8796 (0.8809) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 9:21:35 lr: 0.000074 grad: 0.1106 (0.1106) loss: 0.8972 (0.8972) time: 5.3913 data: 5.2445 max mem: 9305 +Train: [47] [ 100/6250] eta: 0:20:07 lr: 0.000074 grad: 0.0929 (0.0980) loss: 0.8774 (0.8804) time: 0.1269 data: 0.0229 max mem: 9305 +Train: [47] [ 200/6250] eta: 0:17:13 lr: 0.000074 grad: 0.0881 (0.0976) loss: 0.8756 (0.8760) time: 0.1356 data: 0.0299 max mem: 9305 +Train: [47] [ 300/6250] eta: 0:15:49 lr: 0.000074 grad: 0.0889 (0.0971) loss: 0.8606 (0.8729) time: 0.1547 data: 0.0695 max mem: 9305 +Train: [47] [ 400/6250] eta: 0:15:00 lr: 0.000074 grad: 0.0950 (0.0958) loss: 0.8494 (0.8708) time: 0.1489 data: 0.0546 max mem: 9305 +Train: [47] [ 500/6250] eta: 0:14:13 lr: 0.000074 grad: 0.0920 (0.0962) loss: 0.8587 (0.8684) time: 0.1406 data: 0.0502 max mem: 9305 +Train: [47] [ 600/6250] eta: 0:13:36 lr: 0.000074 grad: 0.0976 (0.0962) loss: 0.8591 (0.8668) time: 0.1230 data: 0.0332 max mem: 9305 +Train: [47] [ 700/6250] eta: 0:13:02 lr: 0.000074 grad: 0.0894 (0.0968) loss: 0.8549 (0.8655) time: 0.1239 data: 0.0347 max mem: 9305 +Train: [47] [ 800/6250] eta: 0:12:41 lr: 0.000074 grad: 0.0965 (0.0971) loss: 0.8614 (0.8646) time: 0.1089 data: 0.0226 max mem: 9305 +Train: [47] [ 900/6250] eta: 0:12:45 lr: 0.000074 grad: 0.0940 (0.0968) loss: 0.8596 (0.8640) time: 0.1988 data: 0.1160 max mem: 9305 +Train: [47] [1000/6250] eta: 0:12:31 lr: 0.000073 grad: 0.0901 (0.0965) loss: 0.8552 (0.8635) time: 0.1541 data: 0.0737 max mem: 9305 +Train: [47] [1100/6250] eta: 0:12:20 lr: 0.000073 grad: 0.0951 (0.0965) loss: 0.8563 (0.8630) time: 0.1464 data: 0.0617 max mem: 9305 +Train: [47] [1200/6250] eta: 0:12:07 lr: 0.000073 grad: 0.0942 (0.0964) loss: 0.8609 (0.8626) time: 0.1448 data: 0.0634 max mem: 9305 +Train: [47] [1300/6250] eta: 0:11:49 lr: 0.000073 grad: 0.1003 (0.0966) loss: 0.8564 (0.8623) time: 0.1313 data: 0.0401 max mem: 9305 +Train: [47] [1400/6250] eta: 0:11:47 lr: 0.000073 grad: 0.0942 (0.0967) loss: 0.8596 (0.8620) time: 0.2502 data: 0.1651 max mem: 9305 +Train: [47] [1500/6250] eta: 0:11:28 lr: 0.000073 grad: 0.0976 (0.0967) loss: 0.8549 (0.8617) time: 0.1446 data: 0.0601 max mem: 9305 +Train: [47] [1600/6250] eta: 0:11:18 lr: 0.000073 grad: 0.0958 (0.0970) loss: 0.8577 (0.8615) time: 0.1041 data: 0.0003 max mem: 9305 +Train: [47] [1700/6250] eta: 0:11:00 lr: 0.000073 grad: 0.0955 (0.0969) loss: 0.8546 (0.8613) time: 0.1439 data: 0.0591 max mem: 9305 +Train: [47] [1800/6250] eta: 0:10:50 lr: 0.000073 grad: 0.0982 (0.0971) loss: 0.8551 (0.8611) time: 0.1869 data: 0.0989 max mem: 9305 +Train: [47] [1900/6250] eta: 0:10:43 lr: 0.000073 grad: 0.0860 (0.0971) loss: 0.8625 (0.8609) time: 0.0830 data: 0.0002 max mem: 9305 +Train: [47] [2000/6250] eta: 0:10:22 lr: 0.000073 grad: 0.0959 (0.0972) loss: 0.8527 (0.8607) time: 0.1369 data: 0.0533 max mem: 9305 +Train: [47] [2100/6250] eta: 0:10:04 lr: 0.000073 grad: 0.0985 (0.0975) loss: 0.8509 (0.8605) time: 0.1209 data: 0.0387 max mem: 9305 +Train: [47] [2200/6250] eta: 0:09:48 lr: 0.000073 grad: 0.0950 (0.0975) loss: 0.8569 (0.8604) time: 0.1380 data: 0.0567 max mem: 9305 +Train: [47] [2300/6250] eta: 0:09:32 lr: 0.000073 grad: 0.0982 (0.0978) loss: 0.8618 (0.8602) time: 0.1357 data: 0.0515 max mem: 9305 +Train: [47] [2400/6250] eta: 0:09:17 lr: 0.000073 grad: 0.1010 (0.0979) loss: 0.8595 (0.8602) time: 0.1395 data: 0.0565 max mem: 9305 +Train: [47] [2500/6250] eta: 0:09:00 lr: 0.000073 grad: 0.1007 (0.0981) loss: 0.8541 (0.8600) time: 0.1308 data: 0.0448 max mem: 9305 +Train: [47] [2600/6250] eta: 0:08:45 lr: 0.000073 grad: 0.0983 (0.0981) loss: 0.8692 (0.8600) time: 0.1545 data: 0.0616 max mem: 9305 +Train: [47] [2700/6250] eta: 0:08:29 lr: 0.000073 grad: 0.0880 (0.0982) loss: 0.8624 (0.8600) time: 0.1300 data: 0.0472 max mem: 9305 +Train: [47] [2800/6250] eta: 0:08:13 lr: 0.000073 grad: 0.0943 (0.0984) loss: 0.8660 (0.8599) time: 0.1286 data: 0.0472 max mem: 9305 +Train: [47] [2900/6250] eta: 0:07:58 lr: 0.000073 grad: 0.1067 (0.0985) loss: 0.8598 (0.8597) time: 0.1872 data: 0.1022 max mem: 9305 +Train: [47] [3000/6250] eta: 0:07:42 lr: 0.000073 grad: 0.0921 (0.0986) loss: 0.8560 (0.8596) time: 0.1304 data: 0.0518 max mem: 9305 +Train: [47] [3100/6250] eta: 0:07:26 lr: 0.000073 grad: 0.0991 (0.0987) loss: 0.8621 (0.8595) time: 0.1241 data: 0.0307 max mem: 9305 +Train: [47] [3200/6250] eta: 0:07:12 lr: 0.000073 grad: 0.1022 (0.0988) loss: 0.8621 (0.8594) time: 0.1282 data: 0.0475 max mem: 9305 +Train: [47] [3300/6250] eta: 0:06:57 lr: 0.000073 grad: 0.0982 (0.0989) loss: 0.8492 (0.8594) time: 0.1425 data: 0.0590 max mem: 9305 +Train: [47] [3400/6250] eta: 0:06:42 lr: 0.000073 grad: 0.0945 (0.0990) loss: 0.8610 (0.8594) time: 0.1294 data: 0.0498 max mem: 9305 +Train: [47] [3500/6250] eta: 0:06:28 lr: 0.000073 grad: 0.0963 (0.0989) loss: 0.8569 (0.8595) time: 0.1514 data: 0.0715 max mem: 9305 +Train: [47] [3600/6250] eta: 0:06:14 lr: 0.000073 grad: 0.0936 (0.0989) loss: 0.8594 (0.8595) time: 0.1430 data: 0.0601 max mem: 9305 +Train: [47] [3700/6250] eta: 0:06:00 lr: 0.000073 grad: 0.1028 (0.0990) loss: 0.8537 (0.8595) time: 0.1741 data: 0.0941 max mem: 9305 +Train: [47] [3800/6250] eta: 0:05:46 lr: 0.000073 grad: 0.0992 (0.0991) loss: 0.8567 (0.8595) time: 0.1494 data: 0.0657 max mem: 9305 +Train: [47] [3900/6250] eta: 0:05:31 lr: 0.000073 grad: 0.0974 (0.0991) loss: 0.8528 (0.8595) time: 0.1398 data: 0.0553 max mem: 9305 +Train: [47] [4000/6250] eta: 0:05:17 lr: 0.000073 grad: 0.0984 (0.0991) loss: 0.8621 (0.8595) time: 0.1541 data: 0.0637 max mem: 9305 +Train: [47] [4100/6250] eta: 0:05:02 lr: 0.000072 grad: 0.0987 (0.0991) loss: 0.8554 (0.8595) time: 0.1420 data: 0.0590 max mem: 9305 +Train: [47] [4200/6250] eta: 0:04:47 lr: 0.000072 grad: 0.0938 (0.0992) loss: 0.8663 (0.8596) time: 0.1386 data: 0.0523 max mem: 9305 +Train: [47] [4300/6250] eta: 0:04:34 lr: 0.000072 grad: 0.0954 (0.0992) loss: 0.8641 (0.8596) time: 0.1348 data: 0.0427 max mem: 9305 +Train: [47] [4400/6250] eta: 0:04:20 lr: 0.000072 grad: 0.0921 (0.0993) loss: 0.8641 (0.8597) time: 0.1510 data: 0.0704 max mem: 9305 +Train: [47] [4500/6250] eta: 0:04:06 lr: 0.000072 grad: 0.0992 (0.0993) loss: 0.8523 (0.8597) time: 0.1554 data: 0.0668 max mem: 9305 +Train: [47] [4600/6250] eta: 0:03:52 lr: 0.000072 grad: 0.0928 (0.0993) loss: 0.8621 (0.8597) time: 0.1599 data: 0.0750 max mem: 9305 +Train: [47] [4700/6250] eta: 0:03:38 lr: 0.000072 grad: 0.1023 (0.0992) loss: 0.8579 (0.8597) time: 0.1225 data: 0.0353 max mem: 9305 +Train: [47] [4800/6250] eta: 0:03:23 lr: 0.000072 grad: 0.0955 (0.0993) loss: 0.8626 (0.8598) time: 0.1183 data: 0.0341 max mem: 9305 +Train: [47] [4900/6250] eta: 0:03:09 lr: 0.000072 grad: 0.0944 (0.0992) loss: 0.8625 (0.8598) time: 0.1275 data: 0.0497 max mem: 9305 +Train: [47] [5000/6250] eta: 0:02:54 lr: 0.000072 grad: 0.0925 (0.0993) loss: 0.8546 (0.8598) time: 0.1361 data: 0.0437 max mem: 9305 +Train: [47] [5100/6250] eta: 0:02:40 lr: 0.000072 grad: 0.0955 (0.0992) loss: 0.8561 (0.8598) time: 0.1180 data: 0.0323 max mem: 9305 +Train: [47] [5200/6250] eta: 0:02:25 lr: 0.000072 grad: 0.0924 (0.0992) loss: 0.8670 (0.8598) time: 0.1100 data: 0.0256 max mem: 9305 +Train: [47] [5300/6250] eta: 0:02:11 lr: 0.000072 grad: 0.0976 (0.0991) loss: 0.8600 (0.8599) time: 0.1096 data: 0.0169 max mem: 9305 +Train: [47] [5400/6250] eta: 0:01:57 lr: 0.000072 grad: 0.0946 (0.0991) loss: 0.8582 (0.8599) time: 0.1203 data: 0.0424 max mem: 9305 +Train: [47] [5500/6250] eta: 0:01:43 lr: 0.000072 grad: 0.0968 (0.0992) loss: 0.8582 (0.8599) time: 0.1316 data: 0.0476 max mem: 9305 +Train: [47] [5600/6250] eta: 0:01:29 lr: 0.000072 grad: 0.0919 (0.0992) loss: 0.8696 (0.8599) time: 0.1377 data: 0.0554 max mem: 9305 +Train: [47] [5700/6250] eta: 0:01:15 lr: 0.000072 grad: 0.1001 (0.0993) loss: 0.8582 (0.8598) time: 0.1367 data: 0.0531 max mem: 9305 +Train: [47] [5800/6250] eta: 0:01:02 lr: 0.000072 grad: 0.1011 (0.0994) loss: 0.8561 (0.8598) time: 0.1845 data: 0.1056 max mem: 9305 +Train: [47] [5900/6250] eta: 0:00:48 lr: 0.000072 grad: 0.0943 (0.0994) loss: 0.8621 (0.8598) time: 0.1288 data: 0.0384 max mem: 9305 +Train: [47] [6000/6250] eta: 0:00:34 lr: 0.000072 grad: 0.0966 (0.0995) loss: 0.8639 (0.8598) time: 0.1277 data: 0.0352 max mem: 9305 +Train: [47] [6100/6250] eta: 0:00:20 lr: 0.000072 grad: 0.1045 (0.0996) loss: 0.8606 (0.8598) time: 0.1211 data: 0.0340 max mem: 9305 +Train: [47] [6200/6250] eta: 0:00:06 lr: 0.000072 grad: 0.0998 (0.0997) loss: 0.8535 (0.8598) time: 0.1364 data: 0.0449 max mem: 9305 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.0984 (0.0997) loss: 0.8579 (0.8598) time: 0.1174 data: 0.0389 max mem: 9305 +Train: [47] Total time: 0:14:27 (0.1387 s / it) +Averaged stats: lr: 0.000072 grad: 0.0984 (0.0997) loss: 0.8579 (0.8598) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:04:45 loss: 0.8967 (0.8967) time: 4.6122 data: 4.5768 max mem: 9305 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8820 (0.8802) time: 0.0999 data: 0.0715 max mem: 9305 +Eval (hcp-train-subset): [47] Total time: 0:00:13 (0.2106 s / it) +Averaged stats (hcp-train-subset): loss: 0.8820 (0.8802) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:30 loss: 0.8776 (0.8776) time: 3.3881 data: 3.3197 max mem: 9305 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8782 (0.8800) time: 0.1307 data: 0.0997 max mem: 9305 +Eval (hcp-val): [47] Total time: 0:00:12 (0.2058 s / it) +Averaged stats (hcp-val): loss: 0.8782 (0.8800) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 7:08:36 lr: 0.000072 grad: 0.0875 (0.0875) loss: 0.9038 (0.9038) time: 4.1147 data: 3.8438 max mem: 9305 +Train: [48] [ 100/6250] eta: 0:18:58 lr: 0.000072 grad: 0.0921 (0.1043) loss: 0.8731 (0.8822) time: 0.1442 data: 0.0535 max mem: 9305 +Train: [48] [ 200/6250] eta: 0:16:22 lr: 0.000072 grad: 0.0902 (0.1045) loss: 0.8610 (0.8749) time: 0.1516 data: 0.0612 max mem: 9305 +Train: [48] [ 300/6250] eta: 0:15:15 lr: 0.000072 grad: 0.0961 (0.1041) loss: 0.8606 (0.8706) time: 0.1137 data: 0.0233 max mem: 9305 +Train: [48] [ 400/6250] eta: 0:14:16 lr: 0.000072 grad: 0.0980 (0.1028) loss: 0.8637 (0.8681) time: 0.1215 data: 0.0311 max mem: 9305 +Train: [48] [ 500/6250] eta: 0:13:36 lr: 0.000072 grad: 0.0866 (0.1010) loss: 0.8650 (0.8673) time: 0.1236 data: 0.0329 max mem: 9305 +Train: [48] [ 600/6250] eta: 0:13:11 lr: 0.000072 grad: 0.0930 (0.0999) loss: 0.8588 (0.8663) time: 0.1216 data: 0.0343 max mem: 9305 +Train: [48] [ 700/6250] eta: 0:12:47 lr: 0.000072 grad: 0.0983 (0.0991) loss: 0.8575 (0.8658) time: 0.1433 data: 0.0618 max mem: 9305 +Train: [48] [ 800/6250] eta: 0:12:27 lr: 0.000072 grad: 0.0906 (0.0989) loss: 0.8613 (0.8651) time: 0.1262 data: 0.0443 max mem: 9305 +Train: [48] [ 900/6250] eta: 0:12:13 lr: 0.000071 grad: 0.0897 (0.0983) loss: 0.8618 (0.8648) time: 0.1368 data: 0.0485 max mem: 9305 +Train: [48] [1000/6250] eta: 0:11:56 lr: 0.000071 grad: 0.0860 (0.0977) loss: 0.8672 (0.8646) time: 0.1313 data: 0.0526 max mem: 9305 +Train: [48] [1100/6250] eta: 0:11:44 lr: 0.000071 grad: 0.0960 (0.0975) loss: 0.8582 (0.8642) time: 0.1529 data: 0.0471 max mem: 9305 +Train: [48] [1200/6250] eta: 0:11:40 lr: 0.000071 grad: 0.0935 (0.0975) loss: 0.8538 (0.8640) time: 0.1413 data: 0.0339 max mem: 9305 +Train: [48] [1300/6250] eta: 0:11:22 lr: 0.000071 grad: 0.0888 (0.0975) loss: 0.8595 (0.8637) time: 0.1371 data: 0.0560 max mem: 9305 +Train: [48] [1400/6250] eta: 0:11:08 lr: 0.000071 grad: 0.0913 (0.0976) loss: 0.8577 (0.8633) time: 0.1407 data: 0.0416 max mem: 9305 +Train: [48] [1500/6250] eta: 0:10:54 lr: 0.000071 grad: 0.0888 (0.0978) loss: 0.8592 (0.8629) time: 0.1197 data: 0.0340 max mem: 9305 +Train: [48] [1600/6250] eta: 0:10:36 lr: 0.000071 grad: 0.1025 (0.0980) loss: 0.8597 (0.8627) time: 0.1370 data: 0.0555 max mem: 9305 +Train: [48] [1700/6250] eta: 0:10:23 lr: 0.000071 grad: 0.0905 (0.0980) loss: 0.8675 (0.8625) time: 0.1459 data: 0.0498 max mem: 9305 +Train: [48] [1800/6250] eta: 0:10:09 lr: 0.000071 grad: 0.0959 (0.0980) loss: 0.8621 (0.8623) time: 0.1326 data: 0.0519 max mem: 9305 +Train: [48] [1900/6250] eta: 0:09:55 lr: 0.000071 grad: 0.0978 (0.0983) loss: 0.8547 (0.8619) time: 0.1340 data: 0.0398 max mem: 9305 +Train: [48] [2000/6250] eta: 0:09:42 lr: 0.000071 grad: 0.0953 (0.0981) loss: 0.8589 (0.8620) time: 0.1624 data: 0.0741 max mem: 9305 +Train: [48] [2100/6250] eta: 0:09:29 lr: 0.000071 grad: 0.0924 (0.0980) loss: 0.8616 (0.8619) time: 0.1367 data: 0.0470 max mem: 9305 +Train: [48] [2200/6250] eta: 0:09:16 lr: 0.000071 grad: 0.0949 (0.0980) loss: 0.8590 (0.8618) time: 0.0941 data: 0.0066 max mem: 9305 +Train: [48] [2300/6250] eta: 0:09:02 lr: 0.000071 grad: 0.0937 (0.0979) loss: 0.8607 (0.8618) time: 0.0915 data: 0.0002 max mem: 9305 +Train: [48] [2400/6250] eta: 0:08:50 lr: 0.000071 grad: 0.0892 (0.0978) loss: 0.8616 (0.8616) time: 0.1514 data: 0.0658 max mem: 9305 +Train: [48] [2500/6250] eta: 0:08:39 lr: 0.000071 grad: 0.0974 (0.0979) loss: 0.8648 (0.8615) time: 0.2277 data: 0.1439 max mem: 9305 +Train: [48] [2600/6250] eta: 0:08:23 lr: 0.000071 grad: 0.0937 (0.0979) loss: 0.8647 (0.8615) time: 0.0985 data: 0.0065 max mem: 9305 +Train: [48] [2700/6250] eta: 0:08:09 lr: 0.000071 grad: 0.0969 (0.0980) loss: 0.8619 (0.8614) time: 0.1296 data: 0.0497 max mem: 9305 +Train: [48] [2800/6250] eta: 0:07:55 lr: 0.000071 grad: 0.0886 (0.0980) loss: 0.8599 (0.8614) time: 0.1276 data: 0.0402 max mem: 9305 +Train: [48] [2900/6250] eta: 0:07:42 lr: 0.000071 grad: 0.0892 (0.0981) loss: 0.8705 (0.8615) time: 0.1655 data: 0.0785 max mem: 9305 +Train: [48] [3000/6250] eta: 0:07:30 lr: 0.000071 grad: 0.0961 (0.0981) loss: 0.8642 (0.8615) time: 0.0948 data: 0.0002 max mem: 9305 +Train: [48] [3100/6250] eta: 0:07:15 lr: 0.000071 grad: 0.0902 (0.0982) loss: 0.8633 (0.8616) time: 0.1020 data: 0.0070 max mem: 9305 +Train: [48] [3200/6250] eta: 0:07:01 lr: 0.000071 grad: 0.0990 (0.0983) loss: 0.8646 (0.8616) time: 0.1223 data: 0.0315 max mem: 9305 +Train: [48] [3300/6250] eta: 0:06:47 lr: 0.000071 grad: 0.0992 (0.0984) loss: 0.8620 (0.8616) time: 0.1291 data: 0.0534 max mem: 9305 +Train: [48] [3400/6250] eta: 0:06:33 lr: 0.000071 grad: 0.0983 (0.0985) loss: 0.8563 (0.8615) time: 0.1213 data: 0.0443 max mem: 9305 +Train: [48] [3500/6250] eta: 0:06:19 lr: 0.000071 grad: 0.0981 (0.0986) loss: 0.8622 (0.8615) time: 0.1248 data: 0.0394 max mem: 9305 +Train: [48] [3600/6250] eta: 0:06:05 lr: 0.000071 grad: 0.1014 (0.0988) loss: 0.8554 (0.8614) time: 0.1398 data: 0.0533 max mem: 9305 +Train: [48] [3700/6250] eta: 0:05:51 lr: 0.000071 grad: 0.0962 (0.0989) loss: 0.8653 (0.8613) time: 0.1322 data: 0.0490 max mem: 9305 +Train: [48] [3800/6250] eta: 0:05:37 lr: 0.000071 grad: 0.0989 (0.0991) loss: 0.8622 (0.8613) time: 0.1583 data: 0.0681 max mem: 9305 +Train: [48] [3900/6250] eta: 0:05:23 lr: 0.000070 grad: 0.1064 (0.0993) loss: 0.8596 (0.8611) time: 0.1378 data: 0.0555 max mem: 9305 +Train: [48] [4000/6250] eta: 0:05:09 lr: 0.000070 grad: 0.1067 (0.0994) loss: 0.8491 (0.8610) time: 0.1275 data: 0.0448 max mem: 9305 +Train: [48] [4100/6250] eta: 0:04:57 lr: 0.000070 grad: 0.1057 (0.0995) loss: 0.8480 (0.8608) time: 0.1533 data: 0.0676 max mem: 9305 +Train: [48] [4200/6250] eta: 0:04:43 lr: 0.000070 grad: 0.0973 (0.0997) loss: 0.8629 (0.8606) time: 0.1368 data: 0.0577 max mem: 9305 +Train: [48] [4300/6250] eta: 0:04:29 lr: 0.000070 grad: 0.1125 (0.0999) loss: 0.8434 (0.8604) time: 0.1300 data: 0.0481 max mem: 9305 +Train: [48] [4400/6250] eta: 0:04:15 lr: 0.000070 grad: 0.1048 (0.1001) loss: 0.8578 (0.8602) time: 0.1502 data: 0.0649 max mem: 9305 +Train: [48] [4500/6250] eta: 0:04:01 lr: 0.000070 grad: 0.1020 (0.1001) loss: 0.8469 (0.8601) time: 0.1250 data: 0.0350 max mem: 9305 +Train: [48] [4600/6250] eta: 0:03:47 lr: 0.000070 grad: 0.1021 (0.1003) loss: 0.8511 (0.8599) time: 0.1304 data: 0.0505 max mem: 9305 +Train: [48] [4700/6250] eta: 0:03:33 lr: 0.000070 grad: 0.0995 (0.1003) loss: 0.8580 (0.8598) time: 0.1358 data: 0.0516 max mem: 9305 +Train: [48] [4800/6250] eta: 0:03:18 lr: 0.000070 grad: 0.0970 (0.1004) loss: 0.8577 (0.8597) time: 0.1245 data: 0.0421 max mem: 9305 +Train: [48] [4900/6250] eta: 0:03:04 lr: 0.000070 grad: 0.1013 (0.1005) loss: 0.8529 (0.8596) time: 0.1044 data: 0.0169 max mem: 9305 +Train: [48] [5000/6250] eta: 0:02:50 lr: 0.000070 grad: 0.0943 (0.1005) loss: 0.8656 (0.8596) time: 0.1183 data: 0.0261 max mem: 9305 +Train: [48] [5100/6250] eta: 0:02:36 lr: 0.000070 grad: 0.1010 (0.1006) loss: 0.8584 (0.8595) time: 0.1355 data: 0.0521 max mem: 9305 +Train: [48] [5200/6250] eta: 0:02:22 lr: 0.000070 grad: 0.0945 (0.1007) loss: 0.8536 (0.8595) time: 0.1457 data: 0.0593 max mem: 9305 +Train: [48] [5300/6250] eta: 0:02:08 lr: 0.000070 grad: 0.0973 (0.1007) loss: 0.8626 (0.8595) time: 0.1253 data: 0.0446 max mem: 9305 +Train: [48] [5400/6250] eta: 0:01:55 lr: 0.000070 grad: 0.0946 (0.1007) loss: 0.8623 (0.8595) time: 0.1205 data: 0.0394 max mem: 9305 +Train: [48] [5500/6250] eta: 0:01:41 lr: 0.000070 grad: 0.1015 (0.1006) loss: 0.8562 (0.8595) time: 0.1382 data: 0.0497 max mem: 9305 +Train: [48] [5600/6250] eta: 0:01:28 lr: 0.000070 grad: 0.1087 (0.1007) loss: 0.8578 (0.8596) time: 0.1244 data: 0.0416 max mem: 9305 +Train: [48] [5700/6250] eta: 0:01:14 lr: 0.000070 grad: 0.1038 (0.1008) loss: 0.8588 (0.8596) time: 0.1304 data: 0.0485 max mem: 9305 +Train: [48] [5800/6250] eta: 0:01:00 lr: 0.000070 grad: 0.0994 (0.1008) loss: 0.8660 (0.8597) time: 0.1270 data: 0.0446 max mem: 9305 +Train: [48] [5900/6250] eta: 0:00:47 lr: 0.000070 grad: 0.1002 (0.1009) loss: 0.8578 (0.8597) time: 0.1859 data: 0.1044 max mem: 9305 +Train: [48] [6000/6250] eta: 0:00:33 lr: 0.000070 grad: 0.0963 (0.1009) loss: 0.8640 (0.8598) time: 0.1213 data: 0.0373 max mem: 9305 +Train: [48] [6100/6250] eta: 0:00:20 lr: 0.000070 grad: 0.1043 (0.1010) loss: 0.8561 (0.8597) time: 0.1417 data: 0.0589 max mem: 9305 +Train: [48] [6200/6250] eta: 0:00:06 lr: 0.000070 grad: 0.1002 (0.1010) loss: 0.8599 (0.8597) time: 0.1205 data: 0.0335 max mem: 9305 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1127 (0.1010) loss: 0.8533 (0.8597) time: 0.1605 data: 0.0757 max mem: 9305 +Train: [48] Total time: 0:14:08 (0.1357 s / it) +Averaged stats: lr: 0.000070 grad: 0.1127 (0.1010) loss: 0.8533 (0.8597) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:04:07 loss: 0.8977 (0.8977) time: 3.9939 data: 3.9093 max mem: 9305 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8810 (0.8818) time: 0.1152 data: 0.0869 max mem: 9305 +Eval (hcp-train-subset): [48] Total time: 0:00:12 (0.2082 s / it) +Averaged stats (hcp-train-subset): loss: 0.8810 (0.8818) +Eval (hcp-val): [48] [ 0/62] eta: 0:04:17 loss: 0.8760 (0.8760) time: 4.1484 data: 4.0548 max mem: 9305 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8792 (0.8803) time: 0.1195 data: 0.0910 max mem: 9305 +Eval (hcp-val): [48] Total time: 0:00:12 (0.2034 s / it) +Averaged stats (hcp-val): loss: 0.8792 (0.8803) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 9:30:50 lr: 0.000070 grad: 0.1248 (0.1248) loss: 0.8769 (0.8769) time: 5.4801 data: 5.3503 max mem: 9305 +Train: [49] [ 100/6250] eta: 0:19:53 lr: 0.000070 grad: 0.0955 (0.1055) loss: 0.8690 (0.8747) time: 0.1564 data: 0.0665 max mem: 9305 +Train: [49] [ 200/6250] eta: 0:17:09 lr: 0.000070 grad: 0.0911 (0.1023) loss: 0.8661 (0.8725) time: 0.1294 data: 0.0320 max mem: 9305 +Train: [49] [ 300/6250] eta: 0:15:50 lr: 0.000070 grad: 0.0932 (0.1009) loss: 0.8692 (0.8704) time: 0.1498 data: 0.0629 max mem: 9305 +Train: [49] [ 400/6250] eta: 0:15:01 lr: 0.000070 grad: 0.0937 (0.0992) loss: 0.8694 (0.8694) time: 0.1204 data: 0.0282 max mem: 9305 +Train: [49] [ 500/6250] eta: 0:14:20 lr: 0.000070 grad: 0.1021 (0.0985) loss: 0.8613 (0.8684) time: 0.1205 data: 0.0345 max mem: 9305 +Train: [49] [ 600/6250] eta: 0:13:45 lr: 0.000070 grad: 0.0982 (0.0980) loss: 0.8648 (0.8677) time: 0.1268 data: 0.0399 max mem: 9305 +Train: [49] [ 700/6250] eta: 0:13:40 lr: 0.000069 grad: 0.0924 (0.0977) loss: 0.8698 (0.8675) time: 0.1884 data: 0.0826 max mem: 9305 +Train: [49] [ 800/6250] eta: 0:13:34 lr: 0.000069 grad: 0.0910 (0.0973) loss: 0.8626 (0.8672) time: 0.1310 data: 0.0349 max mem: 9305 +Train: [49] [ 900/6250] eta: 0:13:29 lr: 0.000069 grad: 0.0931 (0.0968) loss: 0.8655 (0.8670) time: 0.1944 data: 0.0996 max mem: 9305 +Train: [49] [1000/6250] eta: 0:13:22 lr: 0.000069 grad: 0.0888 (0.0967) loss: 0.8674 (0.8667) time: 0.1611 data: 0.0812 max mem: 9305 +Train: [49] [1100/6250] eta: 0:13:16 lr: 0.000069 grad: 0.0984 (0.0965) loss: 0.8659 (0.8664) time: 0.1682 data: 0.0753 max mem: 9305 +Train: [49] [1200/6250] eta: 0:13:05 lr: 0.000069 grad: 0.0925 (0.0963) loss: 0.8589 (0.8660) time: 0.0932 data: 0.0100 max mem: 9305 +Train: [49] [1300/6250] eta: 0:12:52 lr: 0.000069 grad: 0.0932 (0.0962) loss: 0.8543 (0.8657) time: 0.1695 data: 0.0897 max mem: 9305 +Train: [49] [1400/6250] eta: 0:12:40 lr: 0.000069 grad: 0.0943 (0.0965) loss: 0.8594 (0.8653) time: 0.1515 data: 0.0658 max mem: 9305 +Train: [49] [1500/6250] eta: 0:12:22 lr: 0.000069 grad: 0.0966 (0.0967) loss: 0.8564 (0.8647) time: 0.1683 data: 0.0883 max mem: 9305 +Train: [49] [1600/6250] eta: 0:12:05 lr: 0.000069 grad: 0.0992 (0.0970) loss: 0.8517 (0.8644) time: 0.1797 data: 0.0956 max mem: 9305 +Train: [49] [1700/6250] eta: 0:11:45 lr: 0.000069 grad: 0.0944 (0.0974) loss: 0.8626 (0.8640) time: 0.1597 data: 0.0676 max mem: 9305 +Train: [49] [1800/6250] eta: 0:11:26 lr: 0.000069 grad: 0.0979 (0.0974) loss: 0.8621 (0.8639) time: 0.1452 data: 0.0553 max mem: 9305 +Train: [49] [1900/6250] eta: 0:11:12 lr: 0.000069 grad: 0.0924 (0.0975) loss: 0.8570 (0.8638) time: 0.2083 data: 0.1133 max mem: 9305 +Train: [49] [2000/6250] eta: 0:10:52 lr: 0.000069 grad: 0.1020 (0.0976) loss: 0.8580 (0.8636) time: 0.1241 data: 0.0397 max mem: 9305 +Train: [49] [2100/6250] eta: 0:10:35 lr: 0.000069 grad: 0.0977 (0.0977) loss: 0.8662 (0.8634) time: 0.1609 data: 0.0740 max mem: 9305 +Train: [49] [2200/6250] eta: 0:10:18 lr: 0.000069 grad: 0.0830 (0.0978) loss: 0.8669 (0.8633) time: 0.1302 data: 0.0446 max mem: 9305 +Train: [49] [2300/6250] eta: 0:10:02 lr: 0.000069 grad: 0.0934 (0.0979) loss: 0.8584 (0.8630) time: 0.1479 data: 0.0626 max mem: 9305 +Train: [49] [2400/6250] eta: 0:09:44 lr: 0.000069 grad: 0.0948 (0.0981) loss: 0.8642 (0.8628) time: 0.1334 data: 0.0532 max mem: 9305 +Train: [49] [2500/6250] eta: 0:09:26 lr: 0.000069 grad: 0.0985 (0.0981) loss: 0.8628 (0.8627) time: 0.1490 data: 0.0658 max mem: 9305 +Train: [49] [2600/6250] eta: 0:09:08 lr: 0.000069 grad: 0.0899 (0.0981) loss: 0.8595 (0.8627) time: 0.1443 data: 0.0625 max mem: 9305 +Train: [49] [2700/6250] eta: 0:08:51 lr: 0.000069 grad: 0.0894 (0.0982) loss: 0.8632 (0.8626) time: 0.1325 data: 0.0401 max mem: 9305 +Train: [49] [2800/6250] eta: 0:08:34 lr: 0.000069 grad: 0.1053 (0.0984) loss: 0.8552 (0.8625) time: 0.1235 data: 0.0395 max mem: 9305 +Train: [49] [2900/6250] eta: 0:08:17 lr: 0.000069 grad: 0.0978 (0.0984) loss: 0.8553 (0.8623) time: 0.1194 data: 0.0368 max mem: 9305 +Train: [49] [3000/6250] eta: 0:08:01 lr: 0.000069 grad: 0.1085 (0.0985) loss: 0.8583 (0.8622) time: 0.1241 data: 0.0369 max mem: 9305 +Train: [49] [3100/6250] eta: 0:07:45 lr: 0.000069 grad: 0.1041 (0.0986) loss: 0.8567 (0.8621) time: 0.1190 data: 0.0352 max mem: 9305 +Train: [49] [3200/6250] eta: 0:07:28 lr: 0.000069 grad: 0.1040 (0.0988) loss: 0.8487 (0.8619) time: 0.1269 data: 0.0432 max mem: 9305 +Train: [49] [3300/6250] eta: 0:07:12 lr: 0.000069 grad: 0.0938 (0.0989) loss: 0.8560 (0.8617) time: 0.1333 data: 0.0504 max mem: 9305 +Train: [49] [3400/6250] eta: 0:06:57 lr: 0.000069 grad: 0.0918 (0.0989) loss: 0.8606 (0.8616) time: 0.1142 data: 0.0305 max mem: 9305 +Train: [49] [3500/6250] eta: 0:06:41 lr: 0.000069 grad: 0.1032 (0.0990) loss: 0.8575 (0.8615) time: 0.1216 data: 0.0427 max mem: 9305 +Train: [49] [3600/6250] eta: 0:06:26 lr: 0.000069 grad: 0.1005 (0.0991) loss: 0.8625 (0.8613) time: 0.1267 data: 0.0485 max mem: 9305 +Train: [49] [3700/6250] eta: 0:06:10 lr: 0.000069 grad: 0.0927 (0.0992) loss: 0.8647 (0.8613) time: 0.1040 data: 0.0194 max mem: 9305 +Train: [49] [3800/6250] eta: 0:05:55 lr: 0.000068 grad: 0.0916 (0.0993) loss: 0.8590 (0.8612) time: 0.1195 data: 0.0354 max mem: 9305 +Train: [49] [3900/6250] eta: 0:05:41 lr: 0.000068 grad: 0.0977 (0.0994) loss: 0.8580 (0.8611) time: 0.1574 data: 0.0609 max mem: 9305 +Train: [49] [4000/6250] eta: 0:05:26 lr: 0.000068 grad: 0.1054 (0.0995) loss: 0.8545 (0.8610) time: 0.1185 data: 0.0379 max mem: 9305 +Train: [49] [4100/6250] eta: 0:05:12 lr: 0.000068 grad: 0.0941 (0.0996) loss: 0.8596 (0.8609) time: 0.1570 data: 0.0690 max mem: 9305 +Train: [49] [4200/6250] eta: 0:04:58 lr: 0.000068 grad: 0.1035 (0.0996) loss: 0.8560 (0.8607) time: 0.1470 data: 0.0661 max mem: 9305 +Train: [49] [4300/6250] eta: 0:04:44 lr: 0.000068 grad: 0.1009 (0.0997) loss: 0.8559 (0.8606) time: 0.1559 data: 0.0762 max mem: 9305 +Train: [49] [4400/6250] eta: 0:04:30 lr: 0.000068 grad: 0.1027 (0.0998) loss: 0.8642 (0.8605) time: 0.1378 data: 0.0515 max mem: 9305 +Train: [49] [4500/6250] eta: 0:04:15 lr: 0.000068 grad: 0.0964 (0.0999) loss: 0.8600 (0.8605) time: 0.1292 data: 0.0374 max mem: 9305 +Train: [49] [4600/6250] eta: 0:04:00 lr: 0.000068 grad: 0.0995 (0.0998) loss: 0.8579 (0.8604) time: 0.1431 data: 0.0598 max mem: 9305 +Train: [49] [4700/6250] eta: 0:03:45 lr: 0.000068 grad: 0.0983 (0.0999) loss: 0.8618 (0.8604) time: 0.1285 data: 0.0443 max mem: 9305 +Train: [49] [4800/6250] eta: 0:03:30 lr: 0.000068 grad: 0.0939 (0.0999) loss: 0.8599 (0.8604) time: 0.1146 data: 0.0271 max mem: 9305 +Train: [49] [4900/6250] eta: 0:03:15 lr: 0.000068 grad: 0.0987 (0.0999) loss: 0.8588 (0.8604) time: 0.1457 data: 0.0633 max mem: 9305 +Train: [49] [5000/6250] eta: 0:03:00 lr: 0.000068 grad: 0.0916 (0.1000) loss: 0.8572 (0.8604) time: 0.1400 data: 0.0629 max mem: 9305 +Train: [49] [5100/6250] eta: 0:02:46 lr: 0.000068 grad: 0.0904 (0.1000) loss: 0.8635 (0.8604) time: 0.1501 data: 0.0468 max mem: 9305 +Train: [49] [5200/6250] eta: 0:02:32 lr: 0.000068 grad: 0.0923 (0.1000) loss: 0.8617 (0.8603) time: 0.2071 data: 0.1255 max mem: 9305 +Train: [49] [5300/6250] eta: 0:02:18 lr: 0.000068 grad: 0.0984 (0.1001) loss: 0.8576 (0.8603) time: 0.1608 data: 0.0726 max mem: 9305 +Train: [49] [5400/6250] eta: 0:02:03 lr: 0.000068 grad: 0.0945 (0.1000) loss: 0.8556 (0.8603) time: 0.1567 data: 0.0743 max mem: 9305 +Train: [49] [5500/6250] eta: 0:01:49 lr: 0.000068 grad: 0.0886 (0.1000) loss: 0.8595 (0.8603) time: 0.1642 data: 0.0858 max mem: 9305 +Train: [49] [5600/6250] eta: 0:01:34 lr: 0.000068 grad: 0.0978 (0.1000) loss: 0.8590 (0.8604) time: 0.1732 data: 0.0959 max mem: 9305 +Train: [49] [5700/6250] eta: 0:01:20 lr: 0.000068 grad: 0.0966 (0.1000) loss: 0.8627 (0.8603) time: 0.1744 data: 0.0918 max mem: 9305 +Train: [49] [5800/6250] eta: 0:01:05 lr: 0.000068 grad: 0.0930 (0.1001) loss: 0.8655 (0.8604) time: 0.1413 data: 0.0560 max mem: 9305 +Train: [49] [5900/6250] eta: 0:00:51 lr: 0.000068 grad: 0.0973 (0.1001) loss: 0.8624 (0.8604) time: 0.1586 data: 0.0647 max mem: 9305 +Train: [49] [6000/6250] eta: 0:00:36 lr: 0.000068 grad: 0.0994 (0.1001) loss: 0.8585 (0.8604) time: 0.1559 data: 0.0769 max mem: 9305 +Train: [49] [6100/6250] eta: 0:00:22 lr: 0.000068 grad: 0.1084 (0.1002) loss: 0.8576 (0.8604) time: 0.1289 data: 0.0439 max mem: 9305 +Train: [49] [6200/6250] eta: 0:00:07 lr: 0.000068 grad: 0.0914 (0.1002) loss: 0.8689 (0.8605) time: 0.1330 data: 0.0509 max mem: 9305 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.0960 (0.1001) loss: 0.8555 (0.8605) time: 0.1336 data: 0.0486 max mem: 9305 +Train: [49] Total time: 0:15:20 (0.1473 s / it) +Averaged stats: lr: 0.000068 grad: 0.0960 (0.1001) loss: 0.8555 (0.8605) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:03:23 loss: 0.8938 (0.8938) time: 3.2874 data: 3.2148 max mem: 9305 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8768 (0.8821) time: 0.1341 data: 0.1050 max mem: 9305 +Eval (hcp-train-subset): [49] Total time: 0:00:13 (0.2111 s / it) +Averaged stats (hcp-train-subset): loss: 0.8768 (0.8821) +Making plots (hcp-train-subset): example=33 +Eval (hcp-val): [49] [ 0/62] eta: 0:03:25 loss: 0.8765 (0.8765) time: 3.3099 data: 3.2242 max mem: 9305 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8792 (0.8809) time: 0.1450 data: 0.1147 max mem: 9305 +Eval (hcp-val): [49] Total time: 0:00:13 (0.2178 s / it) +Averaged stats (hcp-val): loss: 0.8792 (0.8809) +Making plots (hcp-val): example=47 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [50] [ 0/6250] eta: 10:05:30 lr: 0.000068 grad: 0.0887 (0.0887) loss: 0.8929 (0.8929) time: 5.8129 data: 5.7143 max mem: 9305 +Train: [50] [ 100/6250] eta: 0:19:33 lr: 0.000068 grad: 0.0971 (0.0967) loss: 0.8759 (0.8793) time: 0.1275 data: 0.0289 max mem: 9305 +Train: [50] [ 200/6250] eta: 0:16:16 lr: 0.000068 grad: 0.0978 (0.1011) loss: 0.8562 (0.8736) time: 0.1176 data: 0.0271 max mem: 9305 +Train: [50] [ 300/6250] eta: 0:14:48 lr: 0.000068 grad: 0.1057 (0.1014) loss: 0.8624 (0.8705) time: 0.1121 data: 0.0168 max mem: 9305 +Train: [50] [ 400/6250] eta: 0:13:52 lr: 0.000068 grad: 0.0982 (0.1006) loss: 0.8599 (0.8691) time: 0.1232 data: 0.0372 max mem: 9305 +Train: [50] [ 500/6250] eta: 0:13:08 lr: 0.000067 grad: 0.0923 (0.0999) loss: 0.8672 (0.8681) time: 0.1139 data: 0.0255 max mem: 9305 +Train: [50] [ 600/6250] eta: 0:12:38 lr: 0.000067 grad: 0.0929 (0.0991) loss: 0.8618 (0.8673) time: 0.1255 data: 0.0404 max mem: 9305 +Train: [50] [ 700/6250] eta: 0:12:14 lr: 0.000067 grad: 0.0908 (0.0983) loss: 0.8702 (0.8671) time: 0.1159 data: 0.0297 max mem: 9305 +Train: [50] [ 800/6250] eta: 0:12:09 lr: 0.000067 grad: 0.0872 (0.0976) loss: 0.8661 (0.8670) time: 0.1647 data: 0.0753 max mem: 9305 +Train: [50] [ 900/6250] eta: 0:12:00 lr: 0.000067 grad: 0.0990 (0.0973) loss: 0.8625 (0.8668) time: 0.1455 data: 0.0618 max mem: 9305 +Train: [50] [1000/6250] eta: 0:11:45 lr: 0.000067 grad: 0.0899 (0.0970) loss: 0.8670 (0.8667) time: 0.1222 data: 0.0361 max mem: 9305 +Train: [50] [1100/6250] eta: 0:11:30 lr: 0.000067 grad: 0.0915 (0.0967) loss: 0.8678 (0.8667) time: 0.1330 data: 0.0559 max mem: 9305 +Train: [50] [1200/6250] eta: 0:11:17 lr: 0.000067 grad: 0.0936 (0.0966) loss: 0.8663 (0.8665) time: 0.1369 data: 0.0523 max mem: 9305 +Train: [50] [1300/6250] eta: 0:11:04 lr: 0.000067 grad: 0.0906 (0.0967) loss: 0.8660 (0.8663) time: 0.1438 data: 0.0679 max mem: 9305 +Train: [50] [1400/6250] eta: 0:10:54 lr: 0.000067 grad: 0.0944 (0.0968) loss: 0.8603 (0.8660) time: 0.1529 data: 0.0702 max mem: 9305 +Train: [50] [1500/6250] eta: 0:10:40 lr: 0.000067 grad: 0.0922 (0.0968) loss: 0.8626 (0.8657) time: 0.0971 data: 0.0081 max mem: 9305 +Train: [50] [1600/6250] eta: 0:10:29 lr: 0.000067 grad: 0.0962 (0.0969) loss: 0.8649 (0.8654) time: 0.1327 data: 0.0377 max mem: 9305 +Train: [50] [1700/6250] eta: 0:10:14 lr: 0.000067 grad: 0.1028 (0.0971) loss: 0.8589 (0.8650) time: 0.1436 data: 0.0494 max mem: 9305 +Train: [50] [1800/6250] eta: 0:10:03 lr: 0.000067 grad: 0.0934 (0.0973) loss: 0.8687 (0.8649) time: 0.1429 data: 0.0589 max mem: 9305 +Train: [50] [1900/6250] eta: 0:09:49 lr: 0.000067 grad: 0.0936 (0.0974) loss: 0.8645 (0.8648) time: 0.1184 data: 0.0365 max mem: 9305 +Train: [50] [2000/6250] eta: 0:09:35 lr: 0.000067 grad: 0.0944 (0.0975) loss: 0.8667 (0.8646) time: 0.1321 data: 0.0529 max mem: 9305 +Train: [50] [2100/6250] eta: 0:09:21 lr: 0.000067 grad: 0.1004 (0.0975) loss: 0.8638 (0.8645) time: 0.1387 data: 0.0534 max mem: 9305 +Train: [50] [2200/6250] eta: 0:09:07 lr: 0.000067 grad: 0.0987 (0.0976) loss: 0.8584 (0.8644) time: 0.1341 data: 0.0538 max mem: 9305 +Train: [50] [2300/6250] eta: 0:08:53 lr: 0.000067 grad: 0.0930 (0.0977) loss: 0.8651 (0.8643) time: 0.1156 data: 0.0302 max mem: 9305 +Train: [50] [2400/6250] eta: 0:08:40 lr: 0.000067 grad: 0.0951 (0.0977) loss: 0.8687 (0.8642) time: 0.1568 data: 0.0729 max mem: 9305 +Train: [50] [2500/6250] eta: 0:08:25 lr: 0.000067 grad: 0.0905 (0.0977) loss: 0.8665 (0.8642) time: 0.1227 data: 0.0426 max mem: 9305 +Train: [50] [2600/6250] eta: 0:08:14 lr: 0.000067 grad: 0.1000 (0.0979) loss: 0.8607 (0.8641) time: 0.1394 data: 0.0595 max mem: 9305 +Train: [50] [2700/6250] eta: 0:08:01 lr: 0.000067 grad: 0.0887 (0.0978) loss: 0.8684 (0.8642) time: 0.1433 data: 0.0660 max mem: 9305 +Train: [50] [2800/6250] eta: 0:07:49 lr: 0.000067 grad: 0.0901 (0.0978) loss: 0.8675 (0.8642) time: 0.1155 data: 0.0343 max mem: 9305 +Train: [50] [2900/6250] eta: 0:07:36 lr: 0.000067 grad: 0.0948 (0.0979) loss: 0.8676 (0.8641) time: 0.1369 data: 0.0606 max mem: 9305 +Train: [50] [3000/6250] eta: 0:07:22 lr: 0.000067 grad: 0.0927 (0.0980) loss: 0.8596 (0.8641) time: 0.1295 data: 0.0484 max mem: 9305 +Train: [50] [3100/6250] eta: 0:07:09 lr: 0.000067 grad: 0.0936 (0.0980) loss: 0.8629 (0.8641) time: 0.1723 data: 0.0887 max mem: 9305 +Train: [50] [3200/6250] eta: 0:06:58 lr: 0.000067 grad: 0.0945 (0.0980) loss: 0.8686 (0.8641) time: 0.1496 data: 0.0699 max mem: 9305 +Train: [50] [3300/6250] eta: 0:06:44 lr: 0.000067 grad: 0.0958 (0.0981) loss: 0.8612 (0.8641) time: 0.1349 data: 0.0501 max mem: 9305 +Train: [50] [3400/6250] eta: 0:06:30 lr: 0.000067 grad: 0.0985 (0.0982) loss: 0.8638 (0.8640) time: 0.1222 data: 0.0365 max mem: 9305 +Train: [50] [3500/6250] eta: 0:06:17 lr: 0.000067 grad: 0.0994 (0.0983) loss: 0.8621 (0.8640) time: 0.1563 data: 0.0749 max mem: 9305 +Train: [50] [3600/6250] eta: 0:06:04 lr: 0.000066 grad: 0.0915 (0.0983) loss: 0.8676 (0.8640) time: 0.1237 data: 0.0404 max mem: 9305 +Train: [50] [3700/6250] eta: 0:05:51 lr: 0.000066 grad: 0.0978 (0.0983) loss: 0.8644 (0.8640) time: 0.2027 data: 0.1248 max mem: 9305 +Train: [50] [3800/6250] eta: 0:05:38 lr: 0.000066 grad: 0.0981 (0.0983) loss: 0.8653 (0.8640) time: 0.1456 data: 0.0650 max mem: 9305 +Train: [50] [3900/6250] eta: 0:05:25 lr: 0.000066 grad: 0.0913 (0.0983) loss: 0.8584 (0.8639) time: 0.1569 data: 0.0799 max mem: 9305 +Train: [50] [4000/6250] eta: 0:05:11 lr: 0.000066 grad: 0.0986 (0.0983) loss: 0.8606 (0.8639) time: 0.1383 data: 0.0519 max mem: 9305 +Train: [50] [4100/6250] eta: 0:04:57 lr: 0.000066 grad: 0.0915 (0.0983) loss: 0.8694 (0.8639) time: 0.1427 data: 0.0584 max mem: 9305 +Train: [50] [4200/6250] eta: 0:04:43 lr: 0.000066 grad: 0.1069 (0.0984) loss: 0.8519 (0.8638) time: 0.1280 data: 0.0415 max mem: 9305 +Train: [50] [4300/6250] eta: 0:04:29 lr: 0.000066 grad: 0.0982 (0.0986) loss: 0.8619 (0.8637) time: 0.1262 data: 0.0392 max mem: 9305 +Train: [50] [4400/6250] eta: 0:04:15 lr: 0.000066 grad: 0.1037 (0.0987) loss: 0.8609 (0.8636) time: 0.1290 data: 0.0500 max mem: 9305 +Train: [50] [4500/6250] eta: 0:04:00 lr: 0.000066 grad: 0.1008 (0.0988) loss: 0.8605 (0.8635) time: 0.1282 data: 0.0384 max mem: 9305 +Train: [50] [4600/6250] eta: 0:03:46 lr: 0.000066 grad: 0.1058 (0.0989) loss: 0.8552 (0.8635) time: 0.1275 data: 0.0425 max mem: 9305 +Train: [50] [4700/6250] eta: 0:03:31 lr: 0.000066 grad: 0.1012 (0.0991) loss: 0.8608 (0.8634) time: 0.1240 data: 0.0409 max mem: 9305 +Train: [50] [4800/6250] eta: 0:03:17 lr: 0.000066 grad: 0.0970 (0.0992) loss: 0.8636 (0.8633) time: 0.1209 data: 0.0341 max mem: 9305 +Train: [50] [4900/6250] eta: 0:03:03 lr: 0.000066 grad: 0.0977 (0.0993) loss: 0.8595 (0.8632) time: 0.1327 data: 0.0462 max mem: 9305 +Train: [50] [5000/6250] eta: 0:02:49 lr: 0.000066 grad: 0.0986 (0.0995) loss: 0.8654 (0.8631) time: 0.1160 data: 0.0359 max mem: 9305 +Train: [50] [5100/6250] eta: 0:02:35 lr: 0.000066 grad: 0.0931 (0.0996) loss: 0.8685 (0.8631) time: 0.1560 data: 0.0751 max mem: 9305 +Train: [50] [5200/6250] eta: 0:02:22 lr: 0.000066 grad: 0.0963 (0.0997) loss: 0.8605 (0.8630) time: 0.1452 data: 0.0638 max mem: 9305 +Train: [50] [5300/6250] eta: 0:02:08 lr: 0.000066 grad: 0.0982 (0.0997) loss: 0.8605 (0.8630) time: 0.1312 data: 0.0485 max mem: 9305 +Train: [50] [5400/6250] eta: 0:01:54 lr: 0.000066 grad: 0.1099 (0.0998) loss: 0.8614 (0.8630) time: 0.1316 data: 0.0459 max mem: 9305 +Train: [50] [5500/6250] eta: 0:01:41 lr: 0.000066 grad: 0.0945 (0.0998) loss: 0.8566 (0.8629) time: 0.1860 data: 0.0947 max mem: 9305 +Train: [50] [5600/6250] eta: 0:01:27 lr: 0.000066 grad: 0.0935 (0.0999) loss: 0.8579 (0.8629) time: 0.1194 data: 0.0368 max mem: 9305 +Train: [50] [5700/6250] eta: 0:01:14 lr: 0.000066 grad: 0.1015 (0.1000) loss: 0.8645 (0.8628) time: 0.1296 data: 0.0471 max mem: 9305 +Train: [50] [5800/6250] eta: 0:01:00 lr: 0.000066 grad: 0.0938 (0.1000) loss: 0.8674 (0.8627) time: 0.1246 data: 0.0432 max mem: 9305 +Train: [50] [5900/6250] eta: 0:00:47 lr: 0.000066 grad: 0.1001 (0.1001) loss: 0.8538 (0.8627) time: 0.1521 data: 0.0622 max mem: 9305 +Train: [50] [6000/6250] eta: 0:00:33 lr: 0.000066 grad: 0.0971 (0.1001) loss: 0.8590 (0.8626) time: 0.1062 data: 0.0255 max mem: 9305 +Train: [50] [6100/6250] eta: 0:00:20 lr: 0.000066 grad: 0.1069 (0.1002) loss: 0.8588 (0.8625) time: 0.1289 data: 0.0371 max mem: 9305 +Train: [50] [6200/6250] eta: 0:00:06 lr: 0.000066 grad: 0.0888 (0.1003) loss: 0.8582 (0.8624) time: 0.1332 data: 0.0513 max mem: 9305 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.0988 (0.1003) loss: 0.8596 (0.8624) time: 0.1466 data: 0.0595 max mem: 9305 +Train: [50] Total time: 0:14:12 (0.1364 s / it) +Averaged stats: lr: 0.000066 grad: 0.0988 (0.1003) loss: 0.8596 (0.8624) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:03:33 loss: 0.8994 (0.8994) time: 3.4412 data: 3.3301 max mem: 9305 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8806 (0.8826) time: 0.1408 data: 0.1121 max mem: 9305 +Eval (hcp-train-subset): [50] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (hcp-train-subset): loss: 0.8806 (0.8826) +Eval (hcp-val): [50] [ 0/62] eta: 0:05:36 loss: 0.8771 (0.8771) time: 5.4253 data: 5.3923 max mem: 9305 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8784 (0.8801) time: 0.1119 data: 0.0835 max mem: 9305 +Eval (hcp-val): [50] Total time: 0:00:12 (0.2060 s / it) +Averaged stats (hcp-val): loss: 0.8784 (0.8801) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 6:46:52 lr: 0.000066 grad: 0.0751 (0.0751) loss: 0.8984 (0.8984) time: 3.9060 data: 3.6056 max mem: 9305 +Train: [51] [ 100/6250] eta: 0:19:48 lr: 0.000066 grad: 0.1016 (0.1300) loss: 0.8637 (0.8720) time: 0.1453 data: 0.0548 max mem: 9305 +Train: [51] [ 200/6250] eta: 0:16:55 lr: 0.000066 grad: 0.1022 (0.1187) loss: 0.8660 (0.8690) time: 0.1460 data: 0.0490 max mem: 9305 +Train: [51] [ 300/6250] eta: 0:15:15 lr: 0.000065 grad: 0.0895 (0.1123) loss: 0.8688 (0.8679) time: 0.1199 data: 0.0317 max mem: 9305 +Train: [51] [ 400/6250] eta: 0:14:16 lr: 0.000065 grad: 0.0971 (0.1087) loss: 0.8718 (0.8677) time: 0.1100 data: 0.0165 max mem: 9305 +Train: [51] [ 500/6250] eta: 0:13:38 lr: 0.000065 grad: 0.0888 (0.1058) loss: 0.8625 (0.8676) time: 0.1361 data: 0.0523 max mem: 9305 +Train: [51] [ 600/6250] eta: 0:13:01 lr: 0.000065 grad: 0.0967 (0.1040) loss: 0.8608 (0.8678) time: 0.1126 data: 0.0199 max mem: 9305 +Train: [51] [ 700/6250] eta: 0:12:37 lr: 0.000065 grad: 0.0910 (0.1023) loss: 0.8734 (0.8681) time: 0.1293 data: 0.0368 max mem: 9305 +Train: [51] [ 800/6250] eta: 0:12:23 lr: 0.000065 grad: 0.0898 (0.1012) loss: 0.8734 (0.8685) time: 0.1534 data: 0.0639 max mem: 9305 +Train: [51] [ 900/6250] eta: 0:12:08 lr: 0.000065 grad: 0.0908 (0.1008) loss: 0.8697 (0.8684) time: 0.1336 data: 0.0498 max mem: 9305 +Train: [51] [1000/6250] eta: 0:11:55 lr: 0.000065 grad: 0.0932 (0.1003) loss: 0.8674 (0.8682) time: 0.1483 data: 0.0643 max mem: 9305 +Train: [51] [1100/6250] eta: 0:11:40 lr: 0.000065 grad: 0.0970 (0.1001) loss: 0.8607 (0.8679) time: 0.1190 data: 0.0380 max mem: 9305 +Train: [51] [1200/6250] eta: 0:11:29 lr: 0.000065 grad: 0.0969 (0.0998) loss: 0.8689 (0.8677) time: 0.1574 data: 0.0709 max mem: 9305 +Train: [51] [1300/6250] eta: 0:11:20 lr: 0.000065 grad: 0.0956 (0.0996) loss: 0.8668 (0.8676) time: 0.1409 data: 0.0584 max mem: 9305 +Train: [51] [1400/6250] eta: 0:11:04 lr: 0.000065 grad: 0.1005 (0.0998) loss: 0.8559 (0.8672) time: 0.1357 data: 0.0525 max mem: 9305 +Train: [51] [1500/6250] eta: 0:10:49 lr: 0.000065 grad: 0.0968 (0.0996) loss: 0.8661 (0.8670) time: 0.1543 data: 0.0718 max mem: 9305 +Train: [51] [1600/6250] eta: 0:10:36 lr: 0.000065 grad: 0.0953 (0.0995) loss: 0.8612 (0.8668) time: 0.1401 data: 0.0584 max mem: 9305 +Train: [51] [1700/6250] eta: 0:10:23 lr: 0.000065 grad: 0.0964 (0.0995) loss: 0.8641 (0.8665) time: 0.1482 data: 0.0658 max mem: 9305 +Train: [51] [1800/6250] eta: 0:10:11 lr: 0.000065 grad: 0.1018 (0.0996) loss: 0.8612 (0.8662) time: 0.1186 data: 0.0333 max mem: 9305 +Train: [51] [1900/6250] eta: 0:09:59 lr: 0.000065 grad: 0.0956 (0.0997) loss: 0.8608 (0.8659) time: 0.1601 data: 0.0728 max mem: 9305 +Train: [51] [2000/6250] eta: 0:09:44 lr: 0.000065 grad: 0.1008 (0.0997) loss: 0.8574 (0.8658) time: 0.1195 data: 0.0297 max mem: 9305 +Train: [51] [2100/6250] eta: 0:09:31 lr: 0.000065 grad: 0.1028 (0.0997) loss: 0.8663 (0.8656) time: 0.1532 data: 0.0693 max mem: 9305 +Train: [51] [2200/6250] eta: 0:09:16 lr: 0.000065 grad: 0.0993 (0.0998) loss: 0.8648 (0.8654) time: 0.1532 data: 0.0743 max mem: 9305 +Train: [51] [2300/6250] eta: 0:09:04 lr: 0.000065 grad: 0.1007 (0.1000) loss: 0.8614 (0.8652) time: 0.1625 data: 0.0828 max mem: 9305 +Train: [51] [2400/6250] eta: 0:08:50 lr: 0.000065 grad: 0.1012 (0.1001) loss: 0.8621 (0.8651) time: 0.1658 data: 0.0771 max mem: 9305 +Train: [51] [2500/6250] eta: 0:08:37 lr: 0.000065 grad: 0.1033 (0.1003) loss: 0.8645 (0.8649) time: 0.1360 data: 0.0503 max mem: 9305 +Train: [51] [2600/6250] eta: 0:08:24 lr: 0.000065 grad: 0.0988 (0.1005) loss: 0.8613 (0.8648) time: 0.1309 data: 0.0476 max mem: 9305 +Train: [51] [2700/6250] eta: 0:08:09 lr: 0.000065 grad: 0.0982 (0.1005) loss: 0.8583 (0.8646) time: 0.1395 data: 0.0499 max mem: 9305 +Train: [51] [2800/6250] eta: 0:07:55 lr: 0.000065 grad: 0.1037 (0.1007) loss: 0.8641 (0.8644) time: 0.1111 data: 0.0233 max mem: 9305 +Train: [51] [2900/6250] eta: 0:07:41 lr: 0.000065 grad: 0.0972 (0.1007) loss: 0.8650 (0.8644) time: 0.1175 data: 0.0327 max mem: 9305 +Train: [51] [3000/6250] eta: 0:07:27 lr: 0.000065 grad: 0.0953 (0.1009) loss: 0.8651 (0.8643) time: 0.1454 data: 0.0598 max mem: 9305 +Train: [51] [3100/6250] eta: 0:07:13 lr: 0.000065 grad: 0.0979 (0.1010) loss: 0.8664 (0.8643) time: 0.1321 data: 0.0464 max mem: 9305 +Train: [51] [3200/6250] eta: 0:07:00 lr: 0.000065 grad: 0.1043 (0.1011) loss: 0.8583 (0.8642) time: 0.1183 data: 0.0382 max mem: 9305 +Train: [51] [3300/6250] eta: 0:06:46 lr: 0.000065 grad: 0.1007 (0.1012) loss: 0.8602 (0.8641) time: 0.1464 data: 0.0644 max mem: 9305 +Train: [51] [3400/6250] eta: 0:06:32 lr: 0.000064 grad: 0.1015 (0.1013) loss: 0.8595 (0.8640) time: 0.1614 data: 0.0795 max mem: 9305 +Train: [51] [3500/6250] eta: 0:06:19 lr: 0.000064 grad: 0.1056 (0.1014) loss: 0.8574 (0.8638) time: 0.1730 data: 0.0901 max mem: 9305 +Train: [51] [3600/6250] eta: 0:06:06 lr: 0.000064 grad: 0.1027 (0.1016) loss: 0.8601 (0.8637) time: 0.1322 data: 0.0439 max mem: 9305 +Train: [51] [3700/6250] eta: 0:05:53 lr: 0.000064 grad: 0.1027 (0.1017) loss: 0.8620 (0.8635) time: 0.1505 data: 0.0653 max mem: 9305 +Train: [51] [3800/6250] eta: 0:05:39 lr: 0.000064 grad: 0.1018 (0.1017) loss: 0.8611 (0.8635) time: 0.1514 data: 0.0655 max mem: 9305 +Train: [51] [3900/6250] eta: 0:05:25 lr: 0.000064 grad: 0.0996 (0.1018) loss: 0.8585 (0.8634) time: 0.1275 data: 0.0448 max mem: 9305 +Train: [51] [4000/6250] eta: 0:05:11 lr: 0.000064 grad: 0.1007 (0.1018) loss: 0.8606 (0.8633) time: 0.1312 data: 0.0494 max mem: 9305 +Train: [51] [4100/6250] eta: 0:04:56 lr: 0.000064 grad: 0.1011 (0.1019) loss: 0.8644 (0.8633) time: 0.1170 data: 0.0243 max mem: 9305 +Train: [51] [4200/6250] eta: 0:04:41 lr: 0.000064 grad: 0.1045 (0.1020) loss: 0.8634 (0.8633) time: 0.1268 data: 0.0410 max mem: 9305 +Train: [51] [4300/6250] eta: 0:04:27 lr: 0.000064 grad: 0.1075 (0.1020) loss: 0.8620 (0.8632) time: 0.1192 data: 0.0376 max mem: 9305 +Train: [51] [4400/6250] eta: 0:04:12 lr: 0.000064 grad: 0.0972 (0.1021) loss: 0.8617 (0.8631) time: 0.1274 data: 0.0434 max mem: 9305 +Train: [51] [4500/6250] eta: 0:03:58 lr: 0.000064 grad: 0.1072 (0.1022) loss: 0.8579 (0.8630) time: 0.1053 data: 0.0206 max mem: 9305 +Train: [51] [4600/6250] eta: 0:03:44 lr: 0.000064 grad: 0.1047 (0.1025) loss: 0.8607 (0.8629) time: 0.1257 data: 0.0423 max mem: 9305 +Train: [51] [4700/6250] eta: 0:03:31 lr: 0.000064 grad: 0.0997 (0.1025) loss: 0.8585 (0.8629) time: 0.1427 data: 0.0609 max mem: 9305 +Train: [51] [4800/6250] eta: 0:03:17 lr: 0.000064 grad: 0.1047 (0.1026) loss: 0.8582 (0.8628) time: 0.1502 data: 0.0678 max mem: 9305 +Train: [51] [4900/6250] eta: 0:03:04 lr: 0.000064 grad: 0.1013 (0.1027) loss: 0.8593 (0.8627) time: 0.1375 data: 0.0465 max mem: 9305 +Train: [51] [5000/6250] eta: 0:02:50 lr: 0.000064 grad: 0.1006 (0.1028) loss: 0.8651 (0.8627) time: 0.1390 data: 0.0490 max mem: 9305 +Train: [51] [5100/6250] eta: 0:02:36 lr: 0.000064 grad: 0.1069 (0.1028) loss: 0.8572 (0.8625) time: 0.1347 data: 0.0531 max mem: 9305 +Train: [51] [5200/6250] eta: 0:02:23 lr: 0.000064 grad: 0.0871 (0.1028) loss: 0.8637 (0.8625) time: 0.1393 data: 0.0607 max mem: 9305 +Train: [51] [5300/6250] eta: 0:02:09 lr: 0.000064 grad: 0.1084 (0.1029) loss: 0.8572 (0.8625) time: 0.1279 data: 0.0387 max mem: 9305 +Train: [51] [5400/6250] eta: 0:01:55 lr: 0.000064 grad: 0.0995 (0.1029) loss: 0.8628 (0.8625) time: 0.1296 data: 0.0488 max mem: 9305 +Train: [51] [5500/6250] eta: 0:01:42 lr: 0.000064 grad: 0.0996 (0.1029) loss: 0.8573 (0.8625) time: 0.1519 data: 0.0635 max mem: 9305 +Train: [51] [5600/6250] eta: 0:01:28 lr: 0.000064 grad: 0.0971 (0.1029) loss: 0.8675 (0.8625) time: 0.1399 data: 0.0503 max mem: 9305 +Train: [51] [5700/6250] eta: 0:01:15 lr: 0.000064 grad: 0.0955 (0.1029) loss: 0.8605 (0.8625) time: 0.1916 data: 0.1142 max mem: 9305 +Train: [51] [5800/6250] eta: 0:01:01 lr: 0.000064 grad: 0.1005 (0.1029) loss: 0.8648 (0.8624) time: 0.1377 data: 0.0572 max mem: 9305 +Train: [51] [5900/6250] eta: 0:00:47 lr: 0.000064 grad: 0.0959 (0.1029) loss: 0.8589 (0.8624) time: 0.1492 data: 0.0676 max mem: 9305 +Train: [51] [6000/6250] eta: 0:00:34 lr: 0.000064 grad: 0.0942 (0.1028) loss: 0.8602 (0.8624) time: 0.1794 data: 0.0943 max mem: 9305 +Train: [51] [6100/6250] eta: 0:00:20 lr: 0.000064 grad: 0.1021 (0.1028) loss: 0.8557 (0.8623) time: 0.1224 data: 0.0319 max mem: 9305 +Train: [51] [6200/6250] eta: 0:00:06 lr: 0.000064 grad: 0.0977 (0.1028) loss: 0.8606 (0.8623) time: 0.1227 data: 0.0420 max mem: 9305 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.0971 (0.1028) loss: 0.8628 (0.8623) time: 0.1426 data: 0.0594 max mem: 9305 +Train: [51] Total time: 0:14:23 (0.1381 s / it) +Averaged stats: lr: 0.000064 grad: 0.0971 (0.1028) loss: 0.8628 (0.8623) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:03:19 loss: 0.8884 (0.8884) time: 3.2199 data: 3.1362 max mem: 9305 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8783 (0.8793) time: 0.1270 data: 0.0968 max mem: 9305 +Eval (hcp-train-subset): [51] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (hcp-train-subset): loss: 0.8783 (0.8793) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:11 loss: 0.8848 (0.8848) time: 3.0937 data: 3.0246 max mem: 9305 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8778 (0.8805) time: 0.1439 data: 0.1132 max mem: 9305 +Eval (hcp-val): [51] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (hcp-val): loss: 0.8778 (0.8805) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 9:36:34 lr: 0.000064 grad: 0.0877 (0.0877) loss: 0.9168 (0.9168) time: 5.5351 data: 5.4466 max mem: 9305 +Train: [52] [ 100/6250] eta: 0:20:11 lr: 0.000063 grad: 0.0808 (0.0953) loss: 0.8797 (0.8773) time: 0.1498 data: 0.0374 max mem: 9305 +Train: [52] [ 200/6250] eta: 0:17:33 lr: 0.000063 grad: 0.0887 (0.0934) loss: 0.8691 (0.8751) time: 0.1337 data: 0.0343 max mem: 9305 +Train: [52] [ 300/6250] eta: 0:16:11 lr: 0.000063 grad: 0.0898 (0.0943) loss: 0.8638 (0.8725) time: 0.1536 data: 0.0629 max mem: 9305 +Train: [52] [ 400/6250] eta: 0:15:17 lr: 0.000063 grad: 0.0892 (0.0951) loss: 0.8681 (0.8704) time: 0.1357 data: 0.0469 max mem: 9305 +Train: [52] [ 500/6250] eta: 0:14:39 lr: 0.000063 grad: 0.1024 (0.0960) loss: 0.8556 (0.8684) time: 0.1399 data: 0.0532 max mem: 9305 +Train: [52] [ 600/6250] eta: 0:14:06 lr: 0.000063 grad: 0.0917 (0.0961) loss: 0.8690 (0.8677) time: 0.1527 data: 0.0568 max mem: 9305 +Train: [52] [ 700/6250] eta: 0:14:11 lr: 0.000063 grad: 0.0918 (0.0966) loss: 0.8681 (0.8672) time: 0.1732 data: 0.0747 max mem: 9305 +Train: [52] [ 800/6250] eta: 0:13:49 lr: 0.000063 grad: 0.0909 (0.0969) loss: 0.8632 (0.8670) time: 0.1385 data: 0.0539 max mem: 9305 +Train: [52] [ 900/6250] eta: 0:13:32 lr: 0.000063 grad: 0.0943 (0.0966) loss: 0.8644 (0.8671) time: 0.1526 data: 0.0531 max mem: 9305 +Train: [52] [1000/6250] eta: 0:13:26 lr: 0.000063 grad: 0.0968 (0.0965) loss: 0.8637 (0.8671) time: 0.1000 data: 0.0043 max mem: 9305 +Train: [52] [1100/6250] eta: 0:13:00 lr: 0.000063 grad: 0.0886 (0.0962) loss: 0.8734 (0.8672) time: 0.1426 data: 0.0639 max mem: 9305 +Train: [52] [1200/6250] eta: 0:12:46 lr: 0.000063 grad: 0.0975 (0.0963) loss: 0.8634 (0.8671) time: 0.1450 data: 0.0642 max mem: 9305 +Train: [52] [1300/6250] eta: 0:12:25 lr: 0.000063 grad: 0.1024 (0.0965) loss: 0.8573 (0.8668) time: 0.1323 data: 0.0525 max mem: 9305 +Train: [52] [1400/6250] eta: 0:12:08 lr: 0.000063 grad: 0.1006 (0.0968) loss: 0.8570 (0.8664) time: 0.1518 data: 0.0625 max mem: 9305 +Train: [52] [1500/6250] eta: 0:11:56 lr: 0.000063 grad: 0.1079 (0.0972) loss: 0.8551 (0.8658) time: 0.1492 data: 0.0690 max mem: 9305 +Train: [52] [1600/6250] eta: 0:11:40 lr: 0.000063 grad: 0.1018 (0.0975) loss: 0.8604 (0.8656) time: 0.1692 data: 0.0896 max mem: 9305 +Train: [52] [1700/6250] eta: 0:11:24 lr: 0.000063 grad: 0.0977 (0.0976) loss: 0.8564 (0.8653) time: 0.1768 data: 0.0944 max mem: 9305 +Train: [52] [1800/6250] eta: 0:11:10 lr: 0.000063 grad: 0.0923 (0.0978) loss: 0.8658 (0.8651) time: 0.1558 data: 0.0729 max mem: 9305 +Train: [52] [1900/6250] eta: 0:10:57 lr: 0.000063 grad: 0.1014 (0.0980) loss: 0.8599 (0.8647) time: 0.1529 data: 0.0701 max mem: 9305 +Train: [52] [2000/6250] eta: 0:10:43 lr: 0.000063 grad: 0.0963 (0.0982) loss: 0.8641 (0.8644) time: 0.1347 data: 0.0510 max mem: 9305 +Train: [52] [2100/6250] eta: 0:10:28 lr: 0.000063 grad: 0.1026 (0.0984) loss: 0.8599 (0.8641) time: 0.0995 data: 0.0107 max mem: 9305 +Train: [52] [2200/6250] eta: 0:10:13 lr: 0.000063 grad: 0.0992 (0.0987) loss: 0.8640 (0.8638) time: 0.1432 data: 0.0606 max mem: 9305 +Train: [52] [2300/6250] eta: 0:09:57 lr: 0.000063 grad: 0.1018 (0.0989) loss: 0.8565 (0.8636) time: 0.1493 data: 0.0643 max mem: 9305 +Train: [52] [2400/6250] eta: 0:09:43 lr: 0.000063 grad: 0.1049 (0.0991) loss: 0.8540 (0.8634) time: 0.1231 data: 0.0380 max mem: 9305 +Train: [52] [2500/6250] eta: 0:09:29 lr: 0.000063 grad: 0.1046 (0.0993) loss: 0.8539 (0.8631) time: 0.1357 data: 0.0535 max mem: 9305 +Train: [52] [2600/6250] eta: 0:09:15 lr: 0.000063 grad: 0.1027 (0.0996) loss: 0.8535 (0.8628) time: 0.1758 data: 0.0915 max mem: 9305 +Train: [52] [2700/6250] eta: 0:09:02 lr: 0.000063 grad: 0.1005 (0.0996) loss: 0.8518 (0.8627) time: 0.1443 data: 0.0627 max mem: 9305 +Train: [52] [2800/6250] eta: 0:08:47 lr: 0.000063 grad: 0.0941 (0.0997) loss: 0.8570 (0.8625) time: 0.1493 data: 0.0650 max mem: 9305 +Train: [52] [2900/6250] eta: 0:08:32 lr: 0.000063 grad: 0.0946 (0.0998) loss: 0.8601 (0.8623) time: 0.1828 data: 0.1048 max mem: 9305 +Train: [52] [3000/6250] eta: 0:08:14 lr: 0.000063 grad: 0.1000 (0.1000) loss: 0.8539 (0.8622) time: 0.1474 data: 0.0655 max mem: 9305 +Train: [52] [3100/6250] eta: 0:07:57 lr: 0.000063 grad: 0.1062 (0.1001) loss: 0.8597 (0.8621) time: 0.1201 data: 0.0393 max mem: 9305 +Train: [52] [3200/6250] eta: 0:07:41 lr: 0.000062 grad: 0.0998 (0.1003) loss: 0.8616 (0.8620) time: 0.1448 data: 0.0605 max mem: 9305 +Train: [52] [3300/6250] eta: 0:07:27 lr: 0.000062 grad: 0.0987 (0.1004) loss: 0.8673 (0.8619) time: 0.1300 data: 0.0534 max mem: 9305 +Train: [52] [3400/6250] eta: 0:07:11 lr: 0.000062 grad: 0.1034 (0.1005) loss: 0.8547 (0.8618) time: 0.1494 data: 0.0679 max mem: 9305 +Train: [52] [3500/6250] eta: 0:06:57 lr: 0.000062 grad: 0.1021 (0.1007) loss: 0.8550 (0.8617) time: 0.1618 data: 0.0844 max mem: 9305 +Train: [52] [3600/6250] eta: 0:06:42 lr: 0.000062 grad: 0.1077 (0.1008) loss: 0.8531 (0.8615) time: 0.1591 data: 0.0686 max mem: 9305 +Train: [52] [3700/6250] eta: 0:06:27 lr: 0.000062 grad: 0.1026 (0.1010) loss: 0.8578 (0.8614) time: 0.1536 data: 0.0711 max mem: 9305 +Train: [52] [3800/6250] eta: 0:06:11 lr: 0.000062 grad: 0.1075 (0.1011) loss: 0.8554 (0.8613) time: 0.1292 data: 0.0479 max mem: 9305 +Train: [52] [3900/6250] eta: 0:05:55 lr: 0.000062 grad: 0.0907 (0.1012) loss: 0.8637 (0.8612) time: 0.1400 data: 0.0459 max mem: 9305 +Train: [52] [4000/6250] eta: 0:05:39 lr: 0.000062 grad: 0.0920 (0.1012) loss: 0.8538 (0.8611) time: 0.1427 data: 0.0562 max mem: 9305 +Train: [52] [4100/6250] eta: 0:05:23 lr: 0.000062 grad: 0.0955 (0.1013) loss: 0.8613 (0.8610) time: 0.1173 data: 0.0319 max mem: 9305 +Train: [52] [4200/6250] eta: 0:05:06 lr: 0.000062 grad: 0.0952 (0.1013) loss: 0.8587 (0.8609) time: 0.1073 data: 0.0253 max mem: 9305 +Train: [52] [4300/6250] eta: 0:04:50 lr: 0.000062 grad: 0.0983 (0.1014) loss: 0.8557 (0.8608) time: 0.1135 data: 0.0213 max mem: 9305 +Train: [52] [4400/6250] eta: 0:04:35 lr: 0.000062 grad: 0.1045 (0.1016) loss: 0.8513 (0.8607) time: 0.1357 data: 0.0577 max mem: 9305 +Train: [52] [4500/6250] eta: 0:04:20 lr: 0.000062 grad: 0.0982 (0.1016) loss: 0.8632 (0.8606) time: 0.1566 data: 0.0748 max mem: 9305 +Train: [52] [4600/6250] eta: 0:04:05 lr: 0.000062 grad: 0.1038 (0.1017) loss: 0.8509 (0.8605) time: 0.1298 data: 0.0500 max mem: 9305 +Train: [52] [4700/6250] eta: 0:03:50 lr: 0.000062 grad: 0.1048 (0.1019) loss: 0.8522 (0.8604) time: 0.1501 data: 0.0671 max mem: 9305 +Train: [52] [4800/6250] eta: 0:03:35 lr: 0.000062 grad: 0.1004 (0.1020) loss: 0.8493 (0.8602) time: 0.1171 data: 0.0222 max mem: 9305 +Train: [52] [4900/6250] eta: 0:03:20 lr: 0.000062 grad: 0.1098 (0.1022) loss: 0.8493 (0.8601) time: 0.1548 data: 0.0725 max mem: 9305 +Train: [52] [5000/6250] eta: 0:03:05 lr: 0.000062 grad: 0.1064 (0.1023) loss: 0.8450 (0.8600) time: 0.1429 data: 0.0557 max mem: 9305 +Train: [52] [5100/6250] eta: 0:02:50 lr: 0.000062 grad: 0.1069 (0.1025) loss: 0.8602 (0.8599) time: 0.1116 data: 0.0298 max mem: 9305 +Train: [52] [5200/6250] eta: 0:02:35 lr: 0.000062 grad: 0.1018 (0.1026) loss: 0.8570 (0.8598) time: 0.1119 data: 0.0241 max mem: 9305 +Train: [52] [5300/6250] eta: 0:02:21 lr: 0.000062 grad: 0.1090 (0.1028) loss: 0.8527 (0.8596) time: 0.1549 data: 0.0616 max mem: 9305 +Train: [52] [5400/6250] eta: 0:02:06 lr: 0.000062 grad: 0.1108 (0.1028) loss: 0.8537 (0.8595) time: 0.1074 data: 0.0002 max mem: 9305 +Train: [52] [5500/6250] eta: 0:01:51 lr: 0.000062 grad: 0.1041 (0.1030) loss: 0.8547 (0.8594) time: 0.1531 data: 0.0699 max mem: 9305 +Train: [52] [5600/6250] eta: 0:01:36 lr: 0.000062 grad: 0.1039 (0.1031) loss: 0.8561 (0.8594) time: 0.1596 data: 0.0681 max mem: 9305 +Train: [52] [5700/6250] eta: 0:01:21 lr: 0.000062 grad: 0.1149 (0.1032) loss: 0.8552 (0.8593) time: 0.1505 data: 0.0678 max mem: 9305 +Train: [52] [5800/6250] eta: 0:01:06 lr: 0.000062 grad: 0.1061 (0.1032) loss: 0.8655 (0.8592) time: 0.1647 data: 0.0840 max mem: 9305 +Train: [52] [5900/6250] eta: 0:00:51 lr: 0.000062 grad: 0.1007 (0.1033) loss: 0.8510 (0.8592) time: 0.1437 data: 0.0570 max mem: 9305 +Train: [52] [6000/6250] eta: 0:00:37 lr: 0.000062 grad: 0.1007 (0.1033) loss: 0.8577 (0.8592) time: 0.1400 data: 0.0568 max mem: 9305 +Train: [52] [6100/6250] eta: 0:00:22 lr: 0.000062 grad: 0.0986 (0.1033) loss: 0.8644 (0.8592) time: 0.0831 data: 0.0002 max mem: 9305 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1024 (0.1033) loss: 0.8621 (0.8592) time: 0.1326 data: 0.0510 max mem: 9305 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1084 (0.1034) loss: 0.8619 (0.8592) time: 0.1176 data: 0.0355 max mem: 9305 +Train: [52] Total time: 0:15:32 (0.1491 s / it) +Averaged stats: lr: 0.000061 grad: 0.1084 (0.1034) loss: 0.8619 (0.8592) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:04:02 loss: 0.8908 (0.8908) time: 3.9048 data: 3.8269 max mem: 9305 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8815 (0.8819) time: 0.1214 data: 0.0922 max mem: 9305 +Eval (hcp-train-subset): [52] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (hcp-train-subset): loss: 0.8815 (0.8819) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:34 loss: 0.8776 (0.8776) time: 3.4631 data: 3.3902 max mem: 9305 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8796 (0.8805) time: 0.1236 data: 0.0949 max mem: 9305 +Eval (hcp-val): [52] Total time: 0:00:12 (0.2053 s / it) +Averaged stats (hcp-val): loss: 0.8796 (0.8805) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 8:29:55 lr: 0.000061 grad: 0.1499 (0.1499) loss: 0.8872 (0.8872) time: 4.8953 data: 4.7175 max mem: 9305 +Train: [53] [ 100/6250] eta: 0:19:43 lr: 0.000061 grad: 0.0808 (0.0997) loss: 0.8792 (0.8779) time: 0.1375 data: 0.0398 max mem: 9305 +Train: [53] [ 200/6250] eta: 0:16:41 lr: 0.000061 grad: 0.0848 (0.0985) loss: 0.8748 (0.8747) time: 0.1457 data: 0.0516 max mem: 9305 +Train: [53] [ 300/6250] eta: 0:15:21 lr: 0.000061 grad: 0.0868 (0.0964) loss: 0.8764 (0.8738) time: 0.1358 data: 0.0458 max mem: 9305 +Train: [53] [ 400/6250] eta: 0:14:19 lr: 0.000061 grad: 0.0900 (0.0968) loss: 0.8631 (0.8723) time: 0.1091 data: 0.0163 max mem: 9305 +Train: [53] [ 500/6250] eta: 0:13:35 lr: 0.000061 grad: 0.0866 (0.0969) loss: 0.8656 (0.8711) time: 0.1292 data: 0.0404 max mem: 9305 +Train: [53] [ 600/6250] eta: 0:13:02 lr: 0.000061 grad: 0.0914 (0.0976) loss: 0.8673 (0.8708) time: 0.1252 data: 0.0427 max mem: 9305 +Train: [53] [ 700/6250] eta: 0:12:35 lr: 0.000061 grad: 0.0945 (0.0974) loss: 0.8639 (0.8704) time: 0.1114 data: 0.0290 max mem: 9305 +Train: [53] [ 800/6250] eta: 0:12:19 lr: 0.000061 grad: 0.0942 (0.0974) loss: 0.8689 (0.8701) time: 0.1420 data: 0.0530 max mem: 9305 +Train: [53] [ 900/6250] eta: 0:12:06 lr: 0.000061 grad: 0.0915 (0.0972) loss: 0.8666 (0.8699) time: 0.1434 data: 0.0533 max mem: 9305 +Train: [53] [1000/6250] eta: 0:11:55 lr: 0.000061 grad: 0.0964 (0.0970) loss: 0.8629 (0.8695) time: 0.1544 data: 0.0700 max mem: 9305 +Train: [53] [1100/6250] eta: 0:11:38 lr: 0.000061 grad: 0.0949 (0.0972) loss: 0.8611 (0.8690) time: 0.1457 data: 0.0601 max mem: 9305 +Train: [53] [1200/6250] eta: 0:11:20 lr: 0.000061 grad: 0.1087 (0.0977) loss: 0.8530 (0.8681) time: 0.1334 data: 0.0579 max mem: 9305 +Train: [53] [1300/6250] eta: 0:11:05 lr: 0.000061 grad: 0.1040 (0.0982) loss: 0.8583 (0.8673) time: 0.1472 data: 0.0654 max mem: 9305 +Train: [53] [1400/6250] eta: 0:10:52 lr: 0.000061 grad: 0.1012 (0.0984) loss: 0.8605 (0.8669) time: 0.1216 data: 0.0401 max mem: 9305 +Train: [53] [1500/6250] eta: 0:10:38 lr: 0.000061 grad: 0.0963 (0.0985) loss: 0.8610 (0.8665) time: 0.1144 data: 0.0314 max mem: 9305 +Train: [53] [1600/6250] eta: 0:10:25 lr: 0.000061 grad: 0.0969 (0.0988) loss: 0.8599 (0.8661) time: 0.1372 data: 0.0567 max mem: 9305 +Train: [53] [1700/6250] eta: 0:10:22 lr: 0.000061 grad: 0.0956 (0.0990) loss: 0.8574 (0.8657) time: 0.1251 data: 0.0280 max mem: 9305 +Train: [53] [1800/6250] eta: 0:10:06 lr: 0.000061 grad: 0.0964 (0.0992) loss: 0.8658 (0.8654) time: 0.1337 data: 0.0534 max mem: 9305 +Train: [53] [1900/6250] eta: 0:09:51 lr: 0.000061 grad: 0.1021 (0.0997) loss: 0.8641 (0.8650) time: 0.1427 data: 0.0574 max mem: 9305 +Train: [53] [2000/6250] eta: 0:09:38 lr: 0.000061 grad: 0.0982 (0.0999) loss: 0.8614 (0.8647) time: 0.1331 data: 0.0518 max mem: 9305 +Train: [53] [2100/6250] eta: 0:09:25 lr: 0.000061 grad: 0.0961 (0.1001) loss: 0.8632 (0.8645) time: 0.1617 data: 0.0765 max mem: 9305 +Train: [53] [2200/6250] eta: 0:09:10 lr: 0.000061 grad: 0.1019 (0.1003) loss: 0.8617 (0.8642) time: 0.1309 data: 0.0493 max mem: 9305 +Train: [53] [2300/6250] eta: 0:08:56 lr: 0.000061 grad: 0.1089 (0.1007) loss: 0.8547 (0.8638) time: 0.1147 data: 0.0263 max mem: 9305 +Train: [53] [2400/6250] eta: 0:08:42 lr: 0.000061 grad: 0.0980 (0.1009) loss: 0.8582 (0.8635) time: 0.1275 data: 0.0281 max mem: 9305 +Train: [53] [2500/6250] eta: 0:08:28 lr: 0.000061 grad: 0.1021 (0.1011) loss: 0.8588 (0.8632) time: 0.1350 data: 0.0511 max mem: 9305 +Train: [53] [2600/6250] eta: 0:08:14 lr: 0.000061 grad: 0.1018 (0.1012) loss: 0.8631 (0.8631) time: 0.1372 data: 0.0517 max mem: 9305 +Train: [53] [2700/6250] eta: 0:07:59 lr: 0.000061 grad: 0.1024 (0.1014) loss: 0.8517 (0.8628) time: 0.1333 data: 0.0485 max mem: 9305 +Train: [53] [2800/6250] eta: 0:07:45 lr: 0.000061 grad: 0.1036 (0.1016) loss: 0.8561 (0.8625) time: 0.1362 data: 0.0586 max mem: 9305 +Train: [53] [2900/6250] eta: 0:07:32 lr: 0.000061 grad: 0.1010 (0.1018) loss: 0.8549 (0.8622) time: 0.1317 data: 0.0455 max mem: 9305 +Train: [53] [3000/6250] eta: 0:07:22 lr: 0.000060 grad: 0.1047 (0.1020) loss: 0.8430 (0.8618) time: 0.2284 data: 0.1538 max mem: 9305 +Train: [53] [3100/6250] eta: 0:07:09 lr: 0.000060 grad: 0.1104 (0.1022) loss: 0.8550 (0.8616) time: 0.1698 data: 0.0904 max mem: 9305 +Train: [53] [3200/6250] eta: 0:06:55 lr: 0.000060 grad: 0.1071 (0.1024) loss: 0.8548 (0.8614) time: 0.1398 data: 0.0582 max mem: 9305 +Train: [53] [3300/6250] eta: 0:06:42 lr: 0.000060 grad: 0.1018 (0.1025) loss: 0.8553 (0.8611) time: 0.1465 data: 0.0587 max mem: 9305 +Train: [53] [3400/6250] eta: 0:06:30 lr: 0.000060 grad: 0.0999 (0.1026) loss: 0.8556 (0.8609) time: 0.1465 data: 0.0551 max mem: 9305 +Train: [53] [3500/6250] eta: 0:06:16 lr: 0.000060 grad: 0.1012 (0.1027) loss: 0.8558 (0.8609) time: 0.1556 data: 0.0668 max mem: 9305 +Train: [53] [3600/6250] eta: 0:06:03 lr: 0.000060 grad: 0.0982 (0.1028) loss: 0.8588 (0.8607) time: 0.1310 data: 0.0463 max mem: 9305 +Train: [53] [3700/6250] eta: 0:05:49 lr: 0.000060 grad: 0.1110 (0.1030) loss: 0.8614 (0.8607) time: 0.1318 data: 0.0448 max mem: 9305 +Train: [53] [3800/6250] eta: 0:05:34 lr: 0.000060 grad: 0.0987 (0.1030) loss: 0.8597 (0.8606) time: 0.1129 data: 0.0266 max mem: 9305 +Train: [53] [3900/6250] eta: 0:05:19 lr: 0.000060 grad: 0.1000 (0.1030) loss: 0.8603 (0.8606) time: 0.1217 data: 0.0385 max mem: 9305 +Train: [53] [4000/6250] eta: 0:05:05 lr: 0.000060 grad: 0.1024 (0.1030) loss: 0.8699 (0.8606) time: 0.1271 data: 0.0415 max mem: 9305 +Train: [53] [4100/6250] eta: 0:04:51 lr: 0.000060 grad: 0.1077 (0.1031) loss: 0.8582 (0.8606) time: 0.1407 data: 0.0545 max mem: 9305 +Train: [53] [4200/6250] eta: 0:04:37 lr: 0.000060 grad: 0.1016 (0.1032) loss: 0.8579 (0.8605) time: 0.1176 data: 0.0278 max mem: 9305 +Train: [53] [4300/6250] eta: 0:04:24 lr: 0.000060 grad: 0.1039 (0.1032) loss: 0.8509 (0.8604) time: 0.1444 data: 0.0586 max mem: 9305 +Train: [53] [4400/6250] eta: 0:04:10 lr: 0.000060 grad: 0.1074 (0.1033) loss: 0.8501 (0.8604) time: 0.1542 data: 0.0684 max mem: 9305 +Train: [53] [4500/6250] eta: 0:03:57 lr: 0.000060 grad: 0.0967 (0.1033) loss: 0.8634 (0.8604) time: 0.1044 data: 0.0148 max mem: 9305 +Train: [53] [4600/6250] eta: 0:03:43 lr: 0.000060 grad: 0.0998 (0.1033) loss: 0.8625 (0.8604) time: 0.1257 data: 0.0283 max mem: 9305 +Train: [53] [4700/6250] eta: 0:03:30 lr: 0.000060 grad: 0.1013 (0.1033) loss: 0.8610 (0.8604) time: 0.1298 data: 0.0456 max mem: 9305 +Train: [53] [4800/6250] eta: 0:03:17 lr: 0.000060 grad: 0.0987 (0.1033) loss: 0.8575 (0.8604) time: 0.1331 data: 0.0500 max mem: 9305 +Train: [53] [4900/6250] eta: 0:03:03 lr: 0.000060 grad: 0.1072 (0.1034) loss: 0.8658 (0.8603) time: 0.1270 data: 0.0459 max mem: 9305 +Train: [53] [5000/6250] eta: 0:02:49 lr: 0.000060 grad: 0.1041 (0.1034) loss: 0.8629 (0.8603) time: 0.1327 data: 0.0509 max mem: 9305 +Train: [53] [5100/6250] eta: 0:02:36 lr: 0.000060 grad: 0.1050 (0.1034) loss: 0.8589 (0.8603) time: 0.1726 data: 0.0700 max mem: 9305 +Train: [53] [5200/6250] eta: 0:02:23 lr: 0.000060 grad: 0.0987 (0.1035) loss: 0.8627 (0.8603) time: 0.1092 data: 0.0002 max mem: 9305 +Train: [53] [5300/6250] eta: 0:02:09 lr: 0.000060 grad: 0.0991 (0.1035) loss: 0.8568 (0.8603) time: 0.1269 data: 0.0391 max mem: 9305 +Train: [53] [5400/6250] eta: 0:01:55 lr: 0.000060 grad: 0.1077 (0.1035) loss: 0.8576 (0.8603) time: 0.1168 data: 0.0305 max mem: 9305 +Train: [53] [5500/6250] eta: 0:01:41 lr: 0.000060 grad: 0.1065 (0.1037) loss: 0.8547 (0.8602) time: 0.1123 data: 0.0306 max mem: 9305 +Train: [53] [5600/6250] eta: 0:01:28 lr: 0.000060 grad: 0.1008 (0.1037) loss: 0.8577 (0.8602) time: 0.1111 data: 0.0287 max mem: 9305 +Train: [53] [5700/6250] eta: 0:01:14 lr: 0.000060 grad: 0.1039 (0.1037) loss: 0.8607 (0.8601) time: 0.1326 data: 0.0482 max mem: 9305 +Train: [53] [5800/6250] eta: 0:01:01 lr: 0.000060 grad: 0.1020 (0.1038) loss: 0.8548 (0.8601) time: 0.1456 data: 0.0634 max mem: 9305 +Train: [53] [5900/6250] eta: 0:00:47 lr: 0.000060 grad: 0.1052 (0.1038) loss: 0.8578 (0.8600) time: 0.1575 data: 0.0736 max mem: 9305 +Train: [53] [6000/6250] eta: 0:00:34 lr: 0.000059 grad: 0.1001 (0.1038) loss: 0.8562 (0.8600) time: 0.1215 data: 0.0353 max mem: 9305 +Train: [53] [6100/6250] eta: 0:00:20 lr: 0.000059 grad: 0.1056 (0.1039) loss: 0.8466 (0.8599) time: 0.1233 data: 0.0448 max mem: 9305 +Train: [53] [6200/6250] eta: 0:00:06 lr: 0.000059 grad: 0.1079 (0.1040) loss: 0.8550 (0.8598) time: 0.1600 data: 0.0686 max mem: 9305 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1089 (0.1040) loss: 0.8597 (0.8598) time: 0.1406 data: 0.0463 max mem: 9305 +Train: [53] Total time: 0:14:16 (0.1370 s / it) +Averaged stats: lr: 0.000059 grad: 0.1089 (0.1040) loss: 0.8597 (0.8598) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:05:11 loss: 0.8933 (0.8933) time: 5.0281 data: 4.9892 max mem: 9305 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8780 (0.8809) time: 0.1319 data: 0.1036 max mem: 9305 +Eval (hcp-train-subset): [53] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-train-subset): loss: 0.8780 (0.8809) +Eval (hcp-val): [53] [ 0/62] eta: 0:04:49 loss: 0.8786 (0.8786) time: 4.6715 data: 4.6109 max mem: 9305 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8785 (0.8800) time: 0.1473 data: 0.1189 max mem: 9305 +Eval (hcp-val): [53] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (hcp-val): loss: 0.8785 (0.8800) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 7:30:42 lr: 0.000059 grad: 0.2118 (0.2118) loss: 0.8592 (0.8592) time: 4.3269 data: 4.0576 max mem: 9305 +Train: [54] [ 100/6250] eta: 0:20:21 lr: 0.000059 grad: 0.1068 (0.1264) loss: 0.8697 (0.8783) time: 0.1482 data: 0.0471 max mem: 9305 +Train: [54] [ 200/6250] eta: 0:17:55 lr: 0.000059 grad: 0.1045 (0.1167) loss: 0.8595 (0.8724) time: 0.1546 data: 0.0584 max mem: 9305 +Train: [54] [ 300/6250] eta: 0:16:40 lr: 0.000059 grad: 0.0996 (0.1130) loss: 0.8624 (0.8688) time: 0.1448 data: 0.0518 max mem: 9305 +Train: [54] [ 400/6250] eta: 0:15:46 lr: 0.000059 grad: 0.1022 (0.1103) loss: 0.8576 (0.8673) time: 0.1504 data: 0.0616 max mem: 9305 +Train: [54] [ 500/6250] eta: 0:15:04 lr: 0.000059 grad: 0.0925 (0.1091) loss: 0.8596 (0.8654) time: 0.1305 data: 0.0334 max mem: 9305 +Train: [54] [ 600/6250] eta: 0:14:29 lr: 0.000059 grad: 0.1021 (0.1088) loss: 0.8553 (0.8638) time: 0.1345 data: 0.0478 max mem: 9305 +Train: [54] [ 700/6250] eta: 0:14:02 lr: 0.000059 grad: 0.0966 (0.1075) loss: 0.8597 (0.8632) time: 0.1451 data: 0.0479 max mem: 9305 +Train: [54] [ 800/6250] eta: 0:13:44 lr: 0.000059 grad: 0.0962 (0.1065) loss: 0.8619 (0.8628) time: 0.1717 data: 0.0811 max mem: 9305 +Train: [54] [ 900/6250] eta: 0:13:36 lr: 0.000059 grad: 0.0998 (0.1060) loss: 0.8652 (0.8628) time: 0.1642 data: 0.0734 max mem: 9305 +Train: [54] [1000/6250] eta: 0:13:38 lr: 0.000059 grad: 0.0972 (0.1052) loss: 0.8648 (0.8630) time: 0.1760 data: 0.0934 max mem: 9305 +Train: [54] [1100/6250] eta: 0:13:25 lr: 0.000059 grad: 0.1011 (0.1049) loss: 0.8579 (0.8629) time: 0.1801 data: 0.1032 max mem: 9305 +Train: [54] [1200/6250] eta: 0:13:11 lr: 0.000059 grad: 0.1042 (0.1048) loss: 0.8572 (0.8628) time: 0.1733 data: 0.0890 max mem: 9305 +Train: [54] [1300/6250] eta: 0:12:59 lr: 0.000059 grad: 0.0952 (0.1048) loss: 0.8669 (0.8626) time: 0.1348 data: 0.0533 max mem: 9305 +Train: [54] [1400/6250] eta: 0:12:44 lr: 0.000059 grad: 0.1043 (0.1048) loss: 0.8616 (0.8625) time: 0.1483 data: 0.0629 max mem: 9305 +Train: [54] [1500/6250] eta: 0:12:25 lr: 0.000059 grad: 0.1038 (0.1051) loss: 0.8590 (0.8623) time: 0.1517 data: 0.0676 max mem: 9305 +Train: [54] [1600/6250] eta: 0:12:05 lr: 0.000059 grad: 0.1035 (0.1052) loss: 0.8576 (0.8621) time: 0.1458 data: 0.0633 max mem: 9305 +Train: [54] [1700/6250] eta: 0:11:46 lr: 0.000059 grad: 0.1007 (0.1052) loss: 0.8638 (0.8620) time: 0.1689 data: 0.0831 max mem: 9305 +Train: [54] [1800/6250] eta: 0:11:35 lr: 0.000059 grad: 0.0917 (0.1051) loss: 0.8604 (0.8619) time: 0.2744 data: 0.1745 max mem: 9305 +Train: [54] [1900/6250] eta: 0:11:13 lr: 0.000059 grad: 0.0980 (0.1050) loss: 0.8642 (0.8619) time: 0.1101 data: 0.0108 max mem: 9305 +Train: [54] [2000/6250] eta: 0:10:57 lr: 0.000059 grad: 0.0989 (0.1049) loss: 0.8642 (0.8619) time: 0.1779 data: 0.0948 max mem: 9305 +Train: [54] [2100/6250] eta: 0:10:44 lr: 0.000059 grad: 0.0970 (0.1047) loss: 0.8570 (0.8620) time: 0.1543 data: 0.0744 max mem: 9305 +Train: [54] [2200/6250] eta: 0:10:26 lr: 0.000059 grad: 0.1055 (0.1046) loss: 0.8592 (0.8620) time: 0.1411 data: 0.0569 max mem: 9305 +Train: [54] [2300/6250] eta: 0:10:09 lr: 0.000059 grad: 0.0940 (0.1046) loss: 0.8667 (0.8620) time: 0.1674 data: 0.0794 max mem: 9305 +Train: [54] [2400/6250] eta: 0:09:51 lr: 0.000059 grad: 0.0991 (0.1046) loss: 0.8673 (0.8620) time: 0.1467 data: 0.0636 max mem: 9305 +Train: [54] [2500/6250] eta: 0:09:32 lr: 0.000059 grad: 0.0957 (0.1045) loss: 0.8676 (0.8620) time: 0.1048 data: 0.0173 max mem: 9305 +Train: [54] [2600/6250] eta: 0:09:15 lr: 0.000059 grad: 0.1093 (0.1044) loss: 0.8610 (0.8620) time: 0.1613 data: 0.0815 max mem: 9305 +Train: [54] [2700/6250] eta: 0:08:59 lr: 0.000059 grad: 0.0945 (0.1043) loss: 0.8624 (0.8620) time: 0.1363 data: 0.0569 max mem: 9305 +Train: [54] [2800/6250] eta: 0:08:46 lr: 0.000058 grad: 0.0979 (0.1043) loss: 0.8625 (0.8620) time: 0.0950 data: 0.0002 max mem: 9305 +Train: [54] [2900/6250] eta: 0:08:30 lr: 0.000058 grad: 0.0999 (0.1042) loss: 0.8629 (0.8619) time: 0.1519 data: 0.0674 max mem: 9305 +Train: [54] [3000/6250] eta: 0:08:14 lr: 0.000058 grad: 0.1046 (0.1042) loss: 0.8552 (0.8618) time: 0.1527 data: 0.0683 max mem: 9305 +Train: [54] [3100/6250] eta: 0:07:58 lr: 0.000058 grad: 0.1025 (0.1043) loss: 0.8556 (0.8618) time: 0.1494 data: 0.0630 max mem: 9305 +Train: [54] [3200/6250] eta: 0:07:43 lr: 0.000058 grad: 0.1045 (0.1044) loss: 0.8575 (0.8617) time: 0.1606 data: 0.0735 max mem: 9305 +Train: [54] [3300/6250] eta: 0:07:27 lr: 0.000058 grad: 0.0971 (0.1045) loss: 0.8614 (0.8616) time: 0.1710 data: 0.0883 max mem: 9305 +Train: [54] [3400/6250] eta: 0:07:10 lr: 0.000058 grad: 0.1217 (0.1046) loss: 0.8572 (0.8615) time: 0.1320 data: 0.0483 max mem: 9305 +Train: [54] [3500/6250] eta: 0:06:53 lr: 0.000058 grad: 0.0989 (0.1046) loss: 0.8587 (0.8614) time: 0.1230 data: 0.0398 max mem: 9305 +Train: [54] [3600/6250] eta: 0:06:36 lr: 0.000058 grad: 0.1069 (0.1047) loss: 0.8606 (0.8613) time: 0.0991 data: 0.0165 max mem: 9305 +Train: [54] [3700/6250] eta: 0:06:18 lr: 0.000058 grad: 0.0972 (0.1047) loss: 0.8633 (0.8612) time: 0.1122 data: 0.0213 max mem: 9305 +Train: [54] [3800/6250] eta: 0:06:01 lr: 0.000058 grad: 0.0981 (0.1047) loss: 0.8604 (0.8611) time: 0.1229 data: 0.0322 max mem: 9305 +Train: [54] [3900/6250] eta: 0:05:46 lr: 0.000058 grad: 0.0983 (0.1047) loss: 0.8617 (0.8611) time: 0.1466 data: 0.0617 max mem: 9305 +Train: [54] [4000/6250] eta: 0:05:32 lr: 0.000058 grad: 0.1027 (0.1048) loss: 0.8597 (0.8610) time: 0.1734 data: 0.0906 max mem: 9305 +Train: [54] [4100/6250] eta: 0:05:19 lr: 0.000058 grad: 0.1010 (0.1049) loss: 0.8605 (0.8609) time: 0.1556 data: 0.0744 max mem: 9305 +Train: [54] [4200/6250] eta: 0:05:05 lr: 0.000058 grad: 0.1038 (0.1049) loss: 0.8607 (0.8608) time: 0.1819 data: 0.1055 max mem: 9305 +Train: [54] [4300/6250] eta: 0:04:51 lr: 0.000058 grad: 0.0990 (0.1049) loss: 0.8626 (0.8607) time: 0.1240 data: 0.0360 max mem: 9305 +Train: [54] [4400/6250] eta: 0:04:36 lr: 0.000058 grad: 0.1016 (0.1050) loss: 0.8583 (0.8607) time: 0.1507 data: 0.0698 max mem: 9305 +Train: [54] [4500/6250] eta: 0:04:22 lr: 0.000058 grad: 0.1053 (0.1050) loss: 0.8513 (0.8606) time: 0.1323 data: 0.0505 max mem: 9305 +Train: [54] [4600/6250] eta: 0:04:07 lr: 0.000058 grad: 0.1083 (0.1053) loss: 0.8615 (0.8606) time: 0.1757 data: 0.0886 max mem: 9305 +Train: [54] [4700/6250] eta: 0:03:52 lr: 0.000058 grad: 0.1095 (0.1053) loss: 0.8583 (0.8606) time: 0.1281 data: 0.0506 max mem: 9305 +Train: [54] [4800/6250] eta: 0:03:37 lr: 0.000058 grad: 0.1011 (0.1054) loss: 0.8571 (0.8605) time: 0.1409 data: 0.0638 max mem: 9305 +Train: [54] [4900/6250] eta: 0:03:22 lr: 0.000058 grad: 0.1041 (0.1055) loss: 0.8626 (0.8605) time: 0.1548 data: 0.0765 max mem: 9305 +Train: [54] [5000/6250] eta: 0:03:06 lr: 0.000058 grad: 0.0976 (0.1055) loss: 0.8667 (0.8605) time: 0.1447 data: 0.0620 max mem: 9305 +Train: [54] [5100/6250] eta: 0:02:52 lr: 0.000058 grad: 0.1021 (0.1055) loss: 0.8545 (0.8605) time: 0.1371 data: 0.0410 max mem: 9305 +Train: [54] [5200/6250] eta: 0:02:36 lr: 0.000058 grad: 0.0990 (0.1055) loss: 0.8543 (0.8605) time: 0.1177 data: 0.0359 max mem: 9305 +Train: [54] [5300/6250] eta: 0:02:21 lr: 0.000058 grad: 0.1050 (0.1055) loss: 0.8645 (0.8605) time: 0.1433 data: 0.0593 max mem: 9305 +Train: [54] [5400/6250] eta: 0:02:06 lr: 0.000058 grad: 0.1001 (0.1055) loss: 0.8601 (0.8605) time: 0.1401 data: 0.0572 max mem: 9305 +Train: [54] [5500/6250] eta: 0:01:51 lr: 0.000058 grad: 0.1040 (0.1055) loss: 0.8611 (0.8606) time: 0.0980 data: 0.0002 max mem: 9305 +Train: [54] [5600/6250] eta: 0:01:36 lr: 0.000058 grad: 0.0960 (0.1055) loss: 0.8647 (0.8606) time: 0.1545 data: 0.0695 max mem: 9305 +Train: [54] [5700/6250] eta: 0:01:21 lr: 0.000058 grad: 0.0979 (0.1054) loss: 0.8629 (0.8606) time: 0.1340 data: 0.0466 max mem: 9305 +Train: [54] [5800/6250] eta: 0:01:07 lr: 0.000057 grad: 0.0978 (0.1054) loss: 0.8614 (0.8606) time: 0.2641 data: 0.1738 max mem: 9305 +Train: [54] [5900/6250] eta: 0:00:52 lr: 0.000057 grad: 0.1060 (0.1055) loss: 0.8612 (0.8606) time: 0.2355 data: 0.1490 max mem: 9305 +Train: [54] [6000/6250] eta: 0:00:37 lr: 0.000057 grad: 0.1102 (0.1055) loss: 0.8588 (0.8606) time: 0.1192 data: 0.0403 max mem: 9305 +Train: [54] [6100/6250] eta: 0:00:22 lr: 0.000057 grad: 0.1058 (0.1055) loss: 0.8621 (0.8606) time: 0.1393 data: 0.0584 max mem: 9305 +Train: [54] [6200/6250] eta: 0:00:07 lr: 0.000057 grad: 0.1083 (0.1056) loss: 0.8535 (0.8606) time: 0.1102 data: 0.0223 max mem: 9305 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1144 (0.1057) loss: 0.8566 (0.8606) time: 0.1319 data: 0.0496 max mem: 9305 +Train: [54] Total time: 0:15:32 (0.1492 s / it) +Averaged stats: lr: 0.000057 grad: 0.1144 (0.1057) loss: 0.8566 (0.8606) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:03:39 loss: 0.8917 (0.8917) time: 3.5425 data: 3.4614 max mem: 9305 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8766 (0.8798) time: 0.1180 data: 0.0891 max mem: 9305 +Eval (hcp-train-subset): [54] Total time: 0:00:13 (0.2160 s / it) +Averaged stats (hcp-train-subset): loss: 0.8766 (0.8798) +Making plots (hcp-train-subset): example=12 +Eval (hcp-val): [54] [ 0/62] eta: 0:04:32 loss: 0.8763 (0.8763) time: 4.3894 data: 4.3547 max mem: 9305 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8768 (0.8787) time: 0.1510 data: 0.1221 max mem: 9305 +Eval (hcp-val): [54] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (hcp-val): loss: 0.8768 (0.8787) +Making plots (hcp-val): example=36 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [55] [ 0/6250] eta: 7:36:07 lr: 0.000057 grad: 0.3801 (0.3801) loss: 0.8825 (0.8825) time: 4.3788 data: 4.0980 max mem: 9305 +Train: [55] [ 100/6250] eta: 0:19:21 lr: 0.000057 grad: 0.1005 (0.1257) loss: 0.8664 (0.8705) time: 0.1372 data: 0.0416 max mem: 9305 +Train: [55] [ 200/6250] eta: 0:16:21 lr: 0.000057 grad: 0.1070 (0.1171) loss: 0.8623 (0.8646) time: 0.1391 data: 0.0306 max mem: 9305 +Train: [55] [ 300/6250] eta: 0:15:09 lr: 0.000057 grad: 0.0930 (0.1116) loss: 0.8691 (0.8627) time: 0.1433 data: 0.0603 max mem: 9305 +Train: [55] [ 400/6250] eta: 0:14:20 lr: 0.000057 grad: 0.1044 (0.1098) loss: 0.8573 (0.8612) time: 0.1414 data: 0.0533 max mem: 9305 +Train: [55] [ 500/6250] eta: 0:13:40 lr: 0.000057 grad: 0.0996 (0.1093) loss: 0.8615 (0.8606) time: 0.1478 data: 0.0641 max mem: 9305 +Train: [55] [ 600/6250] eta: 0:13:06 lr: 0.000057 grad: 0.1024 (0.1083) loss: 0.8562 (0.8605) time: 0.1214 data: 0.0316 max mem: 9305 +Train: [55] [ 700/6250] eta: 0:12:41 lr: 0.000057 grad: 0.0940 (0.1073) loss: 0.8679 (0.8609) time: 0.1346 data: 0.0379 max mem: 9305 +Train: [55] [ 800/6250] eta: 0:12:27 lr: 0.000057 grad: 0.1012 (0.1066) loss: 0.8600 (0.8612) time: 0.1340 data: 0.0514 max mem: 9305 +Train: [55] [ 900/6250] eta: 0:12:08 lr: 0.000057 grad: 0.0934 (0.1061) loss: 0.8656 (0.8614) time: 0.1291 data: 0.0422 max mem: 9305 +Train: [55] [1000/6250] eta: 0:11:53 lr: 0.000057 grad: 0.1046 (0.1066) loss: 0.8647 (0.8613) time: 0.1326 data: 0.0499 max mem: 9305 +Train: [55] [1100/6250] eta: 0:11:42 lr: 0.000057 grad: 0.1022 (0.1063) loss: 0.8566 (0.8613) time: 0.1578 data: 0.0782 max mem: 9305 +Train: [55] [1200/6250] eta: 0:11:27 lr: 0.000057 grad: 0.1020 (0.1059) loss: 0.8569 (0.8611) time: 0.1328 data: 0.0473 max mem: 9305 +Train: [55] [1300/6250] eta: 0:11:10 lr: 0.000057 grad: 0.1000 (0.1059) loss: 0.8603 (0.8609) time: 0.1355 data: 0.0509 max mem: 9305 +Train: [55] [1400/6250] eta: 0:10:55 lr: 0.000057 grad: 0.1027 (0.1057) loss: 0.8602 (0.8607) time: 0.1501 data: 0.0699 max mem: 9305 +Train: [55] [1500/6250] eta: 0:10:40 lr: 0.000057 grad: 0.0973 (0.1056) loss: 0.8673 (0.8607) time: 0.1466 data: 0.0586 max mem: 9305 +Train: [55] [1600/6250] eta: 0:10:29 lr: 0.000057 grad: 0.1095 (0.1055) loss: 0.8562 (0.8605) time: 0.1154 data: 0.0297 max mem: 9305 +Train: [55] [1700/6250] eta: 0:10:14 lr: 0.000057 grad: 0.0942 (0.1056) loss: 0.8593 (0.8603) time: 0.1339 data: 0.0525 max mem: 9305 +Train: [55] [1800/6250] eta: 0:09:58 lr: 0.000057 grad: 0.0962 (0.1057) loss: 0.8659 (0.8603) time: 0.1153 data: 0.0348 max mem: 9305 +Train: [55] [1900/6250] eta: 0:09:43 lr: 0.000057 grad: 0.0995 (0.1055) loss: 0.8593 (0.8603) time: 0.1298 data: 0.0495 max mem: 9305 +Train: [55] [2000/6250] eta: 0:09:30 lr: 0.000057 grad: 0.0985 (0.1053) loss: 0.8622 (0.8602) time: 0.1429 data: 0.0655 max mem: 9305 +Train: [55] [2100/6250] eta: 0:09:17 lr: 0.000057 grad: 0.1027 (0.1052) loss: 0.8596 (0.8602) time: 0.0976 data: 0.0148 max mem: 9305 +Train: [55] [2200/6250] eta: 0:09:04 lr: 0.000057 grad: 0.0913 (0.1053) loss: 0.8632 (0.8602) time: 0.1377 data: 0.0515 max mem: 9305 +Train: [55] [2300/6250] eta: 0:08:52 lr: 0.000057 grad: 0.1020 (0.1054) loss: 0.8552 (0.8599) time: 0.1597 data: 0.0708 max mem: 9305 +Train: [55] [2400/6250] eta: 0:08:41 lr: 0.000057 grad: 0.1100 (0.1054) loss: 0.8597 (0.8598) time: 0.1656 data: 0.0867 max mem: 9305 +Train: [55] [2500/6250] eta: 0:08:26 lr: 0.000057 grad: 0.1020 (0.1055) loss: 0.8563 (0.8595) time: 0.1009 data: 0.0163 max mem: 9305 +Train: [55] [2600/6250] eta: 0:08:14 lr: 0.000056 grad: 0.1021 (0.1057) loss: 0.8553 (0.8592) time: 0.1374 data: 0.0501 max mem: 9305 +Train: [55] [2700/6250] eta: 0:08:02 lr: 0.000056 grad: 0.1101 (0.1059) loss: 0.8503 (0.8591) time: 0.1399 data: 0.0561 max mem: 9305 +Train: [55] [2800/6250] eta: 0:07:49 lr: 0.000056 grad: 0.1097 (0.1060) loss: 0.8577 (0.8590) time: 0.1375 data: 0.0550 max mem: 9305 +Train: [55] [2900/6250] eta: 0:07:38 lr: 0.000056 grad: 0.1101 (0.1062) loss: 0.8571 (0.8589) time: 0.1346 data: 0.0420 max mem: 9305 +Train: [55] [3000/6250] eta: 0:07:26 lr: 0.000056 grad: 0.1085 (0.1064) loss: 0.8610 (0.8587) time: 0.1479 data: 0.0726 max mem: 9305 +Train: [55] [3100/6250] eta: 0:07:13 lr: 0.000056 grad: 0.1037 (0.1065) loss: 0.8578 (0.8586) time: 0.1352 data: 0.0471 max mem: 9305 +Train: [55] [3200/6250] eta: 0:06:59 lr: 0.000056 grad: 0.1082 (0.1066) loss: 0.8557 (0.8585) time: 0.1409 data: 0.0558 max mem: 9305 +Train: [55] [3300/6250] eta: 0:06:45 lr: 0.000056 grad: 0.1068 (0.1067) loss: 0.8606 (0.8584) time: 0.1091 data: 0.0184 max mem: 9305 +Train: [55] [3400/6250] eta: 0:06:30 lr: 0.000056 grad: 0.1029 (0.1068) loss: 0.8558 (0.8583) time: 0.1095 data: 0.0275 max mem: 9305 +Train: [55] [3500/6250] eta: 0:06:16 lr: 0.000056 grad: 0.1021 (0.1070) loss: 0.8548 (0.8583) time: 0.1635 data: 0.0678 max mem: 9305 +Train: [55] [3600/6250] eta: 0:06:07 lr: 0.000056 grad: 0.1021 (0.1071) loss: 0.8662 (0.8583) time: 0.3260 data: 0.2250 max mem: 9305 +Train: [55] [3700/6250] eta: 0:05:53 lr: 0.000056 grad: 0.1087 (0.1073) loss: 0.8548 (0.8582) time: 0.1119 data: 0.0281 max mem: 9305 +Train: [55] [3800/6250] eta: 0:05:40 lr: 0.000056 grad: 0.1097 (0.1073) loss: 0.8497 (0.8582) time: 0.1361 data: 0.0552 max mem: 9305 +Train: [55] [3900/6250] eta: 0:05:25 lr: 0.000056 grad: 0.1066 (0.1075) loss: 0.8607 (0.8581) time: 0.1158 data: 0.0266 max mem: 9305 +Train: [55] [4000/6250] eta: 0:05:11 lr: 0.000056 grad: 0.1058 (0.1075) loss: 0.8539 (0.8582) time: 0.1244 data: 0.0455 max mem: 9305 +Train: [55] [4100/6250] eta: 0:04:58 lr: 0.000056 grad: 0.1027 (0.1075) loss: 0.8570 (0.8581) time: 0.1801 data: 0.1027 max mem: 9305 +Train: [55] [4200/6250] eta: 0:04:44 lr: 0.000056 grad: 0.1076 (0.1076) loss: 0.8567 (0.8581) time: 0.1630 data: 0.0716 max mem: 9305 +Train: [55] [4300/6250] eta: 0:04:30 lr: 0.000056 grad: 0.0996 (0.1076) loss: 0.8584 (0.8581) time: 0.1438 data: 0.0522 max mem: 9305 +Train: [55] [4400/6250] eta: 0:04:16 lr: 0.000056 grad: 0.1085 (0.1076) loss: 0.8638 (0.8580) time: 0.1453 data: 0.0651 max mem: 9305 +Train: [55] [4500/6250] eta: 0:04:01 lr: 0.000056 grad: 0.1102 (0.1077) loss: 0.8597 (0.8580) time: 0.1422 data: 0.0595 max mem: 9305 +Train: [55] [4600/6250] eta: 0:03:47 lr: 0.000056 grad: 0.1090 (0.1077) loss: 0.8552 (0.8580) time: 0.1484 data: 0.0549 max mem: 9305 +Train: [55] [4700/6250] eta: 0:03:34 lr: 0.000056 grad: 0.1108 (0.1078) loss: 0.8526 (0.8580) time: 0.1666 data: 0.0866 max mem: 9305 +Train: [55] [4800/6250] eta: 0:03:20 lr: 0.000056 grad: 0.0949 (0.1077) loss: 0.8589 (0.8580) time: 0.1557 data: 0.0781 max mem: 9305 +Train: [55] [4900/6250] eta: 0:03:06 lr: 0.000056 grad: 0.1097 (0.1078) loss: 0.8554 (0.8580) time: 0.1207 data: 0.0371 max mem: 9305 +Train: [55] [5000/6250] eta: 0:02:52 lr: 0.000056 grad: 0.1130 (0.1078) loss: 0.8573 (0.8580) time: 0.1531 data: 0.0727 max mem: 9305 +Train: [55] [5100/6250] eta: 0:02:38 lr: 0.000056 grad: 0.1067 (0.1078) loss: 0.8667 (0.8581) time: 0.1461 data: 0.0622 max mem: 9305 +Train: [55] [5200/6250] eta: 0:02:24 lr: 0.000056 grad: 0.1059 (0.1079) loss: 0.8662 (0.8581) time: 0.0856 data: 0.0005 max mem: 9305 +Train: [55] [5300/6250] eta: 0:02:11 lr: 0.000056 grad: 0.1040 (0.1078) loss: 0.8640 (0.8582) time: 0.1460 data: 0.0695 max mem: 9305 +Train: [55] [5400/6250] eta: 0:01:57 lr: 0.000056 grad: 0.1034 (0.1078) loss: 0.8611 (0.8582) time: 0.1050 data: 0.0211 max mem: 9305 +Train: [55] [5500/6250] eta: 0:01:43 lr: 0.000056 grad: 0.0927 (0.1078) loss: 0.8632 (0.8583) time: 0.1388 data: 0.0565 max mem: 9305 +Train: [55] [5600/6250] eta: 0:01:29 lr: 0.000055 grad: 0.1101 (0.1078) loss: 0.8535 (0.8583) time: 0.1380 data: 0.0593 max mem: 9305 +Train: [55] [5700/6250] eta: 0:01:15 lr: 0.000055 grad: 0.1071 (0.1078) loss: 0.8548 (0.8583) time: 0.1275 data: 0.0507 max mem: 9305 +Train: [55] [5800/6250] eta: 0:01:02 lr: 0.000055 grad: 0.1063 (0.1079) loss: 0.8547 (0.8584) time: 0.1329 data: 0.0468 max mem: 9305 +Train: [55] [5900/6250] eta: 0:00:48 lr: 0.000055 grad: 0.1058 (0.1078) loss: 0.8606 (0.8584) time: 0.1481 data: 0.0681 max mem: 9305 +Train: [55] [6000/6250] eta: 0:00:34 lr: 0.000055 grad: 0.1088 (0.1079) loss: 0.8573 (0.8584) time: 0.1436 data: 0.0631 max mem: 9305 +Train: [55] [6100/6250] eta: 0:00:20 lr: 0.000055 grad: 0.1065 (0.1080) loss: 0.8581 (0.8584) time: 0.1284 data: 0.0515 max mem: 9305 +Train: [55] [6200/6250] eta: 0:00:06 lr: 0.000055 grad: 0.1166 (0.1080) loss: 0.8510 (0.8584) time: 0.1923 data: 0.1116 max mem: 9305 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.0994 (0.1080) loss: 0.8613 (0.8583) time: 0.1552 data: 0.0715 max mem: 9305 +Train: [55] Total time: 0:14:28 (0.1390 s / it) +Averaged stats: lr: 0.000055 grad: 0.0994 (0.1080) loss: 0.8613 (0.8583) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:05:43 loss: 0.8833 (0.8833) time: 5.5427 data: 5.5084 max mem: 9305 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8778 (0.8799) time: 0.1374 data: 0.1089 max mem: 9305 +Eval (hcp-train-subset): [55] Total time: 0:00:13 (0.2212 s / it) +Averaged stats (hcp-train-subset): loss: 0.8778 (0.8799) +Eval (hcp-val): [55] [ 0/62] eta: 0:03:38 loss: 0.8753 (0.8753) time: 3.5322 data: 3.4560 max mem: 9305 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8777 (0.8790) time: 0.1494 data: 0.1194 max mem: 9305 +Eval (hcp-val): [55] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (hcp-val): loss: 0.8777 (0.8790) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 10:26:43 lr: 0.000055 grad: 0.3261 (0.3261) loss: 0.8874 (0.8874) time: 6.0166 data: 5.9039 max mem: 9305 +Train: [56] [ 100/6250] eta: 0:21:01 lr: 0.000055 grad: 0.1006 (0.1153) loss: 0.8777 (0.8809) time: 0.1486 data: 0.0629 max mem: 9305 +Train: [56] [ 200/6250] eta: 0:18:14 lr: 0.000055 grad: 0.0928 (0.1072) loss: 0.8711 (0.8781) time: 0.1406 data: 0.0531 max mem: 9305 +Train: [56] [ 300/6250] eta: 0:16:49 lr: 0.000055 grad: 0.1037 (0.1061) loss: 0.8678 (0.8752) time: 0.1352 data: 0.0470 max mem: 9305 +Train: [56] [ 400/6250] eta: 0:15:44 lr: 0.000055 grad: 0.1030 (0.1064) loss: 0.8633 (0.8727) time: 0.1194 data: 0.0289 max mem: 9305 +Train: [56] [ 500/6250] eta: 0:14:55 lr: 0.000055 grad: 0.1032 (0.1062) loss: 0.8646 (0.8712) time: 0.1374 data: 0.0516 max mem: 9305 +Train: [56] [ 600/6250] eta: 0:14:19 lr: 0.000055 grad: 0.0950 (0.1056) loss: 0.8656 (0.8700) time: 0.1583 data: 0.0742 max mem: 9305 +Train: [56] [ 700/6250] eta: 0:14:03 lr: 0.000055 grad: 0.0974 (0.1052) loss: 0.8618 (0.8692) time: 0.1766 data: 0.0839 max mem: 9305 +Train: [56] [ 800/6250] eta: 0:13:50 lr: 0.000055 grad: 0.0945 (0.1047) loss: 0.8670 (0.8684) time: 0.1662 data: 0.0815 max mem: 9305 +Train: [56] [ 900/6250] eta: 0:13:49 lr: 0.000055 grad: 0.1017 (0.1044) loss: 0.8626 (0.8677) time: 0.1694 data: 0.0840 max mem: 9305 +Train: [56] [1000/6250] eta: 0:13:41 lr: 0.000055 grad: 0.0976 (0.1040) loss: 0.8641 (0.8671) time: 0.1588 data: 0.0698 max mem: 9305 +Train: [56] [1100/6250] eta: 0:13:30 lr: 0.000055 grad: 0.1051 (0.1037) loss: 0.8611 (0.8668) time: 0.1827 data: 0.1032 max mem: 9305 +Train: [56] [1200/6250] eta: 0:13:08 lr: 0.000055 grad: 0.1038 (0.1036) loss: 0.8571 (0.8662) time: 0.1431 data: 0.0616 max mem: 9305 +Train: [56] [1300/6250] eta: 0:12:52 lr: 0.000055 grad: 0.1029 (0.1038) loss: 0.8589 (0.8657) time: 0.1932 data: 0.1090 max mem: 9305 +Train: [56] [1400/6250] eta: 0:12:27 lr: 0.000055 grad: 0.1032 (0.1039) loss: 0.8587 (0.8653) time: 0.1363 data: 0.0489 max mem: 9305 +Train: [56] [1500/6250] eta: 0:12:08 lr: 0.000055 grad: 0.1029 (0.1042) loss: 0.8504 (0.8646) time: 0.1482 data: 0.0669 max mem: 9305 +Train: [56] [1600/6250] eta: 0:11:51 lr: 0.000055 grad: 0.0977 (0.1044) loss: 0.8594 (0.8641) time: 0.1412 data: 0.0527 max mem: 9305 +Train: [56] [1700/6250] eta: 0:11:41 lr: 0.000055 grad: 0.1056 (0.1047) loss: 0.8571 (0.8635) time: 0.1563 data: 0.0350 max mem: 9305 +Train: [56] [1800/6250] eta: 0:11:31 lr: 0.000055 grad: 0.1124 (0.1052) loss: 0.8495 (0.8630) time: 0.1979 data: 0.1012 max mem: 9305 +Train: [56] [1900/6250] eta: 0:11:09 lr: 0.000055 grad: 0.1093 (0.1055) loss: 0.8541 (0.8626) time: 0.1383 data: 0.0495 max mem: 9305 +Train: [56] [2000/6250] eta: 0:10:53 lr: 0.000055 grad: 0.1002 (0.1057) loss: 0.8514 (0.8622) time: 0.1519 data: 0.0699 max mem: 9305 +Train: [56] [2100/6250] eta: 0:10:35 lr: 0.000055 grad: 0.1047 (0.1059) loss: 0.8554 (0.8619) time: 0.1501 data: 0.0610 max mem: 9305 +Train: [56] [2200/6250] eta: 0:10:16 lr: 0.000055 grad: 0.1037 (0.1061) loss: 0.8592 (0.8616) time: 0.1019 data: 0.0048 max mem: 9305 +Train: [56] [2300/6250] eta: 0:10:00 lr: 0.000055 grad: 0.1066 (0.1062) loss: 0.8515 (0.8613) time: 0.1524 data: 0.0479 max mem: 9305 +Train: [56] [2400/6250] eta: 0:09:42 lr: 0.000054 grad: 0.1046 (0.1062) loss: 0.8590 (0.8610) time: 0.1218 data: 0.0402 max mem: 9305 +Train: [56] [2500/6250] eta: 0:09:26 lr: 0.000054 grad: 0.1024 (0.1062) loss: 0.8571 (0.8608) time: 0.1316 data: 0.0550 max mem: 9305 +Train: [56] [2600/6250] eta: 0:09:10 lr: 0.000054 grad: 0.1030 (0.1063) loss: 0.8551 (0.8606) time: 0.1393 data: 0.0538 max mem: 9305 +Train: [56] [2700/6250] eta: 0:08:54 lr: 0.000054 grad: 0.1070 (0.1064) loss: 0.8609 (0.8603) time: 0.1560 data: 0.0724 max mem: 9305 +Train: [56] [2800/6250] eta: 0:08:38 lr: 0.000054 grad: 0.1045 (0.1065) loss: 0.8542 (0.8602) time: 0.1434 data: 0.0602 max mem: 9305 +Train: [56] [2900/6250] eta: 0:08:22 lr: 0.000054 grad: 0.1012 (0.1065) loss: 0.8577 (0.8601) time: 0.1545 data: 0.0762 max mem: 9305 +Train: [56] [3000/6250] eta: 0:08:04 lr: 0.000054 grad: 0.1023 (0.1066) loss: 0.8547 (0.8600) time: 0.1419 data: 0.0539 max mem: 9305 +Train: [56] [3100/6250] eta: 0:07:46 lr: 0.000054 grad: 0.1090 (0.1066) loss: 0.8499 (0.8600) time: 0.1012 data: 0.0184 max mem: 9305 +Train: [56] [3200/6250] eta: 0:07:28 lr: 0.000054 grad: 0.1049 (0.1066) loss: 0.8534 (0.8599) time: 0.1040 data: 0.0175 max mem: 9305 +Train: [56] [3300/6250] eta: 0:07:11 lr: 0.000054 grad: 0.1093 (0.1066) loss: 0.8476 (0.8598) time: 0.0974 data: 0.0104 max mem: 9305 +Train: [56] [3400/6250] eta: 0:06:53 lr: 0.000054 grad: 0.1085 (0.1067) loss: 0.8530 (0.8598) time: 0.1208 data: 0.0338 max mem: 9305 +Train: [56] [3500/6250] eta: 0:06:38 lr: 0.000054 grad: 0.1061 (0.1067) loss: 0.8595 (0.8597) time: 0.1421 data: 0.0595 max mem: 9305 +Train: [56] [3600/6250] eta: 0:06:23 lr: 0.000054 grad: 0.1054 (0.1069) loss: 0.8630 (0.8598) time: 0.1224 data: 0.0440 max mem: 9305 +Train: [56] [3700/6250] eta: 0:06:07 lr: 0.000054 grad: 0.1029 (0.1069) loss: 0.8610 (0.8598) time: 0.1304 data: 0.0472 max mem: 9305 +Train: [56] [3800/6250] eta: 0:05:52 lr: 0.000054 grad: 0.1063 (0.1070) loss: 0.8506 (0.8598) time: 0.1105 data: 0.0306 max mem: 9305 +Train: [56] [3900/6250] eta: 0:05:37 lr: 0.000054 grad: 0.1050 (0.1071) loss: 0.8563 (0.8598) time: 0.1246 data: 0.0372 max mem: 9305 +Train: [56] [4000/6250] eta: 0:05:22 lr: 0.000054 grad: 0.1089 (0.1071) loss: 0.8602 (0.8599) time: 0.1459 data: 0.0655 max mem: 9305 +Train: [56] [4100/6250] eta: 0:05:08 lr: 0.000054 grad: 0.0989 (0.1070) loss: 0.8636 (0.8599) time: 0.1721 data: 0.0834 max mem: 9305 +Train: [56] [4200/6250] eta: 0:04:54 lr: 0.000054 grad: 0.1023 (0.1070) loss: 0.8649 (0.8600) time: 0.1499 data: 0.0679 max mem: 9305 +Train: [56] [4300/6250] eta: 0:04:39 lr: 0.000054 grad: 0.1022 (0.1069) loss: 0.8609 (0.8600) time: 0.1389 data: 0.0585 max mem: 9305 +Train: [56] [4400/6250] eta: 0:04:24 lr: 0.000054 grad: 0.1088 (0.1069) loss: 0.8654 (0.8601) time: 0.1107 data: 0.0287 max mem: 9305 +Train: [56] [4500/6250] eta: 0:04:10 lr: 0.000054 grad: 0.0940 (0.1069) loss: 0.8621 (0.8601) time: 0.1279 data: 0.0489 max mem: 9305 +Train: [56] [4600/6250] eta: 0:03:55 lr: 0.000054 grad: 0.1015 (0.1069) loss: 0.8636 (0.8601) time: 0.1326 data: 0.0502 max mem: 9305 +Train: [56] [4700/6250] eta: 0:03:41 lr: 0.000054 grad: 0.1056 (0.1068) loss: 0.8612 (0.8602) time: 0.1200 data: 0.0413 max mem: 9305 +Train: [56] [4800/6250] eta: 0:03:26 lr: 0.000054 grad: 0.1066 (0.1067) loss: 0.8651 (0.8602) time: 0.1230 data: 0.0456 max mem: 9305 +Train: [56] [4900/6250] eta: 0:03:12 lr: 0.000054 grad: 0.1065 (0.1067) loss: 0.8613 (0.8603) time: 0.1110 data: 0.0340 max mem: 9305 +Train: [56] [5000/6250] eta: 0:02:57 lr: 0.000054 grad: 0.1022 (0.1066) loss: 0.8566 (0.8603) time: 0.1168 data: 0.0354 max mem: 9305 +Train: [56] [5100/6250] eta: 0:02:43 lr: 0.000054 grad: 0.1099 (0.1067) loss: 0.8619 (0.8603) time: 0.1348 data: 0.0530 max mem: 9305 +Train: [56] [5200/6250] eta: 0:02:28 lr: 0.000054 grad: 0.0985 (0.1067) loss: 0.8604 (0.8604) time: 0.1294 data: 0.0400 max mem: 9305 +Train: [56] [5300/6250] eta: 0:02:14 lr: 0.000054 grad: 0.0994 (0.1066) loss: 0.8687 (0.8604) time: 0.1487 data: 0.0659 max mem: 9305 +Train: [56] [5400/6250] eta: 0:02:00 lr: 0.000054 grad: 0.1094 (0.1066) loss: 0.8547 (0.8604) time: 0.1237 data: 0.0399 max mem: 9305 +Train: [56] [5500/6250] eta: 0:01:45 lr: 0.000053 grad: 0.1006 (0.1066) loss: 0.8624 (0.8605) time: 0.1336 data: 0.0542 max mem: 9305 +Train: [56] [5600/6250] eta: 0:01:31 lr: 0.000053 grad: 0.1063 (0.1066) loss: 0.8558 (0.8604) time: 0.1288 data: 0.0483 max mem: 9305 +Train: [56] [5700/6250] eta: 0:01:17 lr: 0.000053 grad: 0.1006 (0.1066) loss: 0.8577 (0.8604) time: 0.0945 data: 0.0047 max mem: 9305 +Train: [56] [5800/6250] eta: 0:01:03 lr: 0.000053 grad: 0.1086 (0.1067) loss: 0.8625 (0.8604) time: 0.1297 data: 0.0496 max mem: 9305 +Train: [56] [5900/6250] eta: 0:00:49 lr: 0.000053 grad: 0.1115 (0.1068) loss: 0.8556 (0.8604) time: 0.1391 data: 0.0554 max mem: 9305 +Train: [56] [6000/6250] eta: 0:00:35 lr: 0.000053 grad: 0.1098 (0.1069) loss: 0.8622 (0.8603) time: 0.1455 data: 0.0625 max mem: 9305 +Train: [56] [6100/6250] eta: 0:00:21 lr: 0.000053 grad: 0.1064 (0.1070) loss: 0.8552 (0.8603) time: 0.1314 data: 0.0468 max mem: 9305 +Train: [56] [6200/6250] eta: 0:00:07 lr: 0.000053 grad: 0.1125 (0.1071) loss: 0.8604 (0.8603) time: 0.1526 data: 0.0643 max mem: 9305 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1083 (0.1072) loss: 0.8587 (0.8602) time: 0.0901 data: 0.0073 max mem: 9305 +Train: [56] Total time: 0:14:46 (0.1418 s / it) +Averaged stats: lr: 0.000053 grad: 0.1083 (0.1072) loss: 0.8587 (0.8602) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:04:32 loss: 0.8857 (0.8857) time: 4.3941 data: 4.3604 max mem: 9305 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8777 (0.8790) time: 0.1219 data: 0.0927 max mem: 9305 +Eval (hcp-train-subset): [56] Total time: 0:00:11 (0.1925 s / it) +Averaged stats (hcp-train-subset): loss: 0.8777 (0.8790) +Eval (hcp-val): [56] [ 0/62] eta: 0:03:15 loss: 0.8786 (0.8786) time: 3.1492 data: 3.1005 max mem: 9305 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8772 (0.8799) time: 0.1655 data: 0.1361 max mem: 9305 +Eval (hcp-val): [56] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (hcp-val): loss: 0.8772 (0.8799) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 10:11:31 lr: 0.000053 grad: 0.1155 (0.1155) loss: 0.8866 (0.8866) time: 5.8706 data: 5.7570 max mem: 9305 +Train: [57] [ 100/6250] eta: 0:19:39 lr: 0.000053 grad: 0.1007 (0.1191) loss: 0.8823 (0.8717) time: 0.1418 data: 0.0476 max mem: 9305 +Train: [57] [ 200/6250] eta: 0:17:07 lr: 0.000053 grad: 0.1181 (0.1154) loss: 0.8628 (0.8678) time: 0.1311 data: 0.0356 max mem: 9305 +Train: [57] [ 300/6250] eta: 0:15:38 lr: 0.000053 grad: 0.1082 (0.1155) loss: 0.8685 (0.8657) time: 0.1244 data: 0.0331 max mem: 9305 +Train: [57] [ 400/6250] eta: 0:14:42 lr: 0.000053 grad: 0.1081 (0.1153) loss: 0.8629 (0.8649) time: 0.1218 data: 0.0354 max mem: 9305 +Train: [57] [ 500/6250] eta: 0:13:55 lr: 0.000053 grad: 0.1077 (0.1145) loss: 0.8598 (0.8639) time: 0.1104 data: 0.0255 max mem: 9305 +Train: [57] [ 600/6250] eta: 0:13:20 lr: 0.000053 grad: 0.1004 (0.1138) loss: 0.8636 (0.8635) time: 0.1199 data: 0.0303 max mem: 9305 +Train: [57] [ 700/6250] eta: 0:12:58 lr: 0.000053 grad: 0.1034 (0.1129) loss: 0.8590 (0.8631) time: 0.1276 data: 0.0412 max mem: 9305 +Train: [57] [ 800/6250] eta: 0:12:36 lr: 0.000053 grad: 0.1003 (0.1122) loss: 0.8618 (0.8631) time: 0.1159 data: 0.0242 max mem: 9305 +Train: [57] [ 900/6250] eta: 0:12:21 lr: 0.000053 grad: 0.1105 (0.1115) loss: 0.8626 (0.8627) time: 0.1606 data: 0.0528 max mem: 9305 +Train: [57] [1000/6250] eta: 0:12:04 lr: 0.000053 grad: 0.1014 (0.1110) loss: 0.8619 (0.8625) time: 0.1407 data: 0.0611 max mem: 9305 +Train: [57] [1100/6250] eta: 0:11:46 lr: 0.000053 grad: 0.1052 (0.1108) loss: 0.8598 (0.8623) time: 0.1255 data: 0.0436 max mem: 9305 +Train: [57] [1200/6250] eta: 0:11:31 lr: 0.000053 grad: 0.1089 (0.1107) loss: 0.8511 (0.8620) time: 0.1563 data: 0.0742 max mem: 9305 +Train: [57] [1300/6250] eta: 0:11:14 lr: 0.000053 grad: 0.0979 (0.1104) loss: 0.8602 (0.8618) time: 0.1383 data: 0.0549 max mem: 9305 +Train: [57] [1400/6250] eta: 0:10:59 lr: 0.000053 grad: 0.1077 (0.1102) loss: 0.8598 (0.8617) time: 0.1133 data: 0.0306 max mem: 9305 +Train: [57] [1500/6250] eta: 0:10:44 lr: 0.000053 grad: 0.1049 (0.1102) loss: 0.8615 (0.8618) time: 0.1326 data: 0.0498 max mem: 9305 +Train: [57] [1600/6250] eta: 0:10:29 lr: 0.000053 grad: 0.0999 (0.1099) loss: 0.8631 (0.8618) time: 0.1275 data: 0.0470 max mem: 9305 +Train: [57] [1700/6250] eta: 0:10:14 lr: 0.000053 grad: 0.0988 (0.1097) loss: 0.8627 (0.8619) time: 0.1290 data: 0.0439 max mem: 9305 +Train: [57] [1800/6250] eta: 0:10:00 lr: 0.000053 grad: 0.1035 (0.1094) loss: 0.8623 (0.8620) time: 0.1246 data: 0.0415 max mem: 9305 +Train: [57] [1900/6250] eta: 0:09:47 lr: 0.000053 grad: 0.1049 (0.1093) loss: 0.8619 (0.8618) time: 0.1366 data: 0.0553 max mem: 9305 +Train: [57] [2000/6250] eta: 0:09:34 lr: 0.000053 grad: 0.0991 (0.1091) loss: 0.8617 (0.8618) time: 0.1369 data: 0.0546 max mem: 9305 +Train: [57] [2100/6250] eta: 0:09:27 lr: 0.000053 grad: 0.1002 (0.1091) loss: 0.8632 (0.8618) time: 0.2221 data: 0.1394 max mem: 9305 +Train: [57] [2200/6250] eta: 0:09:09 lr: 0.000053 grad: 0.0998 (0.1090) loss: 0.8642 (0.8618) time: 0.1353 data: 0.0557 max mem: 9305 +Train: [57] [2300/6250] eta: 0:08:57 lr: 0.000052 grad: 0.1103 (0.1090) loss: 0.8564 (0.8617) time: 0.1652 data: 0.0880 max mem: 9305 +Train: [57] [2400/6250] eta: 0:08:46 lr: 0.000052 grad: 0.1083 (0.1088) loss: 0.8614 (0.8616) time: 0.1499 data: 0.0735 max mem: 9305 +Train: [57] [2500/6250] eta: 0:08:32 lr: 0.000052 grad: 0.1084 (0.1088) loss: 0.8624 (0.8614) time: 0.1374 data: 0.0553 max mem: 9305 +Train: [57] [2600/6250] eta: 0:08:19 lr: 0.000052 grad: 0.1099 (0.1089) loss: 0.8606 (0.8613) time: 0.1418 data: 0.0572 max mem: 9305 +Train: [57] [2700/6250] eta: 0:08:06 lr: 0.000052 grad: 0.1125 (0.1090) loss: 0.8579 (0.8611) time: 0.1363 data: 0.0538 max mem: 9305 +Train: [57] [2800/6250] eta: 0:07:50 lr: 0.000052 grad: 0.1113 (0.1090) loss: 0.8495 (0.8609) time: 0.1254 data: 0.0400 max mem: 9305 +Train: [57] [2900/6250] eta: 0:07:35 lr: 0.000052 grad: 0.1115 (0.1090) loss: 0.8477 (0.8607) time: 0.1198 data: 0.0329 max mem: 9305 +Train: [57] [3000/6250] eta: 0:07:20 lr: 0.000052 grad: 0.1051 (0.1091) loss: 0.8611 (0.8606) time: 0.1096 data: 0.0234 max mem: 9305 +Train: [57] [3100/6250] eta: 0:07:05 lr: 0.000052 grad: 0.1166 (0.1092) loss: 0.8520 (0.8604) time: 0.1338 data: 0.0476 max mem: 9305 +Train: [57] [3200/6250] eta: 0:06:50 lr: 0.000052 grad: 0.1010 (0.1091) loss: 0.8573 (0.8603) time: 0.1253 data: 0.0456 max mem: 9305 +Train: [57] [3300/6250] eta: 0:06:35 lr: 0.000052 grad: 0.1011 (0.1091) loss: 0.8576 (0.8602) time: 0.0897 data: 0.0002 max mem: 9305 +Train: [57] [3400/6250] eta: 0:06:21 lr: 0.000052 grad: 0.1096 (0.1091) loss: 0.8581 (0.8601) time: 0.1352 data: 0.0548 max mem: 9305 +Train: [57] [3500/6250] eta: 0:06:07 lr: 0.000052 grad: 0.1070 (0.1092) loss: 0.8548 (0.8601) time: 0.1248 data: 0.0448 max mem: 9305 +Train: [57] [3600/6250] eta: 0:05:54 lr: 0.000052 grad: 0.1086 (0.1092) loss: 0.8541 (0.8600) time: 0.1198 data: 0.0322 max mem: 9305 +Train: [57] [3700/6250] eta: 0:05:42 lr: 0.000052 grad: 0.1049 (0.1091) loss: 0.8558 (0.8600) time: 0.1136 data: 0.0228 max mem: 9305 +Train: [57] [3800/6250] eta: 0:05:29 lr: 0.000052 grad: 0.1031 (0.1091) loss: 0.8619 (0.8601) time: 0.1272 data: 0.0469 max mem: 9305 +Train: [57] [3900/6250] eta: 0:05:16 lr: 0.000052 grad: 0.1064 (0.1092) loss: 0.8550 (0.8600) time: 0.1714 data: 0.0892 max mem: 9305 +Train: [57] [4000/6250] eta: 0:05:03 lr: 0.000052 grad: 0.1040 (0.1093) loss: 0.8563 (0.8599) time: 0.1451 data: 0.0647 max mem: 9305 +Train: [57] [4100/6250] eta: 0:04:50 lr: 0.000052 grad: 0.1067 (0.1093) loss: 0.8603 (0.8599) time: 0.1638 data: 0.0810 max mem: 9305 +Train: [57] [4200/6250] eta: 0:04:37 lr: 0.000052 grad: 0.1113 (0.1094) loss: 0.8606 (0.8599) time: 0.1256 data: 0.0454 max mem: 9305 +Train: [57] [4300/6250] eta: 0:04:24 lr: 0.000052 grad: 0.1062 (0.1094) loss: 0.8638 (0.8599) time: 0.1462 data: 0.0606 max mem: 9305 +Train: [57] [4400/6250] eta: 0:04:11 lr: 0.000052 grad: 0.1124 (0.1095) loss: 0.8571 (0.8598) time: 0.1466 data: 0.0720 max mem: 9305 +Train: [57] [4500/6250] eta: 0:03:57 lr: 0.000052 grad: 0.1097 (0.1096) loss: 0.8644 (0.8598) time: 0.1353 data: 0.0468 max mem: 9305 +Train: [57] [4600/6250] eta: 0:03:44 lr: 0.000052 grad: 0.1054 (0.1096) loss: 0.8587 (0.8598) time: 0.1483 data: 0.0659 max mem: 9305 +Train: [57] [4700/6250] eta: 0:03:31 lr: 0.000052 grad: 0.1085 (0.1097) loss: 0.8528 (0.8597) time: 0.1135 data: 0.0316 max mem: 9305 +Train: [57] [4800/6250] eta: 0:03:17 lr: 0.000052 grad: 0.1086 (0.1098) loss: 0.8575 (0.8596) time: 0.1277 data: 0.0488 max mem: 9305 +Train: [57] [4900/6250] eta: 0:03:04 lr: 0.000052 grad: 0.1019 (0.1099) loss: 0.8577 (0.8595) time: 0.1288 data: 0.0433 max mem: 9305 +Train: [57] [5000/6250] eta: 0:02:51 lr: 0.000052 grad: 0.1023 (0.1099) loss: 0.8560 (0.8595) time: 0.1428 data: 0.0625 max mem: 9305 +Train: [57] [5100/6250] eta: 0:02:37 lr: 0.000052 grad: 0.0978 (0.1099) loss: 0.8658 (0.8595) time: 0.1336 data: 0.0546 max mem: 9305 +Train: [57] [5200/6250] eta: 0:02:23 lr: 0.000052 grad: 0.1059 (0.1099) loss: 0.8618 (0.8595) time: 0.1248 data: 0.0459 max mem: 9305 +Train: [57] [5300/6250] eta: 0:02:10 lr: 0.000052 grad: 0.1058 (0.1099) loss: 0.8600 (0.8595) time: 0.1318 data: 0.0509 max mem: 9305 +Train: [57] [5400/6250] eta: 0:01:56 lr: 0.000051 grad: 0.1095 (0.1100) loss: 0.8575 (0.8594) time: 0.1183 data: 0.0370 max mem: 9305 +Train: [57] [5500/6250] eta: 0:01:42 lr: 0.000051 grad: 0.0994 (0.1100) loss: 0.8606 (0.8594) time: 0.1239 data: 0.0306 max mem: 9305 +Train: [57] [5600/6250] eta: 0:01:29 lr: 0.000051 grad: 0.0995 (0.1100) loss: 0.8628 (0.8594) time: 0.1308 data: 0.0480 max mem: 9305 +Train: [57] [5700/6250] eta: 0:01:15 lr: 0.000051 grad: 0.1067 (0.1100) loss: 0.8553 (0.8594) time: 0.1189 data: 0.0350 max mem: 9305 +Train: [57] [5800/6250] eta: 0:01:01 lr: 0.000051 grad: 0.1071 (0.1099) loss: 0.8641 (0.8594) time: 0.1192 data: 0.0346 max mem: 9305 +Train: [57] [5900/6250] eta: 0:00:47 lr: 0.000051 grad: 0.1016 (0.1099) loss: 0.8642 (0.8594) time: 0.1359 data: 0.0519 max mem: 9305 +Train: [57] [6000/6250] eta: 0:00:34 lr: 0.000051 grad: 0.1030 (0.1098) loss: 0.8586 (0.8594) time: 0.1342 data: 0.0528 max mem: 9305 +Train: [57] [6100/6250] eta: 0:00:20 lr: 0.000051 grad: 0.1012 (0.1098) loss: 0.8631 (0.8594) time: 0.1293 data: 0.0496 max mem: 9305 +Train: [57] [6200/6250] eta: 0:00:06 lr: 0.000051 grad: 0.1049 (0.1097) loss: 0.8562 (0.8594) time: 0.1383 data: 0.0580 max mem: 9305 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1069 (0.1097) loss: 0.8624 (0.8594) time: 0.1214 data: 0.0394 max mem: 9305 +Train: [57] Total time: 0:14:19 (0.1374 s / it) +Averaged stats: lr: 0.000051 grad: 0.1069 (0.1097) loss: 0.8624 (0.8594) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:04:16 loss: 0.8908 (0.8908) time: 4.1381 data: 4.0644 max mem: 9305 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8788 (0.8773) time: 0.1170 data: 0.0866 max mem: 9305 +Eval (hcp-train-subset): [57] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (hcp-train-subset): loss: 0.8788 (0.8773) +Eval (hcp-val): [57] [ 0/62] eta: 0:03:16 loss: 0.8742 (0.8742) time: 3.1679 data: 3.0942 max mem: 9305 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8780 (0.8799) time: 0.1156 data: 0.0862 max mem: 9305 +Eval (hcp-val): [57] Total time: 0:00:12 (0.2041 s / it) +Averaged stats (hcp-val): loss: 0.8780 (0.8799) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 9:57:55 lr: 0.000051 grad: 0.1080 (0.1080) loss: 0.8861 (0.8861) time: 5.7401 data: 5.6421 max mem: 9305 +Train: [58] [ 100/6250] eta: 0:20:05 lr: 0.000051 grad: 0.1062 (0.1350) loss: 0.8748 (0.8760) time: 0.1619 data: 0.0749 max mem: 9305 +Train: [58] [ 200/6250] eta: 0:17:11 lr: 0.000051 grad: 0.1019 (0.1226) loss: 0.8526 (0.8676) time: 0.1092 data: 0.0145 max mem: 9305 +Train: [58] [ 300/6250] eta: 0:15:49 lr: 0.000051 grad: 0.0949 (0.1163) loss: 0.8575 (0.8647) time: 0.0951 data: 0.0002 max mem: 9305 +Train: [58] [ 400/6250] eta: 0:14:43 lr: 0.000051 grad: 0.1052 (0.1130) loss: 0.8525 (0.8629) time: 0.1209 data: 0.0402 max mem: 9305 +Train: [58] [ 500/6250] eta: 0:13:58 lr: 0.000051 grad: 0.1028 (0.1107) loss: 0.8547 (0.8619) time: 0.1203 data: 0.0355 max mem: 9305 +Train: [58] [ 600/6250] eta: 0:13:22 lr: 0.000051 grad: 0.1013 (0.1096) loss: 0.8632 (0.8613) time: 0.1287 data: 0.0328 max mem: 9305 +Train: [58] [ 700/6250] eta: 0:12:52 lr: 0.000051 grad: 0.0943 (0.1086) loss: 0.8620 (0.8612) time: 0.1228 data: 0.0298 max mem: 9305 +Train: [58] [ 800/6250] eta: 0:12:44 lr: 0.000051 grad: 0.1044 (0.1086) loss: 0.8587 (0.8607) time: 0.1564 data: 0.0696 max mem: 9305 +Train: [58] [ 900/6250] eta: 0:12:43 lr: 0.000051 grad: 0.0949 (0.1078) loss: 0.8619 (0.8607) time: 0.1948 data: 0.1079 max mem: 9305 +Train: [58] [1000/6250] eta: 0:12:47 lr: 0.000051 grad: 0.0977 (0.1073) loss: 0.8649 (0.8608) time: 0.1992 data: 0.1164 max mem: 9305 +Train: [58] [1100/6250] eta: 0:12:43 lr: 0.000051 grad: 0.0981 (0.1070) loss: 0.8615 (0.8608) time: 0.1703 data: 0.0871 max mem: 9305 +Train: [58] [1200/6250] eta: 0:12:34 lr: 0.000051 grad: 0.1014 (0.1067) loss: 0.8595 (0.8609) time: 0.1265 data: 0.0311 max mem: 9305 +Train: [58] [1300/6250] eta: 0:12:24 lr: 0.000051 grad: 0.1000 (0.1066) loss: 0.8615 (0.8609) time: 0.1698 data: 0.0913 max mem: 9305 +Train: [58] [1400/6250] eta: 0:12:09 lr: 0.000051 grad: 0.1101 (0.1072) loss: 0.8619 (0.8607) time: 0.1614 data: 0.0794 max mem: 9305 +Train: [58] [1500/6250] eta: 0:11:53 lr: 0.000051 grad: 0.1059 (0.1074) loss: 0.8618 (0.8608) time: 0.1390 data: 0.0597 max mem: 9305 +Train: [58] [1600/6250] eta: 0:11:37 lr: 0.000051 grad: 0.1130 (0.1076) loss: 0.8579 (0.8606) time: 0.1528 data: 0.0693 max mem: 9305 +Train: [58] [1700/6250] eta: 0:11:19 lr: 0.000051 grad: 0.1122 (0.1078) loss: 0.8581 (0.8605) time: 0.1464 data: 0.0589 max mem: 9305 +Train: [58] [1800/6250] eta: 0:11:05 lr: 0.000051 grad: 0.1048 (0.1080) loss: 0.8571 (0.8605) time: 0.1223 data: 0.0324 max mem: 9305 +Train: [58] [1900/6250] eta: 0:10:49 lr: 0.000051 grad: 0.1113 (0.1084) loss: 0.8576 (0.8602) time: 0.1075 data: 0.0165 max mem: 9305 +Train: [58] [2000/6250] eta: 0:10:30 lr: 0.000051 grad: 0.1178 (0.1087) loss: 0.8534 (0.8600) time: 0.1288 data: 0.0442 max mem: 9305 +Train: [58] [2100/6250] eta: 0:10:15 lr: 0.000051 grad: 0.1072 (0.1088) loss: 0.8598 (0.8598) time: 0.1481 data: 0.0645 max mem: 9305 +Train: [58] [2200/6250] eta: 0:10:02 lr: 0.000050 grad: 0.1093 (0.1090) loss: 0.8585 (0.8596) time: 0.2125 data: 0.1345 max mem: 9305 +Train: [58] [2300/6250] eta: 0:09:45 lr: 0.000050 grad: 0.1103 (0.1092) loss: 0.8592 (0.8596) time: 0.1317 data: 0.0557 max mem: 9305 +Train: [58] [2400/6250] eta: 0:09:30 lr: 0.000050 grad: 0.1121 (0.1094) loss: 0.8492 (0.8594) time: 0.1320 data: 0.0491 max mem: 9305 +Train: [58] [2500/6250] eta: 0:09:15 lr: 0.000050 grad: 0.1079 (0.1096) loss: 0.8706 (0.8595) time: 0.1570 data: 0.0733 max mem: 9305 +Train: [58] [2600/6250] eta: 0:09:00 lr: 0.000050 grad: 0.1069 (0.1097) loss: 0.8617 (0.8595) time: 0.1332 data: 0.0466 max mem: 9305 +Train: [58] [2700/6250] eta: 0:08:45 lr: 0.000050 grad: 0.1126 (0.1098) loss: 0.8511 (0.8593) time: 0.1482 data: 0.0642 max mem: 9305 +Train: [58] [2800/6250] eta: 0:08:29 lr: 0.000050 grad: 0.1139 (0.1101) loss: 0.8561 (0.8591) time: 0.1428 data: 0.0610 max mem: 9305 +Train: [58] [2900/6250] eta: 0:08:12 lr: 0.000050 grad: 0.1186 (0.1104) loss: 0.8529 (0.8588) time: 0.1202 data: 0.0335 max mem: 9305 +Train: [58] [3000/6250] eta: 0:07:54 lr: 0.000050 grad: 0.1061 (0.1106) loss: 0.8546 (0.8587) time: 0.1202 data: 0.0358 max mem: 9305 +Train: [58] [3100/6250] eta: 0:07:37 lr: 0.000050 grad: 0.1139 (0.1109) loss: 0.8516 (0.8584) time: 0.1142 data: 0.0342 max mem: 9305 +Train: [58] [3200/6250] eta: 0:07:21 lr: 0.000050 grad: 0.1143 (0.1110) loss: 0.8498 (0.8582) time: 0.1093 data: 0.0277 max mem: 9305 +Train: [58] [3300/6250] eta: 0:07:05 lr: 0.000050 grad: 0.1076 (0.1111) loss: 0.8565 (0.8581) time: 0.1233 data: 0.0413 max mem: 9305 +Train: [58] [3400/6250] eta: 0:06:48 lr: 0.000050 grad: 0.1131 (0.1113) loss: 0.8493 (0.8580) time: 0.1255 data: 0.0378 max mem: 9305 +Train: [58] [3500/6250] eta: 0:06:33 lr: 0.000050 grad: 0.1128 (0.1114) loss: 0.8556 (0.8578) time: 0.1238 data: 0.0442 max mem: 9305 +Train: [58] [3600/6250] eta: 0:06:18 lr: 0.000050 grad: 0.1114 (0.1114) loss: 0.8525 (0.8578) time: 0.1303 data: 0.0501 max mem: 9305 +Train: [58] [3700/6250] eta: 0:06:02 lr: 0.000050 grad: 0.1151 (0.1114) loss: 0.8608 (0.8577) time: 0.1232 data: 0.0373 max mem: 9305 +Train: [58] [3800/6250] eta: 0:05:47 lr: 0.000050 grad: 0.1091 (0.1114) loss: 0.8575 (0.8577) time: 0.1475 data: 0.0647 max mem: 9305 +Train: [58] [3900/6250] eta: 0:05:32 lr: 0.000050 grad: 0.1108 (0.1114) loss: 0.8577 (0.8577) time: 0.1515 data: 0.0708 max mem: 9305 +Train: [58] [4000/6250] eta: 0:05:18 lr: 0.000050 grad: 0.1124 (0.1115) loss: 0.8603 (0.8576) time: 0.1233 data: 0.0400 max mem: 9305 +Train: [58] [4100/6250] eta: 0:05:04 lr: 0.000050 grad: 0.1070 (0.1115) loss: 0.8577 (0.8576) time: 0.1295 data: 0.0430 max mem: 9305 +Train: [58] [4200/6250] eta: 0:04:49 lr: 0.000050 grad: 0.1159 (0.1116) loss: 0.8585 (0.8575) time: 0.1249 data: 0.0462 max mem: 9305 +Train: [58] [4300/6250] eta: 0:04:35 lr: 0.000050 grad: 0.1089 (0.1117) loss: 0.8494 (0.8574) time: 0.1509 data: 0.0661 max mem: 9305 +Train: [58] [4400/6250] eta: 0:04:21 lr: 0.000050 grad: 0.1111 (0.1118) loss: 0.8474 (0.8572) time: 0.1397 data: 0.0567 max mem: 9305 +Train: [58] [4500/6250] eta: 0:04:08 lr: 0.000050 grad: 0.1120 (0.1118) loss: 0.8530 (0.8571) time: 0.0997 data: 0.0112 max mem: 9305 +Train: [58] [4600/6250] eta: 0:03:54 lr: 0.000050 grad: 0.1175 (0.1119) loss: 0.8545 (0.8570) time: 0.1230 data: 0.0450 max mem: 9305 +Train: [58] [4700/6250] eta: 0:03:39 lr: 0.000050 grad: 0.1029 (0.1120) loss: 0.8554 (0.8569) time: 0.1226 data: 0.0309 max mem: 9305 +Train: [58] [4800/6250] eta: 0:03:25 lr: 0.000050 grad: 0.1177 (0.1121) loss: 0.8470 (0.8567) time: 0.1477 data: 0.0686 max mem: 9305 +Train: [58] [4900/6250] eta: 0:03:11 lr: 0.000050 grad: 0.1080 (0.1122) loss: 0.8505 (0.8566) time: 0.1634 data: 0.0804 max mem: 9305 +Train: [58] [5000/6250] eta: 0:02:57 lr: 0.000050 grad: 0.1160 (0.1123) loss: 0.8555 (0.8565) time: 0.1556 data: 0.0775 max mem: 9305 +Train: [58] [5100/6250] eta: 0:02:43 lr: 0.000050 grad: 0.1122 (0.1123) loss: 0.8553 (0.8564) time: 0.1024 data: 0.0226 max mem: 9305 +Train: [58] [5200/6250] eta: 0:02:29 lr: 0.000050 grad: 0.1045 (0.1124) loss: 0.8550 (0.8563) time: 0.1873 data: 0.1047 max mem: 9305 +Train: [58] [5300/6250] eta: 0:02:14 lr: 0.000049 grad: 0.1105 (0.1124) loss: 0.8554 (0.8563) time: 0.1536 data: 0.0794 max mem: 9305 +Train: [58] [5400/6250] eta: 0:02:00 lr: 0.000049 grad: 0.1044 (0.1125) loss: 0.8559 (0.8563) time: 0.1335 data: 0.0534 max mem: 9305 +Train: [58] [5500/6250] eta: 0:01:46 lr: 0.000049 grad: 0.1176 (0.1126) loss: 0.8566 (0.8563) time: 0.1340 data: 0.0494 max mem: 9305 +Train: [58] [5600/6250] eta: 0:01:32 lr: 0.000049 grad: 0.1206 (0.1128) loss: 0.8539 (0.8562) time: 0.1385 data: 0.0598 max mem: 9305 +Train: [58] [5700/6250] eta: 0:01:18 lr: 0.000049 grad: 0.1105 (0.1129) loss: 0.8488 (0.8561) time: 0.1462 data: 0.0552 max mem: 9305 +Train: [58] [5800/6250] eta: 0:01:03 lr: 0.000049 grad: 0.1145 (0.1130) loss: 0.8547 (0.8560) time: 0.2240 data: 0.1398 max mem: 9305 +Train: [58] [5900/6250] eta: 0:00:49 lr: 0.000049 grad: 0.1111 (0.1132) loss: 0.8522 (0.8560) time: 0.1301 data: 0.0505 max mem: 9305 +Train: [58] [6000/6250] eta: 0:00:35 lr: 0.000049 grad: 0.1294 (0.1134) loss: 0.8517 (0.8558) time: 0.1620 data: 0.0823 max mem: 9305 +Train: [58] [6100/6250] eta: 0:00:21 lr: 0.000049 grad: 0.1136 (0.1135) loss: 0.8485 (0.8557) time: 0.1394 data: 0.0616 max mem: 9305 +Train: [58] [6200/6250] eta: 0:00:07 lr: 0.000049 grad: 0.1187 (0.1136) loss: 0.8552 (0.8556) time: 0.1372 data: 0.0526 max mem: 9305 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1050 (0.1136) loss: 0.8525 (0.8556) time: 0.1597 data: 0.0807 max mem: 9305 +Train: [58] Total time: 0:14:54 (0.1431 s / it) +Averaged stats: lr: 0.000049 grad: 0.1050 (0.1136) loss: 0.8525 (0.8556) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:04:50 loss: 0.8904 (0.8904) time: 4.6919 data: 4.6550 max mem: 9305 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8778 (0.8766) time: 0.1485 data: 0.1199 max mem: 9305 +Eval (hcp-train-subset): [58] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (hcp-train-subset): loss: 0.8778 (0.8766) +Eval (hcp-val): [58] [ 0/62] eta: 0:04:35 loss: 0.8789 (0.8789) time: 4.4455 data: 4.3819 max mem: 9305 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8762 (0.8784) time: 0.1486 data: 0.1201 max mem: 9305 +Eval (hcp-val): [58] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-val): loss: 0.8762 (0.8784) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 7:50:03 lr: 0.000049 grad: 0.0630 (0.0630) loss: 0.8952 (0.8952) time: 4.5126 data: 4.1961 max mem: 9305 +Train: [59] [ 100/6250] eta: 0:20:37 lr: 0.000049 grad: 0.0893 (0.1021) loss: 0.8757 (0.8775) time: 0.1541 data: 0.0598 max mem: 9305 +Train: [59] [ 200/6250] eta: 0:18:09 lr: 0.000049 grad: 0.1040 (0.1028) loss: 0.8672 (0.8752) time: 0.1341 data: 0.0456 max mem: 9305 +Train: [59] [ 300/6250] eta: 0:16:32 lr: 0.000049 grad: 0.0949 (0.1029) loss: 0.8706 (0.8712) time: 0.1322 data: 0.0389 max mem: 9305 +Train: [59] [ 400/6250] eta: 0:15:38 lr: 0.000049 grad: 0.0995 (0.1029) loss: 0.8710 (0.8694) time: 0.1576 data: 0.0671 max mem: 9305 +Train: [59] [ 500/6250] eta: 0:14:57 lr: 0.000049 grad: 0.0914 (0.1020) loss: 0.8741 (0.8691) time: 0.1219 data: 0.0311 max mem: 9305 +Train: [59] [ 600/6250] eta: 0:14:22 lr: 0.000049 grad: 0.1020 (0.1020) loss: 0.8675 (0.8685) time: 0.1325 data: 0.0444 max mem: 9305 +Train: [59] [ 700/6250] eta: 0:13:56 lr: 0.000049 grad: 0.0990 (0.1020) loss: 0.8671 (0.8679) time: 0.1553 data: 0.0628 max mem: 9305 +Train: [59] [ 800/6250] eta: 0:13:44 lr: 0.000049 grad: 0.1078 (0.1025) loss: 0.8678 (0.8674) time: 0.1849 data: 0.0918 max mem: 9305 +Train: [59] [ 900/6250] eta: 0:13:27 lr: 0.000049 grad: 0.1000 (0.1031) loss: 0.8620 (0.8667) time: 0.1346 data: 0.0486 max mem: 9305 +Train: [59] [1000/6250] eta: 0:13:02 lr: 0.000049 grad: 0.1061 (0.1034) loss: 0.8598 (0.8659) time: 0.1396 data: 0.0576 max mem: 9305 +Train: [59] [1100/6250] eta: 0:12:44 lr: 0.000049 grad: 0.1052 (0.1037) loss: 0.8597 (0.8651) time: 0.1549 data: 0.0734 max mem: 9305 +Train: [59] [1200/6250] eta: 0:12:24 lr: 0.000049 grad: 0.1081 (0.1040) loss: 0.8503 (0.8644) time: 0.1405 data: 0.0567 max mem: 9305 +Train: [59] [1300/6250] eta: 0:12:03 lr: 0.000049 grad: 0.1079 (0.1045) loss: 0.8545 (0.8638) time: 0.1282 data: 0.0459 max mem: 9305 +Train: [59] [1400/6250] eta: 0:11:45 lr: 0.000049 grad: 0.1107 (0.1053) loss: 0.8498 (0.8630) time: 0.1276 data: 0.0426 max mem: 9305 +Train: [59] [1500/6250] eta: 0:11:29 lr: 0.000049 grad: 0.1089 (0.1058) loss: 0.8559 (0.8624) time: 0.1489 data: 0.0683 max mem: 9305 +Train: [59] [1600/6250] eta: 0:11:13 lr: 0.000049 grad: 0.1075 (0.1062) loss: 0.8588 (0.8618) time: 0.1614 data: 0.0715 max mem: 9305 +Train: [59] [1700/6250] eta: 0:10:58 lr: 0.000049 grad: 0.1080 (0.1066) loss: 0.8572 (0.8613) time: 0.1540 data: 0.0704 max mem: 9305 +Train: [59] [1800/6250] eta: 0:10:45 lr: 0.000049 grad: 0.1097 (0.1070) loss: 0.8548 (0.8608) time: 0.1490 data: 0.0616 max mem: 9305 +Train: [59] [1900/6250] eta: 0:10:29 lr: 0.000049 grad: 0.1141 (0.1076) loss: 0.8421 (0.8601) time: 0.1247 data: 0.0472 max mem: 9305 +Train: [59] [2000/6250] eta: 0:10:12 lr: 0.000049 grad: 0.1034 (0.1080) loss: 0.8603 (0.8597) time: 0.1431 data: 0.0579 max mem: 9305 +Train: [59] [2100/6250] eta: 0:09:59 lr: 0.000048 grad: 0.1138 (0.1083) loss: 0.8573 (0.8595) time: 0.1518 data: 0.0684 max mem: 9305 +Train: [59] [2200/6250] eta: 0:09:44 lr: 0.000048 grad: 0.1121 (0.1085) loss: 0.8563 (0.8592) time: 0.1457 data: 0.0601 max mem: 9305 +Train: [59] [2300/6250] eta: 0:09:29 lr: 0.000048 grad: 0.1103 (0.1089) loss: 0.8585 (0.8590) time: 0.1424 data: 0.0609 max mem: 9305 +Train: [59] [2400/6250] eta: 0:09:13 lr: 0.000048 grad: 0.1139 (0.1092) loss: 0.8540 (0.8588) time: 0.1365 data: 0.0432 max mem: 9305 +Train: [59] [2500/6250] eta: 0:08:56 lr: 0.000048 grad: 0.1174 (0.1095) loss: 0.8518 (0.8586) time: 0.0962 data: 0.0087 max mem: 9305 +Train: [59] [2600/6250] eta: 0:08:40 lr: 0.000048 grad: 0.1071 (0.1097) loss: 0.8566 (0.8586) time: 0.1162 data: 0.0322 max mem: 9305 +Train: [59] [2700/6250] eta: 0:08:22 lr: 0.000048 grad: 0.1166 (0.1100) loss: 0.8534 (0.8585) time: 0.1182 data: 0.0342 max mem: 9305 +Train: [59] [2800/6250] eta: 0:08:05 lr: 0.000048 grad: 0.1075 (0.1102) loss: 0.8586 (0.8584) time: 0.1033 data: 0.0164 max mem: 9305 +Train: [59] [2900/6250] eta: 0:07:49 lr: 0.000048 grad: 0.1156 (0.1103) loss: 0.8565 (0.8584) time: 0.1303 data: 0.0465 max mem: 9305 +Train: [59] [3000/6250] eta: 0:07:33 lr: 0.000048 grad: 0.1098 (0.1106) loss: 0.8605 (0.8584) time: 0.1264 data: 0.0384 max mem: 9305 +Train: [59] [3100/6250] eta: 0:07:18 lr: 0.000048 grad: 0.1152 (0.1107) loss: 0.8503 (0.8583) time: 0.1088 data: 0.0226 max mem: 9305 +Train: [59] [3200/6250] eta: 0:07:03 lr: 0.000048 grad: 0.1104 (0.1108) loss: 0.8551 (0.8582) time: 0.1224 data: 0.0392 max mem: 9305 +Train: [59] [3300/6250] eta: 0:06:51 lr: 0.000048 grad: 0.1185 (0.1111) loss: 0.8584 (0.8582) time: 0.2476 data: 0.1629 max mem: 9305 +Train: [59] [3400/6250] eta: 0:06:36 lr: 0.000048 grad: 0.1163 (0.1113) loss: 0.8579 (0.8581) time: 0.1307 data: 0.0484 max mem: 9305 +Train: [59] [3500/6250] eta: 0:06:21 lr: 0.000048 grad: 0.1117 (0.1114) loss: 0.8557 (0.8581) time: 0.1338 data: 0.0494 max mem: 9305 +Train: [59] [3600/6250] eta: 0:06:07 lr: 0.000048 grad: 0.1161 (0.1115) loss: 0.8497 (0.8581) time: 0.1383 data: 0.0562 max mem: 9305 +Train: [59] [3700/6250] eta: 0:05:53 lr: 0.000048 grad: 0.1157 (0.1115) loss: 0.8584 (0.8581) time: 0.1513 data: 0.0743 max mem: 9305 +Train: [59] [3800/6250] eta: 0:05:39 lr: 0.000048 grad: 0.1105 (0.1117) loss: 0.8561 (0.8581) time: 0.1352 data: 0.0457 max mem: 9305 +Train: [59] [3900/6250] eta: 0:05:25 lr: 0.000048 grad: 0.1077 (0.1117) loss: 0.8664 (0.8581) time: 0.1374 data: 0.0539 max mem: 9305 +Train: [59] [4000/6250] eta: 0:05:11 lr: 0.000048 grad: 0.1149 (0.1118) loss: 0.8560 (0.8582) time: 0.1421 data: 0.0564 max mem: 9305 +Train: [59] [4100/6250] eta: 0:04:57 lr: 0.000048 grad: 0.1124 (0.1119) loss: 0.8562 (0.8581) time: 0.1496 data: 0.0688 max mem: 9305 +Train: [59] [4200/6250] eta: 0:04:43 lr: 0.000048 grad: 0.1060 (0.1119) loss: 0.8610 (0.8582) time: 0.1271 data: 0.0470 max mem: 9305 +Train: [59] [4300/6250] eta: 0:04:30 lr: 0.000048 grad: 0.1117 (0.1120) loss: 0.8549 (0.8582) time: 0.1488 data: 0.0698 max mem: 9305 +Train: [59] [4400/6250] eta: 0:04:16 lr: 0.000048 grad: 0.1068 (0.1121) loss: 0.8572 (0.8582) time: 0.1337 data: 0.0501 max mem: 9305 +Train: [59] [4500/6250] eta: 0:04:03 lr: 0.000048 grad: 0.1079 (0.1121) loss: 0.8624 (0.8582) time: 0.1801 data: 0.0964 max mem: 9305 +Train: [59] [4600/6250] eta: 0:03:49 lr: 0.000048 grad: 0.1048 (0.1122) loss: 0.8542 (0.8581) time: 0.1251 data: 0.0460 max mem: 9305 +Train: [59] [4700/6250] eta: 0:03:35 lr: 0.000048 grad: 0.1089 (0.1123) loss: 0.8643 (0.8580) time: 0.1317 data: 0.0480 max mem: 9305 +Train: [59] [4800/6250] eta: 0:03:21 lr: 0.000048 grad: 0.1075 (0.1124) loss: 0.8559 (0.8580) time: 0.1223 data: 0.0405 max mem: 9305 +Train: [59] [4900/6250] eta: 0:03:07 lr: 0.000048 grad: 0.1108 (0.1124) loss: 0.8595 (0.8580) time: 0.1421 data: 0.0614 max mem: 9305 +Train: [59] [5000/6250] eta: 0:02:53 lr: 0.000048 grad: 0.1146 (0.1125) loss: 0.8561 (0.8579) time: 0.0921 data: 0.0002 max mem: 9305 +Train: [59] [5100/6250] eta: 0:02:39 lr: 0.000048 grad: 0.1147 (0.1126) loss: 0.8535 (0.8578) time: 0.1418 data: 0.0519 max mem: 9305 +Train: [59] [5200/6250] eta: 0:02:26 lr: 0.000047 grad: 0.1097 (0.1127) loss: 0.8564 (0.8578) time: 0.3280 data: 0.2251 max mem: 9305 +Train: [59] [5300/6250] eta: 0:02:12 lr: 0.000047 grad: 0.1131 (0.1128) loss: 0.8586 (0.8577) time: 0.1411 data: 0.0565 max mem: 9305 +Train: [59] [5400/6250] eta: 0:01:58 lr: 0.000047 grad: 0.1150 (0.1129) loss: 0.8519 (0.8576) time: 0.1782 data: 0.0952 max mem: 9305 +Train: [59] [5500/6250] eta: 0:01:44 lr: 0.000047 grad: 0.1202 (0.1130) loss: 0.8505 (0.8576) time: 0.1419 data: 0.0575 max mem: 9305 +Train: [59] [5600/6250] eta: 0:01:30 lr: 0.000047 grad: 0.1138 (0.1132) loss: 0.8546 (0.8575) time: 0.1344 data: 0.0500 max mem: 9305 +Train: [59] [5700/6250] eta: 0:01:16 lr: 0.000047 grad: 0.1102 (0.1133) loss: 0.8583 (0.8574) time: 0.1580 data: 0.0773 max mem: 9305 +Train: [59] [5800/6250] eta: 0:01:02 lr: 0.000047 grad: 0.1191 (0.1135) loss: 0.8469 (0.8573) time: 0.1403 data: 0.0574 max mem: 9305 +Train: [59] [5900/6250] eta: 0:00:48 lr: 0.000047 grad: 0.1174 (0.1135) loss: 0.8493 (0.8572) time: 0.1461 data: 0.0623 max mem: 9305 +Train: [59] [6000/6250] eta: 0:00:34 lr: 0.000047 grad: 0.1201 (0.1137) loss: 0.8529 (0.8571) time: 0.1606 data: 0.0816 max mem: 9305 +Train: [59] [6100/6250] eta: 0:00:20 lr: 0.000047 grad: 0.1179 (0.1138) loss: 0.8543 (0.8571) time: 0.1329 data: 0.0435 max mem: 9305 +Train: [59] [6200/6250] eta: 0:00:06 lr: 0.000047 grad: 0.1200 (0.1139) loss: 0.8519 (0.8571) time: 0.1575 data: 0.0786 max mem: 9305 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1132 (0.1139) loss: 0.8581 (0.8571) time: 0.1358 data: 0.0544 max mem: 9305 +Train: [59] Total time: 0:14:37 (0.1404 s / it) +Averaged stats: lr: 0.000047 grad: 0.1132 (0.1139) loss: 0.8581 (0.8571) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:06:12 loss: 0.8837 (0.8837) time: 6.0131 data: 5.9777 max mem: 9305 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8742 (0.8757) time: 0.1304 data: 0.1018 max mem: 9305 +Eval (hcp-train-subset): [59] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-train-subset): loss: 0.8742 (0.8757) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [59] [ 0/62] eta: 0:04:22 loss: 0.8777 (0.8777) time: 4.2345 data: 4.1898 max mem: 9305 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8758 (0.8778) time: 0.1239 data: 0.0940 max mem: 9305 +Eval (hcp-val): [59] Total time: 0:00:12 (0.2085 s / it) +Averaged stats (hcp-val): loss: 0.8758 (0.8778) +Making plots (hcp-val): example=59 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 8:18:06 lr: 0.000047 grad: 0.0807 (0.0807) loss: 0.8859 (0.8859) time: 4.7819 data: 4.5137 max mem: 9305 +Train: [60] [ 100/6250] eta: 0:20:23 lr: 0.000047 grad: 0.1044 (0.1418) loss: 0.8705 (0.8682) time: 0.1732 data: 0.0780 max mem: 9305 +Train: [60] [ 200/6250] eta: 0:16:40 lr: 0.000047 grad: 0.1107 (0.1291) loss: 0.8627 (0.8646) time: 0.1271 data: 0.0354 max mem: 9305 +Train: [60] [ 300/6250] eta: 0:15:05 lr: 0.000047 grad: 0.1072 (0.1242) loss: 0.8650 (0.8651) time: 0.1002 data: 0.0040 max mem: 9305 +Train: [60] [ 400/6250] eta: 0:14:20 lr: 0.000047 grad: 0.1127 (0.1210) loss: 0.8555 (0.8649) time: 0.1358 data: 0.0473 max mem: 9305 +Train: [60] [ 500/6250] eta: 0:13:51 lr: 0.000047 grad: 0.1079 (0.1194) loss: 0.8625 (0.8640) time: 0.1408 data: 0.0527 max mem: 9305 +Train: [60] [ 600/6250] eta: 0:13:13 lr: 0.000047 grad: 0.1203 (0.1185) loss: 0.8508 (0.8632) time: 0.1175 data: 0.0311 max mem: 9305 +Train: [60] [ 700/6250] eta: 0:12:45 lr: 0.000047 grad: 0.1215 (0.1187) loss: 0.8505 (0.8621) time: 0.1139 data: 0.0056 max mem: 9305 +Train: [60] [ 800/6250] eta: 0:12:19 lr: 0.000047 grad: 0.1184 (0.1186) loss: 0.8456 (0.8612) time: 0.1205 data: 0.0236 max mem: 9305 +Train: [60] [ 900/6250] eta: 0:12:01 lr: 0.000047 grad: 0.1061 (0.1181) loss: 0.8623 (0.8606) time: 0.1399 data: 0.0519 max mem: 9305 +Train: [60] [1000/6250] eta: 0:11:43 lr: 0.000047 grad: 0.1151 (0.1180) loss: 0.8545 (0.8602) time: 0.1402 data: 0.0570 max mem: 9305 +Train: [60] [1100/6250] eta: 0:11:31 lr: 0.000047 grad: 0.1109 (0.1178) loss: 0.8629 (0.8597) time: 0.1534 data: 0.0722 max mem: 9305 +Train: [60] [1200/6250] eta: 0:11:22 lr: 0.000047 grad: 0.1128 (0.1174) loss: 0.8554 (0.8594) time: 0.1499 data: 0.0639 max mem: 9305 +Train: [60] [1300/6250] eta: 0:11:18 lr: 0.000047 grad: 0.1103 (0.1174) loss: 0.8518 (0.8589) time: 0.1829 data: 0.1015 max mem: 9305 +Train: [60] [1400/6250] eta: 0:11:10 lr: 0.000047 grad: 0.1126 (0.1170) loss: 0.8571 (0.8586) time: 0.1600 data: 0.0814 max mem: 9305 +Train: [60] [1500/6250] eta: 0:11:01 lr: 0.000047 grad: 0.1110 (0.1169) loss: 0.8586 (0.8584) time: 0.1500 data: 0.0635 max mem: 9305 +Train: [60] [1600/6250] eta: 0:10:53 lr: 0.000047 grad: 0.1155 (0.1166) loss: 0.8504 (0.8581) time: 0.1677 data: 0.0747 max mem: 9305 +Train: [60] [1700/6250] eta: 0:10:45 lr: 0.000047 grad: 0.1056 (0.1164) loss: 0.8593 (0.8581) time: 0.1522 data: 0.0637 max mem: 9305 +Train: [60] [1800/6250] eta: 0:10:37 lr: 0.000047 grad: 0.1065 (0.1163) loss: 0.8546 (0.8580) time: 0.1836 data: 0.0941 max mem: 9305 +Train: [60] [1900/6250] eta: 0:10:26 lr: 0.000047 grad: 0.1052 (0.1162) loss: 0.8546 (0.8579) time: 0.1809 data: 0.0926 max mem: 9305 +Train: [60] [2000/6250] eta: 0:10:15 lr: 0.000047 grad: 0.1107 (0.1162) loss: 0.8562 (0.8578) time: 0.1791 data: 0.0914 max mem: 9305 +Train: [60] [2100/6250] eta: 0:10:01 lr: 0.000046 grad: 0.1085 (0.1161) loss: 0.8637 (0.8579) time: 0.1661 data: 0.0856 max mem: 9305 +Train: [60] [2200/6250] eta: 0:09:51 lr: 0.000046 grad: 0.1097 (0.1160) loss: 0.8559 (0.8580) time: 0.1930 data: 0.1071 max mem: 9305 +Train: [60] [2300/6250] eta: 0:09:37 lr: 0.000046 grad: 0.1125 (0.1159) loss: 0.8554 (0.8580) time: 0.1386 data: 0.0417 max mem: 9305 +Train: [60] [2400/6250] eta: 0:09:22 lr: 0.000046 grad: 0.1064 (0.1158) loss: 0.8543 (0.8579) time: 0.1500 data: 0.0656 max mem: 9305 +Train: [60] [2500/6250] eta: 0:09:09 lr: 0.000046 grad: 0.1153 (0.1157) loss: 0.8635 (0.8579) time: 0.1359 data: 0.0416 max mem: 9305 +Train: [60] [2600/6250] eta: 0:08:53 lr: 0.000046 grad: 0.1097 (0.1156) loss: 0.8620 (0.8580) time: 0.1393 data: 0.0585 max mem: 9305 +Train: [60] [2700/6250] eta: 0:08:38 lr: 0.000046 grad: 0.1087 (0.1156) loss: 0.8616 (0.8580) time: 0.1645 data: 0.0785 max mem: 9305 +Train: [60] [2800/6250] eta: 0:08:23 lr: 0.000046 grad: 0.1116 (0.1157) loss: 0.8580 (0.8580) time: 0.1333 data: 0.0378 max mem: 9305 +Train: [60] [2900/6250] eta: 0:08:08 lr: 0.000046 grad: 0.1136 (0.1157) loss: 0.8587 (0.8580) time: 0.1358 data: 0.0424 max mem: 9305 +Train: [60] [3000/6250] eta: 0:07:56 lr: 0.000046 grad: 0.1205 (0.1158) loss: 0.8586 (0.8580) time: 0.1789 data: 0.1022 max mem: 9305 +Train: [60] [3100/6250] eta: 0:07:43 lr: 0.000046 grad: 0.1064 (0.1157) loss: 0.8626 (0.8580) time: 0.1658 data: 0.0834 max mem: 9305 +Train: [60] [3200/6250] eta: 0:07:30 lr: 0.000046 grad: 0.1225 (0.1158) loss: 0.8509 (0.8579) time: 0.1727 data: 0.0831 max mem: 9305 +Train: [60] [3300/6250] eta: 0:07:16 lr: 0.000046 grad: 0.1141 (0.1158) loss: 0.8534 (0.8579) time: 0.1455 data: 0.0573 max mem: 9305 +Train: [60] [3400/6250] eta: 0:07:02 lr: 0.000046 grad: 0.1100 (0.1159) loss: 0.8554 (0.8579) time: 0.1601 data: 0.0765 max mem: 9305 +Train: [60] [3500/6250] eta: 0:06:47 lr: 0.000046 grad: 0.1080 (0.1159) loss: 0.8611 (0.8579) time: 0.1715 data: 0.0897 max mem: 9305 +Train: [60] [3600/6250] eta: 0:06:33 lr: 0.000046 grad: 0.1233 (0.1159) loss: 0.8534 (0.8579) time: 0.1810 data: 0.1003 max mem: 9305 +Train: [60] [3700/6250] eta: 0:06:19 lr: 0.000046 grad: 0.1173 (0.1160) loss: 0.8555 (0.8579) time: 0.1699 data: 0.0811 max mem: 9305 +Train: [60] [3800/6250] eta: 0:06:04 lr: 0.000046 grad: 0.1042 (0.1160) loss: 0.8573 (0.8579) time: 0.1531 data: 0.0728 max mem: 9305 +Train: [60] [3900/6250] eta: 0:05:49 lr: 0.000046 grad: 0.1165 (0.1160) loss: 0.8537 (0.8578) time: 0.1652 data: 0.0788 max mem: 9305 +Train: [60] [4000/6250] eta: 0:05:36 lr: 0.000046 grad: 0.1116 (0.1160) loss: 0.8592 (0.8577) time: 0.1877 data: 0.1005 max mem: 9305 +Train: [60] [4100/6250] eta: 0:05:21 lr: 0.000046 grad: 0.1069 (0.1160) loss: 0.8559 (0.8577) time: 0.1532 data: 0.0583 max mem: 9305 +Train: [60] [4200/6250] eta: 0:05:06 lr: 0.000046 grad: 0.1116 (0.1160) loss: 0.8497 (0.8577) time: 0.1388 data: 0.0509 max mem: 9305 +Train: [60] [4300/6250] eta: 0:04:52 lr: 0.000046 grad: 0.1084 (0.1160) loss: 0.8582 (0.8576) time: 0.1567 data: 0.0556 max mem: 9305 +Train: [60] [4400/6250] eta: 0:04:37 lr: 0.000046 grad: 0.1073 (0.1159) loss: 0.8604 (0.8575) time: 0.1546 data: 0.0731 max mem: 9305 +Train: [60] [4500/6250] eta: 0:04:22 lr: 0.000046 grad: 0.1095 (0.1160) loss: 0.8584 (0.8574) time: 0.1922 data: 0.1015 max mem: 9305 +Train: [60] [4600/6250] eta: 0:04:08 lr: 0.000046 grad: 0.1091 (0.1160) loss: 0.8552 (0.8573) time: 0.1666 data: 0.0743 max mem: 9305 +Train: [60] [4700/6250] eta: 0:03:53 lr: 0.000046 grad: 0.1052 (0.1159) loss: 0.8423 (0.8572) time: 0.1990 data: 0.0976 max mem: 9305 +Train: [60] [4800/6250] eta: 0:03:38 lr: 0.000046 grad: 0.1094 (0.1159) loss: 0.8562 (0.8572) time: 0.1382 data: 0.0484 max mem: 9305 +Train: [60] [4900/6250] eta: 0:03:23 lr: 0.000046 grad: 0.1069 (0.1160) loss: 0.8506 (0.8571) time: 0.1525 data: 0.0639 max mem: 9305 +Train: [60] [5000/6250] eta: 0:03:08 lr: 0.000046 grad: 0.1108 (0.1160) loss: 0.8499 (0.8571) time: 0.1590 data: 0.0757 max mem: 9305 +Train: [60] [5100/6250] eta: 0:02:53 lr: 0.000046 grad: 0.1055 (0.1160) loss: 0.8610 (0.8570) time: 0.1381 data: 0.0513 max mem: 9305 +Train: [60] [5200/6250] eta: 0:02:38 lr: 0.000045 grad: 0.1130 (0.1160) loss: 0.8514 (0.8570) time: 0.1367 data: 0.0503 max mem: 9305 +Train: [60] [5300/6250] eta: 0:02:23 lr: 0.000045 grad: 0.1122 (0.1161) loss: 0.8516 (0.8569) time: 0.1408 data: 0.0554 max mem: 9305 +Train: [60] [5400/6250] eta: 0:02:08 lr: 0.000045 grad: 0.1153 (0.1161) loss: 0.8588 (0.8569) time: 0.1495 data: 0.0668 max mem: 9305 +Train: [60] [5500/6250] eta: 0:01:52 lr: 0.000045 grad: 0.1055 (0.1160) loss: 0.8528 (0.8569) time: 0.1440 data: 0.0601 max mem: 9305 +Train: [60] [5600/6250] eta: 0:01:37 lr: 0.000045 grad: 0.1070 (0.1160) loss: 0.8537 (0.8569) time: 0.1476 data: 0.0646 max mem: 9305 +Train: [60] [5700/6250] eta: 0:01:22 lr: 0.000045 grad: 0.1139 (0.1160) loss: 0.8519 (0.8568) time: 0.1405 data: 0.0584 max mem: 9305 +Train: [60] [5800/6250] eta: 0:01:07 lr: 0.000045 grad: 0.1098 (0.1160) loss: 0.8550 (0.8567) time: 0.1406 data: 0.0502 max mem: 9305 +Train: [60] [5900/6250] eta: 0:00:52 lr: 0.000045 grad: 0.1152 (0.1161) loss: 0.8547 (0.8567) time: 0.1450 data: 0.0595 max mem: 9305 +Train: [60] [6000/6250] eta: 0:00:37 lr: 0.000045 grad: 0.1067 (0.1161) loss: 0.8503 (0.8566) time: 0.1360 data: 0.0462 max mem: 9305 +Train: [60] [6100/6250] eta: 0:00:22 lr: 0.000045 grad: 0.1178 (0.1161) loss: 0.8501 (0.8566) time: 0.1309 data: 0.0370 max mem: 9305 +Train: [60] [6200/6250] eta: 0:00:07 lr: 0.000045 grad: 0.1176 (0.1162) loss: 0.8511 (0.8565) time: 0.1800 data: 0.0893 max mem: 9305 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1179 (0.1163) loss: 0.8472 (0.8564) time: 0.1610 data: 0.0503 max mem: 9305 +Train: [60] Total time: 0:15:51 (0.1523 s / it) +Averaged stats: lr: 0.000045 grad: 0.1179 (0.1163) loss: 0.8472 (0.8564) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:06:03 loss: 0.8897 (0.8897) time: 5.8585 data: 5.8229 max mem: 9305 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8766 (0.8770) time: 0.1564 data: 0.1275 max mem: 9305 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-train-subset): loss: 0.8766 (0.8770) +Eval (hcp-val): [60] [ 0/62] eta: 0:05:00 loss: 0.8744 (0.8744) time: 4.8407 data: 4.8068 max mem: 9305 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8767 (0.8778) time: 0.1143 data: 0.0852 max mem: 9305 +Eval (hcp-val): [60] Total time: 0:00:16 (0.2712 s / it) +Averaged stats (hcp-val): loss: 0.8767 (0.8778) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 11:27:49 lr: 0.000045 grad: 0.1004 (0.1004) loss: 0.8674 (0.8674) time: 6.6031 data: 6.5017 max mem: 9305 +Train: [61] [ 100/6250] eta: 0:22:45 lr: 0.000045 grad: 0.1088 (0.1068) loss: 0.8656 (0.8754) time: 0.1705 data: 0.0658 max mem: 9305 +Train: [61] [ 200/6250] eta: 0:19:48 lr: 0.000045 grad: 0.1087 (0.1179) loss: 0.8586 (0.8683) time: 0.1843 data: 0.0803 max mem: 9305 +Train: [61] [ 300/6250] eta: 0:18:22 lr: 0.000045 grad: 0.1029 (0.1165) loss: 0.8579 (0.8652) time: 0.1889 data: 0.0845 max mem: 9305 +Train: [61] [ 400/6250] eta: 0:17:27 lr: 0.000045 grad: 0.1145 (0.1158) loss: 0.8477 (0.8630) time: 0.1579 data: 0.0700 max mem: 9305 +Train: [61] [ 500/6250] eta: 0:16:34 lr: 0.000045 grad: 0.1090 (0.1148) loss: 0.8578 (0.8618) time: 0.1493 data: 0.0495 max mem: 9305 +Train: [61] [ 600/6250] eta: 0:16:13 lr: 0.000045 grad: 0.1058 (0.1140) loss: 0.8569 (0.8612) time: 0.1783 data: 0.0888 max mem: 9305 +Train: [61] [ 700/6250] eta: 0:15:42 lr: 0.000045 grad: 0.1155 (0.1136) loss: 0.8588 (0.8607) time: 0.1268 data: 0.0333 max mem: 9305 +Train: [61] [ 800/6250] eta: 0:15:31 lr: 0.000045 grad: 0.1088 (0.1136) loss: 0.8541 (0.8606) time: 0.1993 data: 0.1005 max mem: 9305 +Train: [61] [ 900/6250] eta: 0:15:07 lr: 0.000045 grad: 0.1016 (0.1130) loss: 0.8739 (0.8608) time: 0.1441 data: 0.0532 max mem: 9305 +Train: [61] [1000/6250] eta: 0:14:40 lr: 0.000045 grad: 0.1097 (0.1128) loss: 0.8621 (0.8609) time: 0.1572 data: 0.0683 max mem: 9305 +Train: [61] [1100/6250] eta: 0:14:19 lr: 0.000045 grad: 0.1152 (0.1128) loss: 0.8495 (0.8607) time: 0.1651 data: 0.0702 max mem: 9305 +Train: [61] [1200/6250] eta: 0:14:05 lr: 0.000045 grad: 0.1207 (0.1132) loss: 0.8552 (0.8604) time: 0.1779 data: 0.0912 max mem: 9305 +Train: [61] [1300/6250] eta: 0:13:44 lr: 0.000045 grad: 0.1091 (0.1131) loss: 0.8530 (0.8603) time: 0.1684 data: 0.0805 max mem: 9305 +Train: [61] [1400/6250] eta: 0:13:26 lr: 0.000045 grad: 0.1153 (0.1134) loss: 0.8457 (0.8598) time: 0.1294 data: 0.0366 max mem: 9305 +Train: [61] [1500/6250] eta: 0:13:13 lr: 0.000045 grad: 0.1051 (0.1135) loss: 0.8553 (0.8595) time: 0.1545 data: 0.0550 max mem: 9305 +Train: [61] [1600/6250] eta: 0:12:55 lr: 0.000045 grad: 0.1140 (0.1137) loss: 0.8578 (0.8592) time: 0.1757 data: 0.0826 max mem: 9305 +Train: [61] [1700/6250] eta: 0:12:41 lr: 0.000045 grad: 0.1137 (0.1140) loss: 0.8492 (0.8589) time: 0.1618 data: 0.0730 max mem: 9305 +Train: [61] [1800/6250] eta: 0:12:22 lr: 0.000045 grad: 0.1118 (0.1141) loss: 0.8563 (0.8586) time: 0.1701 data: 0.0843 max mem: 9305 +Train: [61] [1900/6250] eta: 0:12:03 lr: 0.000045 grad: 0.1147 (0.1143) loss: 0.8557 (0.8583) time: 0.1660 data: 0.0828 max mem: 9305 +Train: [61] [2000/6250] eta: 0:11:47 lr: 0.000045 grad: 0.1100 (0.1144) loss: 0.8518 (0.8581) time: 0.1615 data: 0.0748 max mem: 9305 +Train: [61] [2100/6250] eta: 0:11:32 lr: 0.000044 grad: 0.1149 (0.1144) loss: 0.8500 (0.8580) time: 0.1818 data: 0.0954 max mem: 9305 +Train: [61] [2200/6250] eta: 0:11:14 lr: 0.000044 grad: 0.1128 (0.1145) loss: 0.8488 (0.8578) time: 0.1780 data: 0.0893 max mem: 9305 +Train: [61] [2300/6250] eta: 0:10:57 lr: 0.000044 grad: 0.1084 (0.1145) loss: 0.8567 (0.8576) time: 0.1381 data: 0.0435 max mem: 9305 +Train: [61] [2400/6250] eta: 0:10:41 lr: 0.000044 grad: 0.1080 (0.1144) loss: 0.8576 (0.8575) time: 0.1582 data: 0.0662 max mem: 9305 +Train: [61] [2500/6250] eta: 0:10:23 lr: 0.000044 grad: 0.1101 (0.1144) loss: 0.8569 (0.8574) time: 0.1666 data: 0.0808 max mem: 9305 +Train: [61] [2600/6250] eta: 0:10:06 lr: 0.000044 grad: 0.0994 (0.1142) loss: 0.8591 (0.8573) time: 0.1393 data: 0.0498 max mem: 9305 +Train: [61] [2700/6250] eta: 0:09:49 lr: 0.000044 grad: 0.1228 (0.1142) loss: 0.8572 (0.8573) time: 0.1800 data: 0.0930 max mem: 9305 +Train: [61] [2800/6250] eta: 0:09:32 lr: 0.000044 grad: 0.1155 (0.1142) loss: 0.8489 (0.8572) time: 0.1689 data: 0.0840 max mem: 9305 +Train: [61] [2900/6250] eta: 0:09:15 lr: 0.000044 grad: 0.1150 (0.1144) loss: 0.8516 (0.8570) time: 0.1749 data: 0.0925 max mem: 9305 +Train: [61] [3000/6250] eta: 0:09:01 lr: 0.000044 grad: 0.1146 (0.1144) loss: 0.8485 (0.8569) time: 0.1421 data: 0.0568 max mem: 9305 +Train: [61] [3100/6250] eta: 0:08:43 lr: 0.000044 grad: 0.1118 (0.1144) loss: 0.8560 (0.8568) time: 0.1516 data: 0.0670 max mem: 9305 +Train: [61] [3200/6250] eta: 0:08:28 lr: 0.000044 grad: 0.1114 (0.1144) loss: 0.8549 (0.8568) time: 0.2017 data: 0.1158 max mem: 9305 +Train: [61] [3300/6250] eta: 0:08:10 lr: 0.000044 grad: 0.1126 (0.1145) loss: 0.8562 (0.8567) time: 0.1596 data: 0.0748 max mem: 9305 +Train: [61] [3400/6250] eta: 0:07:54 lr: 0.000044 grad: 0.1102 (0.1145) loss: 0.8602 (0.8567) time: 0.1605 data: 0.0751 max mem: 9305 +Train: [61] [3500/6250] eta: 0:07:36 lr: 0.000044 grad: 0.1155 (0.1146) loss: 0.8545 (0.8567) time: 0.1467 data: 0.0610 max mem: 9305 +Train: [61] [3600/6250] eta: 0:07:19 lr: 0.000044 grad: 0.1050 (0.1146) loss: 0.8539 (0.8566) time: 0.1771 data: 0.0892 max mem: 9305 +Train: [61] [3700/6250] eta: 0:07:04 lr: 0.000044 grad: 0.1125 (0.1147) loss: 0.8522 (0.8565) time: 0.1813 data: 0.0981 max mem: 9305 +Train: [61] [3800/6250] eta: 0:06:47 lr: 0.000044 grad: 0.1168 (0.1148) loss: 0.8545 (0.8564) time: 0.1715 data: 0.0822 max mem: 9305 +Train: [61] [3900/6250] eta: 0:06:30 lr: 0.000044 grad: 0.1110 (0.1150) loss: 0.8537 (0.8564) time: 0.1417 data: 0.0557 max mem: 9305 +Train: [61] [4000/6250] eta: 0:06:12 lr: 0.000044 grad: 0.1111 (0.1151) loss: 0.8574 (0.8563) time: 0.1563 data: 0.0705 max mem: 9305 +Train: [61] [4100/6250] eta: 0:05:55 lr: 0.000044 grad: 0.1208 (0.1152) loss: 0.8527 (0.8563) time: 0.1473 data: 0.0540 max mem: 9305 +Train: [61] [4200/6250] eta: 0:05:38 lr: 0.000044 grad: 0.1096 (0.1153) loss: 0.8591 (0.8563) time: 0.1588 data: 0.0732 max mem: 9305 +Train: [61] [4300/6250] eta: 0:05:22 lr: 0.000044 grad: 0.1144 (0.1153) loss: 0.8600 (0.8563) time: 0.1639 data: 0.0746 max mem: 9305 +Train: [61] [4400/6250] eta: 0:05:06 lr: 0.000044 grad: 0.1146 (0.1153) loss: 0.8590 (0.8563) time: 0.1751 data: 0.0880 max mem: 9305 +Train: [61] [4500/6250] eta: 0:04:49 lr: 0.000044 grad: 0.1142 (0.1154) loss: 0.8570 (0.8563) time: 0.1465 data: 0.0521 max mem: 9305 +Train: [61] [4600/6250] eta: 0:04:32 lr: 0.000044 grad: 0.1052 (0.1154) loss: 0.8669 (0.8564) time: 0.1319 data: 0.0390 max mem: 9305 +Train: [61] [4700/6250] eta: 0:04:15 lr: 0.000044 grad: 0.1182 (0.1154) loss: 0.8578 (0.8564) time: 0.1671 data: 0.0712 max mem: 9305 +Train: [61] [4800/6250] eta: 0:03:59 lr: 0.000044 grad: 0.1113 (0.1155) loss: 0.8602 (0.8564) time: 0.1920 data: 0.1053 max mem: 9305 +Train: [61] [4900/6250] eta: 0:03:42 lr: 0.000044 grad: 0.1168 (0.1155) loss: 0.8579 (0.8564) time: 0.1566 data: 0.0709 max mem: 9305 +Train: [61] [5000/6250] eta: 0:03:26 lr: 0.000044 grad: 0.1122 (0.1155) loss: 0.8606 (0.8564) time: 0.2493 data: 0.1670 max mem: 9305 +Train: [61] [5100/6250] eta: 0:03:09 lr: 0.000044 grad: 0.1172 (0.1156) loss: 0.8596 (0.8564) time: 0.1108 data: 0.0206 max mem: 9305 +Train: [61] [5200/6250] eta: 0:02:52 lr: 0.000044 grad: 0.1130 (0.1158) loss: 0.8595 (0.8565) time: 0.1468 data: 0.0468 max mem: 9305 +Train: [61] [5300/6250] eta: 0:02:36 lr: 0.000043 grad: 0.1305 (0.1160) loss: 0.8535 (0.8565) time: 0.1372 data: 0.0457 max mem: 9305 +Train: [61] [5400/6250] eta: 0:02:19 lr: 0.000043 grad: 0.1116 (0.1161) loss: 0.8538 (0.8565) time: 0.1515 data: 0.0644 max mem: 9305 +Train: [61] [5500/6250] eta: 0:02:03 lr: 0.000043 grad: 0.1220 (0.1166) loss: 0.8553 (0.8565) time: 0.1618 data: 0.0707 max mem: 9305 +Train: [61] [5600/6250] eta: 0:01:46 lr: 0.000043 grad: 0.1168 (0.1167) loss: 0.8566 (0.8564) time: 0.1178 data: 0.0250 max mem: 9305 +Train: [61] [5700/6250] eta: 0:01:30 lr: 0.000043 grad: 0.1336 (0.1169) loss: 0.8517 (0.8564) time: 0.1683 data: 0.0577 max mem: 9305 +Train: [61] [5800/6250] eta: 0:01:13 lr: 0.000043 grad: 0.1151 (0.1170) loss: 0.8545 (0.8563) time: 0.1463 data: 0.0600 max mem: 9305 +Train: [61] [5900/6250] eta: 0:00:57 lr: 0.000043 grad: 0.1200 (0.1171) loss: 0.8554 (0.8564) time: 0.1522 data: 0.0543 max mem: 9305 +Train: [61] [6000/6250] eta: 0:00:40 lr: 0.000043 grad: 0.1157 (0.1172) loss: 0.8575 (0.8563) time: 0.1309 data: 0.0499 max mem: 9305 +Train: [61] [6100/6250] eta: 0:00:24 lr: 0.000043 grad: 0.1173 (0.1173) loss: 0.8524 (0.8563) time: 0.1511 data: 0.0716 max mem: 9305 +Train: [61] [6200/6250] eta: 0:00:08 lr: 0.000043 grad: 0.1145 (0.1174) loss: 0.8615 (0.8563) time: 0.1338 data: 0.0516 max mem: 9305 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1136 (0.1175) loss: 0.8539 (0.8563) time: 0.1252 data: 0.0404 max mem: 9305 +Train: [61] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000043 grad: 0.1136 (0.1175) loss: 0.8539 (0.8563) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:05:13 loss: 0.8892 (0.8892) time: 5.0620 data: 5.0282 max mem: 9305 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8755 (0.8755) time: 0.1599 data: 0.1310 max mem: 9305 +Eval (hcp-train-subset): [61] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-train-subset): loss: 0.8755 (0.8755) +Eval (hcp-val): [61] [ 0/62] eta: 0:05:12 loss: 0.8758 (0.8758) time: 5.0346 data: 4.9734 max mem: 9305 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8772 (0.8786) time: 0.1564 data: 0.1271 max mem: 9305 +Eval (hcp-val): [61] Total time: 0:00:16 (0.2620 s / it) +Averaged stats (hcp-val): loss: 0.8772 (0.8786) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 10:24:52 lr: 0.000043 grad: 0.0875 (0.0875) loss: 0.8880 (0.8880) time: 5.9988 data: 5.6980 max mem: 9305 +Train: [62] [ 100/6250] eta: 0:24:03 lr: 0.000043 grad: 0.1112 (0.1220) loss: 0.8743 (0.8815) time: 0.1640 data: 0.0475 max mem: 9305 +Train: [62] [ 200/6250] eta: 0:20:37 lr: 0.000043 grad: 0.1096 (0.1166) loss: 0.8690 (0.8744) time: 0.1841 data: 0.0830 max mem: 9305 +Train: [62] [ 300/6250] eta: 0:19:27 lr: 0.000043 grad: 0.0989 (0.1141) loss: 0.8602 (0.8712) time: 0.2371 data: 0.1423 max mem: 9305 +Train: [62] [ 400/6250] eta: 0:17:57 lr: 0.000043 grad: 0.1006 (0.1127) loss: 0.8667 (0.8698) time: 0.1835 data: 0.0900 max mem: 9305 +Train: [62] [ 500/6250] eta: 0:17:29 lr: 0.000043 grad: 0.1011 (0.1104) loss: 0.8635 (0.8686) time: 0.1150 data: 0.0003 max mem: 9305 +Train: [62] [ 600/6250] eta: 0:16:54 lr: 0.000043 grad: 0.0970 (0.1100) loss: 0.8671 (0.8680) time: 0.1743 data: 0.0827 max mem: 9305 +Train: [62] [ 700/6250] eta: 0:16:31 lr: 0.000043 grad: 0.0992 (0.1095) loss: 0.8577 (0.8677) time: 0.1618 data: 0.0633 max mem: 9305 +Train: [62] [ 800/6250] eta: 0:16:09 lr: 0.000043 grad: 0.1081 (0.1095) loss: 0.8639 (0.8669) time: 0.1569 data: 0.0643 max mem: 9305 +Train: [62] [ 900/6250] eta: 0:15:55 lr: 0.000043 grad: 0.1022 (0.1094) loss: 0.8624 (0.8667) time: 0.1833 data: 0.0853 max mem: 9305 +Train: [62] [1000/6250] eta: 0:15:28 lr: 0.000043 grad: 0.1064 (0.1092) loss: 0.8658 (0.8665) time: 0.1468 data: 0.0665 max mem: 9305 +Train: [62] [1100/6250] eta: 0:14:59 lr: 0.000043 grad: 0.1029 (0.1087) loss: 0.8633 (0.8663) time: 0.1097 data: 0.0081 max mem: 9305 +Train: [62] [1200/6250] eta: 0:14:35 lr: 0.000043 grad: 0.1016 (0.1086) loss: 0.8645 (0.8660) time: 0.1527 data: 0.0616 max mem: 9305 +Train: [62] [1300/6250] eta: 0:14:10 lr: 0.000043 grad: 0.1051 (0.1085) loss: 0.8582 (0.8655) time: 0.1593 data: 0.0725 max mem: 9305 +Train: [62] [1400/6250] eta: 0:13:50 lr: 0.000043 grad: 0.1068 (0.1088) loss: 0.8570 (0.8651) time: 0.1451 data: 0.0522 max mem: 9305 +Train: [62] [1500/6250] eta: 0:13:39 lr: 0.000043 grad: 0.1051 (0.1096) loss: 0.8625 (0.8647) time: 0.2119 data: 0.1199 max mem: 9305 +Train: [62] [1600/6250] eta: 0:13:22 lr: 0.000043 grad: 0.1186 (0.1098) loss: 0.8585 (0.8643) time: 0.1632 data: 0.0657 max mem: 9305 +Train: [62] [1700/6250] eta: 0:13:04 lr: 0.000043 grad: 0.1144 (0.1099) loss: 0.8574 (0.8639) time: 0.1870 data: 0.1022 max mem: 9305 +Train: [62] [1800/6250] eta: 0:12:53 lr: 0.000043 grad: 0.0999 (0.1100) loss: 0.8669 (0.8637) time: 0.1817 data: 0.0937 max mem: 9305 +Train: [62] [1900/6250] eta: 0:12:39 lr: 0.000043 grad: 0.1109 (0.1102) loss: 0.8585 (0.8634) time: 0.1194 data: 0.0127 max mem: 9305 +Train: [62] [2000/6250] eta: 0:12:21 lr: 0.000043 grad: 0.1080 (0.1104) loss: 0.8600 (0.8631) time: 0.1644 data: 0.0734 max mem: 9305 +Train: [62] [2100/6250] eta: 0:12:00 lr: 0.000043 grad: 0.1155 (0.1107) loss: 0.8571 (0.8630) time: 0.1515 data: 0.0596 max mem: 9305 +Train: [62] [2200/6250] eta: 0:11:44 lr: 0.000042 grad: 0.1081 (0.1111) loss: 0.8589 (0.8627) time: 0.1343 data: 0.0342 max mem: 9305 +Train: [62] [2300/6250] eta: 0:11:26 lr: 0.000042 grad: 0.1137 (0.1112) loss: 0.8601 (0.8624) time: 0.1814 data: 0.0585 max mem: 9305 +Train: [62] [2400/6250] eta: 0:11:07 lr: 0.000042 grad: 0.1083 (0.1112) loss: 0.8613 (0.8623) time: 0.1433 data: 0.0512 max mem: 9305 +Train: [62] [2500/6250] eta: 0:10:54 lr: 0.000042 grad: 0.1174 (0.1114) loss: 0.8516 (0.8621) time: 0.1568 data: 0.0287 max mem: 9305 +Train: [62] [2600/6250] eta: 0:10:39 lr: 0.000042 grad: 0.1135 (0.1117) loss: 0.8533 (0.8619) time: 0.1620 data: 0.0378 max mem: 9305 +Train: [62] [2700/6250] eta: 0:10:19 lr: 0.000042 grad: 0.1209 (0.1119) loss: 0.8520 (0.8616) time: 0.1514 data: 0.0652 max mem: 9305 +Train: [62] [2800/6250] eta: 0:09:57 lr: 0.000042 grad: 0.1135 (0.1120) loss: 0.8510 (0.8614) time: 0.1413 data: 0.0596 max mem: 9305 +Train: [62] [2900/6250] eta: 0:09:39 lr: 0.000042 grad: 0.1078 (0.1120) loss: 0.8585 (0.8612) time: 0.1319 data: 0.0387 max mem: 9305 +Train: [62] [3000/6250] eta: 0:09:21 lr: 0.000042 grad: 0.1137 (0.1121) loss: 0.8578 (0.8610) time: 0.1399 data: 0.0506 max mem: 9305 +Train: [62] [3100/6250] eta: 0:09:02 lr: 0.000042 grad: 0.1264 (0.1123) loss: 0.8509 (0.8608) time: 0.1255 data: 0.0398 max mem: 9305 +Train: [62] [3200/6250] eta: 0:08:43 lr: 0.000042 grad: 0.1167 (0.1126) loss: 0.8566 (0.8606) time: 0.1515 data: 0.0663 max mem: 9305 +Train: [62] [3300/6250] eta: 0:08:26 lr: 0.000042 grad: 0.1179 (0.1126) loss: 0.8540 (0.8605) time: 0.1672 data: 0.0749 max mem: 9305 +Train: [62] [3400/6250] eta: 0:08:07 lr: 0.000042 grad: 0.1123 (0.1127) loss: 0.8557 (0.8603) time: 0.1655 data: 0.0723 max mem: 9305 +Train: [62] [3500/6250] eta: 0:07:50 lr: 0.000042 grad: 0.1147 (0.1128) loss: 0.8601 (0.8602) time: 0.2383 data: 0.1524 max mem: 9305 +Train: [62] [3600/6250] eta: 0:07:31 lr: 0.000042 grad: 0.1125 (0.1131) loss: 0.8596 (0.8601) time: 0.1431 data: 0.0577 max mem: 9305 +Train: [62] [3700/6250] eta: 0:07:12 lr: 0.000042 grad: 0.1135 (0.1133) loss: 0.8547 (0.8600) time: 0.1505 data: 0.0687 max mem: 9305 +Train: [62] [3800/6250] eta: 0:06:54 lr: 0.000042 grad: 0.1129 (0.1133) loss: 0.8567 (0.8599) time: 0.1573 data: 0.0556 max mem: 9305 +Train: [62] [3900/6250] eta: 0:06:36 lr: 0.000042 grad: 0.1118 (0.1135) loss: 0.8582 (0.8597) time: 0.1145 data: 0.0248 max mem: 9305 +Train: [62] [4000/6250] eta: 0:06:19 lr: 0.000042 grad: 0.1141 (0.1137) loss: 0.8568 (0.8595) time: 0.1434 data: 0.0511 max mem: 9305 +Train: [62] [4100/6250] eta: 0:06:01 lr: 0.000042 grad: 0.1213 (0.1139) loss: 0.8503 (0.8593) time: 0.1089 data: 0.0095 max mem: 9305 +Train: [62] [4200/6250] eta: 0:05:44 lr: 0.000042 grad: 0.1125 (0.1140) loss: 0.8619 (0.8592) time: 0.1743 data: 0.0875 max mem: 9305 +Train: [62] [4300/6250] eta: 0:05:27 lr: 0.000042 grad: 0.1190 (0.1142) loss: 0.8554 (0.8591) time: 0.1237 data: 0.0324 max mem: 9305 +Train: [62] [4400/6250] eta: 0:05:11 lr: 0.000042 grad: 0.1122 (0.1143) loss: 0.8527 (0.8590) time: 0.1532 data: 0.0664 max mem: 9305 +Train: [62] [4500/6250] eta: 0:04:54 lr: 0.000042 grad: 0.1087 (0.1144) loss: 0.8586 (0.8589) time: 0.1540 data: 0.0663 max mem: 9305 +Train: [62] [4600/6250] eta: 0:04:38 lr: 0.000042 grad: 0.1224 (0.1146) loss: 0.8501 (0.8588) time: 0.1876 data: 0.0862 max mem: 9305 +Train: [62] [4700/6250] eta: 0:04:22 lr: 0.000042 grad: 0.1178 (0.1146) loss: 0.8514 (0.8588) time: 0.2648 data: 0.0878 max mem: 9305 +Train: [62] [4800/6250] eta: 0:04:05 lr: 0.000042 grad: 0.1062 (0.1146) loss: 0.8629 (0.8587) time: 0.1574 data: 0.0571 max mem: 9305 +Train: [62] [4900/6250] eta: 0:03:48 lr: 0.000042 grad: 0.1167 (0.1146) loss: 0.8581 (0.8587) time: 0.1672 data: 0.0873 max mem: 9305 +Train: [62] [5000/6250] eta: 0:03:32 lr: 0.000042 grad: 0.1179 (0.1147) loss: 0.8558 (0.8586) time: 0.1662 data: 0.0829 max mem: 9305 +Train: [62] [5100/6250] eta: 0:03:15 lr: 0.000042 grad: 0.1140 (0.1148) loss: 0.8605 (0.8585) time: 0.1601 data: 0.0757 max mem: 9305 +Train: [62] [5200/6250] eta: 0:02:57 lr: 0.000042 grad: 0.1116 (0.1148) loss: 0.8551 (0.8585) time: 0.1450 data: 0.0561 max mem: 9305 +Train: [62] [5300/6250] eta: 0:02:40 lr: 0.000042 grad: 0.1057 (0.1148) loss: 0.8617 (0.8584) time: 0.1413 data: 0.0481 max mem: 9305 +Train: [62] [5400/6250] eta: 0:02:23 lr: 0.000041 grad: 0.1116 (0.1148) loss: 0.8540 (0.8584) time: 0.1404 data: 0.0596 max mem: 9305 +Train: [62] [5500/6250] eta: 0:02:06 lr: 0.000041 grad: 0.1166 (0.1148) loss: 0.8605 (0.8583) time: 0.1436 data: 0.0562 max mem: 9305 +Train: [62] [5600/6250] eta: 0:01:49 lr: 0.000041 grad: 0.1141 (0.1149) loss: 0.8487 (0.8583) time: 0.1490 data: 0.0652 max mem: 9305 +Train: [62] [5700/6250] eta: 0:01:32 lr: 0.000041 grad: 0.1104 (0.1149) loss: 0.8601 (0.8583) time: 0.1700 data: 0.0714 max mem: 9305 +Train: [62] [5800/6250] eta: 0:01:15 lr: 0.000041 grad: 0.1122 (0.1149) loss: 0.8563 (0.8583) time: 0.1167 data: 0.0280 max mem: 9305 +Train: [62] [5900/6250] eta: 0:00:58 lr: 0.000041 grad: 0.1063 (0.1149) loss: 0.8615 (0.8582) time: 0.1372 data: 0.0530 max mem: 9305 +Train: [62] [6000/6250] eta: 0:00:41 lr: 0.000041 grad: 0.1142 (0.1150) loss: 0.8499 (0.8582) time: 0.1461 data: 0.0597 max mem: 9305 +Train: [62] [6100/6250] eta: 0:00:24 lr: 0.000041 grad: 0.1100 (0.1150) loss: 0.8624 (0.8582) time: 0.1398 data: 0.0489 max mem: 9305 +Train: [62] [6200/6250] eta: 0:00:08 lr: 0.000041 grad: 0.1145 (0.1150) loss: 0.8546 (0.8582) time: 0.1601 data: 0.0793 max mem: 9305 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1104 (0.1150) loss: 0.8595 (0.8582) time: 0.1770 data: 0.0887 max mem: 9305 +Train: [62] Total time: 0:17:28 (0.1678 s / it) +Averaged stats: lr: 0.000041 grad: 0.1104 (0.1150) loss: 0.8595 (0.8582) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:05:30 loss: 0.8908 (0.8908) time: 5.3243 data: 5.2581 max mem: 9305 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8723 (0.8750) time: 0.1442 data: 0.1154 max mem: 9305 +Eval (hcp-train-subset): [62] Total time: 0:00:16 (0.2614 s / it) +Averaged stats (hcp-train-subset): loss: 0.8723 (0.8750) +Eval (hcp-val): [62] [ 0/62] eta: 0:05:24 loss: 0.8763 (0.8763) time: 5.2321 data: 5.1905 max mem: 9305 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8755 (0.8777) time: 0.1495 data: 0.1203 max mem: 9305 +Eval (hcp-val): [62] Total time: 0:00:15 (0.2478 s / it) +Averaged stats (hcp-val): loss: 0.8755 (0.8777) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 8:16:26 lr: 0.000041 grad: 0.0843 (0.0843) loss: 0.8751 (0.8751) time: 4.7659 data: 4.4501 max mem: 9305 +Train: [63] [ 100/6250] eta: 0:26:52 lr: 0.000041 grad: 0.1095 (0.1167) loss: 0.8647 (0.8682) time: 0.1339 data: 0.0003 max mem: 9305 +Train: [63] [ 200/6250] eta: 0:22:50 lr: 0.000041 grad: 0.1094 (0.1241) loss: 0.8676 (0.8638) time: 0.2329 data: 0.1197 max mem: 9305 +Train: [63] [ 300/6250] eta: 0:21:03 lr: 0.000041 grad: 0.1112 (0.1221) loss: 0.8661 (0.8630) time: 0.1981 data: 0.0944 max mem: 9305 +Train: [63] [ 400/6250] eta: 0:19:54 lr: 0.000041 grad: 0.1164 (0.1200) loss: 0.8616 (0.8633) time: 0.1708 data: 0.0705 max mem: 9305 +Train: [63] [ 500/6250] eta: 0:18:52 lr: 0.000041 grad: 0.1045 (0.1182) loss: 0.8643 (0.8639) time: 0.1783 data: 0.0800 max mem: 9305 +Train: [63] [ 600/6250] eta: 0:18:07 lr: 0.000041 grad: 0.1119 (0.1168) loss: 0.8641 (0.8642) time: 0.1459 data: 0.0434 max mem: 9305 +Train: [63] [ 700/6250] eta: 0:17:40 lr: 0.000041 grad: 0.1140 (0.1162) loss: 0.8657 (0.8642) time: 0.1748 data: 0.0865 max mem: 9305 +Train: [63] [ 800/6250] eta: 0:17:02 lr: 0.000041 grad: 0.1059 (0.1156) loss: 0.8664 (0.8641) time: 0.1569 data: 0.0615 max mem: 9305 +Train: [63] [ 900/6250] eta: 0:16:36 lr: 0.000041 grad: 0.1103 (0.1153) loss: 0.8570 (0.8636) time: 0.1765 data: 0.0802 max mem: 9305 +Train: [63] [1000/6250] eta: 0:16:09 lr: 0.000041 grad: 0.1142 (0.1154) loss: 0.8575 (0.8630) time: 0.1373 data: 0.0399 max mem: 9305 +Train: [63] [1100/6250] eta: 0:15:37 lr: 0.000041 grad: 0.1041 (0.1154) loss: 0.8569 (0.8624) time: 0.1476 data: 0.0593 max mem: 9305 +Train: [63] [1200/6250] eta: 0:15:10 lr: 0.000041 grad: 0.1097 (0.1156) loss: 0.8575 (0.8616) time: 0.1566 data: 0.0693 max mem: 9305 +Train: [63] [1300/6250] eta: 0:14:42 lr: 0.000041 grad: 0.1079 (0.1156) loss: 0.8564 (0.8612) time: 0.1453 data: 0.0601 max mem: 9305 +Train: [63] [1400/6250] eta: 0:14:25 lr: 0.000041 grad: 0.1161 (0.1155) loss: 0.8548 (0.8607) time: 0.1765 data: 0.0951 max mem: 9305 +Train: [63] [1500/6250] eta: 0:14:06 lr: 0.000041 grad: 0.1117 (0.1157) loss: 0.8604 (0.8604) time: 0.1859 data: 0.0975 max mem: 9305 +Train: [63] [1600/6250] eta: 0:13:43 lr: 0.000041 grad: 0.1169 (0.1158) loss: 0.8614 (0.8602) time: 0.1714 data: 0.0811 max mem: 9305 +Train: [63] [1700/6250] eta: 0:13:21 lr: 0.000041 grad: 0.1214 (0.1161) loss: 0.8568 (0.8600) time: 0.1084 data: 0.0179 max mem: 9305 +Train: [63] [1800/6250] eta: 0:13:01 lr: 0.000041 grad: 0.1175 (0.1163) loss: 0.8608 (0.8599) time: 0.1679 data: 0.0817 max mem: 9305 +Train: [63] [1900/6250] eta: 0:12:43 lr: 0.000041 grad: 0.1158 (0.1166) loss: 0.8541 (0.8598) time: 0.1793 data: 0.0854 max mem: 9305 +Train: [63] [2000/6250] eta: 0:12:24 lr: 0.000041 grad: 0.1133 (0.1169) loss: 0.8608 (0.8595) time: 0.1709 data: 0.0783 max mem: 9305 +Train: [63] [2100/6250] eta: 0:12:04 lr: 0.000041 grad: 0.1223 (0.1172) loss: 0.8537 (0.8593) time: 0.1926 data: 0.1005 max mem: 9305 +Train: [63] [2200/6250] eta: 0:11:46 lr: 0.000041 grad: 0.1158 (0.1173) loss: 0.8553 (0.8591) time: 0.1852 data: 0.0961 max mem: 9305 +Train: [63] [2300/6250] eta: 0:11:28 lr: 0.000041 grad: 0.1152 (0.1175) loss: 0.8578 (0.8590) time: 0.1730 data: 0.0696 max mem: 9305 +Train: [63] [2400/6250] eta: 0:11:09 lr: 0.000040 grad: 0.1132 (0.1176) loss: 0.8591 (0.8589) time: 0.1737 data: 0.0899 max mem: 9305 +Train: [63] [2500/6250] eta: 0:10:53 lr: 0.000040 grad: 0.1231 (0.1178) loss: 0.8479 (0.8587) time: 0.1945 data: 0.1036 max mem: 9305 +Train: [63] [2600/6250] eta: 0:10:36 lr: 0.000040 grad: 0.1164 (0.1182) loss: 0.8528 (0.8585) time: 0.1776 data: 0.0876 max mem: 9305 +Train: [63] [2700/6250] eta: 0:10:19 lr: 0.000040 grad: 0.1164 (0.1184) loss: 0.8557 (0.8583) time: 0.2316 data: 0.1399 max mem: 9305 +Train: [63] [2800/6250] eta: 0:09:58 lr: 0.000040 grad: 0.1212 (0.1185) loss: 0.8578 (0.8583) time: 0.1633 data: 0.0708 max mem: 9305 +Train: [63] [2900/6250] eta: 0:09:38 lr: 0.000040 grad: 0.1181 (0.1186) loss: 0.8508 (0.8581) time: 0.1281 data: 0.0421 max mem: 9305 +Train: [63] [3000/6250] eta: 0:09:19 lr: 0.000040 grad: 0.1142 (0.1187) loss: 0.8522 (0.8581) time: 0.1350 data: 0.0477 max mem: 9305 +Train: [63] [3100/6250] eta: 0:09:00 lr: 0.000040 grad: 0.1223 (0.1188) loss: 0.8493 (0.8581) time: 0.1552 data: 0.0690 max mem: 9305 +Train: [63] [3200/6250] eta: 0:08:41 lr: 0.000040 grad: 0.1158 (0.1190) loss: 0.8587 (0.8580) time: 0.1546 data: 0.0641 max mem: 9305 +Train: [63] [3300/6250] eta: 0:08:23 lr: 0.000040 grad: 0.1149 (0.1191) loss: 0.8519 (0.8578) time: 0.1495 data: 0.0597 max mem: 9305 +Train: [63] [3400/6250] eta: 0:08:04 lr: 0.000040 grad: 0.1177 (0.1191) loss: 0.8529 (0.8578) time: 0.1344 data: 0.0452 max mem: 9305 +Train: [63] [3500/6250] eta: 0:07:46 lr: 0.000040 grad: 0.1190 (0.1192) loss: 0.8489 (0.8577) time: 0.1299 data: 0.0401 max mem: 9305 +Train: [63] [3600/6250] eta: 0:07:27 lr: 0.000040 grad: 0.1131 (0.1192) loss: 0.8481 (0.8575) time: 0.1488 data: 0.0549 max mem: 9305 +Train: [63] [3700/6250] eta: 0:07:10 lr: 0.000040 grad: 0.1142 (0.1192) loss: 0.8622 (0.8575) time: 0.1484 data: 0.0651 max mem: 9305 +Train: [63] [3800/6250] eta: 0:06:52 lr: 0.000040 grad: 0.1189 (0.1191) loss: 0.8544 (0.8574) time: 0.1404 data: 0.0482 max mem: 9305 +Train: [63] [3900/6250] eta: 0:06:34 lr: 0.000040 grad: 0.1186 (0.1191) loss: 0.8528 (0.8574) time: 0.1490 data: 0.0591 max mem: 9305 +Train: [63] [4000/6250] eta: 0:06:17 lr: 0.000040 grad: 0.1143 (0.1191) loss: 0.8578 (0.8574) time: 0.1376 data: 0.0459 max mem: 9305 +Train: [63] [4100/6250] eta: 0:06:00 lr: 0.000040 grad: 0.1161 (0.1192) loss: 0.8559 (0.8573) time: 0.1542 data: 0.0667 max mem: 9305 +Train: [63] [4200/6250] eta: 0:05:42 lr: 0.000040 grad: 0.1213 (0.1192) loss: 0.8521 (0.8573) time: 0.1358 data: 0.0497 max mem: 9305 +Train: [63] [4300/6250] eta: 0:05:25 lr: 0.000040 grad: 0.1240 (0.1193) loss: 0.8544 (0.8572) time: 0.1712 data: 0.0767 max mem: 9305 +Train: [63] [4400/6250] eta: 0:05:08 lr: 0.000040 grad: 0.1084 (0.1193) loss: 0.8615 (0.8572) time: 0.1400 data: 0.0594 max mem: 9305 +Train: [63] [4500/6250] eta: 0:04:51 lr: 0.000040 grad: 0.1259 (0.1193) loss: 0.8573 (0.8572) time: 0.1589 data: 0.0712 max mem: 9305 +Train: [63] [4600/6250] eta: 0:04:35 lr: 0.000040 grad: 0.1135 (0.1194) loss: 0.8558 (0.8571) time: 0.2354 data: 0.1479 max mem: 9305 +Train: [63] [4700/6250] eta: 0:04:18 lr: 0.000040 grad: 0.1184 (0.1194) loss: 0.8584 (0.8570) time: 0.1414 data: 0.0445 max mem: 9305 +Train: [63] [4800/6250] eta: 0:04:02 lr: 0.000040 grad: 0.1088 (0.1194) loss: 0.8570 (0.8569) time: 0.1432 data: 0.0579 max mem: 9305 +Train: [63] [4900/6250] eta: 0:03:47 lr: 0.000040 grad: 0.1175 (0.1194) loss: 0.8564 (0.8569) time: 0.4761 data: 0.3545 max mem: 9305 +Train: [63] [5000/6250] eta: 0:03:29 lr: 0.000040 grad: 0.1178 (0.1195) loss: 0.8469 (0.8568) time: 0.1483 data: 0.0580 max mem: 9305 +Train: [63] [5100/6250] eta: 0:03:12 lr: 0.000040 grad: 0.1156 (0.1194) loss: 0.8584 (0.8568) time: 0.1430 data: 0.0558 max mem: 9305 +Train: [63] [5200/6250] eta: 0:02:56 lr: 0.000040 grad: 0.1075 (0.1193) loss: 0.8589 (0.8568) time: 0.1400 data: 0.0533 max mem: 9305 +Train: [63] [5300/6250] eta: 0:02:39 lr: 0.000040 grad: 0.1173 (0.1192) loss: 0.8584 (0.8569) time: 0.1468 data: 0.0612 max mem: 9305 +Train: [63] [5400/6250] eta: 0:02:22 lr: 0.000040 grad: 0.1160 (0.1192) loss: 0.8603 (0.8569) time: 0.1462 data: 0.0463 max mem: 9305 +Train: [63] [5500/6250] eta: 0:02:05 lr: 0.000040 grad: 0.1176 (0.1192) loss: 0.8618 (0.8569) time: 0.1567 data: 0.0668 max mem: 9305 +Train: [63] [5600/6250] eta: 0:01:48 lr: 0.000039 grad: 0.1240 (0.1192) loss: 0.8536 (0.8569) time: 0.1535 data: 0.0695 max mem: 9305 +Train: [63] [5700/6250] eta: 0:01:31 lr: 0.000039 grad: 0.1195 (0.1192) loss: 0.8548 (0.8569) time: 0.1552 data: 0.0691 max mem: 9305 +Train: [63] [5800/6250] eta: 0:01:14 lr: 0.000039 grad: 0.1115 (0.1192) loss: 0.8586 (0.8568) time: 0.1458 data: 0.0490 max mem: 9305 +Train: [63] [5900/6250] eta: 0:00:58 lr: 0.000039 grad: 0.1223 (0.1193) loss: 0.8491 (0.8568) time: 0.1501 data: 0.0665 max mem: 9305 +Train: [63] [6000/6250] eta: 0:00:41 lr: 0.000039 grad: 0.1246 (0.1194) loss: 0.8534 (0.8567) time: 0.1399 data: 0.0461 max mem: 9305 +Train: [63] [6100/6250] eta: 0:00:24 lr: 0.000039 grad: 0.1183 (0.1194) loss: 0.8525 (0.8566) time: 0.1345 data: 0.0411 max mem: 9305 +Train: [63] [6200/6250] eta: 0:00:08 lr: 0.000039 grad: 0.1186 (0.1194) loss: 0.8547 (0.8566) time: 0.1200 data: 0.0358 max mem: 9305 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1234 (0.1194) loss: 0.8545 (0.8565) time: 0.1191 data: 0.0308 max mem: 9305 +Train: [63] Total time: 0:17:15 (0.1657 s / it) +Averaged stats: lr: 0.000039 grad: 0.1234 (0.1194) loss: 0.8545 (0.8565) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:05:16 loss: 0.8881 (0.8881) time: 5.1010 data: 5.0678 max mem: 9305 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8736 (0.8763) time: 0.1643 data: 0.1345 max mem: 9305 +Eval (hcp-train-subset): [63] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8736 (0.8763) +Eval (hcp-val): [63] [ 0/62] eta: 0:04:16 loss: 0.8725 (0.8725) time: 4.1348 data: 4.0448 max mem: 9305 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8751 (0.8780) time: 0.1207 data: 0.0920 max mem: 9305 +Eval (hcp-val): [63] Total time: 0:00:15 (0.2486 s / it) +Averaged stats (hcp-val): loss: 0.8751 (0.8780) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 9:15:44 lr: 0.000039 grad: 0.4321 (0.4321) loss: 0.8026 (0.8026) time: 5.3350 data: 5.0377 max mem: 9305 +Train: [64] [ 100/6250] eta: 0:23:29 lr: 0.000039 grad: 0.1264 (0.1404) loss: 0.8715 (0.8736) time: 0.1774 data: 0.0693 max mem: 9305 +Train: [64] [ 200/6250] eta: 0:20:25 lr: 0.000039 grad: 0.1233 (0.1350) loss: 0.8546 (0.8685) time: 0.1902 data: 0.0874 max mem: 9305 +Train: [64] [ 300/6250] eta: 0:19:42 lr: 0.000039 grad: 0.1269 (0.1310) loss: 0.8548 (0.8652) time: 0.1358 data: 0.0131 max mem: 9305 +Train: [64] [ 400/6250] eta: 0:19:16 lr: 0.000039 grad: 0.1140 (0.1297) loss: 0.8545 (0.8627) time: 0.2252 data: 0.1087 max mem: 9305 +Train: [64] [ 500/6250] eta: 0:18:30 lr: 0.000039 grad: 0.1106 (0.1288) loss: 0.8571 (0.8604) time: 0.1498 data: 0.0393 max mem: 9305 +Train: [64] [ 600/6250] eta: 0:17:57 lr: 0.000039 grad: 0.1146 (0.1273) loss: 0.8555 (0.8594) time: 0.1178 data: 0.0002 max mem: 9305 +Train: [64] [ 700/6250] eta: 0:17:22 lr: 0.000039 grad: 0.1347 (0.1267) loss: 0.8485 (0.8582) time: 0.1818 data: 0.0688 max mem: 9305 +Train: [64] [ 800/6250] eta: 0:16:46 lr: 0.000039 grad: 0.1173 (0.1258) loss: 0.8544 (0.8574) time: 0.1762 data: 0.0854 max mem: 9305 +Train: [64] [ 900/6250] eta: 0:16:27 lr: 0.000039 grad: 0.1170 (0.1256) loss: 0.8585 (0.8570) time: 0.2443 data: 0.1421 max mem: 9305 +Train: [64] [1000/6250] eta: 0:16:03 lr: 0.000039 grad: 0.1238 (0.1253) loss: 0.8522 (0.8567) time: 0.1530 data: 0.0696 max mem: 9305 +Train: [64] [1100/6250] eta: 0:15:36 lr: 0.000039 grad: 0.1165 (0.1252) loss: 0.8516 (0.8562) time: 0.1702 data: 0.0741 max mem: 9305 +Train: [64] [1200/6250] eta: 0:15:15 lr: 0.000039 grad: 0.1195 (0.1248) loss: 0.8549 (0.8559) time: 0.1983 data: 0.1131 max mem: 9305 +Train: [64] [1300/6250] eta: 0:14:50 lr: 0.000039 grad: 0.1197 (0.1246) loss: 0.8496 (0.8555) time: 0.1562 data: 0.0713 max mem: 9305 +Train: [64] [1400/6250] eta: 0:14:25 lr: 0.000039 grad: 0.1187 (0.1242) loss: 0.8556 (0.8555) time: 0.1469 data: 0.0647 max mem: 9305 +Train: [64] [1500/6250] eta: 0:14:02 lr: 0.000039 grad: 0.1213 (0.1241) loss: 0.8562 (0.8553) time: 0.1577 data: 0.0682 max mem: 9305 +Train: [64] [1600/6250] eta: 0:13:41 lr: 0.000039 grad: 0.1117 (0.1239) loss: 0.8560 (0.8552) time: 0.1031 data: 0.0170 max mem: 9305 +Train: [64] [1700/6250] eta: 0:13:19 lr: 0.000039 grad: 0.1121 (0.1236) loss: 0.8614 (0.8552) time: 0.1446 data: 0.0615 max mem: 9305 +Train: [64] [1800/6250] eta: 0:12:56 lr: 0.000039 grad: 0.1133 (0.1233) loss: 0.8564 (0.8553) time: 0.1714 data: 0.0743 max mem: 9305 +Train: [64] [1900/6250] eta: 0:12:37 lr: 0.000039 grad: 0.1149 (0.1230) loss: 0.8588 (0.8553) time: 0.1596 data: 0.0712 max mem: 9305 +Train: [64] [2000/6250] eta: 0:12:18 lr: 0.000039 grad: 0.1205 (0.1229) loss: 0.8549 (0.8552) time: 0.1899 data: 0.0999 max mem: 9305 +Train: [64] [2100/6250] eta: 0:11:56 lr: 0.000039 grad: 0.1162 (0.1226) loss: 0.8562 (0.8552) time: 0.1488 data: 0.0557 max mem: 9305 +Train: [64] [2200/6250] eta: 0:11:37 lr: 0.000039 grad: 0.1168 (0.1224) loss: 0.8525 (0.8551) time: 0.1050 data: 0.0002 max mem: 9305 +Train: [64] [2300/6250] eta: 0:11:17 lr: 0.000039 grad: 0.1123 (0.1222) loss: 0.8501 (0.8550) time: 0.1523 data: 0.0635 max mem: 9305 +Train: [64] [2400/6250] eta: 0:10:59 lr: 0.000039 grad: 0.1186 (0.1221) loss: 0.8447 (0.8549) time: 0.1912 data: 0.0990 max mem: 9305 +Train: [64] [2500/6250] eta: 0:10:39 lr: 0.000039 grad: 0.1048 (0.1220) loss: 0.8599 (0.8549) time: 0.1866 data: 0.0833 max mem: 9305 +Train: [64] [2600/6250] eta: 0:10:20 lr: 0.000039 grad: 0.1158 (0.1221) loss: 0.8570 (0.8549) time: 0.1404 data: 0.0482 max mem: 9305 +Train: [64] [2700/6250] eta: 0:10:02 lr: 0.000038 grad: 0.1144 (0.1221) loss: 0.8586 (0.8549) time: 0.1849 data: 0.0839 max mem: 9305 +Train: [64] [2800/6250] eta: 0:09:44 lr: 0.000038 grad: 0.1156 (0.1221) loss: 0.8574 (0.8548) time: 0.1490 data: 0.0617 max mem: 9305 +Train: [64] [2900/6250] eta: 0:09:24 lr: 0.000038 grad: 0.1157 (0.1219) loss: 0.8513 (0.8548) time: 0.1527 data: 0.0650 max mem: 9305 +Train: [64] [3000/6250] eta: 0:09:09 lr: 0.000038 grad: 0.1193 (0.1219) loss: 0.8600 (0.8547) time: 0.1973 data: 0.0992 max mem: 9305 +Train: [64] [3100/6250] eta: 0:08:52 lr: 0.000038 grad: 0.1191 (0.1218) loss: 0.8551 (0.8546) time: 0.1912 data: 0.1087 max mem: 9305 +Train: [64] [3200/6250] eta: 0:08:36 lr: 0.000038 grad: 0.1157 (0.1218) loss: 0.8503 (0.8546) time: 0.2638 data: 0.1707 max mem: 9305 +Train: [64] [3300/6250] eta: 0:08:19 lr: 0.000038 grad: 0.1168 (0.1219) loss: 0.8553 (0.8546) time: 0.2021 data: 0.1078 max mem: 9305 +Train: [64] [3400/6250] eta: 0:08:01 lr: 0.000038 grad: 0.1203 (0.1218) loss: 0.8554 (0.8546) time: 0.1856 data: 0.0582 max mem: 9305 +Train: [64] [3500/6250] eta: 0:07:44 lr: 0.000038 grad: 0.1109 (0.1217) loss: 0.8579 (0.8547) time: 0.1359 data: 0.0453 max mem: 9305 +Train: [64] [3600/6250] eta: 0:07:28 lr: 0.000038 grad: 0.1174 (0.1216) loss: 0.8570 (0.8547) time: 0.1035 data: 0.0002 max mem: 9305 +Train: [64] [3700/6250] eta: 0:07:10 lr: 0.000038 grad: 0.1217 (0.1216) loss: 0.8511 (0.8548) time: 0.1795 data: 0.0909 max mem: 9305 +Train: [64] [3800/6250] eta: 0:06:53 lr: 0.000038 grad: 0.1242 (0.1216) loss: 0.8510 (0.8548) time: 0.1416 data: 0.0524 max mem: 9305 +Train: [64] [3900/6250] eta: 0:06:35 lr: 0.000038 grad: 0.1201 (0.1217) loss: 0.8580 (0.8548) time: 0.0994 data: 0.0002 max mem: 9305 +Train: [64] [4000/6250] eta: 0:06:17 lr: 0.000038 grad: 0.1242 (0.1217) loss: 0.8498 (0.8548) time: 0.1370 data: 0.0300 max mem: 9305 +Train: [64] [4100/6250] eta: 0:05:59 lr: 0.000038 grad: 0.1240 (0.1217) loss: 0.8563 (0.8548) time: 0.1454 data: 0.0545 max mem: 9305 +Train: [64] [4200/6250] eta: 0:05:42 lr: 0.000038 grad: 0.1162 (0.1218) loss: 0.8570 (0.8549) time: 0.2139 data: 0.1177 max mem: 9305 +Train: [64] [4300/6250] eta: 0:05:25 lr: 0.000038 grad: 0.1209 (0.1219) loss: 0.8554 (0.8549) time: 0.1529 data: 0.0663 max mem: 9305 +Train: [64] [4400/6250] eta: 0:05:08 lr: 0.000038 grad: 0.1220 (0.1219) loss: 0.8521 (0.8549) time: 0.1525 data: 0.0624 max mem: 9305 +Train: [64] [4500/6250] eta: 0:04:51 lr: 0.000038 grad: 0.1180 (0.1220) loss: 0.8530 (0.8549) time: 0.1532 data: 0.0707 max mem: 9305 +Train: [64] [4600/6250] eta: 0:04:34 lr: 0.000038 grad: 0.1302 (0.1220) loss: 0.8566 (0.8549) time: 0.1801 data: 0.0929 max mem: 9305 +Train: [64] [4700/6250] eta: 0:04:17 lr: 0.000038 grad: 0.1189 (0.1221) loss: 0.8592 (0.8549) time: 0.1541 data: 0.0683 max mem: 9305 +Train: [64] [4800/6250] eta: 0:04:00 lr: 0.000038 grad: 0.1239 (0.1222) loss: 0.8533 (0.8549) time: 0.1806 data: 0.0917 max mem: 9305 +Train: [64] [4900/6250] eta: 0:03:45 lr: 0.000038 grad: 0.1202 (0.1222) loss: 0.8477 (0.8549) time: 0.3536 data: 0.2442 max mem: 9305 +Train: [64] [5000/6250] eta: 0:03:28 lr: 0.000038 grad: 0.1261 (0.1223) loss: 0.8530 (0.8548) time: 0.1074 data: 0.0005 max mem: 9305 +Train: [64] [5100/6250] eta: 0:03:12 lr: 0.000038 grad: 0.1236 (0.1224) loss: 0.8533 (0.8547) time: 0.1489 data: 0.0520 max mem: 9305 +Train: [64] [5200/6250] eta: 0:02:55 lr: 0.000038 grad: 0.1172 (0.1225) loss: 0.8547 (0.8546) time: 0.1525 data: 0.0571 max mem: 9305 +Train: [64] [5300/6250] eta: 0:02:39 lr: 0.000038 grad: 0.1180 (0.1224) loss: 0.8483 (0.8547) time: 0.1477 data: 0.0559 max mem: 9305 +Train: [64] [5400/6250] eta: 0:02:22 lr: 0.000038 grad: 0.1196 (0.1225) loss: 0.8460 (0.8546) time: 0.1565 data: 0.0615 max mem: 9305 +Train: [64] [5500/6250] eta: 0:02:05 lr: 0.000038 grad: 0.1220 (0.1225) loss: 0.8558 (0.8546) time: 0.1426 data: 0.0546 max mem: 9305 +Train: [64] [5600/6250] eta: 0:01:48 lr: 0.000038 grad: 0.1232 (0.1225) loss: 0.8521 (0.8547) time: 0.2168 data: 0.1203 max mem: 9305 +Train: [64] [5700/6250] eta: 0:01:31 lr: 0.000038 grad: 0.1065 (0.1225) loss: 0.8586 (0.8547) time: 0.1543 data: 0.0673 max mem: 9305 +Train: [64] [5800/6250] eta: 0:01:14 lr: 0.000038 grad: 0.1166 (0.1226) loss: 0.8540 (0.8547) time: 0.1353 data: 0.0462 max mem: 9305 +Train: [64] [5900/6250] eta: 0:00:58 lr: 0.000037 grad: 0.1141 (0.1225) loss: 0.8585 (0.8547) time: 0.1293 data: 0.0454 max mem: 9305 +Train: [64] [6000/6250] eta: 0:00:41 lr: 0.000037 grad: 0.1277 (0.1225) loss: 0.8512 (0.8548) time: 0.1772 data: 0.0888 max mem: 9305 +Train: [64] [6100/6250] eta: 0:00:24 lr: 0.000037 grad: 0.1197 (0.1226) loss: 0.8594 (0.8548) time: 0.1898 data: 0.0995 max mem: 9305 +Train: [64] [6200/6250] eta: 0:00:08 lr: 0.000037 grad: 0.1251 (0.1226) loss: 0.8519 (0.8549) time: 0.1496 data: 0.0627 max mem: 9305 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1265 (0.1227) loss: 0.8517 (0.8549) time: 0.1440 data: 0.0541 max mem: 9305 +Train: [64] Total time: 0:17:22 (0.1668 s / it) +Averaged stats: lr: 0.000037 grad: 0.1265 (0.1227) loss: 0.8517 (0.8549) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:04:10 loss: 0.8868 (0.8868) time: 4.0359 data: 3.9678 max mem: 9305 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8736 (0.8739) time: 0.1587 data: 0.1293 max mem: 9305 +Eval (hcp-train-subset): [64] Total time: 0:00:15 (0.2469 s / it) +Averaged stats (hcp-train-subset): loss: 0.8736 (0.8739) +Making plots (hcp-train-subset): example=28 +Eval (hcp-val): [64] [ 0/62] eta: 0:06:46 loss: 0.8755 (0.8755) time: 6.5586 data: 6.5207 max mem: 9305 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8747 (0.8776) time: 0.1800 data: 0.1484 max mem: 9305 +Eval (hcp-val): [64] Total time: 0:00:16 (0.2635 s / it) +Averaged stats (hcp-val): loss: 0.8747 (0.8776) +Making plots (hcp-val): example=43 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [65] [ 0/6250] eta: 10:28:48 lr: 0.000037 grad: 0.2624 (0.2624) loss: 0.8773 (0.8773) time: 6.0366 data: 5.6638 max mem: 9305 +Train: [65] [ 100/6250] eta: 0:28:12 lr: 0.000037 grad: 0.1224 (0.1478) loss: 0.8621 (0.8670) time: 0.1618 data: 0.0422 max mem: 9305 +Train: [65] [ 200/6250] eta: 0:24:07 lr: 0.000037 grad: 0.1200 (0.1439) loss: 0.8509 (0.8607) time: 0.2379 data: 0.1179 max mem: 9305 +Train: [65] [ 300/6250] eta: 0:22:05 lr: 0.000037 grad: 0.1093 (0.1363) loss: 0.8601 (0.8614) time: 0.1817 data: 0.0781 max mem: 9305 +Train: [65] [ 400/6250] eta: 0:20:23 lr: 0.000037 grad: 0.1099 (0.1336) loss: 0.8604 (0.8609) time: 0.1531 data: 0.0434 max mem: 9305 +Train: [65] [ 500/6250] eta: 0:19:17 lr: 0.000037 grad: 0.1208 (0.1314) loss: 0.8561 (0.8607) time: 0.1463 data: 0.0516 max mem: 9305 +Train: [65] [ 600/6250] eta: 0:18:18 lr: 0.000037 grad: 0.1154 (0.1296) loss: 0.8576 (0.8607) time: 0.1468 data: 0.0551 max mem: 9305 +Train: [65] [ 700/6250] eta: 0:17:48 lr: 0.000037 grad: 0.1121 (0.1285) loss: 0.8596 (0.8606) time: 0.2025 data: 0.0776 max mem: 9305 +Train: [65] [ 800/6250] eta: 0:17:23 lr: 0.000037 grad: 0.1176 (0.1272) loss: 0.8528 (0.8605) time: 0.1067 data: 0.0003 max mem: 9305 +Train: [65] [ 900/6250] eta: 0:16:46 lr: 0.000037 grad: 0.1149 (0.1262) loss: 0.8568 (0.8604) time: 0.1564 data: 0.0531 max mem: 9305 +Train: [65] [1000/6250] eta: 0:16:44 lr: 0.000037 grad: 0.1214 (0.1259) loss: 0.8594 (0.8603) time: 0.3449 data: 0.2360 max mem: 9305 +Train: [65] [1100/6250] eta: 0:16:06 lr: 0.000037 grad: 0.1319 (0.1262) loss: 0.8525 (0.8598) time: 0.1386 data: 0.0600 max mem: 9305 +Train: [65] [1200/6250] eta: 0:15:42 lr: 0.000037 grad: 0.1171 (0.1265) loss: 0.8554 (0.8591) time: 0.1672 data: 0.0696 max mem: 9305 +Train: [65] [1300/6250] eta: 0:15:16 lr: 0.000037 grad: 0.1222 (0.1264) loss: 0.8508 (0.8585) time: 0.1586 data: 0.0613 max mem: 9305 +Train: [65] [1400/6250] eta: 0:14:48 lr: 0.000037 grad: 0.1258 (0.1268) loss: 0.8522 (0.8579) time: 0.1569 data: 0.0641 max mem: 9305 +Train: [65] [1500/6250] eta: 0:14:25 lr: 0.000037 grad: 0.1317 (0.1271) loss: 0.8489 (0.8574) time: 0.1557 data: 0.0566 max mem: 9305 +Train: [65] [1600/6250] eta: 0:13:57 lr: 0.000037 grad: 0.1281 (0.1275) loss: 0.8492 (0.8567) time: 0.1386 data: 0.0525 max mem: 9305 +Train: [65] [1700/6250] eta: 0:13:33 lr: 0.000037 grad: 0.1221 (0.1274) loss: 0.8554 (0.8564) time: 0.1489 data: 0.0571 max mem: 9305 +Train: [65] [1800/6250] eta: 0:13:08 lr: 0.000037 grad: 0.1168 (0.1274) loss: 0.8565 (0.8561) time: 0.1758 data: 0.0954 max mem: 9305 +Train: [65] [1900/6250] eta: 0:12:46 lr: 0.000037 grad: 0.1262 (0.1274) loss: 0.8566 (0.8557) time: 0.1551 data: 0.0697 max mem: 9305 +Train: [65] [2000/6250] eta: 0:12:30 lr: 0.000037 grad: 0.1275 (0.1274) loss: 0.8562 (0.8554) time: 0.1988 data: 0.1131 max mem: 9305 +Train: [65] [2100/6250] eta: 0:12:15 lr: 0.000037 grad: 0.1216 (0.1272) loss: 0.8534 (0.8553) time: 0.1632 data: 0.0735 max mem: 9305 +Train: [65] [2200/6250] eta: 0:11:56 lr: 0.000037 grad: 0.1308 (0.1271) loss: 0.8441 (0.8551) time: 0.1308 data: 0.0289 max mem: 9305 +Train: [65] [2300/6250] eta: 0:11:41 lr: 0.000037 grad: 0.1188 (0.1270) loss: 0.8571 (0.8550) time: 0.1747 data: 0.0407 max mem: 9305 +Train: [65] [2400/6250] eta: 0:11:31 lr: 0.000037 grad: 0.1186 (0.1268) loss: 0.8538 (0.8549) time: 0.3736 data: 0.2783 max mem: 9305 +Train: [65] [2500/6250] eta: 0:11:10 lr: 0.000037 grad: 0.1301 (0.1269) loss: 0.8445 (0.8548) time: 0.1258 data: 0.0314 max mem: 9305 +Train: [65] [2600/6250] eta: 0:10:50 lr: 0.000037 grad: 0.1158 (0.1267) loss: 0.8584 (0.8547) time: 0.1406 data: 0.0308 max mem: 9305 +Train: [65] [2700/6250] eta: 0:10:31 lr: 0.000037 grad: 0.1297 (0.1268) loss: 0.8516 (0.8545) time: 0.1364 data: 0.0426 max mem: 9305 +Train: [65] [2800/6250] eta: 0:10:14 lr: 0.000037 grad: 0.1247 (0.1268) loss: 0.8511 (0.8544) time: 0.2176 data: 0.1402 max mem: 9305 +Train: [65] [2900/6250] eta: 0:09:55 lr: 0.000037 grad: 0.1233 (0.1268) loss: 0.8485 (0.8542) time: 0.1906 data: 0.0971 max mem: 9305 +Train: [65] [3000/6250] eta: 0:09:36 lr: 0.000036 grad: 0.1218 (0.1266) loss: 0.8494 (0.8542) time: 0.1380 data: 0.0448 max mem: 9305 +Train: [65] [3100/6250] eta: 0:09:19 lr: 0.000036 grad: 0.1206 (0.1264) loss: 0.8495 (0.8541) time: 0.1578 data: 0.0624 max mem: 9305 +Train: [65] [3200/6250] eta: 0:09:00 lr: 0.000036 grad: 0.1180 (0.1263) loss: 0.8563 (0.8541) time: 0.1584 data: 0.0768 max mem: 9305 +Train: [65] [3300/6250] eta: 0:08:41 lr: 0.000036 grad: 0.1306 (0.1263) loss: 0.8527 (0.8541) time: 0.1655 data: 0.0770 max mem: 9305 +Train: [65] [3400/6250] eta: 0:08:24 lr: 0.000036 grad: 0.1170 (0.1262) loss: 0.8557 (0.8542) time: 0.1528 data: 0.0582 max mem: 9305 +Train: [65] [3500/6250] eta: 0:08:05 lr: 0.000036 grad: 0.1306 (0.1262) loss: 0.8456 (0.8541) time: 0.1044 data: 0.0053 max mem: 9305 +Train: [65] [3600/6250] eta: 0:07:46 lr: 0.000036 grad: 0.1313 (0.1261) loss: 0.8555 (0.8541) time: 0.1783 data: 0.0996 max mem: 9305 +Train: [65] [3700/6250] eta: 0:07:28 lr: 0.000036 grad: 0.1247 (0.1261) loss: 0.8510 (0.8542) time: 0.1916 data: 0.1076 max mem: 9305 +Train: [65] [3800/6250] eta: 0:07:09 lr: 0.000036 grad: 0.1210 (0.1261) loss: 0.8550 (0.8542) time: 0.1838 data: 0.1027 max mem: 9305 +Train: [65] [3900/6250] eta: 0:06:51 lr: 0.000036 grad: 0.1259 (0.1260) loss: 0.8577 (0.8543) time: 0.1195 data: 0.0328 max mem: 9305 +Train: [65] [4000/6250] eta: 0:06:32 lr: 0.000036 grad: 0.1252 (0.1259) loss: 0.8561 (0.8543) time: 0.1708 data: 0.0923 max mem: 9305 +Train: [65] [4100/6250] eta: 0:06:13 lr: 0.000036 grad: 0.1187 (0.1259) loss: 0.8561 (0.8543) time: 0.1113 data: 0.0132 max mem: 9305 +Train: [65] [4200/6250] eta: 0:05:55 lr: 0.000036 grad: 0.1121 (0.1257) loss: 0.8659 (0.8545) time: 0.1259 data: 0.0387 max mem: 9305 +Train: [65] [4300/6250] eta: 0:05:36 lr: 0.000036 grad: 0.1082 (0.1255) loss: 0.8609 (0.8546) time: 0.1572 data: 0.0729 max mem: 9305 +Train: [65] [4400/6250] eta: 0:05:18 lr: 0.000036 grad: 0.1180 (0.1253) loss: 0.8642 (0.8547) time: 0.1576 data: 0.0478 max mem: 9305 +Train: [65] [4500/6250] eta: 0:05:00 lr: 0.000036 grad: 0.1163 (0.1252) loss: 0.8593 (0.8548) time: 0.1589 data: 0.0634 max mem: 9305 +Train: [65] [4600/6250] eta: 0:04:43 lr: 0.000036 grad: 0.1189 (0.1250) loss: 0.8620 (0.8549) time: 0.1580 data: 0.0409 max mem: 9305 +Train: [65] [4700/6250] eta: 0:04:26 lr: 0.000036 grad: 0.1239 (0.1250) loss: 0.8582 (0.8550) time: 0.1615 data: 0.0774 max mem: 9305 +Train: [65] [4800/6250] eta: 0:04:09 lr: 0.000036 grad: 0.1139 (0.1249) loss: 0.8652 (0.8550) time: 0.1781 data: 0.0804 max mem: 9305 +Train: [65] [4900/6250] eta: 0:03:52 lr: 0.000036 grad: 0.1187 (0.1248) loss: 0.8627 (0.8551) time: 0.1658 data: 0.0667 max mem: 9305 +Train: [65] [5000/6250] eta: 0:03:34 lr: 0.000036 grad: 0.1121 (0.1247) loss: 0.8586 (0.8552) time: 0.2068 data: 0.1092 max mem: 9305 +Train: [65] [5100/6250] eta: 0:03:17 lr: 0.000036 grad: 0.1238 (0.1246) loss: 0.8626 (0.8552) time: 0.1502 data: 0.0651 max mem: 9305 +Train: [65] [5200/6250] eta: 0:03:00 lr: 0.000036 grad: 0.1183 (0.1246) loss: 0.8576 (0.8552) time: 0.1458 data: 0.0598 max mem: 9305 +Train: [65] [5300/6250] eta: 0:02:43 lr: 0.000036 grad: 0.1182 (0.1246) loss: 0.8600 (0.8552) time: 0.1560 data: 0.0714 max mem: 9305 +Train: [65] [5400/6250] eta: 0:02:26 lr: 0.000036 grad: 0.1225 (0.1246) loss: 0.8553 (0.8552) time: 0.0993 data: 0.0003 max mem: 9305 +Train: [65] [5500/6250] eta: 0:02:09 lr: 0.000036 grad: 0.1164 (0.1246) loss: 0.8555 (0.8552) time: 0.1603 data: 0.0810 max mem: 9305 +Train: [65] [5600/6250] eta: 0:01:51 lr: 0.000036 grad: 0.1141 (0.1245) loss: 0.8567 (0.8551) time: 0.1538 data: 0.0687 max mem: 9305 +Train: [65] [5700/6250] eta: 0:01:34 lr: 0.000036 grad: 0.1140 (0.1245) loss: 0.8565 (0.8551) time: 0.1600 data: 0.0755 max mem: 9305 +Train: [65] [5800/6250] eta: 0:01:17 lr: 0.000036 grad: 0.1150 (0.1244) loss: 0.8588 (0.8551) time: 0.1496 data: 0.0669 max mem: 9305 +Train: [65] [5900/6250] eta: 0:00:59 lr: 0.000036 grad: 0.1187 (0.1244) loss: 0.8621 (0.8552) time: 0.1335 data: 0.0437 max mem: 9305 +Train: [65] [6000/6250] eta: 0:00:42 lr: 0.000036 grad: 0.1139 (0.1243) loss: 0.8562 (0.8552) time: 0.1373 data: 0.0421 max mem: 9305 +Train: [65] [6100/6250] eta: 0:00:25 lr: 0.000036 grad: 0.1234 (0.1243) loss: 0.8549 (0.8552) time: 0.1585 data: 0.0728 max mem: 9305 +Train: [65] [6200/6250] eta: 0:00:08 lr: 0.000036 grad: 0.1260 (0.1243) loss: 0.8569 (0.8553) time: 0.1720 data: 0.0854 max mem: 9305 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1178 (0.1243) loss: 0.8577 (0.8553) time: 0.1722 data: 0.0809 max mem: 9305 +Train: [65] Total time: 0:17:51 (0.1715 s / it) +Averaged stats: lr: 0.000036 grad: 0.1178 (0.1243) loss: 0.8577 (0.8553) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:05:07 loss: 0.8857 (0.8857) time: 4.9658 data: 4.8721 max mem: 9305 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8719 (0.8752) time: 0.1611 data: 0.1300 max mem: 9305 +Eval (hcp-train-subset): [65] Total time: 0:00:17 (0.2760 s / it) +Averaged stats (hcp-train-subset): loss: 0.8719 (0.8752) +Eval (hcp-val): [65] [ 0/62] eta: 0:06:49 loss: 0.8744 (0.8744) time: 6.6093 data: 6.5725 max mem: 9305 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8756 (0.8774) time: 0.1830 data: 0.1543 max mem: 9305 +Eval (hcp-val): [65] Total time: 0:00:17 (0.2791 s / it) +Averaged stats (hcp-val): loss: 0.8756 (0.8774) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 9:54:41 lr: 0.000036 grad: 0.1433 (0.1433) loss: 0.8798 (0.8798) time: 5.7091 data: 5.3780 max mem: 9305 +Train: [66] [ 100/6250] eta: 0:26:49 lr: 0.000035 grad: 0.1177 (0.1420) loss: 0.8665 (0.8679) time: 0.1970 data: 0.0872 max mem: 9305 +Train: [66] [ 200/6250] eta: 0:24:01 lr: 0.000035 grad: 0.1201 (0.1316) loss: 0.8562 (0.8632) time: 0.2839 data: 0.1821 max mem: 9305 +Train: [66] [ 300/6250] eta: 0:22:27 lr: 0.000035 grad: 0.1316 (0.1295) loss: 0.8504 (0.8615) time: 0.2158 data: 0.1215 max mem: 9305 +Train: [66] [ 400/6250] eta: 0:20:48 lr: 0.000035 grad: 0.1171 (0.1290) loss: 0.8462 (0.8593) time: 0.1420 data: 0.0361 max mem: 9305 +Train: [66] [ 500/6250] eta: 0:20:06 lr: 0.000035 grad: 0.1135 (0.1283) loss: 0.8612 (0.8589) time: 0.1314 data: 0.0005 max mem: 9305 +Train: [66] [ 600/6250] eta: 0:19:14 lr: 0.000035 grad: 0.1140 (0.1271) loss: 0.8592 (0.8587) time: 0.1809 data: 0.0800 max mem: 9305 +Train: [66] [ 700/6250] eta: 0:19:42 lr: 0.000035 grad: 0.1093 (0.1266) loss: 0.8625 (0.8587) time: 0.2263 data: 0.1052 max mem: 9305 +Train: [66] [ 800/6250] eta: 0:19:08 lr: 0.000035 grad: 0.1126 (0.1260) loss: 0.8611 (0.8584) time: 0.1794 data: 0.0761 max mem: 9305 +Train: [66] [ 900/6250] eta: 0:18:22 lr: 0.000035 grad: 0.1266 (0.1257) loss: 0.8439 (0.8579) time: 0.1917 data: 0.0982 max mem: 9305 +Train: [66] [1000/6250] eta: 0:17:30 lr: 0.000035 grad: 0.1218 (0.1254) loss: 0.8491 (0.8575) time: 0.1472 data: 0.0647 max mem: 9305 +Train: [66] [1100/6250] eta: 0:16:53 lr: 0.000035 grad: 0.1170 (0.1251) loss: 0.8572 (0.8571) time: 0.1859 data: 0.0866 max mem: 9305 +Train: [66] [1200/6250] eta: 0:16:18 lr: 0.000035 grad: 0.1181 (0.1247) loss: 0.8541 (0.8568) time: 0.1518 data: 0.0641 max mem: 9305 +Train: [66] [1300/6250] eta: 0:15:41 lr: 0.000035 grad: 0.1280 (0.1249) loss: 0.8515 (0.8564) time: 0.1558 data: 0.0674 max mem: 9305 +Train: [66] [1400/6250] eta: 0:15:09 lr: 0.000035 grad: 0.1225 (0.1248) loss: 0.8508 (0.8562) time: 0.1606 data: 0.0733 max mem: 9305 +Train: [66] [1500/6250] eta: 0:14:37 lr: 0.000035 grad: 0.1358 (0.1248) loss: 0.8515 (0.8560) time: 0.1529 data: 0.0653 max mem: 9305 +Train: [66] [1600/6250] eta: 0:14:05 lr: 0.000035 grad: 0.1212 (0.1246) loss: 0.8494 (0.8558) time: 0.1399 data: 0.0467 max mem: 9305 +Train: [66] [1700/6250] eta: 0:13:33 lr: 0.000035 grad: 0.1180 (0.1245) loss: 0.8494 (0.8556) time: 0.1322 data: 0.0492 max mem: 9305 +Train: [66] [1800/6250] eta: 0:13:00 lr: 0.000035 grad: 0.1271 (0.1244) loss: 0.8496 (0.8555) time: 0.1157 data: 0.0306 max mem: 9305 +Train: [66] [1900/6250] eta: 0:12:37 lr: 0.000035 grad: 0.1251 (0.1245) loss: 0.8530 (0.8554) time: 0.1646 data: 0.0787 max mem: 9305 +Train: [66] [2000/6250] eta: 0:12:15 lr: 0.000035 grad: 0.1190 (0.1246) loss: 0.8557 (0.8553) time: 0.1618 data: 0.0647 max mem: 9305 +Train: [66] [2100/6250] eta: 0:11:56 lr: 0.000035 grad: 0.1185 (0.1244) loss: 0.8578 (0.8553) time: 0.1038 data: 0.0003 max mem: 9305 +Train: [66] [2200/6250] eta: 0:11:39 lr: 0.000035 grad: 0.1229 (0.1243) loss: 0.8476 (0.8553) time: 0.2019 data: 0.1115 max mem: 9305 +Train: [66] [2300/6250] eta: 0:11:22 lr: 0.000035 grad: 0.1285 (0.1244) loss: 0.8482 (0.8552) time: 0.1798 data: 0.0678 max mem: 9305 +Train: [66] [2400/6250] eta: 0:11:07 lr: 0.000035 grad: 0.1152 (0.1243) loss: 0.8560 (0.8551) time: 0.1559 data: 0.0567 max mem: 9305 +Train: [66] [2500/6250] eta: 0:10:47 lr: 0.000035 grad: 0.1127 (0.1241) loss: 0.8588 (0.8552) time: 0.1810 data: 0.0845 max mem: 9305 +Train: [66] [2600/6250] eta: 0:10:33 lr: 0.000035 grad: 0.1226 (0.1240) loss: 0.8553 (0.8553) time: 0.1191 data: 0.0325 max mem: 9305 +Train: [66] [2700/6250] eta: 0:10:12 lr: 0.000035 grad: 0.1182 (0.1241) loss: 0.8632 (0.8554) time: 0.1431 data: 0.0561 max mem: 9305 +Train: [66] [2800/6250] eta: 0:09:54 lr: 0.000035 grad: 0.1214 (0.1240) loss: 0.8595 (0.8554) time: 0.1616 data: 0.0792 max mem: 9305 +Train: [66] [2900/6250] eta: 0:09:35 lr: 0.000035 grad: 0.1219 (0.1241) loss: 0.8576 (0.8554) time: 0.1645 data: 0.0806 max mem: 9305 +Train: [66] [3000/6250] eta: 0:09:17 lr: 0.000035 grad: 0.1221 (0.1242) loss: 0.8549 (0.8554) time: 0.1897 data: 0.0793 max mem: 9305 +Train: [66] [3100/6250] eta: 0:09:04 lr: 0.000035 grad: 0.1283 (0.1242) loss: 0.8563 (0.8553) time: 0.1417 data: 0.0440 max mem: 9305 +Train: [66] [3200/6250] eta: 0:08:47 lr: 0.000035 grad: 0.1256 (0.1242) loss: 0.8527 (0.8553) time: 0.2367 data: 0.1384 max mem: 9305 +Train: [66] [3300/6250] eta: 0:08:27 lr: 0.000035 grad: 0.1203 (0.1242) loss: 0.8545 (0.8552) time: 0.1571 data: 0.0664 max mem: 9305 +Train: [66] [3400/6250] eta: 0:08:08 lr: 0.000035 grad: 0.1235 (0.1242) loss: 0.8558 (0.8551) time: 0.1450 data: 0.0642 max mem: 9305 +Train: [66] [3500/6250] eta: 0:07:51 lr: 0.000034 grad: 0.1214 (0.1242) loss: 0.8569 (0.8551) time: 0.1652 data: 0.0789 max mem: 9305 +Train: [66] [3600/6250] eta: 0:07:32 lr: 0.000034 grad: 0.1174 (0.1242) loss: 0.8587 (0.8551) time: 0.1603 data: 0.0671 max mem: 9305 +Train: [66] [3700/6250] eta: 0:07:14 lr: 0.000034 grad: 0.1170 (0.1242) loss: 0.8564 (0.8551) time: 0.1330 data: 0.0412 max mem: 9305 +Train: [66] [3800/6250] eta: 0:06:56 lr: 0.000034 grad: 0.1202 (0.1243) loss: 0.8598 (0.8551) time: 0.1427 data: 0.0440 max mem: 9305 +Train: [66] [3900/6250] eta: 0:06:38 lr: 0.000034 grad: 0.1206 (0.1244) loss: 0.8551 (0.8550) time: 0.1555 data: 0.0721 max mem: 9305 +Train: [66] [4000/6250] eta: 0:06:20 lr: 0.000034 grad: 0.1174 (0.1245) loss: 0.8533 (0.8550) time: 0.1798 data: 0.0931 max mem: 9305 +Train: [66] [4100/6250] eta: 0:06:02 lr: 0.000034 grad: 0.1155 (0.1245) loss: 0.8608 (0.8550) time: 0.1645 data: 0.0703 max mem: 9305 +Train: [66] [4200/6250] eta: 0:05:45 lr: 0.000034 grad: 0.1278 (0.1246) loss: 0.8481 (0.8550) time: 0.1534 data: 0.0657 max mem: 9305 +Train: [66] [4300/6250] eta: 0:05:28 lr: 0.000034 grad: 0.1258 (0.1247) loss: 0.8526 (0.8550) time: 0.1580 data: 0.0724 max mem: 9305 +Train: [66] [4400/6250] eta: 0:05:10 lr: 0.000034 grad: 0.1173 (0.1247) loss: 0.8520 (0.8549) time: 0.1673 data: 0.0830 max mem: 9305 +Train: [66] [4500/6250] eta: 0:04:53 lr: 0.000034 grad: 0.1185 (0.1248) loss: 0.8481 (0.8549) time: 0.1381 data: 0.0483 max mem: 9305 +Train: [66] [4600/6250] eta: 0:04:35 lr: 0.000034 grad: 0.1320 (0.1249) loss: 0.8533 (0.8549) time: 0.1450 data: 0.0515 max mem: 9305 +Train: [66] [4700/6250] eta: 0:04:18 lr: 0.000034 grad: 0.1225 (0.1250) loss: 0.8543 (0.8548) time: 0.1554 data: 0.0550 max mem: 9305 +Train: [66] [4800/6250] eta: 0:04:02 lr: 0.000034 grad: 0.1120 (0.1250) loss: 0.8596 (0.8548) time: 0.1807 data: 0.0319 max mem: 9305 +Train: [66] [4900/6250] eta: 0:03:46 lr: 0.000034 grad: 0.1227 (0.1251) loss: 0.8539 (0.8548) time: 0.1027 data: 0.0002 max mem: 9305 +Train: [66] [5000/6250] eta: 0:03:29 lr: 0.000034 grad: 0.1150 (0.1251) loss: 0.8586 (0.8548) time: 0.2026 data: 0.1060 max mem: 9305 +Train: [66] [5100/6250] eta: 0:03:13 lr: 0.000034 grad: 0.1205 (0.1251) loss: 0.8565 (0.8549) time: 0.1448 data: 0.0616 max mem: 9305 +Train: [66] [5200/6250] eta: 0:02:57 lr: 0.000034 grad: 0.1223 (0.1251) loss: 0.8585 (0.8549) time: 0.3194 data: 0.2236 max mem: 9305 +Train: [66] [5300/6250] eta: 0:02:40 lr: 0.000034 grad: 0.1153 (0.1250) loss: 0.8668 (0.8550) time: 0.2734 data: 0.1778 max mem: 9305 +Train: [66] [5400/6250] eta: 0:02:23 lr: 0.000034 grad: 0.1195 (0.1250) loss: 0.8607 (0.8550) time: 0.1720 data: 0.0816 max mem: 9305 +Train: [66] [5500/6250] eta: 0:02:06 lr: 0.000034 grad: 0.1267 (0.1251) loss: 0.8538 (0.8550) time: 0.1740 data: 0.0894 max mem: 9305 +Train: [66] [5600/6250] eta: 0:01:49 lr: 0.000034 grad: 0.1302 (0.1251) loss: 0.8509 (0.8549) time: 0.1521 data: 0.0618 max mem: 9305 +Train: [66] [5700/6250] eta: 0:01:32 lr: 0.000034 grad: 0.1228 (0.1252) loss: 0.8576 (0.8549) time: 0.1473 data: 0.0571 max mem: 9305 +Train: [66] [5800/6250] eta: 0:01:15 lr: 0.000034 grad: 0.1223 (0.1252) loss: 0.8509 (0.8549) time: 0.1444 data: 0.0453 max mem: 9305 +Train: [66] [5900/6250] eta: 0:00:58 lr: 0.000034 grad: 0.1174 (0.1252) loss: 0.8509 (0.8549) time: 0.1642 data: 0.0829 max mem: 9305 +Train: [66] [6000/6250] eta: 0:00:41 lr: 0.000034 grad: 0.1213 (0.1252) loss: 0.8540 (0.8550) time: 0.1552 data: 0.0746 max mem: 9305 +Train: [66] [6100/6250] eta: 0:00:25 lr: 0.000034 grad: 0.1171 (0.1252) loss: 0.8580 (0.8550) time: 0.1521 data: 0.0597 max mem: 9305 +Train: [66] [6200/6250] eta: 0:00:08 lr: 0.000034 grad: 0.1343 (0.1252) loss: 0.8450 (0.8550) time: 0.1511 data: 0.0616 max mem: 9305 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1292 (0.1253) loss: 0.8466 (0.8550) time: 0.1626 data: 0.0858 max mem: 9305 +Train: [66] Total time: 0:17:30 (0.1681 s / it) +Averaged stats: lr: 0.000034 grad: 0.1292 (0.1253) loss: 0.8466 (0.8550) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:06:11 loss: 0.8866 (0.8866) time: 5.9952 data: 5.9559 max mem: 9305 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8732 (0.8747) time: 0.1490 data: 0.1199 max mem: 9305 +Eval (hcp-train-subset): [66] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (hcp-train-subset): loss: 0.8732 (0.8747) +Eval (hcp-val): [66] [ 0/62] eta: 0:06:09 loss: 0.8729 (0.8729) time: 5.9518 data: 5.9154 max mem: 9305 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8761 (0.8778) time: 0.1554 data: 0.1267 max mem: 9305 +Eval (hcp-val): [66] Total time: 0:00:15 (0.2512 s / it) +Averaged stats (hcp-val): loss: 0.8761 (0.8778) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 10:48:08 lr: 0.000034 grad: 0.1029 (0.1029) loss: 0.8935 (0.8935) time: 6.2222 data: 5.8427 max mem: 9305 +Train: [67] [ 100/6250] eta: 0:26:41 lr: 0.000034 grad: 0.1164 (0.1500) loss: 0.8732 (0.8803) time: 0.1432 data: 0.0121 max mem: 9305 +Train: [67] [ 200/6250] eta: 0:21:50 lr: 0.000034 grad: 0.1325 (0.1392) loss: 0.8531 (0.8717) time: 0.2062 data: 0.1091 max mem: 9305 +Train: [67] [ 300/6250] eta: 0:21:19 lr: 0.000034 grad: 0.1233 (0.1328) loss: 0.8560 (0.8681) time: 0.2079 data: 0.0930 max mem: 9305 +Train: [67] [ 400/6250] eta: 0:20:36 lr: 0.000034 grad: 0.1135 (0.1289) loss: 0.8596 (0.8660) time: 0.2214 data: 0.1247 max mem: 9305 +Train: [67] [ 500/6250] eta: 0:19:50 lr: 0.000034 grad: 0.1124 (0.1266) loss: 0.8580 (0.8650) time: 0.1081 data: 0.0002 max mem: 9305 +Train: [67] [ 600/6250] eta: 0:19:12 lr: 0.000033 grad: 0.1192 (0.1252) loss: 0.8605 (0.8640) time: 0.1507 data: 0.0003 max mem: 9305 +Train: [67] [ 700/6250] eta: 0:18:54 lr: 0.000033 grad: 0.1172 (0.1244) loss: 0.8555 (0.8629) time: 0.1842 data: 0.0870 max mem: 9305 +Train: [67] [ 800/6250] eta: 0:18:06 lr: 0.000033 grad: 0.1147 (0.1234) loss: 0.8554 (0.8622) time: 0.1708 data: 0.0741 max mem: 9305 +Train: [67] [ 900/6250] eta: 0:17:28 lr: 0.000033 grad: 0.1087 (0.1228) loss: 0.8623 (0.8614) time: 0.1542 data: 0.0661 max mem: 9305 +Train: [67] [1000/6250] eta: 0:17:02 lr: 0.000033 grad: 0.1060 (0.1222) loss: 0.8544 (0.8607) time: 0.1911 data: 0.0741 max mem: 9305 +Train: [67] [1100/6250] eta: 0:16:51 lr: 0.000033 grad: 0.1182 (0.1222) loss: 0.8540 (0.8600) time: 0.3208 data: 0.1732 max mem: 9305 +Train: [67] [1200/6250] eta: 0:16:07 lr: 0.000033 grad: 0.1142 (0.1220) loss: 0.8581 (0.8594) time: 0.1284 data: 0.0382 max mem: 9305 +Train: [67] [1300/6250] eta: 0:15:43 lr: 0.000033 grad: 0.1194 (0.1217) loss: 0.8549 (0.8589) time: 0.1926 data: 0.0955 max mem: 9305 +Train: [67] [1400/6250] eta: 0:15:16 lr: 0.000033 grad: 0.1101 (0.1217) loss: 0.8572 (0.8586) time: 0.2160 data: 0.1312 max mem: 9305 +Train: [67] [1500/6250] eta: 0:14:45 lr: 0.000033 grad: 0.1196 (0.1216) loss: 0.8497 (0.8582) time: 0.1553 data: 0.0703 max mem: 9305 +Train: [67] [1600/6250] eta: 0:14:21 lr: 0.000033 grad: 0.1150 (0.1215) loss: 0.8547 (0.8581) time: 0.1299 data: 0.0329 max mem: 9305 +Train: [67] [1700/6250] eta: 0:13:54 lr: 0.000033 grad: 0.1145 (0.1215) loss: 0.8507 (0.8579) time: 0.1250 data: 0.0407 max mem: 9305 +Train: [67] [1800/6250] eta: 0:13:37 lr: 0.000033 grad: 0.1094 (0.1215) loss: 0.8548 (0.8579) time: 0.1368 data: 0.0406 max mem: 9305 +Train: [67] [1900/6250] eta: 0:13:19 lr: 0.000033 grad: 0.1272 (0.1216) loss: 0.8481 (0.8576) time: 0.2463 data: 0.0826 max mem: 9305 +Train: [67] [2000/6250] eta: 0:13:00 lr: 0.000033 grad: 0.1152 (0.1216) loss: 0.8618 (0.8576) time: 0.1680 data: 0.0798 max mem: 9305 +Train: [67] [2100/6250] eta: 0:12:44 lr: 0.000033 grad: 0.1192 (0.1216) loss: 0.8578 (0.8575) time: 0.2040 data: 0.1198 max mem: 9305 +Train: [67] [2200/6250] eta: 0:12:20 lr: 0.000033 grad: 0.1185 (0.1218) loss: 0.8534 (0.8573) time: 0.1524 data: 0.0655 max mem: 9305 +Train: [67] [2300/6250] eta: 0:12:03 lr: 0.000033 grad: 0.1226 (0.1220) loss: 0.8553 (0.8571) time: 0.1972 data: 0.0768 max mem: 9305 +Train: [67] [2400/6250] eta: 0:11:49 lr: 0.000033 grad: 0.1251 (0.1224) loss: 0.8448 (0.8568) time: 0.1940 data: 0.0895 max mem: 9305 +Train: [67] [2500/6250] eta: 0:11:37 lr: 0.000033 grad: 0.1366 (0.1227) loss: 0.8504 (0.8567) time: 0.1440 data: 0.0063 max mem: 9305 +Train: [67] [2600/6250] eta: 0:11:21 lr: 0.000033 grad: 0.1296 (0.1228) loss: 0.8521 (0.8565) time: 0.3113 data: 0.1963 max mem: 9305 +Train: [67] [2700/6250] eta: 0:10:58 lr: 0.000033 grad: 0.1245 (0.1231) loss: 0.8511 (0.8563) time: 0.1864 data: 0.0936 max mem: 9305 +Train: [67] [2800/6250] eta: 0:10:38 lr: 0.000033 grad: 0.1214 (0.1232) loss: 0.8542 (0.8563) time: 0.1468 data: 0.0585 max mem: 9305 +Train: [67] [2900/6250] eta: 0:10:28 lr: 0.000033 grad: 0.1278 (0.1235) loss: 0.8513 (0.8560) time: 0.1803 data: 0.0845 max mem: 9305 +Train: [67] [3000/6250] eta: 0:10:04 lr: 0.000033 grad: 0.1243 (0.1236) loss: 0.8580 (0.8560) time: 0.1653 data: 0.0799 max mem: 9305 +Train: [67] [3100/6250] eta: 0:09:44 lr: 0.000033 grad: 0.1321 (0.1238) loss: 0.8464 (0.8558) time: 0.1533 data: 0.0671 max mem: 9305 +Train: [67] [3200/6250] eta: 0:09:22 lr: 0.000033 grad: 0.1253 (0.1240) loss: 0.8489 (0.8557) time: 0.1704 data: 0.0794 max mem: 9305 +Train: [67] [3300/6250] eta: 0:09:02 lr: 0.000033 grad: 0.1199 (0.1243) loss: 0.8517 (0.8555) time: 0.1141 data: 0.0260 max mem: 9305 +Train: [67] [3400/6250] eta: 0:08:42 lr: 0.000033 grad: 0.1302 (0.1247) loss: 0.8477 (0.8553) time: 0.1718 data: 0.0899 max mem: 9305 +Train: [67] [3500/6250] eta: 0:08:22 lr: 0.000033 grad: 0.1376 (0.1250) loss: 0.8475 (0.8552) time: 0.1695 data: 0.0809 max mem: 9305 +Train: [67] [3600/6250] eta: 0:08:03 lr: 0.000033 grad: 0.1175 (0.1251) loss: 0.8535 (0.8551) time: 0.1663 data: 0.0827 max mem: 9305 +Train: [67] [3700/6250] eta: 0:07:42 lr: 0.000033 grad: 0.1351 (0.1254) loss: 0.8421 (0.8549) time: 0.1502 data: 0.0550 max mem: 9305 +Train: [67] [3800/6250] eta: 0:07:23 lr: 0.000033 grad: 0.1261 (0.1257) loss: 0.8547 (0.8548) time: 0.1747 data: 0.0846 max mem: 9305 +Train: [67] [3900/6250] eta: 0:07:03 lr: 0.000033 grad: 0.1227 (0.1258) loss: 0.8579 (0.8547) time: 0.1550 data: 0.0673 max mem: 9305 +Train: [67] [4000/6250] eta: 0:06:45 lr: 0.000032 grad: 0.1229 (0.1259) loss: 0.8537 (0.8547) time: 0.2410 data: 0.1604 max mem: 9305 +Train: [67] [4100/6250] eta: 0:06:26 lr: 0.000032 grad: 0.1126 (0.1260) loss: 0.8537 (0.8548) time: 0.3076 data: 0.2229 max mem: 9305 +Train: [67] [4200/6250] eta: 0:06:06 lr: 0.000032 grad: 0.1376 (0.1262) loss: 0.8578 (0.8547) time: 0.1425 data: 0.0553 max mem: 9305 +Train: [67] [4300/6250] eta: 0:05:47 lr: 0.000032 grad: 0.1242 (0.1262) loss: 0.8535 (0.8548) time: 0.1523 data: 0.0652 max mem: 9305 +Train: [67] [4400/6250] eta: 0:05:29 lr: 0.000032 grad: 0.1261 (0.1262) loss: 0.8522 (0.8548) time: 0.1793 data: 0.0947 max mem: 9305 +Train: [67] [4500/6250] eta: 0:05:10 lr: 0.000032 grad: 0.1247 (0.1262) loss: 0.8517 (0.8548) time: 0.0903 data: 0.0002 max mem: 9305 +Train: [67] [4600/6250] eta: 0:04:52 lr: 0.000032 grad: 0.1214 (0.1262) loss: 0.8586 (0.8549) time: 0.1798 data: 0.1002 max mem: 9305 +Train: [67] [4700/6250] eta: 0:04:33 lr: 0.000032 grad: 0.1186 (0.1262) loss: 0.8540 (0.8549) time: 0.1140 data: 0.0239 max mem: 9305 +Train: [67] [4800/6250] eta: 0:04:15 lr: 0.000032 grad: 0.1139 (0.1262) loss: 0.8566 (0.8550) time: 0.1463 data: 0.0622 max mem: 9305 +Train: [67] [4900/6250] eta: 0:03:58 lr: 0.000032 grad: 0.1213 (0.1262) loss: 0.8543 (0.8549) time: 0.0982 data: 0.0002 max mem: 9305 +Train: [67] [5000/6250] eta: 0:03:42 lr: 0.000032 grad: 0.1298 (0.1262) loss: 0.8551 (0.8550) time: 0.1543 data: 0.0530 max mem: 9305 +Train: [67] [5100/6250] eta: 0:03:24 lr: 0.000032 grad: 0.1179 (0.1262) loss: 0.8523 (0.8550) time: 0.3034 data: 0.1518 max mem: 9305 +Train: [67] [5200/6250] eta: 0:03:07 lr: 0.000032 grad: 0.1188 (0.1263) loss: 0.8586 (0.8549) time: 0.2281 data: 0.1319 max mem: 9305 +Train: [67] [5300/6250] eta: 0:02:49 lr: 0.000032 grad: 0.1173 (0.1263) loss: 0.8575 (0.8549) time: 0.1672 data: 0.0769 max mem: 9305 +Train: [67] [5400/6250] eta: 0:02:31 lr: 0.000032 grad: 0.1337 (0.1264) loss: 0.8534 (0.8549) time: 0.1006 data: 0.0003 max mem: 9305 +Train: [67] [5500/6250] eta: 0:02:13 lr: 0.000032 grad: 0.1197 (0.1264) loss: 0.8573 (0.8548) time: 0.1474 data: 0.0554 max mem: 9305 +Train: [67] [5600/6250] eta: 0:01:56 lr: 0.000032 grad: 0.1197 (0.1264) loss: 0.8539 (0.8548) time: 0.1339 data: 0.0511 max mem: 9305 +Train: [67] [5700/6250] eta: 0:01:38 lr: 0.000032 grad: 0.1150 (0.1265) loss: 0.8515 (0.8547) time: 0.1632 data: 0.0786 max mem: 9305 +Train: [67] [5800/6250] eta: 0:01:20 lr: 0.000032 grad: 0.1251 (0.1265) loss: 0.8492 (0.8547) time: 0.1492 data: 0.0660 max mem: 9305 +Train: [67] [5900/6250] eta: 0:01:02 lr: 0.000032 grad: 0.1331 (0.1266) loss: 0.8515 (0.8546) time: 0.1667 data: 0.0808 max mem: 9305 +Train: [67] [6000/6250] eta: 0:00:44 lr: 0.000032 grad: 0.1279 (0.1267) loss: 0.8499 (0.8546) time: 0.1573 data: 0.0647 max mem: 9305 +Train: [67] [6100/6250] eta: 0:00:26 lr: 0.000032 grad: 0.1281 (0.1269) loss: 0.8413 (0.8545) time: 0.1743 data: 0.0904 max mem: 9305 +Train: [67] [6200/6250] eta: 0:00:08 lr: 0.000032 grad: 0.1338 (0.1270) loss: 0.8463 (0.8544) time: 0.1091 data: 0.0207 max mem: 9305 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1324 (0.1271) loss: 0.8403 (0.8544) time: 0.1333 data: 0.0462 max mem: 9305 +Train: [67] Total time: 0:18:27 (0.1772 s / it) +Averaged stats: lr: 0.000032 grad: 0.1324 (0.1271) loss: 0.8403 (0.8544) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:05:23 loss: 0.8839 (0.8839) time: 5.2116 data: 5.1740 max mem: 9305 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8725 (0.8734) time: 0.1273 data: 0.0985 max mem: 9305 +Eval (hcp-train-subset): [67] Total time: 0:00:15 (0.2433 s / it) +Averaged stats (hcp-train-subset): loss: 0.8725 (0.8734) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:06 loss: 0.8745 (0.8745) time: 4.9477 data: 4.9128 max mem: 9305 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8757 (0.8774) time: 0.1357 data: 0.1066 max mem: 9305 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8757 (0.8774) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 12:11:42 lr: 0.000032 grad: 0.2575 (0.2575) loss: 0.8375 (0.8375) time: 7.0243 data: 6.9085 max mem: 9305 +Train: [68] [ 100/6250] eta: 0:23:25 lr: 0.000032 grad: 0.1391 (0.1553) loss: 0.8647 (0.8655) time: 0.1764 data: 0.0706 max mem: 9305 +Train: [68] [ 200/6250] eta: 0:20:59 lr: 0.000032 grad: 0.1384 (0.1479) loss: 0.8631 (0.8627) time: 0.1976 data: 0.0648 max mem: 9305 +Train: [68] [ 300/6250] eta: 0:19:47 lr: 0.000032 grad: 0.1284 (0.1445) loss: 0.8553 (0.8609) time: 0.1713 data: 0.0549 max mem: 9305 +Train: [68] [ 400/6250] eta: 0:18:52 lr: 0.000032 grad: 0.1257 (0.1404) loss: 0.8591 (0.8590) time: 0.1704 data: 0.0737 max mem: 9305 +Train: [68] [ 500/6250] eta: 0:18:27 lr: 0.000032 grad: 0.1319 (0.1402) loss: 0.8534 (0.8575) time: 0.2205 data: 0.0484 max mem: 9305 +Train: [68] [ 600/6250] eta: 0:18:28 lr: 0.000032 grad: 0.1261 (0.1378) loss: 0.8605 (0.8574) time: 0.1853 data: 0.0872 max mem: 9305 +Train: [68] [ 700/6250] eta: 0:17:48 lr: 0.000032 grad: 0.1230 (0.1361) loss: 0.8526 (0.8570) time: 0.1762 data: 0.0850 max mem: 9305 +Train: [68] [ 800/6250] eta: 0:17:18 lr: 0.000032 grad: 0.1155 (0.1345) loss: 0.8642 (0.8574) time: 0.1779 data: 0.0838 max mem: 9305 +Train: [68] [ 900/6250] eta: 0:16:54 lr: 0.000032 grad: 0.1199 (0.1335) loss: 0.8562 (0.8573) time: 0.1570 data: 0.0611 max mem: 9305 +Train: [68] [1000/6250] eta: 0:16:24 lr: 0.000032 grad: 0.1373 (0.1326) loss: 0.8499 (0.8573) time: 0.1727 data: 0.0786 max mem: 9305 +Train: [68] [1100/6250] eta: 0:15:53 lr: 0.000032 grad: 0.1182 (0.1321) loss: 0.8527 (0.8570) time: 0.2025 data: 0.1076 max mem: 9305 +Train: [68] [1200/6250] eta: 0:15:25 lr: 0.000032 grad: 0.1219 (0.1314) loss: 0.8536 (0.8570) time: 0.1623 data: 0.0766 max mem: 9305 +Train: [68] [1300/6250] eta: 0:15:00 lr: 0.000031 grad: 0.1211 (0.1313) loss: 0.8572 (0.8568) time: 0.1900 data: 0.0902 max mem: 9305 +Train: [68] [1400/6250] eta: 0:14:44 lr: 0.000031 grad: 0.1242 (0.1309) loss: 0.8504 (0.8567) time: 0.2050 data: 0.1198 max mem: 9305 +Train: [68] [1500/6250] eta: 0:14:19 lr: 0.000031 grad: 0.1278 (0.1305) loss: 0.8572 (0.8567) time: 0.1792 data: 0.0966 max mem: 9305 +Train: [68] [1600/6250] eta: 0:14:02 lr: 0.000031 grad: 0.1256 (0.1300) loss: 0.8493 (0.8567) time: 0.2510 data: 0.1633 max mem: 9305 +Train: [68] [1700/6250] eta: 0:13:34 lr: 0.000031 grad: 0.1293 (0.1298) loss: 0.8509 (0.8565) time: 0.1912 data: 0.1020 max mem: 9305 +Train: [68] [1800/6250] eta: 0:13:14 lr: 0.000031 grad: 0.1141 (0.1294) loss: 0.8612 (0.8565) time: 0.1934 data: 0.0970 max mem: 9305 +Train: [68] [1900/6250] eta: 0:12:59 lr: 0.000031 grad: 0.1215 (0.1292) loss: 0.8619 (0.8566) time: 0.1073 data: 0.0003 max mem: 9305 +Train: [68] [2000/6250] eta: 0:12:34 lr: 0.000031 grad: 0.1236 (0.1290) loss: 0.8577 (0.8566) time: 0.1471 data: 0.0523 max mem: 9305 +Train: [68] [2100/6250] eta: 0:12:15 lr: 0.000031 grad: 0.1104 (0.1287) loss: 0.8523 (0.8565) time: 0.1689 data: 0.0715 max mem: 9305 +Train: [68] [2200/6250] eta: 0:11:53 lr: 0.000031 grad: 0.1189 (0.1285) loss: 0.8580 (0.8565) time: 0.1537 data: 0.0623 max mem: 9305 +Train: [68] [2300/6250] eta: 0:11:33 lr: 0.000031 grad: 0.1134 (0.1282) loss: 0.8599 (0.8566) time: 0.1374 data: 0.0504 max mem: 9305 +Train: [68] [2400/6250] eta: 0:11:20 lr: 0.000031 grad: 0.1234 (0.1280) loss: 0.8452 (0.8565) time: 0.1884 data: 0.0879 max mem: 9305 +Train: [68] [2500/6250] eta: 0:10:59 lr: 0.000031 grad: 0.1193 (0.1279) loss: 0.8445 (0.8564) time: 0.1647 data: 0.0678 max mem: 9305 +Train: [68] [2600/6250] eta: 0:10:49 lr: 0.000031 grad: 0.1225 (0.1278) loss: 0.8550 (0.8563) time: 0.3331 data: 0.2262 max mem: 9305 +Train: [68] [2700/6250] eta: 0:10:27 lr: 0.000031 grad: 0.1220 (0.1278) loss: 0.8535 (0.8561) time: 0.1813 data: 0.0879 max mem: 9305 +Train: [68] [2800/6250] eta: 0:10:20 lr: 0.000031 grad: 0.1248 (0.1277) loss: 0.8520 (0.8560) time: 0.1174 data: 0.0003 max mem: 9305 +Train: [68] [2900/6250] eta: 0:09:59 lr: 0.000031 grad: 0.1250 (0.1277) loss: 0.8517 (0.8559) time: 0.1707 data: 0.0765 max mem: 9305 +Train: [68] [3000/6250] eta: 0:09:44 lr: 0.000031 grad: 0.1267 (0.1275) loss: 0.8524 (0.8558) time: 0.0950 data: 0.0002 max mem: 9305 +Train: [68] [3100/6250] eta: 0:09:25 lr: 0.000031 grad: 0.1233 (0.1275) loss: 0.8557 (0.8558) time: 0.2067 data: 0.1198 max mem: 9305 +Train: [68] [3200/6250] eta: 0:09:07 lr: 0.000031 grad: 0.1144 (0.1275) loss: 0.8575 (0.8557) time: 0.2477 data: 0.1662 max mem: 9305 +Train: [68] [3300/6250] eta: 0:08:52 lr: 0.000031 grad: 0.1185 (0.1276) loss: 0.8610 (0.8557) time: 0.1044 data: 0.0004 max mem: 9305 +Train: [68] [3400/6250] eta: 0:08:32 lr: 0.000031 grad: 0.1246 (0.1276) loss: 0.8491 (0.8556) time: 0.2061 data: 0.1154 max mem: 9305 +Train: [68] [3500/6250] eta: 0:08:15 lr: 0.000031 grad: 0.1308 (0.1275) loss: 0.8498 (0.8555) time: 0.2293 data: 0.1271 max mem: 9305 +Train: [68] [3600/6250] eta: 0:07:56 lr: 0.000031 grad: 0.1197 (0.1275) loss: 0.8546 (0.8554) time: 0.1766 data: 0.0915 max mem: 9305 +Train: [68] [3700/6250] eta: 0:07:36 lr: 0.000031 grad: 0.1225 (0.1276) loss: 0.8565 (0.8553) time: 0.1724 data: 0.0862 max mem: 9305 +Train: [68] [3800/6250] eta: 0:07:17 lr: 0.000031 grad: 0.1245 (0.1276) loss: 0.8582 (0.8553) time: 0.1408 data: 0.0500 max mem: 9305 +Train: [68] [3900/6250] eta: 0:06:58 lr: 0.000031 grad: 0.1177 (0.1276) loss: 0.8641 (0.8553) time: 0.1710 data: 0.0733 max mem: 9305 +Train: [68] [4000/6250] eta: 0:06:38 lr: 0.000031 grad: 0.1144 (0.1276) loss: 0.8612 (0.8553) time: 0.1378 data: 0.0441 max mem: 9305 +Train: [68] [4100/6250] eta: 0:06:19 lr: 0.000031 grad: 0.1263 (0.1277) loss: 0.8514 (0.8552) time: 0.1482 data: 0.0650 max mem: 9305 +Train: [68] [4200/6250] eta: 0:06:00 lr: 0.000031 grad: 0.1328 (0.1278) loss: 0.8509 (0.8552) time: 0.2308 data: 0.1460 max mem: 9305 +Train: [68] [4300/6250] eta: 0:05:41 lr: 0.000031 grad: 0.1259 (0.1279) loss: 0.8535 (0.8552) time: 0.1801 data: 0.0907 max mem: 9305 +Train: [68] [4400/6250] eta: 0:05:21 lr: 0.000031 grad: 0.1256 (0.1278) loss: 0.8497 (0.8552) time: 0.1380 data: 0.0528 max mem: 9305 +Train: [68] [4500/6250] eta: 0:05:03 lr: 0.000031 grad: 0.1216 (0.1278) loss: 0.8548 (0.8552) time: 0.1407 data: 0.0485 max mem: 9305 +Train: [68] [4600/6250] eta: 0:04:45 lr: 0.000031 grad: 0.1252 (0.1278) loss: 0.8562 (0.8552) time: 0.1496 data: 0.0586 max mem: 9305 +Train: [68] [4700/6250] eta: 0:04:27 lr: 0.000031 grad: 0.1273 (0.1278) loss: 0.8611 (0.8553) time: 0.1755 data: 0.0927 max mem: 9305 +Train: [68] [4800/6250] eta: 0:04:09 lr: 0.000030 grad: 0.1227 (0.1277) loss: 0.8602 (0.8554) time: 0.1195 data: 0.0387 max mem: 9305 +Train: [68] [4900/6250] eta: 0:03:51 lr: 0.000030 grad: 0.1213 (0.1277) loss: 0.8618 (0.8554) time: 0.1322 data: 0.0506 max mem: 9305 +Train: [68] [5000/6250] eta: 0:03:33 lr: 0.000030 grad: 0.1330 (0.1277) loss: 0.8546 (0.8554) time: 0.1640 data: 0.0774 max mem: 9305 +Train: [68] [5100/6250] eta: 0:03:16 lr: 0.000030 grad: 0.1281 (0.1277) loss: 0.8569 (0.8555) time: 0.1698 data: 0.0653 max mem: 9305 +Train: [68] [5200/6250] eta: 0:02:58 lr: 0.000030 grad: 0.1240 (0.1277) loss: 0.8553 (0.8555) time: 0.1696 data: 0.0837 max mem: 9305 +Train: [68] [5300/6250] eta: 0:02:41 lr: 0.000030 grad: 0.1226 (0.1277) loss: 0.8571 (0.8555) time: 0.1385 data: 0.0316 max mem: 9305 +Train: [68] [5400/6250] eta: 0:02:23 lr: 0.000030 grad: 0.1206 (0.1277) loss: 0.8567 (0.8555) time: 0.1477 data: 0.0636 max mem: 9305 +Train: [68] [5500/6250] eta: 0:02:06 lr: 0.000030 grad: 0.1248 (0.1278) loss: 0.8583 (0.8555) time: 0.1914 data: 0.0867 max mem: 9305 +Train: [68] [5600/6250] eta: 0:01:49 lr: 0.000030 grad: 0.1318 (0.1278) loss: 0.8584 (0.8555) time: 0.0977 data: 0.0119 max mem: 9305 +Train: [68] [5700/6250] eta: 0:01:32 lr: 0.000030 grad: 0.1282 (0.1279) loss: 0.8586 (0.8555) time: 0.1270 data: 0.0435 max mem: 9305 +Train: [68] [5800/6250] eta: 0:01:15 lr: 0.000030 grad: 0.1326 (0.1279) loss: 0.8531 (0.8555) time: 0.1330 data: 0.0476 max mem: 9305 +Train: [68] [5900/6250] eta: 0:00:58 lr: 0.000030 grad: 0.1290 (0.1279) loss: 0.8579 (0.8555) time: 0.1448 data: 0.0624 max mem: 9305 +Train: [68] [6000/6250] eta: 0:00:41 lr: 0.000030 grad: 0.1298 (0.1280) loss: 0.8511 (0.8555) time: 0.1441 data: 0.0619 max mem: 9305 +Train: [68] [6100/6250] eta: 0:00:24 lr: 0.000030 grad: 0.1198 (0.1280) loss: 0.8543 (0.8555) time: 0.1417 data: 0.0598 max mem: 9305 +Train: [68] [6200/6250] eta: 0:00:08 lr: 0.000030 grad: 0.1298 (0.1280) loss: 0.8499 (0.8555) time: 0.1296 data: 0.0407 max mem: 9305 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1257 (0.1280) loss: 0.8584 (0.8555) time: 0.1908 data: 0.1031 max mem: 9305 +Train: [68] Total time: 0:17:22 (0.1668 s / it) +Averaged stats: lr: 0.000030 grad: 0.1257 (0.1280) loss: 0.8584 (0.8555) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:04:02 loss: 0.8879 (0.8879) time: 3.9133 data: 3.8465 max mem: 9305 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8742 (0.8762) time: 0.1195 data: 0.0905 max mem: 9305 +Eval (hcp-train-subset): [68] Total time: 0:00:14 (0.2360 s / it) +Averaged stats (hcp-train-subset): loss: 0.8742 (0.8762) +Eval (hcp-val): [68] [ 0/62] eta: 0:05:11 loss: 0.8740 (0.8740) time: 5.0183 data: 4.9766 max mem: 9305 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8762 (0.8767) time: 0.1482 data: 0.1193 max mem: 9305 +Eval (hcp-val): [68] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-val): loss: 0.8762 (0.8767) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 7:27:52 lr: 0.000030 grad: 0.0952 (0.0952) loss: 0.9037 (0.9037) time: 4.2997 data: 4.0551 max mem: 9305 +Train: [69] [ 100/6250] eta: 0:20:48 lr: 0.000030 grad: 0.1126 (0.1491) loss: 0.8785 (0.8776) time: 0.1851 data: 0.0860 max mem: 9305 +Train: [69] [ 200/6250] eta: 0:18:27 lr: 0.000030 grad: 0.1226 (0.1461) loss: 0.8622 (0.8700) time: 0.1677 data: 0.0546 max mem: 9305 +Train: [69] [ 300/6250] eta: 0:17:30 lr: 0.000030 grad: 0.1320 (0.1432) loss: 0.8602 (0.8648) time: 0.1482 data: 0.0574 max mem: 9305 +Train: [69] [ 400/6250] eta: 0:17:38 lr: 0.000030 grad: 0.1199 (0.1402) loss: 0.8601 (0.8627) time: 0.1706 data: 0.0665 max mem: 9305 +Train: [69] [ 500/6250] eta: 0:17:07 lr: 0.000030 grad: 0.1218 (0.1374) loss: 0.8622 (0.8619) time: 0.1916 data: 0.0917 max mem: 9305 +Train: [69] [ 600/6250] eta: 0:16:32 lr: 0.000030 grad: 0.1192 (0.1347) loss: 0.8578 (0.8615) time: 0.1703 data: 0.0742 max mem: 9305 +Train: [69] [ 700/6250] eta: 0:15:54 lr: 0.000030 grad: 0.1158 (0.1331) loss: 0.8583 (0.8612) time: 0.1588 data: 0.0722 max mem: 9305 +Train: [69] [ 800/6250] eta: 0:15:19 lr: 0.000030 grad: 0.1131 (0.1320) loss: 0.8585 (0.8610) time: 0.1426 data: 0.0540 max mem: 9305 +Train: [69] [ 900/6250] eta: 0:15:02 lr: 0.000030 grad: 0.1162 (0.1313) loss: 0.8613 (0.8607) time: 0.1491 data: 0.0576 max mem: 9305 +Train: [69] [1000/6250] eta: 0:14:38 lr: 0.000030 grad: 0.1234 (0.1304) loss: 0.8631 (0.8607) time: 0.1682 data: 0.0827 max mem: 9305 +Train: [69] [1100/6250] eta: 0:14:22 lr: 0.000030 grad: 0.1210 (0.1296) loss: 0.8587 (0.8606) time: 0.1443 data: 0.0596 max mem: 9305 +Train: [69] [1200/6250] eta: 0:13:57 lr: 0.000030 grad: 0.1190 (0.1292) loss: 0.8523 (0.8604) time: 0.1383 data: 0.0516 max mem: 9305 +Train: [69] [1300/6250] eta: 0:13:45 lr: 0.000030 grad: 0.1222 (0.1289) loss: 0.8633 (0.8604) time: 0.2719 data: 0.1663 max mem: 9305 +Train: [69] [1400/6250] eta: 0:13:25 lr: 0.000030 grad: 0.1153 (0.1285) loss: 0.8560 (0.8602) time: 0.1200 data: 0.0410 max mem: 9305 +Train: [69] [1500/6250] eta: 0:12:58 lr: 0.000030 grad: 0.1172 (0.1282) loss: 0.8602 (0.8601) time: 0.1356 data: 0.0471 max mem: 9305 +Train: [69] [1600/6250] eta: 0:12:35 lr: 0.000030 grad: 0.1188 (0.1280) loss: 0.8515 (0.8600) time: 0.1445 data: 0.0635 max mem: 9305 +Train: [69] [1700/6250] eta: 0:12:13 lr: 0.000030 grad: 0.1260 (0.1279) loss: 0.8591 (0.8599) time: 0.1504 data: 0.0650 max mem: 9305 +Train: [69] [1800/6250] eta: 0:11:50 lr: 0.000030 grad: 0.1178 (0.1277) loss: 0.8500 (0.8597) time: 0.1469 data: 0.0600 max mem: 9305 +Train: [69] [1900/6250] eta: 0:11:28 lr: 0.000030 grad: 0.1142 (0.1275) loss: 0.8571 (0.8595) time: 0.1148 data: 0.0285 max mem: 9305 +Train: [69] [2000/6250] eta: 0:11:11 lr: 0.000030 grad: 0.1155 (0.1273) loss: 0.8567 (0.8595) time: 0.1294 data: 0.0455 max mem: 9305 +Train: [69] [2100/6250] eta: 0:10:52 lr: 0.000029 grad: 0.1227 (0.1273) loss: 0.8602 (0.8596) time: 0.1432 data: 0.0592 max mem: 9305 +Train: [69] [2200/6250] eta: 0:10:33 lr: 0.000029 grad: 0.1269 (0.1274) loss: 0.8550 (0.8594) time: 0.1234 data: 0.0443 max mem: 9305 +Train: [69] [2300/6250] eta: 0:10:15 lr: 0.000029 grad: 0.1246 (0.1275) loss: 0.8592 (0.8593) time: 0.1324 data: 0.0475 max mem: 9305 +Train: [69] [2400/6250] eta: 0:09:57 lr: 0.000029 grad: 0.1272 (0.1275) loss: 0.8569 (0.8591) time: 0.1778 data: 0.0968 max mem: 9305 +Train: [69] [2500/6250] eta: 0:09:38 lr: 0.000029 grad: 0.1240 (0.1275) loss: 0.8581 (0.8591) time: 0.1254 data: 0.0390 max mem: 9305 +Train: [69] [2600/6250] eta: 0:09:21 lr: 0.000029 grad: 0.1221 (0.1277) loss: 0.8596 (0.8589) time: 0.1437 data: 0.0590 max mem: 9305 +Train: [69] [2700/6250] eta: 0:09:02 lr: 0.000029 grad: 0.1223 (0.1277) loss: 0.8551 (0.8588) time: 0.1173 data: 0.0326 max mem: 9305 +Train: [69] [2800/6250] eta: 0:08:46 lr: 0.000029 grad: 0.1286 (0.1279) loss: 0.8570 (0.8585) time: 0.1594 data: 0.0789 max mem: 9305 +Train: [69] [2900/6250] eta: 0:08:29 lr: 0.000029 grad: 0.1287 (0.1280) loss: 0.8506 (0.8583) time: 0.1534 data: 0.0686 max mem: 9305 +Train: [69] [3000/6250] eta: 0:08:14 lr: 0.000029 grad: 0.1213 (0.1282) loss: 0.8503 (0.8580) time: 0.1364 data: 0.0465 max mem: 9305 +Train: [69] [3100/6250] eta: 0:07:58 lr: 0.000029 grad: 0.1315 (0.1282) loss: 0.8494 (0.8579) time: 0.1468 data: 0.0686 max mem: 9305 +Train: [69] [3200/6250] eta: 0:07:45 lr: 0.000029 grad: 0.1261 (0.1283) loss: 0.8390 (0.8576) time: 0.1723 data: 0.0909 max mem: 9305 +Train: [69] [3300/6250] eta: 0:07:28 lr: 0.000029 grad: 0.1319 (0.1284) loss: 0.8503 (0.8573) time: 0.1317 data: 0.0461 max mem: 9305 +Train: [69] [3400/6250] eta: 0:07:13 lr: 0.000029 grad: 0.1290 (0.1285) loss: 0.8467 (0.8571) time: 0.1512 data: 0.0701 max mem: 9305 +Train: [69] [3500/6250] eta: 0:06:58 lr: 0.000029 grad: 0.1310 (0.1286) loss: 0.8546 (0.8570) time: 0.2242 data: 0.1381 max mem: 9305 +Train: [69] [3600/6250] eta: 0:06:44 lr: 0.000029 grad: 0.1303 (0.1287) loss: 0.8449 (0.8568) time: 0.0817 data: 0.0002 max mem: 9305 +Train: [69] [3700/6250] eta: 0:06:27 lr: 0.000029 grad: 0.1265 (0.1287) loss: 0.8451 (0.8567) time: 0.1549 data: 0.0708 max mem: 9305 +Train: [69] [3800/6250] eta: 0:06:11 lr: 0.000029 grad: 0.1357 (0.1288) loss: 0.8481 (0.8565) time: 0.1411 data: 0.0461 max mem: 9305 +Train: [69] [3900/6250] eta: 0:05:55 lr: 0.000029 grad: 0.1205 (0.1288) loss: 0.8505 (0.8564) time: 0.1306 data: 0.0497 max mem: 9305 +Train: [69] [4000/6250] eta: 0:05:39 lr: 0.000029 grad: 0.1198 (0.1288) loss: 0.8569 (0.8563) time: 0.1396 data: 0.0599 max mem: 9305 +Train: [69] [4100/6250] eta: 0:05:23 lr: 0.000029 grad: 0.1254 (0.1288) loss: 0.8510 (0.8562) time: 0.1420 data: 0.0560 max mem: 9305 +Train: [69] [4200/6250] eta: 0:05:08 lr: 0.000029 grad: 0.1339 (0.1290) loss: 0.8416 (0.8560) time: 0.0922 data: 0.0002 max mem: 9305 +Train: [69] [4300/6250] eta: 0:04:53 lr: 0.000029 grad: 0.1290 (0.1290) loss: 0.8548 (0.8559) time: 0.1403 data: 0.0533 max mem: 9305 +Train: [69] [4400/6250] eta: 0:04:37 lr: 0.000029 grad: 0.1193 (0.1290) loss: 0.8578 (0.8558) time: 0.1446 data: 0.0663 max mem: 9305 +Train: [69] [4500/6250] eta: 0:04:22 lr: 0.000029 grad: 0.1276 (0.1292) loss: 0.8530 (0.8557) time: 0.1399 data: 0.0607 max mem: 9305 +Train: [69] [4600/6250] eta: 0:04:07 lr: 0.000029 grad: 0.1318 (0.1293) loss: 0.8526 (0.8557) time: 0.1592 data: 0.0781 max mem: 9305 +Train: [69] [4700/6250] eta: 0:03:52 lr: 0.000029 grad: 0.1358 (0.1294) loss: 0.8526 (0.8557) time: 0.1406 data: 0.0583 max mem: 9305 +Train: [69] [4800/6250] eta: 0:03:37 lr: 0.000029 grad: 0.1215 (0.1294) loss: 0.8614 (0.8557) time: 0.1357 data: 0.0509 max mem: 9305 +Train: [69] [4900/6250] eta: 0:03:22 lr: 0.000029 grad: 0.1260 (0.1294) loss: 0.8510 (0.8557) time: 0.1784 data: 0.0953 max mem: 9305 +Train: [69] [5000/6250] eta: 0:03:07 lr: 0.000029 grad: 0.1246 (0.1295) loss: 0.8562 (0.8556) time: 0.1886 data: 0.1069 max mem: 9305 +Train: [69] [5100/6250] eta: 0:02:52 lr: 0.000029 grad: 0.1171 (0.1295) loss: 0.8533 (0.8556) time: 0.1467 data: 0.0632 max mem: 9305 +Train: [69] [5200/6250] eta: 0:02:37 lr: 0.000029 grad: 0.1216 (0.1296) loss: 0.8593 (0.8556) time: 0.1816 data: 0.1071 max mem: 9305 +Train: [69] [5300/6250] eta: 0:02:22 lr: 0.000029 grad: 0.1284 (0.1297) loss: 0.8468 (0.8555) time: 0.1104 data: 0.0273 max mem: 9305 +Train: [69] [5400/6250] eta: 0:02:06 lr: 0.000029 grad: 0.1314 (0.1298) loss: 0.8493 (0.8554) time: 0.1409 data: 0.0558 max mem: 9305 +Train: [69] [5500/6250] eta: 0:01:51 lr: 0.000029 grad: 0.1262 (0.1299) loss: 0.8522 (0.8554) time: 0.1519 data: 0.0703 max mem: 9305 +Train: [69] [5600/6250] eta: 0:01:36 lr: 0.000028 grad: 0.1303 (0.1299) loss: 0.8506 (0.8553) time: 0.1501 data: 0.0652 max mem: 9305 +Train: [69] [5700/6250] eta: 0:01:21 lr: 0.000028 grad: 0.1364 (0.1300) loss: 0.8530 (0.8553) time: 0.1821 data: 0.0846 max mem: 9305 +Train: [69] [5800/6250] eta: 0:01:07 lr: 0.000028 grad: 0.1355 (0.1300) loss: 0.8497 (0.8552) time: 0.2013 data: 0.1140 max mem: 9305 +Train: [69] [5900/6250] eta: 0:00:52 lr: 0.000028 grad: 0.1336 (0.1301) loss: 0.8555 (0.8552) time: 0.2013 data: 0.1132 max mem: 9305 +Train: [69] [6000/6250] eta: 0:00:37 lr: 0.000028 grad: 0.1327 (0.1302) loss: 0.8466 (0.8551) time: 0.1850 data: 0.0617 max mem: 9305 +Train: [69] [6100/6250] eta: 0:00:22 lr: 0.000028 grad: 0.1366 (0.1302) loss: 0.8459 (0.8551) time: 0.1623 data: 0.0761 max mem: 9305 +Train: [69] [6200/6250] eta: 0:00:07 lr: 0.000028 grad: 0.1448 (0.1304) loss: 0.8463 (0.8551) time: 0.1270 data: 0.0338 max mem: 9305 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1275 (0.1304) loss: 0.8569 (0.8551) time: 0.1639 data: 0.0779 max mem: 9305 +Train: [69] Total time: 0:15:41 (0.1506 s / it) +Averaged stats: lr: 0.000028 grad: 0.1275 (0.1304) loss: 0.8569 (0.8551) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:03:56 loss: 0.8868 (0.8868) time: 3.8087 data: 3.7412 max mem: 9305 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8699 (0.8723) time: 0.1348 data: 0.1041 max mem: 9305 +Eval (hcp-train-subset): [69] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-train-subset): loss: 0.8699 (0.8723) +Making plots (hcp-train-subset): example=47 +Eval (hcp-val): [69] [ 0/62] eta: 0:05:45 loss: 0.8739 (0.8739) time: 5.5727 data: 5.5357 max mem: 9305 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8747 (0.8770) time: 0.1398 data: 0.1097 max mem: 9305 +Eval (hcp-val): [69] Total time: 0:00:15 (0.2480 s / it) +Averaged stats (hcp-val): loss: 0.8747 (0.8770) +Making plots (hcp-val): example=54 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [70] [ 0/6250] eta: 12:56:55 lr: 0.000028 grad: 0.4417 (0.4417) loss: 0.7957 (0.7957) time: 7.4585 data: 7.3618 max mem: 9305 +Train: [70] [ 100/6250] eta: 0:24:44 lr: 0.000028 grad: 0.1488 (0.1495) loss: 0.8422 (0.8663) time: 0.1787 data: 0.0741 max mem: 9305 +Train: [70] [ 200/6250] eta: 0:19:17 lr: 0.000028 grad: 0.1273 (0.1499) loss: 0.8548 (0.8591) time: 0.1256 data: 0.0338 max mem: 9305 +Train: [70] [ 300/6250] eta: 0:17:27 lr: 0.000028 grad: 0.1274 (0.1478) loss: 0.8528 (0.8562) time: 0.1440 data: 0.0510 max mem: 9305 +Train: [70] [ 400/6250] eta: 0:16:28 lr: 0.000028 grad: 0.1295 (0.1439) loss: 0.8516 (0.8550) time: 0.1499 data: 0.0672 max mem: 9305 +Train: [70] [ 500/6250] eta: 0:15:40 lr: 0.000028 grad: 0.1246 (0.1416) loss: 0.8539 (0.8550) time: 0.1458 data: 0.0607 max mem: 9305 +Train: [70] [ 600/6250] eta: 0:14:49 lr: 0.000028 grad: 0.1174 (0.1393) loss: 0.8521 (0.8547) time: 0.1302 data: 0.0364 max mem: 9305 +Train: [70] [ 700/6250] eta: 0:14:06 lr: 0.000028 grad: 0.1306 (0.1380) loss: 0.8524 (0.8545) time: 0.1201 data: 0.0272 max mem: 9305 +Train: [70] [ 800/6250] eta: 0:13:31 lr: 0.000028 grad: 0.1201 (0.1372) loss: 0.8508 (0.8542) time: 0.1230 data: 0.0365 max mem: 9305 +Train: [70] [ 900/6250] eta: 0:13:02 lr: 0.000028 grad: 0.1175 (0.1365) loss: 0.8583 (0.8537) time: 0.1286 data: 0.0402 max mem: 9305 +Train: [70] [1000/6250] eta: 0:12:39 lr: 0.000028 grad: 0.1195 (0.1359) loss: 0.8569 (0.8536) time: 0.1320 data: 0.0460 max mem: 9305 +Train: [70] [1100/6250] eta: 0:12:18 lr: 0.000028 grad: 0.1250 (0.1354) loss: 0.8501 (0.8534) time: 0.1378 data: 0.0574 max mem: 9305 +Train: [70] [1200/6250] eta: 0:12:06 lr: 0.000028 grad: 0.1306 (0.1348) loss: 0.8483 (0.8534) time: 0.1608 data: 0.0754 max mem: 9305 +Train: [70] [1300/6250] eta: 0:11:46 lr: 0.000028 grad: 0.1236 (0.1342) loss: 0.8537 (0.8534) time: 0.1510 data: 0.0655 max mem: 9305 +Train: [70] [1400/6250] eta: 0:11:35 lr: 0.000028 grad: 0.1323 (0.1339) loss: 0.8513 (0.8534) time: 0.1500 data: 0.0673 max mem: 9305 +Train: [70] [1500/6250] eta: 0:11:25 lr: 0.000028 grad: 0.1230 (0.1336) loss: 0.8541 (0.8533) time: 0.1217 data: 0.0364 max mem: 9305 +Train: [70] [1600/6250] eta: 0:11:10 lr: 0.000028 grad: 0.1273 (0.1332) loss: 0.8484 (0.8531) time: 0.1269 data: 0.0441 max mem: 9305 +Train: [70] [1700/6250] eta: 0:10:56 lr: 0.000028 grad: 0.1225 (0.1331) loss: 0.8524 (0.8532) time: 0.1363 data: 0.0582 max mem: 9305 +Train: [70] [1800/6250] eta: 0:10:40 lr: 0.000028 grad: 0.1304 (0.1328) loss: 0.8526 (0.8533) time: 0.1418 data: 0.0599 max mem: 9305 +Train: [70] [1900/6250] eta: 0:10:24 lr: 0.000028 grad: 0.1251 (0.1327) loss: 0.8513 (0.8533) time: 0.1436 data: 0.0633 max mem: 9305 +Train: [70] [2000/6250] eta: 0:10:07 lr: 0.000028 grad: 0.1165 (0.1323) loss: 0.8592 (0.8535) time: 0.1256 data: 0.0366 max mem: 9305 +Train: [70] [2100/6250] eta: 0:09:53 lr: 0.000028 grad: 0.1144 (0.1322) loss: 0.8636 (0.8535) time: 0.1399 data: 0.0492 max mem: 9305 +Train: [70] [2200/6250] eta: 0:09:36 lr: 0.000028 grad: 0.1172 (0.1320) loss: 0.8629 (0.8537) time: 0.1527 data: 0.0686 max mem: 9305 +Train: [70] [2300/6250] eta: 0:09:27 lr: 0.000028 grad: 0.1314 (0.1318) loss: 0.8581 (0.8539) time: 0.2590 data: 0.1674 max mem: 9305 +Train: [70] [2400/6250] eta: 0:09:09 lr: 0.000028 grad: 0.1278 (0.1316) loss: 0.8585 (0.8540) time: 0.1409 data: 0.0598 max mem: 9305 +Train: [70] [2500/6250] eta: 0:08:55 lr: 0.000028 grad: 0.1305 (0.1317) loss: 0.8601 (0.8541) time: 0.1707 data: 0.0697 max mem: 9305 +Train: [70] [2600/6250] eta: 0:08:43 lr: 0.000028 grad: 0.1274 (0.1317) loss: 0.8609 (0.8541) time: 0.1498 data: 0.0735 max mem: 9305 +Train: [70] [2700/6250] eta: 0:08:28 lr: 0.000028 grad: 0.1180 (0.1315) loss: 0.8622 (0.8541) time: 0.1167 data: 0.0339 max mem: 9305 +Train: [70] [2800/6250] eta: 0:08:13 lr: 0.000028 grad: 0.1270 (0.1315) loss: 0.8597 (0.8541) time: 0.1252 data: 0.0463 max mem: 9305 +Train: [70] [2900/6250] eta: 0:07:58 lr: 0.000028 grad: 0.1234 (0.1315) loss: 0.8537 (0.8542) time: 0.1203 data: 0.0330 max mem: 9305 +Train: [70] [3000/6250] eta: 0:07:43 lr: 0.000027 grad: 0.1214 (0.1313) loss: 0.8561 (0.8542) time: 0.1367 data: 0.0530 max mem: 9305 +Train: [70] [3100/6250] eta: 0:07:29 lr: 0.000027 grad: 0.1328 (0.1312) loss: 0.8518 (0.8542) time: 0.1467 data: 0.0662 max mem: 9305 +Train: [70] [3200/6250] eta: 0:07:16 lr: 0.000027 grad: 0.1286 (0.1310) loss: 0.8621 (0.8543) time: 0.1177 data: 0.0295 max mem: 9305 +Train: [70] [3300/6250] eta: 0:07:02 lr: 0.000027 grad: 0.1282 (0.1309) loss: 0.8571 (0.8543) time: 0.1666 data: 0.0838 max mem: 9305 +Train: [70] [3400/6250] eta: 0:06:48 lr: 0.000027 grad: 0.1181 (0.1309) loss: 0.8587 (0.8543) time: 0.1362 data: 0.0577 max mem: 9305 +Train: [70] [3500/6250] eta: 0:06:34 lr: 0.000027 grad: 0.1350 (0.1309) loss: 0.8492 (0.8543) time: 0.1569 data: 0.0768 max mem: 9305 +Train: [70] [3600/6250] eta: 0:06:20 lr: 0.000027 grad: 0.1346 (0.1308) loss: 0.8529 (0.8543) time: 0.1393 data: 0.0521 max mem: 9305 +Train: [70] [3700/6250] eta: 0:06:06 lr: 0.000027 grad: 0.1244 (0.1308) loss: 0.8588 (0.8544) time: 0.1232 data: 0.0425 max mem: 9305 +Train: [70] [3800/6250] eta: 0:05:54 lr: 0.000027 grad: 0.1233 (0.1306) loss: 0.8546 (0.8544) time: 0.2617 data: 0.1751 max mem: 9305 +Train: [70] [3900/6250] eta: 0:05:38 lr: 0.000027 grad: 0.1242 (0.1306) loss: 0.8544 (0.8544) time: 0.1239 data: 0.0448 max mem: 9305 +Train: [70] [4000/6250] eta: 0:05:23 lr: 0.000027 grad: 0.1229 (0.1305) loss: 0.8528 (0.8544) time: 0.1430 data: 0.0621 max mem: 9305 +Train: [70] [4100/6250] eta: 0:05:09 lr: 0.000027 grad: 0.1151 (0.1304) loss: 0.8638 (0.8544) time: 0.1477 data: 0.0704 max mem: 9305 +Train: [70] [4200/6250] eta: 0:04:55 lr: 0.000027 grad: 0.1153 (0.1304) loss: 0.8573 (0.8545) time: 0.1419 data: 0.0443 max mem: 9305 +Train: [70] [4300/6250] eta: 0:04:42 lr: 0.000027 grad: 0.1219 (0.1304) loss: 0.8505 (0.8545) time: 0.1395 data: 0.0246 max mem: 9305 +Train: [70] [4400/6250] eta: 0:04:28 lr: 0.000027 grad: 0.1236 (0.1303) loss: 0.8580 (0.8546) time: 0.1336 data: 0.0467 max mem: 9305 +Train: [70] [4500/6250] eta: 0:04:12 lr: 0.000027 grad: 0.1205 (0.1302) loss: 0.8546 (0.8546) time: 0.1219 data: 0.0329 max mem: 9305 +Train: [70] [4600/6250] eta: 0:03:57 lr: 0.000027 grad: 0.1243 (0.1301) loss: 0.8522 (0.8546) time: 0.1320 data: 0.0495 max mem: 9305 +Train: [70] [4700/6250] eta: 0:03:43 lr: 0.000027 grad: 0.1331 (0.1301) loss: 0.8571 (0.8546) time: 0.1274 data: 0.0368 max mem: 9305 +Train: [70] [4800/6250] eta: 0:03:28 lr: 0.000027 grad: 0.1229 (0.1301) loss: 0.8539 (0.8546) time: 0.1192 data: 0.0337 max mem: 9305 +Train: [70] [4900/6250] eta: 0:03:13 lr: 0.000027 grad: 0.1300 (0.1301) loss: 0.8509 (0.8546) time: 0.1285 data: 0.0381 max mem: 9305 +Train: [70] [5000/6250] eta: 0:02:59 lr: 0.000027 grad: 0.1238 (0.1302) loss: 0.8588 (0.8546) time: 0.1323 data: 0.0460 max mem: 9305 +Train: [70] [5100/6250] eta: 0:02:44 lr: 0.000027 grad: 0.1191 (0.1302) loss: 0.8590 (0.8546) time: 0.1416 data: 0.0602 max mem: 9305 +Train: [70] [5200/6250] eta: 0:02:29 lr: 0.000027 grad: 0.1321 (0.1303) loss: 0.8521 (0.8545) time: 0.1318 data: 0.0511 max mem: 9305 +Train: [70] [5300/6250] eta: 0:02:15 lr: 0.000027 grad: 0.1343 (0.1304) loss: 0.8594 (0.8545) time: 0.1576 data: 0.0777 max mem: 9305 +Train: [70] [5400/6250] eta: 0:02:01 lr: 0.000027 grad: 0.1313 (0.1306) loss: 0.8541 (0.8545) time: 0.1391 data: 0.0571 max mem: 9305 +Train: [70] [5500/6250] eta: 0:01:46 lr: 0.000027 grad: 0.1336 (0.1308) loss: 0.8509 (0.8544) time: 0.1524 data: 0.0708 max mem: 9305 +Train: [70] [5600/6250] eta: 0:01:32 lr: 0.000027 grad: 0.1314 (0.1308) loss: 0.8550 (0.8544) time: 0.1250 data: 0.0380 max mem: 9305 +Train: [70] [5700/6250] eta: 0:01:18 lr: 0.000027 grad: 0.1275 (0.1309) loss: 0.8587 (0.8544) time: 0.1506 data: 0.0699 max mem: 9305 +Train: [70] [5800/6250] eta: 0:01:04 lr: 0.000027 grad: 0.1294 (0.1309) loss: 0.8527 (0.8544) time: 0.1511 data: 0.0738 max mem: 9305 +Train: [70] [5900/6250] eta: 0:00:49 lr: 0.000027 grad: 0.1351 (0.1310) loss: 0.8521 (0.8543) time: 0.1203 data: 0.0417 max mem: 9305 +Train: [70] [6000/6250] eta: 0:00:35 lr: 0.000027 grad: 0.1330 (0.1311) loss: 0.8561 (0.8544) time: 0.1291 data: 0.0504 max mem: 9305 +Train: [70] [6100/6250] eta: 0:00:21 lr: 0.000027 grad: 0.1303 (0.1311) loss: 0.8543 (0.8543) time: 0.1490 data: 0.0576 max mem: 9305 +Train: [70] [6200/6250] eta: 0:00:07 lr: 0.000027 grad: 0.1370 (0.1312) loss: 0.8560 (0.8543) time: 0.1528 data: 0.0631 max mem: 9305 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1366 (0.1312) loss: 0.8536 (0.8543) time: 0.1362 data: 0.0185 max mem: 9305 +Train: [70] Total time: 0:14:55 (0.1432 s / it) +Averaged stats: lr: 0.000027 grad: 0.1366 (0.1312) loss: 0.8536 (0.8543) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:03:42 loss: 0.8821 (0.8821) time: 3.5913 data: 3.5270 max mem: 9305 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8719 (0.8719) time: 0.1650 data: 0.1359 max mem: 9305 +Eval (hcp-train-subset): [70] Total time: 0:00:13 (0.2201 s / it) +Averaged stats (hcp-train-subset): loss: 0.8719 (0.8719) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:19 loss: 0.8764 (0.8764) time: 5.1469 data: 5.1131 max mem: 9305 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8763 (0.8767) time: 0.1378 data: 0.1081 max mem: 9305 +Eval (hcp-val): [70] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-val): loss: 0.8763 (0.8767) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 10:42:56 lr: 0.000027 grad: 0.1053 (0.1053) loss: 0.9079 (0.9079) time: 6.1723 data: 6.0153 max mem: 9305 +Train: [71] [ 100/6250] eta: 0:21:02 lr: 0.000027 grad: 0.1572 (0.1774) loss: 0.8512 (0.8556) time: 0.1603 data: 0.0692 max mem: 9305 +Train: [71] [ 200/6250] eta: 0:18:05 lr: 0.000027 grad: 0.1530 (0.1736) loss: 0.8336 (0.8482) time: 0.1581 data: 0.0583 max mem: 9305 +Train: [71] [ 300/6250] eta: 0:16:29 lr: 0.000027 grad: 0.1348 (0.1698) loss: 0.8395 (0.8457) time: 0.1060 data: 0.0133 max mem: 9305 +Train: [71] [ 400/6250] eta: 0:15:08 lr: 0.000026 grad: 0.1323 (0.1638) loss: 0.8424 (0.8445) time: 0.1226 data: 0.0341 max mem: 9305 +Train: [71] [ 500/6250] eta: 0:14:16 lr: 0.000026 grad: 0.1218 (0.1579) loss: 0.8463 (0.8451) time: 0.1169 data: 0.0167 max mem: 9305 +Train: [71] [ 600/6250] eta: 0:13:44 lr: 0.000026 grad: 0.1346 (0.1530) loss: 0.8441 (0.8458) time: 0.1337 data: 0.0415 max mem: 9305 +Train: [71] [ 700/6250] eta: 0:13:20 lr: 0.000026 grad: 0.1351 (0.1502) loss: 0.8407 (0.8462) time: 0.1349 data: 0.0394 max mem: 9305 +Train: [71] [ 800/6250] eta: 0:13:00 lr: 0.000026 grad: 0.1252 (0.1479) loss: 0.8503 (0.8466) time: 0.1400 data: 0.0555 max mem: 9305 +Train: [71] [ 900/6250] eta: 0:12:45 lr: 0.000026 grad: 0.1301 (0.1465) loss: 0.8489 (0.8469) time: 0.1497 data: 0.0625 max mem: 9305 +Train: [71] [1000/6250] eta: 0:12:44 lr: 0.000026 grad: 0.1239 (0.1450) loss: 0.8461 (0.8472) time: 0.1894 data: 0.0964 max mem: 9305 +Train: [71] [1100/6250] eta: 0:12:45 lr: 0.000026 grad: 0.1238 (0.1442) loss: 0.8520 (0.8475) time: 0.2435 data: 0.1311 max mem: 9305 +Train: [71] [1200/6250] eta: 0:12:25 lr: 0.000026 grad: 0.1315 (0.1428) loss: 0.8508 (0.8479) time: 0.1548 data: 0.0720 max mem: 9305 +Train: [71] [1300/6250] eta: 0:12:14 lr: 0.000026 grad: 0.1333 (0.1421) loss: 0.8499 (0.8483) time: 0.1100 data: 0.0201 max mem: 9305 +Train: [71] [1400/6250] eta: 0:11:59 lr: 0.000026 grad: 0.1263 (0.1413) loss: 0.8507 (0.8485) time: 0.1372 data: 0.0466 max mem: 9305 +Train: [71] [1500/6250] eta: 0:11:45 lr: 0.000026 grad: 0.1338 (0.1404) loss: 0.8524 (0.8488) time: 0.1482 data: 0.0621 max mem: 9305 +Train: [71] [1600/6250] eta: 0:11:27 lr: 0.000026 grad: 0.1269 (0.1397) loss: 0.8523 (0.8490) time: 0.1523 data: 0.0705 max mem: 9305 +Train: [71] [1700/6250] eta: 0:11:12 lr: 0.000026 grad: 0.1317 (0.1393) loss: 0.8480 (0.8491) time: 0.1587 data: 0.0769 max mem: 9305 +Train: [71] [1800/6250] eta: 0:10:58 lr: 0.000026 grad: 0.1275 (0.1390) loss: 0.8482 (0.8492) time: 0.1508 data: 0.0671 max mem: 9305 +Train: [71] [1900/6250] eta: 0:10:44 lr: 0.000026 grad: 0.1348 (0.1389) loss: 0.8472 (0.8493) time: 0.1425 data: 0.0653 max mem: 9305 +Train: [71] [2000/6250] eta: 0:10:28 lr: 0.000026 grad: 0.1373 (0.1387) loss: 0.8530 (0.8494) time: 0.1170 data: 0.0332 max mem: 9305 +Train: [71] [2100/6250] eta: 0:10:12 lr: 0.000026 grad: 0.1294 (0.1385) loss: 0.8561 (0.8495) time: 0.1532 data: 0.0725 max mem: 9305 +Train: [71] [2200/6250] eta: 0:09:56 lr: 0.000026 grad: 0.1303 (0.1384) loss: 0.8487 (0.8495) time: 0.1430 data: 0.0608 max mem: 9305 +Train: [71] [2300/6250] eta: 0:09:44 lr: 0.000026 grad: 0.1276 (0.1383) loss: 0.8462 (0.8495) time: 0.2250 data: 0.1448 max mem: 9305 +Train: [71] [2400/6250] eta: 0:09:28 lr: 0.000026 grad: 0.1336 (0.1383) loss: 0.8493 (0.8495) time: 0.1446 data: 0.0570 max mem: 9305 +Train: [71] [2500/6250] eta: 0:09:17 lr: 0.000026 grad: 0.1287 (0.1383) loss: 0.8534 (0.8496) time: 0.1790 data: 0.0751 max mem: 9305 +Train: [71] [2600/6250] eta: 0:09:03 lr: 0.000026 grad: 0.1310 (0.1383) loss: 0.8523 (0.8496) time: 0.1373 data: 0.0004 max mem: 9305 +Train: [71] [2700/6250] eta: 0:08:48 lr: 0.000026 grad: 0.1384 (0.1382) loss: 0.8544 (0.8496) time: 0.1389 data: 0.0568 max mem: 9305 +Train: [71] [2800/6250] eta: 0:08:32 lr: 0.000026 grad: 0.1356 (0.1380) loss: 0.8518 (0.8496) time: 0.1227 data: 0.0425 max mem: 9305 +Train: [71] [2900/6250] eta: 0:08:15 lr: 0.000026 grad: 0.1382 (0.1380) loss: 0.8396 (0.8496) time: 0.1256 data: 0.0442 max mem: 9305 +Train: [71] [3000/6250] eta: 0:08:00 lr: 0.000026 grad: 0.1274 (0.1378) loss: 0.8516 (0.8496) time: 0.1834 data: 0.0704 max mem: 9305 +Train: [71] [3100/6250] eta: 0:07:47 lr: 0.000026 grad: 0.1213 (0.1377) loss: 0.8563 (0.8496) time: 0.1269 data: 0.0440 max mem: 9305 +Train: [71] [3200/6250] eta: 0:07:30 lr: 0.000026 grad: 0.1304 (0.1376) loss: 0.8481 (0.8497) time: 0.1258 data: 0.0444 max mem: 9305 +Train: [71] [3300/6250] eta: 0:07:14 lr: 0.000026 grad: 0.1281 (0.1374) loss: 0.8530 (0.8497) time: 0.1298 data: 0.0466 max mem: 9305 +Train: [71] [3400/6250] eta: 0:07:00 lr: 0.000026 grad: 0.1232 (0.1372) loss: 0.8524 (0.8497) time: 0.1970 data: 0.0958 max mem: 9305 +Train: [71] [3500/6250] eta: 0:06:44 lr: 0.000026 grad: 0.1320 (0.1370) loss: 0.8514 (0.8498) time: 0.1229 data: 0.0441 max mem: 9305 +Train: [71] [3600/6250] eta: 0:06:28 lr: 0.000026 grad: 0.1251 (0.1368) loss: 0.8508 (0.8498) time: 0.1151 data: 0.0299 max mem: 9305 +Train: [71] [3700/6250] eta: 0:06:16 lr: 0.000026 grad: 0.1285 (0.1366) loss: 0.8540 (0.8499) time: 0.2722 data: 0.1755 max mem: 9305 +Train: [71] [3800/6250] eta: 0:06:01 lr: 0.000026 grad: 0.1322 (0.1364) loss: 0.8485 (0.8500) time: 0.1338 data: 0.0524 max mem: 9305 +Train: [71] [3900/6250] eta: 0:05:45 lr: 0.000026 grad: 0.1313 (0.1364) loss: 0.8533 (0.8500) time: 0.1536 data: 0.0698 max mem: 9305 +Train: [71] [4000/6250] eta: 0:05:31 lr: 0.000026 grad: 0.1291 (0.1361) loss: 0.8548 (0.8501) time: 0.1500 data: 0.0616 max mem: 9305 +Train: [71] [4100/6250] eta: 0:05:15 lr: 0.000026 grad: 0.1258 (0.1360) loss: 0.8499 (0.8502) time: 0.1380 data: 0.0524 max mem: 9305 +Train: [71] [4200/6250] eta: 0:05:00 lr: 0.000025 grad: 0.1311 (0.1359) loss: 0.8548 (0.8503) time: 0.1545 data: 0.0742 max mem: 9305 +Train: [71] [4300/6250] eta: 0:04:46 lr: 0.000025 grad: 0.1269 (0.1357) loss: 0.8567 (0.8503) time: 0.1615 data: 0.0748 max mem: 9305 +Train: [71] [4400/6250] eta: 0:04:31 lr: 0.000025 grad: 0.1319 (0.1357) loss: 0.8427 (0.8503) time: 0.1216 data: 0.0375 max mem: 9305 +Train: [71] [4500/6250] eta: 0:04:17 lr: 0.000025 grad: 0.1295 (0.1357) loss: 0.8506 (0.8503) time: 0.1347 data: 0.0550 max mem: 9305 +Train: [71] [4600/6250] eta: 0:04:02 lr: 0.000025 grad: 0.1332 (0.1357) loss: 0.8480 (0.8503) time: 0.1258 data: 0.0428 max mem: 9305 +Train: [71] [4700/6250] eta: 0:03:47 lr: 0.000025 grad: 0.1295 (0.1356) loss: 0.8524 (0.8503) time: 0.1397 data: 0.0618 max mem: 9305 +Train: [71] [4800/6250] eta: 0:03:32 lr: 0.000025 grad: 0.1265 (0.1357) loss: 0.8510 (0.8502) time: 0.1424 data: 0.0614 max mem: 9305 +Train: [71] [4900/6250] eta: 0:03:17 lr: 0.000025 grad: 0.1295 (0.1357) loss: 0.8561 (0.8502) time: 0.1304 data: 0.0431 max mem: 9305 +Train: [71] [5000/6250] eta: 0:03:03 lr: 0.000025 grad: 0.1285 (0.1357) loss: 0.8565 (0.8502) time: 0.0849 data: 0.0003 max mem: 9305 +Train: [71] [5100/6250] eta: 0:02:48 lr: 0.000025 grad: 0.1349 (0.1358) loss: 0.8450 (0.8502) time: 0.1280 data: 0.0484 max mem: 9305 +Train: [71] [5200/6250] eta: 0:02:33 lr: 0.000025 grad: 0.1373 (0.1358) loss: 0.8533 (0.8503) time: 0.1108 data: 0.0290 max mem: 9305 +Train: [71] [5300/6250] eta: 0:02:18 lr: 0.000025 grad: 0.1338 (0.1357) loss: 0.8423 (0.8503) time: 0.1365 data: 0.0560 max mem: 9305 +Train: [71] [5400/6250] eta: 0:02:04 lr: 0.000025 grad: 0.1340 (0.1358) loss: 0.8517 (0.8503) time: 0.1447 data: 0.0595 max mem: 9305 +Train: [71] [5500/6250] eta: 0:01:49 lr: 0.000025 grad: 0.1412 (0.1358) loss: 0.8527 (0.8504) time: 0.1474 data: 0.0531 max mem: 9305 +Train: [71] [5600/6250] eta: 0:01:35 lr: 0.000025 grad: 0.1363 (0.1358) loss: 0.8530 (0.8504) time: 0.1603 data: 0.0786 max mem: 9305 +Train: [71] [5700/6250] eta: 0:01:20 lr: 0.000025 grad: 0.1407 (0.1358) loss: 0.8522 (0.8504) time: 0.1550 data: 0.0730 max mem: 9305 +Train: [71] [5800/6250] eta: 0:01:06 lr: 0.000025 grad: 0.1322 (0.1358) loss: 0.8590 (0.8504) time: 0.1470 data: 0.0590 max mem: 9305 +Train: [71] [5900/6250] eta: 0:00:51 lr: 0.000025 grad: 0.1393 (0.1358) loss: 0.8568 (0.8505) time: 0.1460 data: 0.0611 max mem: 9305 +Train: [71] [6000/6250] eta: 0:00:36 lr: 0.000025 grad: 0.1399 (0.1358) loss: 0.8488 (0.8504) time: 0.1733 data: 0.0927 max mem: 9305 +Train: [71] [6100/6250] eta: 0:00:22 lr: 0.000025 grad: 0.1306 (0.1359) loss: 0.8507 (0.8504) time: 0.1555 data: 0.0722 max mem: 9305 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1311 (0.1359) loss: 0.8575 (0.8505) time: 0.1226 data: 0.0425 max mem: 9305 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1426 (0.1360) loss: 0.8479 (0.8505) time: 0.2078 data: 0.1321 max mem: 9305 +Train: [71] Total time: 0:15:25 (0.1481 s / it) +Averaged stats: lr: 0.000025 grad: 0.1426 (0.1360) loss: 0.8479 (0.8505) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:03:57 loss: 0.8880 (0.8880) time: 3.8334 data: 3.7898 max mem: 9305 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8695 (0.8720) time: 0.1200 data: 0.0904 max mem: 9305 +Eval (hcp-train-subset): [71] Total time: 0:00:12 (0.2002 s / it) +Averaged stats (hcp-train-subset): loss: 0.8695 (0.8720) +Eval (hcp-val): [71] [ 0/62] eta: 0:05:38 loss: 0.8755 (0.8755) time: 5.4661 data: 5.4338 max mem: 9305 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8740 (0.8768) time: 0.1294 data: 0.1014 max mem: 9305 +Eval (hcp-val): [71] Total time: 0:00:12 (0.2028 s / it) +Averaged stats (hcp-val): loss: 0.8740 (0.8768) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 7:48:06 lr: 0.000025 grad: 0.1532 (0.1532) loss: 0.8882 (0.8882) time: 4.4939 data: 4.2344 max mem: 9305 +Train: [72] [ 100/6250] eta: 0:20:24 lr: 0.000025 grad: 0.1391 (0.1488) loss: 0.8642 (0.8669) time: 0.1545 data: 0.0553 max mem: 9305 +Train: [72] [ 200/6250] eta: 0:17:27 lr: 0.000025 grad: 0.1178 (0.1448) loss: 0.8661 (0.8627) time: 0.1242 data: 0.0300 max mem: 9305 +Train: [72] [ 300/6250] eta: 0:16:09 lr: 0.000025 grad: 0.1292 (0.1406) loss: 0.8624 (0.8615) time: 0.1389 data: 0.0482 max mem: 9305 +Train: [72] [ 400/6250] eta: 0:15:26 lr: 0.000025 grad: 0.1283 (0.1377) loss: 0.8670 (0.8612) time: 0.1470 data: 0.0579 max mem: 9305 +Train: [72] [ 500/6250] eta: 0:15:02 lr: 0.000025 grad: 0.1341 (0.1363) loss: 0.8553 (0.8607) time: 0.1673 data: 0.0685 max mem: 9305 +Train: [72] [ 600/6250] eta: 0:14:40 lr: 0.000025 grad: 0.1184 (0.1353) loss: 0.8619 (0.8606) time: 0.1750 data: 0.0768 max mem: 9305 +Train: [72] [ 700/6250] eta: 0:14:18 lr: 0.000025 grad: 0.1270 (0.1340) loss: 0.8490 (0.8601) time: 0.1407 data: 0.0487 max mem: 9305 +Train: [72] [ 800/6250] eta: 0:13:45 lr: 0.000025 grad: 0.1219 (0.1333) loss: 0.8544 (0.8593) time: 0.1252 data: 0.0373 max mem: 9305 +Train: [72] [ 900/6250] eta: 0:13:14 lr: 0.000025 grad: 0.1241 (0.1327) loss: 0.8608 (0.8591) time: 0.1094 data: 0.0177 max mem: 9305 +Train: [72] [1000/6250] eta: 0:12:44 lr: 0.000025 grad: 0.1292 (0.1324) loss: 0.8528 (0.8588) time: 0.1195 data: 0.0339 max mem: 9305 +Train: [72] [1100/6250] eta: 0:12:22 lr: 0.000025 grad: 0.1213 (0.1323) loss: 0.8556 (0.8583) time: 0.1100 data: 0.0299 max mem: 9305 +Train: [72] [1200/6250] eta: 0:11:56 lr: 0.000025 grad: 0.1234 (0.1324) loss: 0.8543 (0.8579) time: 0.1163 data: 0.0270 max mem: 9305 +Train: [72] [1300/6250] eta: 0:11:34 lr: 0.000025 grad: 0.1255 (0.1326) loss: 0.8608 (0.8575) time: 0.1280 data: 0.0430 max mem: 9305 +Train: [72] [1400/6250] eta: 0:11:12 lr: 0.000025 grad: 0.1282 (0.1325) loss: 0.8583 (0.8574) time: 0.1110 data: 0.0266 max mem: 9305 +Train: [72] [1500/6250] eta: 0:10:53 lr: 0.000025 grad: 0.1286 (0.1326) loss: 0.8489 (0.8570) time: 0.1188 data: 0.0283 max mem: 9305 +Train: [72] [1600/6250] eta: 0:10:35 lr: 0.000025 grad: 0.1272 (0.1327) loss: 0.8510 (0.8566) time: 0.1160 data: 0.0355 max mem: 9305 +Train: [72] [1700/6250] eta: 0:10:19 lr: 0.000024 grad: 0.1215 (0.1329) loss: 0.8507 (0.8563) time: 0.0981 data: 0.0107 max mem: 9305 +Train: [72] [1800/6250] eta: 0:10:04 lr: 0.000024 grad: 0.1329 (0.1329) loss: 0.8423 (0.8561) time: 0.1303 data: 0.0447 max mem: 9305 +Train: [72] [1900/6250] eta: 0:09:49 lr: 0.000024 grad: 0.1297 (0.1332) loss: 0.8542 (0.8559) time: 0.1362 data: 0.0517 max mem: 9305 +Train: [72] [2000/6250] eta: 0:09:35 lr: 0.000024 grad: 0.1263 (0.1333) loss: 0.8596 (0.8557) time: 0.1373 data: 0.0477 max mem: 9305 +Train: [72] [2100/6250] eta: 0:09:21 lr: 0.000024 grad: 0.1259 (0.1333) loss: 0.8608 (0.8556) time: 0.1308 data: 0.0407 max mem: 9305 +Train: [72] [2200/6250] eta: 0:09:07 lr: 0.000024 grad: 0.1261 (0.1333) loss: 0.8573 (0.8555) time: 0.1073 data: 0.0227 max mem: 9305 +Train: [72] [2300/6250] eta: 0:08:51 lr: 0.000024 grad: 0.1376 (0.1334) loss: 0.8484 (0.8553) time: 0.1355 data: 0.0498 max mem: 9305 +Train: [72] [2400/6250] eta: 0:08:36 lr: 0.000024 grad: 0.1239 (0.1335) loss: 0.8540 (0.8552) time: 0.1107 data: 0.0203 max mem: 9305 +Train: [72] [2500/6250] eta: 0:08:20 lr: 0.000024 grad: 0.1316 (0.1335) loss: 0.8501 (0.8551) time: 0.1240 data: 0.0396 max mem: 9305 +Train: [72] [2600/6250] eta: 0:08:06 lr: 0.000024 grad: 0.1358 (0.1337) loss: 0.8537 (0.8550) time: 0.1383 data: 0.0587 max mem: 9305 +Train: [72] [2700/6250] eta: 0:07:51 lr: 0.000024 grad: 0.1277 (0.1337) loss: 0.8529 (0.8549) time: 0.1263 data: 0.0442 max mem: 9305 +Train: [72] [2800/6250] eta: 0:07:37 lr: 0.000024 grad: 0.1267 (0.1337) loss: 0.8537 (0.8548) time: 0.1238 data: 0.0321 max mem: 9305 +Train: [72] [2900/6250] eta: 0:07:22 lr: 0.000024 grad: 0.1262 (0.1338) loss: 0.8591 (0.8548) time: 0.1215 data: 0.0361 max mem: 9305 +Train: [72] [3000/6250] eta: 0:07:09 lr: 0.000024 grad: 0.1241 (0.1338) loss: 0.8533 (0.8547) time: 0.1273 data: 0.0420 max mem: 9305 +Train: [72] [3100/6250] eta: 0:06:55 lr: 0.000024 grad: 0.1326 (0.1338) loss: 0.8528 (0.8547) time: 0.1264 data: 0.0417 max mem: 9305 +Train: [72] [3200/6250] eta: 0:06:42 lr: 0.000024 grad: 0.1301 (0.1338) loss: 0.8530 (0.8546) time: 0.1291 data: 0.0426 max mem: 9305 +Train: [72] [3300/6250] eta: 0:06:28 lr: 0.000024 grad: 0.1211 (0.1338) loss: 0.8548 (0.8547) time: 0.1228 data: 0.0338 max mem: 9305 +Train: [72] [3400/6250] eta: 0:06:15 lr: 0.000024 grad: 0.1276 (0.1338) loss: 0.8527 (0.8547) time: 0.1323 data: 0.0497 max mem: 9305 +Train: [72] [3500/6250] eta: 0:06:02 lr: 0.000024 grad: 0.1301 (0.1339) loss: 0.8550 (0.8546) time: 0.1344 data: 0.0483 max mem: 9305 +Train: [72] [3600/6250] eta: 0:05:49 lr: 0.000024 grad: 0.1251 (0.1339) loss: 0.8471 (0.8546) time: 0.1502 data: 0.0671 max mem: 9305 +Train: [72] [3700/6250] eta: 0:05:36 lr: 0.000024 grad: 0.1319 (0.1339) loss: 0.8554 (0.8546) time: 0.1393 data: 0.0558 max mem: 9305 +Train: [72] [3800/6250] eta: 0:05:23 lr: 0.000024 grad: 0.1359 (0.1340) loss: 0.8564 (0.8545) time: 0.1314 data: 0.0504 max mem: 9305 +Train: [72] [3900/6250] eta: 0:05:10 lr: 0.000024 grad: 0.1263 (0.1340) loss: 0.8585 (0.8545) time: 0.1362 data: 0.0531 max mem: 9305 +Train: [72] [4000/6250] eta: 0:04:57 lr: 0.000024 grad: 0.1392 (0.1341) loss: 0.8520 (0.8545) time: 0.1293 data: 0.0501 max mem: 9305 +Train: [72] [4100/6250] eta: 0:04:44 lr: 0.000024 grad: 0.1324 (0.1341) loss: 0.8504 (0.8545) time: 0.1329 data: 0.0321 max mem: 9305 +Train: [72] [4200/6250] eta: 0:04:32 lr: 0.000024 grad: 0.1234 (0.1340) loss: 0.8588 (0.8545) time: 0.0857 data: 0.0002 max mem: 9305 +Train: [72] [4300/6250] eta: 0:04:19 lr: 0.000024 grad: 0.1267 (0.1340) loss: 0.8552 (0.8545) time: 0.1389 data: 0.0594 max mem: 9305 +Train: [72] [4400/6250] eta: 0:04:06 lr: 0.000024 grad: 0.1210 (0.1339) loss: 0.8576 (0.8546) time: 0.1387 data: 0.0518 max mem: 9305 +Train: [72] [4500/6250] eta: 0:03:53 lr: 0.000024 grad: 0.1204 (0.1338) loss: 0.8601 (0.8546) time: 0.1449 data: 0.0629 max mem: 9305 +Train: [72] [4600/6250] eta: 0:03:40 lr: 0.000024 grad: 0.1336 (0.1338) loss: 0.8581 (0.8546) time: 0.1736 data: 0.0842 max mem: 9305 +Train: [72] [4700/6250] eta: 0:03:27 lr: 0.000024 grad: 0.1364 (0.1338) loss: 0.8547 (0.8546) time: 0.1424 data: 0.0589 max mem: 9305 +Train: [72] [4800/6250] eta: 0:03:14 lr: 0.000024 grad: 0.1363 (0.1338) loss: 0.8532 (0.8546) time: 0.1712 data: 0.0778 max mem: 9305 +Train: [72] [4900/6250] eta: 0:03:01 lr: 0.000024 grad: 0.1326 (0.1338) loss: 0.8594 (0.8546) time: 0.1559 data: 0.0662 max mem: 9305 +Train: [72] [5000/6250] eta: 0:02:49 lr: 0.000024 grad: 0.1351 (0.1338) loss: 0.8561 (0.8546) time: 0.1916 data: 0.0990 max mem: 9305 +Train: [72] [5100/6250] eta: 0:02:35 lr: 0.000024 grad: 0.1376 (0.1339) loss: 0.8509 (0.8546) time: 0.1291 data: 0.0456 max mem: 9305 +Train: [72] [5200/6250] eta: 0:02:21 lr: 0.000024 grad: 0.1313 (0.1339) loss: 0.8554 (0.8546) time: 0.1262 data: 0.0426 max mem: 9305 +Train: [72] [5300/6250] eta: 0:02:08 lr: 0.000024 grad: 0.1365 (0.1340) loss: 0.8562 (0.8547) time: 0.1046 data: 0.0200 max mem: 9305 +Train: [72] [5400/6250] eta: 0:01:54 lr: 0.000024 grad: 0.1442 (0.1341) loss: 0.8475 (0.8546) time: 0.1231 data: 0.0361 max mem: 9305 +Train: [72] [5500/6250] eta: 0:01:41 lr: 0.000023 grad: 0.1462 (0.1342) loss: 0.8454 (0.8545) time: 0.1503 data: 0.0735 max mem: 9305 +Train: [72] [5600/6250] eta: 0:01:27 lr: 0.000023 grad: 0.1317 (0.1343) loss: 0.8548 (0.8545) time: 0.1511 data: 0.0704 max mem: 9305 +Train: [72] [5700/6250] eta: 0:01:14 lr: 0.000023 grad: 0.1360 (0.1345) loss: 0.8549 (0.8544) time: 0.1341 data: 0.0526 max mem: 9305 +Train: [72] [5800/6250] eta: 0:01:00 lr: 0.000023 grad: 0.1494 (0.1347) loss: 0.8474 (0.8544) time: 0.1373 data: 0.0432 max mem: 9305 +Train: [72] [5900/6250] eta: 0:00:47 lr: 0.000023 grad: 0.1400 (0.1350) loss: 0.8379 (0.8542) time: 0.1243 data: 0.0427 max mem: 9305 +Train: [72] [6000/6250] eta: 0:00:33 lr: 0.000023 grad: 0.1391 (0.1352) loss: 0.8499 (0.8541) time: 0.1528 data: 0.0665 max mem: 9305 +Train: [72] [6100/6250] eta: 0:00:20 lr: 0.000023 grad: 0.1383 (0.1353) loss: 0.8537 (0.8540) time: 0.1481 data: 0.0673 max mem: 9305 +Train: [72] [6200/6250] eta: 0:00:06 lr: 0.000023 grad: 0.1428 (0.1354) loss: 0.8532 (0.8540) time: 0.1459 data: 0.0623 max mem: 9305 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1329 (0.1355) loss: 0.8525 (0.8540) time: 0.1520 data: 0.0656 max mem: 9305 +Train: [72] Total time: 0:14:12 (0.1365 s / it) +Averaged stats: lr: 0.000023 grad: 0.1329 (0.1355) loss: 0.8525 (0.8540) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:03:57 loss: 0.8846 (0.8846) time: 3.8382 data: 3.7552 max mem: 9305 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8685 (0.8718) time: 0.1537 data: 0.1254 max mem: 9305 +Eval (hcp-train-subset): [72] Total time: 0:00:15 (0.2458 s / it) +Averaged stats (hcp-train-subset): loss: 0.8685 (0.8718) +Eval (hcp-val): [72] [ 0/62] eta: 0:03:55 loss: 0.8745 (0.8745) time: 3.8030 data: 3.7166 max mem: 9305 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8759 (0.8768) time: 0.1539 data: 0.1256 max mem: 9305 +Eval (hcp-val): [72] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8759 (0.8768) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 8:11:12 lr: 0.000023 grad: 0.2072 (0.2072) loss: 0.8809 (0.8809) time: 4.7157 data: 4.5432 max mem: 9305 +Train: [73] [ 100/6250] eta: 0:21:34 lr: 0.000023 grad: 0.1337 (0.1481) loss: 0.8579 (0.8695) time: 0.1778 data: 0.0671 max mem: 9305 +Train: [73] [ 200/6250] eta: 0:18:21 lr: 0.000023 grad: 0.1383 (0.1488) loss: 0.8551 (0.8649) time: 0.1604 data: 0.0519 max mem: 9305 +Train: [73] [ 300/6250] eta: 0:16:46 lr: 0.000023 grad: 0.1366 (0.1492) loss: 0.8469 (0.8609) time: 0.1612 data: 0.0691 max mem: 9305 +Train: [73] [ 400/6250] eta: 0:15:33 lr: 0.000023 grad: 0.1374 (0.1481) loss: 0.8578 (0.8585) time: 0.1234 data: 0.0296 max mem: 9305 +Train: [73] [ 500/6250] eta: 0:14:47 lr: 0.000023 grad: 0.1297 (0.1460) loss: 0.8555 (0.8583) time: 0.1057 data: 0.0136 max mem: 9305 +Train: [73] [ 600/6250] eta: 0:14:40 lr: 0.000023 grad: 0.1254 (0.1449) loss: 0.8612 (0.8584) time: 0.1691 data: 0.0678 max mem: 9305 +Train: [73] [ 700/6250] eta: 0:14:40 lr: 0.000023 grad: 0.1237 (0.1433) loss: 0.8615 (0.8584) time: 0.1766 data: 0.0826 max mem: 9305 +Train: [73] [ 800/6250] eta: 0:14:39 lr: 0.000023 grad: 0.1412 (0.1420) loss: 0.8564 (0.8585) time: 0.1304 data: 0.0310 max mem: 9305 +Train: [73] [ 900/6250] eta: 0:14:35 lr: 0.000023 grad: 0.1358 (0.1413) loss: 0.8553 (0.8586) time: 0.1953 data: 0.1129 max mem: 9305 +Train: [73] [1000/6250] eta: 0:14:24 lr: 0.000023 grad: 0.1384 (0.1406) loss: 0.8519 (0.8585) time: 0.2015 data: 0.1177 max mem: 9305 +Train: [73] [1100/6250] eta: 0:13:49 lr: 0.000023 grad: 0.1360 (0.1398) loss: 0.8547 (0.8582) time: 0.1341 data: 0.0481 max mem: 9305 +Train: [73] [1200/6250] eta: 0:13:38 lr: 0.000023 grad: 0.1335 (0.1394) loss: 0.8510 (0.8580) time: 0.1727 data: 0.0801 max mem: 9305 +Train: [73] [1300/6250] eta: 0:13:12 lr: 0.000023 grad: 0.1382 (0.1393) loss: 0.8482 (0.8578) time: 0.1378 data: 0.0426 max mem: 9305 +Train: [73] [1400/6250] eta: 0:12:52 lr: 0.000023 grad: 0.1407 (0.1391) loss: 0.8548 (0.8575) time: 0.1257 data: 0.0333 max mem: 9305 +Train: [73] [1500/6250] eta: 0:12:43 lr: 0.000023 grad: 0.1370 (0.1391) loss: 0.8556 (0.8572) time: 0.1522 data: 0.0627 max mem: 9305 +Train: [73] [1600/6250] eta: 0:12:18 lr: 0.000023 grad: 0.1291 (0.1390) loss: 0.8481 (0.8570) time: 0.1095 data: 0.0288 max mem: 9305 +Train: [73] [1700/6250] eta: 0:12:05 lr: 0.000023 grad: 0.1521 (0.1390) loss: 0.8487 (0.8567) time: 0.2134 data: 0.0975 max mem: 9305 +Train: [73] [1800/6250] eta: 0:11:45 lr: 0.000023 grad: 0.1405 (0.1390) loss: 0.8506 (0.8564) time: 0.1504 data: 0.0680 max mem: 9305 +Train: [73] [1900/6250] eta: 0:11:29 lr: 0.000023 grad: 0.1430 (0.1392) loss: 0.8416 (0.8559) time: 0.1097 data: 0.0136 max mem: 9305 +Train: [73] [2000/6250] eta: 0:11:11 lr: 0.000023 grad: 0.1344 (0.1394) loss: 0.8514 (0.8557) time: 0.1596 data: 0.0563 max mem: 9305 +Train: [73] [2100/6250] eta: 0:10:50 lr: 0.000023 grad: 0.1411 (0.1394) loss: 0.8490 (0.8554) time: 0.1257 data: 0.0437 max mem: 9305 +Train: [73] [2200/6250] eta: 0:10:30 lr: 0.000023 grad: 0.1374 (0.1392) loss: 0.8564 (0.8553) time: 0.1296 data: 0.0458 max mem: 9305 +Train: [73] [2300/6250] eta: 0:10:11 lr: 0.000023 grad: 0.1231 (0.1390) loss: 0.8525 (0.8552) time: 0.1315 data: 0.0444 max mem: 9305 +Train: [73] [2400/6250] eta: 0:09:54 lr: 0.000023 grad: 0.1295 (0.1390) loss: 0.8509 (0.8550) time: 0.1396 data: 0.0631 max mem: 9305 +Train: [73] [2500/6250] eta: 0:09:36 lr: 0.000023 grad: 0.1406 (0.1389) loss: 0.8487 (0.8550) time: 0.1102 data: 0.0286 max mem: 9305 +Train: [73] [2600/6250] eta: 0:09:19 lr: 0.000023 grad: 0.1315 (0.1387) loss: 0.8526 (0.8548) time: 0.1522 data: 0.0741 max mem: 9305 +Train: [73] [2700/6250] eta: 0:09:03 lr: 0.000023 grad: 0.1338 (0.1384) loss: 0.8566 (0.8548) time: 0.1224 data: 0.0385 max mem: 9305 +Train: [73] [2800/6250] eta: 0:08:48 lr: 0.000023 grad: 0.1249 (0.1383) loss: 0.8553 (0.8548) time: 0.2013 data: 0.1190 max mem: 9305 +Train: [73] [2900/6250] eta: 0:08:30 lr: 0.000023 grad: 0.1318 (0.1381) loss: 0.8543 (0.8548) time: 0.1386 data: 0.0589 max mem: 9305 +Train: [73] [3000/6250] eta: 0:08:13 lr: 0.000023 grad: 0.1277 (0.1381) loss: 0.8597 (0.8548) time: 0.1290 data: 0.0445 max mem: 9305 +Train: [73] [3100/6250] eta: 0:07:58 lr: 0.000023 grad: 0.1292 (0.1379) loss: 0.8612 (0.8548) time: 0.1742 data: 0.0469 max mem: 9305 +Train: [73] [3200/6250] eta: 0:07:45 lr: 0.000022 grad: 0.1235 (0.1377) loss: 0.8528 (0.8549) time: 0.1752 data: 0.1015 max mem: 9305 +Train: [73] [3300/6250] eta: 0:07:29 lr: 0.000022 grad: 0.1346 (0.1376) loss: 0.8546 (0.8549) time: 0.1994 data: 0.1158 max mem: 9305 +Train: [73] [3400/6250] eta: 0:07:12 lr: 0.000022 grad: 0.1351 (0.1375) loss: 0.8574 (0.8549) time: 0.1406 data: 0.0541 max mem: 9305 +Train: [73] [3500/6250] eta: 0:06:56 lr: 0.000022 grad: 0.1282 (0.1375) loss: 0.8503 (0.8549) time: 0.1171 data: 0.0293 max mem: 9305 +Train: [73] [3600/6250] eta: 0:06:39 lr: 0.000022 grad: 0.1341 (0.1374) loss: 0.8576 (0.8549) time: 0.1345 data: 0.0434 max mem: 9305 +Train: [73] [3700/6250] eta: 0:06:23 lr: 0.000022 grad: 0.1346 (0.1374) loss: 0.8545 (0.8549) time: 0.1244 data: 0.0350 max mem: 9305 +Train: [73] [3800/6250] eta: 0:06:07 lr: 0.000022 grad: 0.1392 (0.1375) loss: 0.8577 (0.8549) time: 0.1396 data: 0.0583 max mem: 9305 +Train: [73] [3900/6250] eta: 0:05:51 lr: 0.000022 grad: 0.1300 (0.1376) loss: 0.8546 (0.8548) time: 0.1356 data: 0.0505 max mem: 9305 +Train: [73] [4000/6250] eta: 0:05:36 lr: 0.000022 grad: 0.1339 (0.1376) loss: 0.8499 (0.8547) time: 0.1433 data: 0.0594 max mem: 9305 +Train: [73] [4100/6250] eta: 0:05:21 lr: 0.000022 grad: 0.1458 (0.1378) loss: 0.8548 (0.8546) time: 0.1493 data: 0.0669 max mem: 9305 +Train: [73] [4200/6250] eta: 0:05:05 lr: 0.000022 grad: 0.1337 (0.1378) loss: 0.8544 (0.8545) time: 0.1195 data: 0.0400 max mem: 9305 +Train: [73] [4300/6250] eta: 0:04:50 lr: 0.000022 grad: 0.1382 (0.1378) loss: 0.8439 (0.8544) time: 0.1627 data: 0.0847 max mem: 9305 +Train: [73] [4400/6250] eta: 0:04:35 lr: 0.000022 grad: 0.1406 (0.1380) loss: 0.8503 (0.8542) time: 0.1574 data: 0.0777 max mem: 9305 +Train: [73] [4500/6250] eta: 0:04:20 lr: 0.000022 grad: 0.1396 (0.1381) loss: 0.8435 (0.8541) time: 0.1352 data: 0.0522 max mem: 9305 +Train: [73] [4600/6250] eta: 0:04:05 lr: 0.000022 grad: 0.1391 (0.1382) loss: 0.8520 (0.8540) time: 0.1582 data: 0.0762 max mem: 9305 +Train: [73] [4700/6250] eta: 0:03:50 lr: 0.000022 grad: 0.1265 (0.1382) loss: 0.8528 (0.8540) time: 0.1421 data: 0.0624 max mem: 9305 +Train: [73] [4800/6250] eta: 0:03:36 lr: 0.000022 grad: 0.1251 (0.1382) loss: 0.8540 (0.8539) time: 0.1535 data: 0.0702 max mem: 9305 +Train: [73] [4900/6250] eta: 0:03:21 lr: 0.000022 grad: 0.1350 (0.1382) loss: 0.8495 (0.8539) time: 0.1554 data: 0.0642 max mem: 9305 +Train: [73] [5000/6250] eta: 0:03:06 lr: 0.000022 grad: 0.1176 (0.1382) loss: 0.8540 (0.8538) time: 0.1424 data: 0.0633 max mem: 9305 +Train: [73] [5100/6250] eta: 0:02:51 lr: 0.000022 grad: 0.1263 (0.1381) loss: 0.8532 (0.8538) time: 0.1897 data: 0.1088 max mem: 9305 +Train: [73] [5200/6250] eta: 0:02:36 lr: 0.000022 grad: 0.1365 (0.1381) loss: 0.8551 (0.8538) time: 0.1146 data: 0.0288 max mem: 9305 +Train: [73] [5300/6250] eta: 0:02:21 lr: 0.000022 grad: 0.1292 (0.1380) loss: 0.8590 (0.8538) time: 0.1146 data: 0.0171 max mem: 9305 +Train: [73] [5400/6250] eta: 0:02:06 lr: 0.000022 grad: 0.1310 (0.1380) loss: 0.8595 (0.8538) time: 0.1191 data: 0.0382 max mem: 9305 +Train: [73] [5500/6250] eta: 0:01:51 lr: 0.000022 grad: 0.1328 (0.1380) loss: 0.8588 (0.8538) time: 0.1198 data: 0.0346 max mem: 9305 +Train: [73] [5600/6250] eta: 0:01:36 lr: 0.000022 grad: 0.1402 (0.1381) loss: 0.8467 (0.8537) time: 0.1220 data: 0.0396 max mem: 9305 +Train: [73] [5700/6250] eta: 0:01:21 lr: 0.000022 grad: 0.1360 (0.1381) loss: 0.8537 (0.8537) time: 0.1576 data: 0.0781 max mem: 9305 +Train: [73] [5800/6250] eta: 0:01:07 lr: 0.000022 grad: 0.1330 (0.1381) loss: 0.8482 (0.8537) time: 0.1681 data: 0.0738 max mem: 9305 +Train: [73] [5900/6250] eta: 0:00:52 lr: 0.000022 grad: 0.1300 (0.1381) loss: 0.8523 (0.8536) time: 0.1644 data: 0.0790 max mem: 9305 +Train: [73] [6000/6250] eta: 0:00:37 lr: 0.000022 grad: 0.1388 (0.1381) loss: 0.8452 (0.8535) time: 0.1675 data: 0.0899 max mem: 9305 +Train: [73] [6100/6250] eta: 0:00:22 lr: 0.000022 grad: 0.1367 (0.1381) loss: 0.8529 (0.8535) time: 0.1433 data: 0.0562 max mem: 9305 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.1400 (0.1382) loss: 0.8480 (0.8535) time: 0.1285 data: 0.0437 max mem: 9305 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1304 (0.1381) loss: 0.8551 (0.8535) time: 0.1351 data: 0.0482 max mem: 9305 +Train: [73] Total time: 0:15:35 (0.1497 s / it) +Averaged stats: lr: 0.000022 grad: 0.1304 (0.1381) loss: 0.8551 (0.8535) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:04:07 loss: 0.8735 (0.8735) time: 3.9865 data: 3.9115 max mem: 9305 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8681 (0.8698) time: 0.1314 data: 0.1026 max mem: 9305 +Eval (hcp-train-subset): [73] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (hcp-train-subset): loss: 0.8681 (0.8698) +Eval (hcp-val): [73] [ 0/62] eta: 0:03:23 loss: 0.8727 (0.8727) time: 3.2887 data: 3.2072 max mem: 9305 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8744 (0.8763) time: 0.1288 data: 0.0992 max mem: 9305 +Eval (hcp-val): [73] Total time: 0:00:13 (0.2167 s / it) +Averaged stats (hcp-val): loss: 0.8744 (0.8763) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 11:21:49 lr: 0.000022 grad: 0.2022 (0.2022) loss: 0.8929 (0.8929) time: 6.5455 data: 6.4024 max mem: 9305 +Train: [74] [ 100/6250] eta: 0:19:48 lr: 0.000022 grad: 0.1258 (0.1555) loss: 0.8630 (0.8702) time: 0.1310 data: 0.0294 max mem: 9305 +Train: [74] [ 200/6250] eta: 0:16:24 lr: 0.000022 grad: 0.1326 (0.1459) loss: 0.8585 (0.8658) time: 0.1270 data: 0.0177 max mem: 9305 +Train: [74] [ 300/6250] eta: 0:15:30 lr: 0.000022 grad: 0.1295 (0.1433) loss: 0.8629 (0.8635) time: 0.1465 data: 0.0494 max mem: 9305 +Train: [74] [ 400/6250] eta: 0:15:32 lr: 0.000022 grad: 0.1222 (0.1413) loss: 0.8614 (0.8622) time: 0.1632 data: 0.0672 max mem: 9305 +Train: [74] [ 500/6250] eta: 0:15:06 lr: 0.000022 grad: 0.1290 (0.1399) loss: 0.8623 (0.8614) time: 0.1699 data: 0.0742 max mem: 9305 +Train: [74] [ 600/6250] eta: 0:14:31 lr: 0.000022 grad: 0.1299 (0.1390) loss: 0.8530 (0.8603) time: 0.1324 data: 0.0407 max mem: 9305 +Train: [74] [ 700/6250] eta: 0:14:12 lr: 0.000022 grad: 0.1295 (0.1381) loss: 0.8540 (0.8600) time: 0.1712 data: 0.0806 max mem: 9305 +Train: [74] [ 800/6250] eta: 0:14:02 lr: 0.000022 grad: 0.1232 (0.1374) loss: 0.8590 (0.8594) time: 0.1845 data: 0.0927 max mem: 9305 +Train: [74] [ 900/6250] eta: 0:13:53 lr: 0.000021 grad: 0.1335 (0.1364) loss: 0.8626 (0.8595) time: 0.1110 data: 0.0006 max mem: 9305 +Train: [74] [1000/6250] eta: 0:13:33 lr: 0.000021 grad: 0.1242 (0.1359) loss: 0.8541 (0.8595) time: 0.1307 data: 0.0404 max mem: 9305 +Train: [74] [1100/6250] eta: 0:13:15 lr: 0.000021 grad: 0.1293 (0.1357) loss: 0.8553 (0.8591) time: 0.1158 data: 0.0209 max mem: 9305 +Train: [74] [1200/6250] eta: 0:13:06 lr: 0.000021 grad: 0.1309 (0.1352) loss: 0.8557 (0.8589) time: 0.1203 data: 0.0265 max mem: 9305 +Train: [74] [1300/6250] eta: 0:12:40 lr: 0.000021 grad: 0.1273 (0.1349) loss: 0.8560 (0.8588) time: 0.1176 data: 0.0193 max mem: 9305 +Train: [74] [1400/6250] eta: 0:12:30 lr: 0.000021 grad: 0.1335 (0.1352) loss: 0.8465 (0.8583) time: 0.2358 data: 0.1473 max mem: 9305 +Train: [74] [1500/6250] eta: 0:12:10 lr: 0.000021 grad: 0.1303 (0.1353) loss: 0.8507 (0.8578) time: 0.1563 data: 0.0738 max mem: 9305 +Train: [74] [1600/6250] eta: 0:11:50 lr: 0.000021 grad: 0.1397 (0.1354) loss: 0.8458 (0.8575) time: 0.1355 data: 0.0493 max mem: 9305 +Train: [74] [1700/6250] eta: 0:11:30 lr: 0.000021 grad: 0.1379 (0.1359) loss: 0.8484 (0.8569) time: 0.1395 data: 0.0543 max mem: 9305 +Train: [74] [1800/6250] eta: 0:11:16 lr: 0.000021 grad: 0.1355 (0.1359) loss: 0.8533 (0.8566) time: 0.1614 data: 0.0718 max mem: 9305 +Train: [74] [1900/6250] eta: 0:11:00 lr: 0.000021 grad: 0.1322 (0.1361) loss: 0.8521 (0.8563) time: 0.1313 data: 0.0428 max mem: 9305 +Train: [74] [2000/6250] eta: 0:10:47 lr: 0.000021 grad: 0.1295 (0.1363) loss: 0.8484 (0.8559) time: 0.1520 data: 0.0697 max mem: 9305 +Train: [74] [2100/6250] eta: 0:10:31 lr: 0.000021 grad: 0.1384 (0.1363) loss: 0.8404 (0.8556) time: 0.1508 data: 0.0517 max mem: 9305 +Train: [74] [2200/6250] eta: 0:10:16 lr: 0.000021 grad: 0.1310 (0.1365) loss: 0.8535 (0.8552) time: 0.1444 data: 0.0585 max mem: 9305 +Train: [74] [2300/6250] eta: 0:09:59 lr: 0.000021 grad: 0.1364 (0.1368) loss: 0.8501 (0.8549) time: 0.1344 data: 0.0467 max mem: 9305 +Train: [74] [2400/6250] eta: 0:09:48 lr: 0.000021 grad: 0.1374 (0.1369) loss: 0.8492 (0.8547) time: 0.0937 data: 0.0002 max mem: 9305 +Train: [74] [2500/6250] eta: 0:09:31 lr: 0.000021 grad: 0.1457 (0.1372) loss: 0.8462 (0.8546) time: 0.1427 data: 0.0563 max mem: 9305 +Train: [74] [2600/6250] eta: 0:09:15 lr: 0.000021 grad: 0.1355 (0.1373) loss: 0.8467 (0.8544) time: 0.1596 data: 0.0727 max mem: 9305 +Train: [74] [2700/6250] eta: 0:09:05 lr: 0.000021 grad: 0.1333 (0.1374) loss: 0.8527 (0.8542) time: 0.1009 data: 0.0002 max mem: 9305 +Train: [74] [2800/6250] eta: 0:08:48 lr: 0.000021 grad: 0.1375 (0.1375) loss: 0.8526 (0.8541) time: 0.1416 data: 0.0512 max mem: 9305 +Train: [74] [2900/6250] eta: 0:08:31 lr: 0.000021 grad: 0.1398 (0.1376) loss: 0.8527 (0.8540) time: 0.1285 data: 0.0390 max mem: 9305 +Train: [74] [3000/6250] eta: 0:08:14 lr: 0.000021 grad: 0.1364 (0.1377) loss: 0.8461 (0.8538) time: 0.1546 data: 0.0499 max mem: 9305 +Train: [74] [3100/6250] eta: 0:07:59 lr: 0.000021 grad: 0.1329 (0.1377) loss: 0.8578 (0.8538) time: 0.1522 data: 0.0665 max mem: 9305 +Train: [74] [3200/6250] eta: 0:07:43 lr: 0.000021 grad: 0.1336 (0.1378) loss: 0.8526 (0.8537) time: 0.1475 data: 0.0578 max mem: 9305 +Train: [74] [3300/6250] eta: 0:07:28 lr: 0.000021 grad: 0.1313 (0.1379) loss: 0.8541 (0.8536) time: 0.1067 data: 0.0002 max mem: 9305 +Train: [74] [3400/6250] eta: 0:07:13 lr: 0.000021 grad: 0.1407 (0.1379) loss: 0.8456 (0.8535) time: 0.1463 data: 0.0592 max mem: 9305 +Train: [74] [3500/6250] eta: 0:06:58 lr: 0.000021 grad: 0.1363 (0.1379) loss: 0.8560 (0.8534) time: 0.1615 data: 0.0805 max mem: 9305 +Train: [74] [3600/6250] eta: 0:06:42 lr: 0.000021 grad: 0.1402 (0.1381) loss: 0.8460 (0.8534) time: 0.1348 data: 0.0485 max mem: 9305 +Train: [74] [3700/6250] eta: 0:06:26 lr: 0.000021 grad: 0.1311 (0.1380) loss: 0.8462 (0.8534) time: 0.1369 data: 0.0434 max mem: 9305 +Train: [74] [3800/6250] eta: 0:06:10 lr: 0.000021 grad: 0.1389 (0.1381) loss: 0.8579 (0.8534) time: 0.1425 data: 0.0593 max mem: 9305 +Train: [74] [3900/6250] eta: 0:05:55 lr: 0.000021 grad: 0.1432 (0.1382) loss: 0.8522 (0.8533) time: 0.1417 data: 0.0528 max mem: 9305 +Train: [74] [4000/6250] eta: 0:05:40 lr: 0.000021 grad: 0.1401 (0.1384) loss: 0.8538 (0.8534) time: 0.1362 data: 0.0466 max mem: 9305 +Train: [74] [4100/6250] eta: 0:05:24 lr: 0.000021 grad: 0.1481 (0.1384) loss: 0.8474 (0.8534) time: 0.1551 data: 0.0673 max mem: 9305 +Train: [74] [4200/6250] eta: 0:05:09 lr: 0.000021 grad: 0.1332 (0.1386) loss: 0.8542 (0.8533) time: 0.1079 data: 0.0005 max mem: 9305 +Train: [74] [4300/6250] eta: 0:04:54 lr: 0.000021 grad: 0.1384 (0.1388) loss: 0.8577 (0.8533) time: 0.1250 data: 0.0306 max mem: 9305 +Train: [74] [4400/6250] eta: 0:04:39 lr: 0.000021 grad: 0.1458 (0.1388) loss: 0.8541 (0.8533) time: 0.1391 data: 0.0507 max mem: 9305 +Train: [74] [4500/6250] eta: 0:04:24 lr: 0.000021 grad: 0.1383 (0.1389) loss: 0.8546 (0.8533) time: 0.1262 data: 0.0383 max mem: 9305 +Train: [74] [4600/6250] eta: 0:04:09 lr: 0.000021 grad: 0.1315 (0.1390) loss: 0.8550 (0.8532) time: 0.1548 data: 0.0691 max mem: 9305 +Train: [74] [4700/6250] eta: 0:03:54 lr: 0.000021 grad: 0.1429 (0.1392) loss: 0.8579 (0.8531) time: 0.1548 data: 0.0597 max mem: 9305 +Train: [74] [4800/6250] eta: 0:03:38 lr: 0.000021 grad: 0.1442 (0.1394) loss: 0.8396 (0.8529) time: 0.1452 data: 0.0474 max mem: 9305 +Train: [74] [4900/6250] eta: 0:03:23 lr: 0.000020 grad: 0.1457 (0.1395) loss: 0.8385 (0.8528) time: 0.1621 data: 0.0751 max mem: 9305 +Train: [74] [5000/6250] eta: 0:03:09 lr: 0.000020 grad: 0.1376 (0.1396) loss: 0.8470 (0.8527) time: 0.1605 data: 0.0695 max mem: 9305 +Train: [74] [5100/6250] eta: 0:02:54 lr: 0.000020 grad: 0.1392 (0.1397) loss: 0.8436 (0.8526) time: 0.1658 data: 0.0705 max mem: 9305 +Train: [74] [5200/6250] eta: 0:02:40 lr: 0.000020 grad: 0.1420 (0.1398) loss: 0.8493 (0.8525) time: 0.1533 data: 0.0499 max mem: 9305 +Train: [74] [5300/6250] eta: 0:02:25 lr: 0.000020 grad: 0.1460 (0.1399) loss: 0.8428 (0.8523) time: 0.1895 data: 0.0681 max mem: 9305 +Train: [74] [5400/6250] eta: 0:02:10 lr: 0.000020 grad: 0.1421 (0.1401) loss: 0.8521 (0.8522) time: 0.1044 data: 0.0002 max mem: 9305 +Train: [74] [5500/6250] eta: 0:01:55 lr: 0.000020 grad: 0.1493 (0.1402) loss: 0.8387 (0.8520) time: 0.1946 data: 0.0925 max mem: 9305 +Train: [74] [5600/6250] eta: 0:01:40 lr: 0.000020 grad: 0.1348 (0.1402) loss: 0.8463 (0.8520) time: 0.1931 data: 0.1037 max mem: 9305 +Train: [74] [5700/6250] eta: 0:01:25 lr: 0.000020 grad: 0.1279 (0.1402) loss: 0.8492 (0.8519) time: 0.1579 data: 0.0717 max mem: 9305 +Train: [74] [5800/6250] eta: 0:01:09 lr: 0.000020 grad: 0.1304 (0.1402) loss: 0.8543 (0.8519) time: 0.1498 data: 0.0555 max mem: 9305 +Train: [74] [5900/6250] eta: 0:00:54 lr: 0.000020 grad: 0.1425 (0.1402) loss: 0.8525 (0.8519) time: 0.1563 data: 0.0726 max mem: 9305 +Train: [74] [6000/6250] eta: 0:00:38 lr: 0.000020 grad: 0.1307 (0.1402) loss: 0.8529 (0.8519) time: 0.1574 data: 0.0685 max mem: 9305 +Train: [74] [6100/6250] eta: 0:00:23 lr: 0.000020 grad: 0.1382 (0.1403) loss: 0.8562 (0.8519) time: 0.1616 data: 0.0570 max mem: 9305 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1346 (0.1403) loss: 0.8520 (0.8519) time: 0.1519 data: 0.0563 max mem: 9305 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1372 (0.1403) loss: 0.8528 (0.8518) time: 0.1419 data: 0.0443 max mem: 9305 +Train: [74] Total time: 0:16:19 (0.1567 s / it) +Averaged stats: lr: 0.000020 grad: 0.1372 (0.1403) loss: 0.8528 (0.8518) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:06:21 loss: 0.8805 (0.8805) time: 6.1602 data: 6.1256 max mem: 9305 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8669 (0.8697) time: 0.1461 data: 0.1171 max mem: 9305 +Eval (hcp-train-subset): [74] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.8669 (0.8697) +Making plots (hcp-train-subset): example=30 +Eval (hcp-val): [74] [ 0/62] eta: 0:04:44 loss: 0.8755 (0.8755) time: 4.5949 data: 4.5152 max mem: 9305 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8748 (0.8764) time: 0.1413 data: 0.1114 max mem: 9305 +Eval (hcp-val): [74] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-val): loss: 0.8748 (0.8764) +Making plots (hcp-val): example=48 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [75] [ 0/6250] eta: 9:14:45 lr: 0.000020 grad: 0.0701 (0.0701) loss: 0.9021 (0.9021) time: 5.3257 data: 5.1045 max mem: 9305 +Train: [75] [ 100/6250] eta: 0:22:31 lr: 0.000020 grad: 0.1287 (0.1621) loss: 0.8612 (0.8710) time: 0.1445 data: 0.0240 max mem: 9305 +Train: [75] [ 200/6250] eta: 0:18:56 lr: 0.000020 grad: 0.1321 (0.1554) loss: 0.8548 (0.8639) time: 0.1607 data: 0.0513 max mem: 9305 +Train: [75] [ 300/6250] eta: 0:17:19 lr: 0.000020 grad: 0.1296 (0.1500) loss: 0.8577 (0.8616) time: 0.1595 data: 0.0539 max mem: 9305 +Train: [75] [ 400/6250] eta: 0:16:25 lr: 0.000020 grad: 0.1301 (0.1466) loss: 0.8585 (0.8600) time: 0.1472 data: 0.0534 max mem: 9305 +Train: [75] [ 500/6250] eta: 0:15:40 lr: 0.000020 grad: 0.1339 (0.1447) loss: 0.8522 (0.8583) time: 0.1551 data: 0.0643 max mem: 9305 +Train: [75] [ 600/6250] eta: 0:15:16 lr: 0.000020 grad: 0.1365 (0.1443) loss: 0.8506 (0.8570) time: 0.1593 data: 0.0614 max mem: 9305 +Train: [75] [ 700/6250] eta: 0:15:02 lr: 0.000020 grad: 0.1422 (0.1439) loss: 0.8538 (0.8561) time: 0.1635 data: 0.0661 max mem: 9305 +Train: [75] [ 800/6250] eta: 0:14:45 lr: 0.000020 grad: 0.1323 (0.1437) loss: 0.8521 (0.8554) time: 0.1508 data: 0.0619 max mem: 9305 +Train: [75] [ 900/6250] eta: 0:14:22 lr: 0.000020 grad: 0.1329 (0.1433) loss: 0.8572 (0.8550) time: 0.1737 data: 0.0813 max mem: 9305 +Train: [75] [1000/6250] eta: 0:14:01 lr: 0.000020 grad: 0.1381 (0.1428) loss: 0.8501 (0.8547) time: 0.1681 data: 0.0757 max mem: 9305 +Train: [75] [1100/6250] eta: 0:13:56 lr: 0.000020 grad: 0.1338 (0.1424) loss: 0.8574 (0.8547) time: 0.1564 data: 0.0309 max mem: 9305 +Train: [75] [1200/6250] eta: 0:13:31 lr: 0.000020 grad: 0.1393 (0.1423) loss: 0.8487 (0.8543) time: 0.1696 data: 0.0851 max mem: 9305 +Train: [75] [1300/6250] eta: 0:13:18 lr: 0.000020 grad: 0.1484 (0.1425) loss: 0.8456 (0.8539) time: 0.1636 data: 0.0760 max mem: 9305 +Train: [75] [1400/6250] eta: 0:12:58 lr: 0.000020 grad: 0.1406 (0.1423) loss: 0.8480 (0.8537) time: 0.1437 data: 0.0529 max mem: 9305 +Train: [75] [1500/6250] eta: 0:12:42 lr: 0.000020 grad: 0.1348 (0.1420) loss: 0.8561 (0.8536) time: 0.1306 data: 0.0389 max mem: 9305 +Train: [75] [1600/6250] eta: 0:12:24 lr: 0.000020 grad: 0.1453 (0.1419) loss: 0.8445 (0.8535) time: 0.1736 data: 0.0751 max mem: 9305 +Train: [75] [1700/6250] eta: 0:12:09 lr: 0.000020 grad: 0.1319 (0.1416) loss: 0.8519 (0.8534) time: 0.1958 data: 0.1096 max mem: 9305 +Train: [75] [1800/6250] eta: 0:11:52 lr: 0.000020 grad: 0.1369 (0.1414) loss: 0.8563 (0.8533) time: 0.1590 data: 0.0756 max mem: 9305 +Train: [75] [1900/6250] eta: 0:11:33 lr: 0.000020 grad: 0.1378 (0.1413) loss: 0.8482 (0.8532) time: 0.1524 data: 0.0572 max mem: 9305 +Train: [75] [2000/6250] eta: 0:11:15 lr: 0.000020 grad: 0.1282 (0.1412) loss: 0.8493 (0.8531) time: 0.1414 data: 0.0480 max mem: 9305 +Train: [75] [2100/6250] eta: 0:10:57 lr: 0.000020 grad: 0.1373 (0.1411) loss: 0.8474 (0.8530) time: 0.1642 data: 0.0843 max mem: 9305 +Train: [75] [2200/6250] eta: 0:10:38 lr: 0.000020 grad: 0.1279 (0.1408) loss: 0.8513 (0.8530) time: 0.1284 data: 0.0416 max mem: 9305 +Train: [75] [2300/6250] eta: 0:10:22 lr: 0.000020 grad: 0.1344 (0.1408) loss: 0.8517 (0.8529) time: 0.0938 data: 0.0002 max mem: 9305 +Train: [75] [2400/6250] eta: 0:10:06 lr: 0.000020 grad: 0.1387 (0.1408) loss: 0.8442 (0.8528) time: 0.1270 data: 0.0395 max mem: 9305 +Train: [75] [2500/6250] eta: 0:09:49 lr: 0.000020 grad: 0.1427 (0.1409) loss: 0.8572 (0.8527) time: 0.1513 data: 0.0615 max mem: 9305 +Train: [75] [2600/6250] eta: 0:09:32 lr: 0.000020 grad: 0.1240 (0.1410) loss: 0.8630 (0.8527) time: 0.1417 data: 0.0640 max mem: 9305 +Train: [75] [2700/6250] eta: 0:09:14 lr: 0.000020 grad: 0.1319 (0.1410) loss: 0.8535 (0.8526) time: 0.1205 data: 0.0323 max mem: 9305 +Train: [75] [2800/6250] eta: 0:09:02 lr: 0.000019 grad: 0.1355 (0.1410) loss: 0.8504 (0.8525) time: 0.3016 data: 0.1997 max mem: 9305 +Train: [75] [2900/6250] eta: 0:08:45 lr: 0.000019 grad: 0.1389 (0.1410) loss: 0.8486 (0.8525) time: 0.1395 data: 0.0469 max mem: 9305 +Train: [75] [3000/6250] eta: 0:08:29 lr: 0.000019 grad: 0.1399 (0.1411) loss: 0.8563 (0.8525) time: 0.1800 data: 0.0965 max mem: 9305 +Train: [75] [3100/6250] eta: 0:08:17 lr: 0.000019 grad: 0.1362 (0.1411) loss: 0.8476 (0.8524) time: 0.2563 data: 0.1577 max mem: 9305 +Train: [75] [3200/6250] eta: 0:07:59 lr: 0.000019 grad: 0.1331 (0.1411) loss: 0.8496 (0.8523) time: 0.1567 data: 0.0531 max mem: 9305 +Train: [75] [3300/6250] eta: 0:07:45 lr: 0.000019 grad: 0.1336 (0.1411) loss: 0.8476 (0.8523) time: 0.1066 data: 0.0003 max mem: 9305 +Train: [75] [3400/6250] eta: 0:07:28 lr: 0.000019 grad: 0.1333 (0.1410) loss: 0.8566 (0.8523) time: 0.1516 data: 0.0726 max mem: 9305 +Train: [75] [3500/6250] eta: 0:07:12 lr: 0.000019 grad: 0.1348 (0.1410) loss: 0.8468 (0.8522) time: 0.1219 data: 0.0355 max mem: 9305 +Train: [75] [3600/6250] eta: 0:06:56 lr: 0.000019 grad: 0.1474 (0.1411) loss: 0.8505 (0.8523) time: 0.1740 data: 0.0906 max mem: 9305 +Train: [75] [3700/6250] eta: 0:06:39 lr: 0.000019 grad: 0.1483 (0.1411) loss: 0.8456 (0.8523) time: 0.1613 data: 0.0729 max mem: 9305 +Train: [75] [3800/6250] eta: 0:06:23 lr: 0.000019 grad: 0.1259 (0.1411) loss: 0.8619 (0.8523) time: 0.1629 data: 0.0793 max mem: 9305 +Train: [75] [3900/6250] eta: 0:06:07 lr: 0.000019 grad: 0.1295 (0.1410) loss: 0.8523 (0.8524) time: 0.1459 data: 0.0599 max mem: 9305 +Train: [75] [4000/6250] eta: 0:05:51 lr: 0.000019 grad: 0.1433 (0.1410) loss: 0.8485 (0.8525) time: 0.1578 data: 0.0684 max mem: 9305 +Train: [75] [4100/6250] eta: 0:05:35 lr: 0.000019 grad: 0.1310 (0.1410) loss: 0.8532 (0.8525) time: 0.1548 data: 0.0637 max mem: 9305 +Train: [75] [4200/6250] eta: 0:05:20 lr: 0.000019 grad: 0.1363 (0.1410) loss: 0.8589 (0.8526) time: 0.1295 data: 0.0393 max mem: 9305 +Train: [75] [4300/6250] eta: 0:05:04 lr: 0.000019 grad: 0.1399 (0.1412) loss: 0.8556 (0.8525) time: 0.1393 data: 0.0587 max mem: 9305 +Train: [75] [4400/6250] eta: 0:04:47 lr: 0.000019 grad: 0.1399 (0.1413) loss: 0.8530 (0.8525) time: 0.1364 data: 0.0502 max mem: 9305 +Train: [75] [4500/6250] eta: 0:04:32 lr: 0.000019 grad: 0.1429 (0.1414) loss: 0.8556 (0.8525) time: 0.1576 data: 0.0649 max mem: 9305 +Train: [75] [4600/6250] eta: 0:04:17 lr: 0.000019 grad: 0.1431 (0.1416) loss: 0.8556 (0.8525) time: 0.1214 data: 0.0309 max mem: 9305 +Train: [75] [4700/6250] eta: 0:04:01 lr: 0.000019 grad: 0.1461 (0.1417) loss: 0.8577 (0.8525) time: 0.1331 data: 0.0426 max mem: 9305 +Train: [75] [4800/6250] eta: 0:03:45 lr: 0.000019 grad: 0.1382 (0.1418) loss: 0.8517 (0.8525) time: 0.1428 data: 0.0588 max mem: 9305 +Train: [75] [4900/6250] eta: 0:03:29 lr: 0.000019 grad: 0.1451 (0.1418) loss: 0.8457 (0.8526) time: 0.1362 data: 0.0513 max mem: 9305 +Train: [75] [5000/6250] eta: 0:03:14 lr: 0.000019 grad: 0.1443 (0.1419) loss: 0.8550 (0.8525) time: 0.1907 data: 0.0926 max mem: 9305 +Train: [75] [5100/6250] eta: 0:02:58 lr: 0.000019 grad: 0.1431 (0.1420) loss: 0.8480 (0.8525) time: 0.1822 data: 0.0963 max mem: 9305 +Train: [75] [5200/6250] eta: 0:02:43 lr: 0.000019 grad: 0.1520 (0.1422) loss: 0.8443 (0.8524) time: 0.1654 data: 0.0781 max mem: 9305 +Train: [75] [5300/6250] eta: 0:02:28 lr: 0.000019 grad: 0.1571 (0.1423) loss: 0.8488 (0.8524) time: 0.1851 data: 0.1035 max mem: 9305 +Train: [75] [5400/6250] eta: 0:02:12 lr: 0.000019 grad: 0.1427 (0.1424) loss: 0.8517 (0.8525) time: 0.1460 data: 0.0645 max mem: 9305 +Train: [75] [5500/6250] eta: 0:01:57 lr: 0.000019 grad: 0.1367 (0.1424) loss: 0.8527 (0.8525) time: 0.1630 data: 0.0778 max mem: 9305 +Train: [75] [5600/6250] eta: 0:01:41 lr: 0.000019 grad: 0.1364 (0.1424) loss: 0.8601 (0.8526) time: 0.1798 data: 0.0930 max mem: 9305 +Train: [75] [5700/6250] eta: 0:01:26 lr: 0.000019 grad: 0.1361 (0.1423) loss: 0.8607 (0.8527) time: 0.1525 data: 0.0722 max mem: 9305 +Train: [75] [5800/6250] eta: 0:01:10 lr: 0.000019 grad: 0.1491 (0.1422) loss: 0.8571 (0.8527) time: 0.1380 data: 0.0406 max mem: 9305 +Train: [75] [5900/6250] eta: 0:00:55 lr: 0.000019 grad: 0.1364 (0.1422) loss: 0.8599 (0.8528) time: 0.1198 data: 0.0288 max mem: 9305 +Train: [75] [6000/6250] eta: 0:00:39 lr: 0.000019 grad: 0.1331 (0.1422) loss: 0.8519 (0.8528) time: 0.1838 data: 0.0967 max mem: 9305 +Train: [75] [6100/6250] eta: 0:00:23 lr: 0.000019 grad: 0.1479 (0.1422) loss: 0.8511 (0.8528) time: 0.1465 data: 0.0438 max mem: 9305 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.1395 (0.1422) loss: 0.8554 (0.8528) time: 0.1603 data: 0.0734 max mem: 9305 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1374 (0.1421) loss: 0.8579 (0.8528) time: 0.1409 data: 0.0469 max mem: 9305 +Train: [75] Total time: 0:16:33 (0.1589 s / it) +Averaged stats: lr: 0.000019 grad: 0.1374 (0.1421) loss: 0.8579 (0.8528) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:03:59 loss: 0.8772 (0.8772) time: 3.8652 data: 3.7998 max mem: 9305 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8689 (0.8689) time: 0.1708 data: 0.1399 max mem: 9305 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (hcp-train-subset): loss: 0.8689 (0.8689) +Eval (hcp-val): [75] [ 0/62] eta: 0:04:39 loss: 0.8706 (0.8706) time: 4.5129 data: 4.4115 max mem: 9305 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8747 (0.8755) time: 0.1445 data: 0.1159 max mem: 9305 +Eval (hcp-val): [75] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-val): loss: 0.8747 (0.8755) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 10:39:35 lr: 0.000019 grad: 0.1001 (0.1001) loss: 0.8855 (0.8855) time: 6.1402 data: 6.0392 max mem: 9305 +Train: [76] [ 100/6250] eta: 0:21:19 lr: 0.000019 grad: 0.1366 (0.1593) loss: 0.8641 (0.8698) time: 0.1867 data: 0.0782 max mem: 9305 +Train: [76] [ 200/6250] eta: 0:18:33 lr: 0.000019 grad: 0.1369 (0.1547) loss: 0.8583 (0.8640) time: 0.1645 data: 0.0568 max mem: 9305 +Train: [76] [ 300/6250] eta: 0:17:26 lr: 0.000019 grad: 0.1386 (0.1490) loss: 0.8580 (0.8620) time: 0.1724 data: 0.0647 max mem: 9305 +Train: [76] [ 400/6250] eta: 0:16:36 lr: 0.000019 grad: 0.1259 (0.1458) loss: 0.8630 (0.8608) time: 0.1422 data: 0.0463 max mem: 9305 +Train: [76] [ 500/6250] eta: 0:16:04 lr: 0.000019 grad: 0.1333 (0.1425) loss: 0.8643 (0.8608) time: 0.1569 data: 0.0570 max mem: 9305 +Train: [76] [ 600/6250] eta: 0:15:31 lr: 0.000019 grad: 0.1298 (0.1415) loss: 0.8609 (0.8604) time: 0.1736 data: 0.0772 max mem: 9305 +Train: [76] [ 700/6250] eta: 0:15:05 lr: 0.000019 grad: 0.1252 (0.1408) loss: 0.8551 (0.8599) time: 0.1440 data: 0.0579 max mem: 9305 +Train: [76] [ 800/6250] eta: 0:14:38 lr: 0.000018 grad: 0.1358 (0.1404) loss: 0.8568 (0.8594) time: 0.1626 data: 0.0686 max mem: 9305 +Train: [76] [ 900/6250] eta: 0:14:14 lr: 0.000018 grad: 0.1386 (0.1398) loss: 0.8627 (0.8592) time: 0.1470 data: 0.0512 max mem: 9305 +Train: [76] [1000/6250] eta: 0:13:54 lr: 0.000018 grad: 0.1313 (0.1395) loss: 0.8610 (0.8590) time: 0.1476 data: 0.0517 max mem: 9305 +Train: [76] [1100/6250] eta: 0:13:49 lr: 0.000018 grad: 0.1377 (0.1391) loss: 0.8496 (0.8587) time: 0.2539 data: 0.0734 max mem: 9305 +Train: [76] [1200/6250] eta: 0:13:34 lr: 0.000018 grad: 0.1312 (0.1387) loss: 0.8506 (0.8587) time: 0.1490 data: 0.0592 max mem: 9305 +Train: [76] [1300/6250] eta: 0:13:16 lr: 0.000018 grad: 0.1387 (0.1385) loss: 0.8468 (0.8584) time: 0.1687 data: 0.0630 max mem: 9305 +Train: [76] [1400/6250] eta: 0:13:01 lr: 0.000018 grad: 0.1350 (0.1383) loss: 0.8474 (0.8582) time: 0.1944 data: 0.1067 max mem: 9305 +Train: [76] [1500/6250] eta: 0:12:42 lr: 0.000018 grad: 0.1324 (0.1382) loss: 0.8513 (0.8580) time: 0.1450 data: 0.0603 max mem: 9305 +Train: [76] [1600/6250] eta: 0:12:23 lr: 0.000018 grad: 0.1412 (0.1382) loss: 0.8522 (0.8577) time: 0.1365 data: 0.0237 max mem: 9305 +Train: [76] [1700/6250] eta: 0:12:13 lr: 0.000018 grad: 0.1274 (0.1382) loss: 0.8549 (0.8575) time: 0.1197 data: 0.0242 max mem: 9305 +Train: [76] [1800/6250] eta: 0:12:00 lr: 0.000018 grad: 0.1409 (0.1382) loss: 0.8540 (0.8573) time: 0.1477 data: 0.0652 max mem: 9305 +Train: [76] [1900/6250] eta: 0:11:48 lr: 0.000018 grad: 0.1280 (0.1380) loss: 0.8593 (0.8573) time: 0.1238 data: 0.0248 max mem: 9305 +Train: [76] [2000/6250] eta: 0:11:33 lr: 0.000018 grad: 0.1312 (0.1379) loss: 0.8497 (0.8571) time: 0.1658 data: 0.0740 max mem: 9305 +Train: [76] [2100/6250] eta: 0:11:15 lr: 0.000018 grad: 0.1504 (0.1381) loss: 0.8487 (0.8569) time: 0.1339 data: 0.0027 max mem: 9305 +Train: [76] [2200/6250] eta: 0:11:00 lr: 0.000018 grad: 0.1341 (0.1386) loss: 0.8602 (0.8568) time: 0.1166 data: 0.0276 max mem: 9305 +Train: [76] [2300/6250] eta: 0:10:43 lr: 0.000018 grad: 0.1344 (0.1387) loss: 0.8570 (0.8567) time: 0.1411 data: 0.0610 max mem: 9305 +Train: [76] [2400/6250] eta: 0:10:28 lr: 0.000018 grad: 0.1326 (0.1387) loss: 0.8569 (0.8566) time: 0.1788 data: 0.0983 max mem: 9305 +Train: [76] [2500/6250] eta: 0:10:12 lr: 0.000018 grad: 0.1461 (0.1387) loss: 0.8490 (0.8566) time: 0.1594 data: 0.0759 max mem: 9305 +Train: [76] [2600/6250] eta: 0:09:54 lr: 0.000018 grad: 0.1519 (0.1387) loss: 0.8495 (0.8566) time: 0.1367 data: 0.0451 max mem: 9305 +Train: [76] [2700/6250] eta: 0:09:37 lr: 0.000018 grad: 0.1367 (0.1388) loss: 0.8513 (0.8565) time: 0.1549 data: 0.0677 max mem: 9305 +Train: [76] [2800/6250] eta: 0:09:22 lr: 0.000018 grad: 0.1426 (0.1389) loss: 0.8530 (0.8565) time: 0.1788 data: 0.0687 max mem: 9305 +Train: [76] [2900/6250] eta: 0:09:05 lr: 0.000018 grad: 0.1391 (0.1390) loss: 0.8508 (0.8564) time: 0.1458 data: 0.0494 max mem: 9305 +Train: [76] [3000/6250] eta: 0:08:51 lr: 0.000018 grad: 0.1300 (0.1391) loss: 0.8628 (0.8563) time: 0.1393 data: 0.0464 max mem: 9305 +Train: [76] [3100/6250] eta: 0:08:33 lr: 0.000018 grad: 0.1333 (0.1390) loss: 0.8555 (0.8563) time: 0.1404 data: 0.0433 max mem: 9305 +Train: [76] [3200/6250] eta: 0:08:21 lr: 0.000018 grad: 0.1384 (0.1390) loss: 0.8545 (0.8563) time: 0.1082 data: 0.0002 max mem: 9305 +Train: [76] [3300/6250] eta: 0:08:04 lr: 0.000018 grad: 0.1422 (0.1391) loss: 0.8542 (0.8561) time: 0.1225 data: 0.0266 max mem: 9305 +Train: [76] [3400/6250] eta: 0:07:50 lr: 0.000018 grad: 0.1412 (0.1393) loss: 0.8561 (0.8560) time: 0.1758 data: 0.0553 max mem: 9305 +Train: [76] [3500/6250] eta: 0:07:33 lr: 0.000018 grad: 0.1393 (0.1394) loss: 0.8593 (0.8559) time: 0.1397 data: 0.0511 max mem: 9305 +Train: [76] [3600/6250] eta: 0:07:18 lr: 0.000018 grad: 0.1281 (0.1394) loss: 0.8569 (0.8557) time: 0.1066 data: 0.0002 max mem: 9305 +Train: [76] [3700/6250] eta: 0:07:01 lr: 0.000018 grad: 0.1410 (0.1395) loss: 0.8538 (0.8557) time: 0.1343 data: 0.0407 max mem: 9305 +Train: [76] [3800/6250] eta: 0:06:44 lr: 0.000018 grad: 0.1511 (0.1397) loss: 0.8483 (0.8555) time: 0.1271 data: 0.0380 max mem: 9305 +Train: [76] [3900/6250] eta: 0:06:27 lr: 0.000018 grad: 0.1506 (0.1400) loss: 0.8475 (0.8553) time: 0.1269 data: 0.0504 max mem: 9305 +Train: [76] [4000/6250] eta: 0:06:10 lr: 0.000018 grad: 0.1358 (0.1401) loss: 0.8480 (0.8553) time: 0.1954 data: 0.1032 max mem: 9305 +Train: [76] [4100/6250] eta: 0:05:54 lr: 0.000018 grad: 0.1384 (0.1401) loss: 0.8514 (0.8552) time: 0.1969 data: 0.1038 max mem: 9305 +Train: [76] [4200/6250] eta: 0:05:37 lr: 0.000018 grad: 0.1390 (0.1402) loss: 0.8601 (0.8551) time: 0.1550 data: 0.0713 max mem: 9305 +Train: [76] [4300/6250] eta: 0:05:20 lr: 0.000018 grad: 0.1412 (0.1403) loss: 0.8572 (0.8550) time: 0.1714 data: 0.0854 max mem: 9305 +Train: [76] [4400/6250] eta: 0:05:04 lr: 0.000018 grad: 0.1444 (0.1405) loss: 0.8476 (0.8549) time: 0.1673 data: 0.0833 max mem: 9305 +Train: [76] [4500/6250] eta: 0:04:47 lr: 0.000018 grad: 0.1446 (0.1407) loss: 0.8537 (0.8548) time: 0.1571 data: 0.0698 max mem: 9305 +Train: [76] [4600/6250] eta: 0:04:30 lr: 0.000018 grad: 0.1486 (0.1408) loss: 0.8542 (0.8547) time: 0.1406 data: 0.0584 max mem: 9305 +Train: [76] [4700/6250] eta: 0:04:14 lr: 0.000018 grad: 0.1413 (0.1409) loss: 0.8467 (0.8546) time: 0.1838 data: 0.0938 max mem: 9305 +Train: [76] [4800/6250] eta: 0:03:57 lr: 0.000018 grad: 0.1384 (0.1410) loss: 0.8465 (0.8546) time: 0.1316 data: 0.0394 max mem: 9305 +Train: [76] [4900/6250] eta: 0:03:41 lr: 0.000018 grad: 0.1418 (0.1411) loss: 0.8497 (0.8545) time: 0.1072 data: 0.0153 max mem: 9305 +Train: [76] [5000/6250] eta: 0:03:25 lr: 0.000018 grad: 0.1369 (0.1412) loss: 0.8580 (0.8545) time: 0.1637 data: 0.0730 max mem: 9305 +Train: [76] [5100/6250] eta: 0:03:09 lr: 0.000017 grad: 0.1440 (0.1412) loss: 0.8535 (0.8545) time: 0.1594 data: 0.0741 max mem: 9305 +Train: [76] [5200/6250] eta: 0:02:53 lr: 0.000017 grad: 0.1427 (0.1414) loss: 0.8538 (0.8545) time: 0.1721 data: 0.0816 max mem: 9305 +Train: [76] [5300/6250] eta: 0:02:36 lr: 0.000017 grad: 0.1321 (0.1414) loss: 0.8551 (0.8545) time: 0.1760 data: 0.0972 max mem: 9305 +Train: [76] [5400/6250] eta: 0:02:19 lr: 0.000017 grad: 0.1418 (0.1415) loss: 0.8533 (0.8544) time: 0.1476 data: 0.0393 max mem: 9305 +Train: [76] [5500/6250] eta: 0:02:03 lr: 0.000017 grad: 0.1359 (0.1416) loss: 0.8555 (0.8544) time: 0.1240 data: 0.0256 max mem: 9305 +Train: [76] [5600/6250] eta: 0:01:46 lr: 0.000017 grad: 0.1416 (0.1417) loss: 0.8515 (0.8543) time: 0.1612 data: 0.0693 max mem: 9305 +Train: [76] [5700/6250] eta: 0:01:30 lr: 0.000017 grad: 0.1463 (0.1419) loss: 0.8587 (0.8542) time: 0.1269 data: 0.0376 max mem: 9305 +Train: [76] [5800/6250] eta: 0:01:13 lr: 0.000017 grad: 0.1426 (0.1420) loss: 0.8548 (0.8542) time: 0.1433 data: 0.0549 max mem: 9305 +Train: [76] [5900/6250] eta: 0:00:57 lr: 0.000017 grad: 0.1393 (0.1422) loss: 0.8489 (0.8541) time: 0.1562 data: 0.0661 max mem: 9305 +Train: [76] [6000/6250] eta: 0:00:41 lr: 0.000017 grad: 0.1458 (0.1423) loss: 0.8518 (0.8540) time: 0.2354 data: 0.1243 max mem: 9305 +Train: [76] [6100/6250] eta: 0:00:24 lr: 0.000017 grad: 0.1480 (0.1424) loss: 0.8491 (0.8539) time: 0.2056 data: 0.1084 max mem: 9305 +Train: [76] [6200/6250] eta: 0:00:08 lr: 0.000017 grad: 0.1462 (0.1425) loss: 0.8423 (0.8539) time: 0.2753 data: 0.1486 max mem: 9305 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1452 (0.1425) loss: 0.8464 (0.8538) time: 0.1215 data: 0.0003 max mem: 9305 +Train: [76] Total time: 0:17:23 (0.1670 s / it) +Averaged stats: lr: 0.000017 grad: 0.1452 (0.1425) loss: 0.8464 (0.8538) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:03:41 loss: 0.8853 (0.8853) time: 3.5688 data: 3.4894 max mem: 9305 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8679 (0.8677) time: 0.1370 data: 0.1086 max mem: 9305 +Eval (hcp-train-subset): [76] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-train-subset): loss: 0.8679 (0.8677) +Eval (hcp-val): [76] [ 0/62] eta: 0:05:00 loss: 0.8762 (0.8762) time: 4.8405 data: 4.7980 max mem: 9305 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8764 (0.8772) time: 0.1567 data: 0.1274 max mem: 9305 +Eval (hcp-val): [76] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-val): loss: 0.8764 (0.8772) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 10:36:06 lr: 0.000017 grad: 0.1445 (0.1445) loss: 0.8619 (0.8619) time: 6.1066 data: 5.9673 max mem: 9305 +Train: [77] [ 100/6250] eta: 0:21:32 lr: 0.000017 grad: 0.1225 (0.1538) loss: 0.8732 (0.8673) time: 0.1430 data: 0.0349 max mem: 9305 +Train: [77] [ 200/6250] eta: 0:19:00 lr: 0.000017 grad: 0.1282 (0.1500) loss: 0.8675 (0.8650) time: 0.1552 data: 0.0430 max mem: 9305 +Train: [77] [ 300/6250] eta: 0:17:53 lr: 0.000017 grad: 0.1436 (0.1497) loss: 0.8615 (0.8633) time: 0.1728 data: 0.0480 max mem: 9305 +Train: [77] [ 400/6250] eta: 0:17:33 lr: 0.000017 grad: 0.1310 (0.1483) loss: 0.8602 (0.8625) time: 0.1851 data: 0.0899 max mem: 9305 +Train: [77] [ 500/6250] eta: 0:16:46 lr: 0.000017 grad: 0.1364 (0.1476) loss: 0.8605 (0.8612) time: 0.1415 data: 0.0520 max mem: 9305 +Train: [77] [ 600/6250] eta: 0:16:36 lr: 0.000017 grad: 0.1401 (0.1475) loss: 0.8562 (0.8603) time: 0.2333 data: 0.1346 max mem: 9305 +Train: [77] [ 700/6250] eta: 0:16:11 lr: 0.000017 grad: 0.1403 (0.1466) loss: 0.8535 (0.8593) time: 0.1161 data: 0.0003 max mem: 9305 +Train: [77] [ 800/6250] eta: 0:15:51 lr: 0.000017 grad: 0.1345 (0.1461) loss: 0.8525 (0.8586) time: 0.1877 data: 0.0714 max mem: 9305 +Train: [77] [ 900/6250] eta: 0:15:40 lr: 0.000017 grad: 0.1449 (0.1458) loss: 0.8446 (0.8578) time: 0.1014 data: 0.0004 max mem: 9305 +Train: [77] [1000/6250] eta: 0:15:18 lr: 0.000017 grad: 0.1433 (0.1454) loss: 0.8579 (0.8574) time: 0.1773 data: 0.0789 max mem: 9305 +Train: [77] [1100/6250] eta: 0:15:08 lr: 0.000017 grad: 0.1443 (0.1451) loss: 0.8533 (0.8571) time: 0.1950 data: 0.0862 max mem: 9305 +Train: [77] [1200/6250] eta: 0:14:45 lr: 0.000017 grad: 0.1493 (0.1448) loss: 0.8529 (0.8567) time: 0.1665 data: 0.0803 max mem: 9305 +Train: [77] [1300/6250] eta: 0:14:23 lr: 0.000017 grad: 0.1370 (0.1446) loss: 0.8491 (0.8562) time: 0.1579 data: 0.0701 max mem: 9305 +Train: [77] [1400/6250] eta: 0:13:59 lr: 0.000017 grad: 0.1460 (0.1449) loss: 0.8448 (0.8556) time: 0.1486 data: 0.0617 max mem: 9305 +Train: [77] [1500/6250] eta: 0:13:36 lr: 0.000017 grad: 0.1412 (0.1451) loss: 0.8507 (0.8551) time: 0.1789 data: 0.0929 max mem: 9305 +Train: [77] [1600/6250] eta: 0:13:15 lr: 0.000017 grad: 0.1369 (0.1452) loss: 0.8522 (0.8546) time: 0.1654 data: 0.0786 max mem: 9305 +Train: [77] [1700/6250] eta: 0:12:56 lr: 0.000017 grad: 0.1367 (0.1450) loss: 0.8440 (0.8543) time: 0.1548 data: 0.0722 max mem: 9305 +Train: [77] [1800/6250] eta: 0:12:34 lr: 0.000017 grad: 0.1419 (0.1450) loss: 0.8532 (0.8542) time: 0.1344 data: 0.0486 max mem: 9305 +Train: [77] [1900/6250] eta: 0:12:14 lr: 0.000017 grad: 0.1365 (0.1450) loss: 0.8527 (0.8538) time: 0.1364 data: 0.0549 max mem: 9305 +Train: [77] [2000/6250] eta: 0:11:57 lr: 0.000017 grad: 0.1395 (0.1451) loss: 0.8463 (0.8536) time: 0.2311 data: 0.1098 max mem: 9305 +Train: [77] [2100/6250] eta: 0:11:44 lr: 0.000017 grad: 0.1442 (0.1451) loss: 0.8435 (0.8534) time: 0.1635 data: 0.0750 max mem: 9305 +Train: [77] [2200/6250] eta: 0:11:25 lr: 0.000017 grad: 0.1388 (0.1451) loss: 0.8490 (0.8531) time: 0.1943 data: 0.0616 max mem: 9305 +Train: [77] [2300/6250] eta: 0:11:08 lr: 0.000017 grad: 0.1439 (0.1450) loss: 0.8544 (0.8530) time: 0.1278 data: 0.0299 max mem: 9305 +Train: [77] [2400/6250] eta: 0:10:48 lr: 0.000017 grad: 0.1423 (0.1449) loss: 0.8549 (0.8530) time: 0.1355 data: 0.0548 max mem: 9305 +Train: [77] [2500/6250] eta: 0:10:26 lr: 0.000017 grad: 0.1390 (0.1450) loss: 0.8478 (0.8528) time: 0.1197 data: 0.0404 max mem: 9305 +Train: [77] [2600/6250] eta: 0:10:07 lr: 0.000017 grad: 0.1474 (0.1449) loss: 0.8469 (0.8528) time: 0.1376 data: 0.0495 max mem: 9305 +Train: [77] [2700/6250] eta: 0:09:49 lr: 0.000017 grad: 0.1398 (0.1447) loss: 0.8582 (0.8528) time: 0.1690 data: 0.0709 max mem: 9305 +Train: [77] [2800/6250] eta: 0:09:32 lr: 0.000017 grad: 0.1416 (0.1447) loss: 0.8508 (0.8528) time: 0.1464 data: 0.0607 max mem: 9305 +Train: [77] [2900/6250] eta: 0:09:14 lr: 0.000017 grad: 0.1447 (0.1446) loss: 0.8545 (0.8527) time: 0.1525 data: 0.0583 max mem: 9305 +Train: [77] [3000/6250] eta: 0:08:55 lr: 0.000017 grad: 0.1482 (0.1446) loss: 0.8463 (0.8527) time: 0.1396 data: 0.0422 max mem: 9305 +Train: [77] [3100/6250] eta: 0:08:36 lr: 0.000017 grad: 0.1293 (0.1446) loss: 0.8569 (0.8527) time: 0.1282 data: 0.0369 max mem: 9305 +Train: [77] [3200/6250] eta: 0:08:19 lr: 0.000017 grad: 0.1358 (0.1444) loss: 0.8548 (0.8527) time: 0.0960 data: 0.0059 max mem: 9305 +Train: [77] [3300/6250] eta: 0:08:03 lr: 0.000016 grad: 0.1419 (0.1444) loss: 0.8566 (0.8527) time: 0.1079 data: 0.0002 max mem: 9305 +Train: [77] [3400/6250] eta: 0:07:46 lr: 0.000016 grad: 0.1421 (0.1444) loss: 0.8527 (0.8527) time: 0.1577 data: 0.0625 max mem: 9305 +Train: [77] [3500/6250] eta: 0:07:34 lr: 0.000016 grad: 0.1399 (0.1444) loss: 0.8535 (0.8526) time: 0.1677 data: 0.0726 max mem: 9305 +Train: [77] [3600/6250] eta: 0:07:17 lr: 0.000016 grad: 0.1397 (0.1444) loss: 0.8516 (0.8526) time: 0.2287 data: 0.0950 max mem: 9305 +Train: [77] [3700/6250] eta: 0:07:03 lr: 0.000016 grad: 0.1357 (0.1443) loss: 0.8583 (0.8526) time: 0.1589 data: 0.0589 max mem: 9305 +Train: [77] [3800/6250] eta: 0:06:46 lr: 0.000016 grad: 0.1292 (0.1442) loss: 0.8606 (0.8527) time: 0.1960 data: 0.0603 max mem: 9305 +Train: [77] [3900/6250] eta: 0:06:30 lr: 0.000016 grad: 0.1413 (0.1442) loss: 0.8491 (0.8526) time: 0.1396 data: 0.0525 max mem: 9305 +Train: [77] [4000/6250] eta: 0:06:13 lr: 0.000016 grad: 0.1413 (0.1442) loss: 0.8557 (0.8526) time: 0.1676 data: 0.0825 max mem: 9305 +Train: [77] [4100/6250] eta: 0:05:56 lr: 0.000016 grad: 0.1385 (0.1445) loss: 0.8526 (0.8525) time: 0.1645 data: 0.0797 max mem: 9305 +Train: [77] [4200/6250] eta: 0:05:39 lr: 0.000016 grad: 0.1380 (0.1445) loss: 0.8531 (0.8526) time: 0.1678 data: 0.0768 max mem: 9305 +Train: [77] [4300/6250] eta: 0:05:23 lr: 0.000016 grad: 0.1370 (0.1445) loss: 0.8581 (0.8526) time: 0.1266 data: 0.0378 max mem: 9305 +Train: [77] [4400/6250] eta: 0:05:06 lr: 0.000016 grad: 0.1403 (0.1445) loss: 0.8570 (0.8527) time: 0.1026 data: 0.0065 max mem: 9305 +Train: [77] [4500/6250] eta: 0:04:49 lr: 0.000016 grad: 0.1341 (0.1445) loss: 0.8585 (0.8528) time: 0.1734 data: 0.0772 max mem: 9305 +Train: [77] [4600/6250] eta: 0:04:33 lr: 0.000016 grad: 0.1435 (0.1445) loss: 0.8500 (0.8529) time: 0.0921 data: 0.0002 max mem: 9305 +Train: [77] [4700/6250] eta: 0:04:16 lr: 0.000016 grad: 0.1444 (0.1444) loss: 0.8537 (0.8529) time: 0.1753 data: 0.0962 max mem: 9305 +Train: [77] [4800/6250] eta: 0:04:00 lr: 0.000016 grad: 0.1389 (0.1444) loss: 0.8563 (0.8529) time: 0.1649 data: 0.0681 max mem: 9305 +Train: [77] [4900/6250] eta: 0:03:43 lr: 0.000016 grad: 0.1403 (0.1445) loss: 0.8511 (0.8529) time: 0.1826 data: 0.0949 max mem: 9305 +Train: [77] [5000/6250] eta: 0:03:26 lr: 0.000016 grad: 0.1409 (0.1446) loss: 0.8480 (0.8529) time: 0.1233 data: 0.0383 max mem: 9305 +Train: [77] [5100/6250] eta: 0:03:09 lr: 0.000016 grad: 0.1511 (0.1447) loss: 0.8517 (0.8529) time: 0.1537 data: 0.0668 max mem: 9305 +Train: [77] [5200/6250] eta: 0:02:53 lr: 0.000016 grad: 0.1441 (0.1448) loss: 0.8544 (0.8530) time: 0.1506 data: 0.0610 max mem: 9305 +Train: [77] [5300/6250] eta: 0:02:36 lr: 0.000016 grad: 0.1491 (0.1450) loss: 0.8490 (0.8529) time: 0.1205 data: 0.0225 max mem: 9305 +Train: [77] [5400/6250] eta: 0:02:19 lr: 0.000016 grad: 0.1529 (0.1453) loss: 0.8462 (0.8528) time: 0.1429 data: 0.0468 max mem: 9305 +Train: [77] [5500/6250] eta: 0:02:03 lr: 0.000016 grad: 0.1502 (0.1455) loss: 0.8391 (0.8526) time: 0.1089 data: 0.0175 max mem: 9305 +Train: [77] [5600/6250] eta: 0:01:46 lr: 0.000016 grad: 0.1524 (0.1456) loss: 0.8459 (0.8526) time: 0.1404 data: 0.0482 max mem: 9305 +Train: [77] [5700/6250] eta: 0:01:29 lr: 0.000016 grad: 0.1521 (0.1459) loss: 0.8404 (0.8525) time: 0.1210 data: 0.0299 max mem: 9305 +Train: [77] [5800/6250] eta: 0:01:13 lr: 0.000016 grad: 0.1475 (0.1461) loss: 0.8478 (0.8524) time: 0.1180 data: 0.0266 max mem: 9305 +Train: [77] [5900/6250] eta: 0:00:56 lr: 0.000016 grad: 0.1499 (0.1462) loss: 0.8538 (0.8524) time: 0.0978 data: 0.0113 max mem: 9305 +Train: [77] [6000/6250] eta: 0:00:40 lr: 0.000016 grad: 0.1445 (0.1463) loss: 0.8473 (0.8523) time: 0.1423 data: 0.0578 max mem: 9305 +Train: [77] [6100/6250] eta: 0:00:24 lr: 0.000016 grad: 0.1477 (0.1464) loss: 0.8492 (0.8522) time: 0.1611 data: 0.0691 max mem: 9305 +Train: [77] [6200/6250] eta: 0:00:08 lr: 0.000016 grad: 0.1516 (0.1465) loss: 0.8514 (0.8522) time: 0.2259 data: 0.0988 max mem: 9305 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1469 (0.1466) loss: 0.8513 (0.8521) time: 0.1006 data: 0.0004 max mem: 9305 +Train: [77] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000016 grad: 0.1469 (0.1466) loss: 0.8513 (0.8521) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:45 loss: 0.8749 (0.8749) time: 5.5780 data: 5.5437 max mem: 9305 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8635 (0.8671) time: 0.1201 data: 0.0905 max mem: 9305 +Eval (hcp-train-subset): [77] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (hcp-train-subset): loss: 0.8635 (0.8671) +Eval (hcp-val): [77] [ 0/62] eta: 0:06:25 loss: 0.8730 (0.8730) time: 6.2110 data: 6.1730 max mem: 9305 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8744 (0.8760) time: 0.1592 data: 0.1284 max mem: 9305 +Eval (hcp-val): [77] Total time: 0:00:16 (0.2669 s / it) +Averaged stats (hcp-val): loss: 0.8744 (0.8760) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 12:27:26 lr: 0.000016 grad: 0.1307 (0.1307) loss: 0.8687 (0.8687) time: 7.1754 data: 7.0644 max mem: 9305 +Train: [78] [ 100/6250] eta: 0:26:14 lr: 0.000016 grad: 0.1331 (0.1803) loss: 0.8579 (0.8616) time: 0.2064 data: 0.0921 max mem: 9305 +Train: [78] [ 200/6250] eta: 0:22:40 lr: 0.000016 grad: 0.1365 (0.1690) loss: 0.8544 (0.8566) time: 0.1895 data: 0.0741 max mem: 9305 +Train: [78] [ 300/6250] eta: 0:21:17 lr: 0.000016 grad: 0.1221 (0.1586) loss: 0.8585 (0.8563) time: 0.2049 data: 0.1042 max mem: 9305 +Train: [78] [ 400/6250] eta: 0:19:28 lr: 0.000016 grad: 0.1463 (0.1551) loss: 0.8586 (0.8571) time: 0.1690 data: 0.0686 max mem: 9305 +Train: [78] [ 500/6250] eta: 0:19:11 lr: 0.000016 grad: 0.1462 (0.1556) loss: 0.8603 (0.8568) time: 0.3164 data: 0.2245 max mem: 9305 +Train: [78] [ 600/6250] eta: 0:17:55 lr: 0.000016 grad: 0.1392 (0.1545) loss: 0.8591 (0.8569) time: 0.1424 data: 0.0244 max mem: 9305 +Train: [78] [ 700/6250] eta: 0:17:19 lr: 0.000016 grad: 0.1380 (0.1534) loss: 0.8652 (0.8572) time: 0.1742 data: 0.0391 max mem: 9305 +Train: [78] [ 800/6250] eta: 0:16:51 lr: 0.000016 grad: 0.1305 (0.1522) loss: 0.8658 (0.8578) time: 0.2188 data: 0.0647 max mem: 9305 +Train: [78] [ 900/6250] eta: 0:16:54 lr: 0.000016 grad: 0.1443 (0.1515) loss: 0.8525 (0.8580) time: 0.1682 data: 0.0532 max mem: 9305 +Train: [78] [1000/6250] eta: 0:16:32 lr: 0.000016 grad: 0.1478 (0.1508) loss: 0.8572 (0.8581) time: 0.1144 data: 0.0003 max mem: 9305 +Train: [78] [1100/6250] eta: 0:16:15 lr: 0.000016 grad: 0.1463 (0.1505) loss: 0.8543 (0.8578) time: 0.1654 data: 0.0581 max mem: 9305 +Train: [78] [1200/6250] eta: 0:15:42 lr: 0.000016 grad: 0.1305 (0.1504) loss: 0.8603 (0.8575) time: 0.1825 data: 0.0891 max mem: 9305 +Train: [78] [1300/6250] eta: 0:15:17 lr: 0.000016 grad: 0.1465 (0.1499) loss: 0.8535 (0.8575) time: 0.1760 data: 0.0935 max mem: 9305 +Train: [78] [1400/6250] eta: 0:14:42 lr: 0.000016 grad: 0.1299 (0.1495) loss: 0.8605 (0.8576) time: 0.1320 data: 0.0439 max mem: 9305 +Train: [78] [1500/6250] eta: 0:14:15 lr: 0.000015 grad: 0.1317 (0.1489) loss: 0.8577 (0.8576) time: 0.1421 data: 0.0481 max mem: 9305 +Train: [78] [1600/6250] eta: 0:13:52 lr: 0.000015 grad: 0.1385 (0.1484) loss: 0.8591 (0.8577) time: 0.1363 data: 0.0287 max mem: 9305 +Train: [78] [1700/6250] eta: 0:13:30 lr: 0.000015 grad: 0.1397 (0.1482) loss: 0.8518 (0.8575) time: 0.1664 data: 0.0711 max mem: 9305 +Train: [78] [1800/6250] eta: 0:13:08 lr: 0.000015 grad: 0.1397 (0.1479) loss: 0.8525 (0.8574) time: 0.1594 data: 0.0585 max mem: 9305 +Train: [78] [1900/6250] eta: 0:12:45 lr: 0.000015 grad: 0.1367 (0.1477) loss: 0.8540 (0.8572) time: 0.1506 data: 0.0537 max mem: 9305 +Train: [78] [2000/6250] eta: 0:12:25 lr: 0.000015 grad: 0.1327 (0.1476) loss: 0.8494 (0.8570) time: 0.1921 data: 0.1083 max mem: 9305 +Train: [78] [2100/6250] eta: 0:12:05 lr: 0.000015 grad: 0.1359 (0.1472) loss: 0.8530 (0.8568) time: 0.1795 data: 0.0975 max mem: 9305 +Train: [78] [2200/6250] eta: 0:11:43 lr: 0.000015 grad: 0.1454 (0.1470) loss: 0.8546 (0.8567) time: 0.1466 data: 0.0625 max mem: 9305 +Train: [78] [2300/6250] eta: 0:11:20 lr: 0.000015 grad: 0.1408 (0.1467) loss: 0.8547 (0.8566) time: 0.1468 data: 0.0618 max mem: 9305 +Train: [78] [2400/6250] eta: 0:11:05 lr: 0.000015 grad: 0.1470 (0.1465) loss: 0.8622 (0.8565) time: 0.2132 data: 0.1163 max mem: 9305 +Train: [78] [2500/6250] eta: 0:10:46 lr: 0.000015 grad: 0.1322 (0.1465) loss: 0.8574 (0.8565) time: 0.1685 data: 0.0796 max mem: 9305 +Train: [78] [2600/6250] eta: 0:10:32 lr: 0.000015 grad: 0.1411 (0.1463) loss: 0.8580 (0.8564) time: 0.1405 data: 0.0474 max mem: 9305 +Train: [78] [2700/6250] eta: 0:10:16 lr: 0.000015 grad: 0.1386 (0.1461) loss: 0.8594 (0.8564) time: 0.2359 data: 0.0922 max mem: 9305 +Train: [78] [2800/6250] eta: 0:09:55 lr: 0.000015 grad: 0.1384 (0.1460) loss: 0.8545 (0.8564) time: 0.1490 data: 0.0680 max mem: 9305 +Train: [78] [2900/6250] eta: 0:09:34 lr: 0.000015 grad: 0.1400 (0.1457) loss: 0.8631 (0.8565) time: 0.1232 data: 0.0350 max mem: 9305 +Train: [78] [3000/6250] eta: 0:09:13 lr: 0.000015 grad: 0.1315 (0.1455) loss: 0.8590 (0.8565) time: 0.1523 data: 0.0715 max mem: 9305 +Train: [78] [3100/6250] eta: 0:08:54 lr: 0.000015 grad: 0.1366 (0.1454) loss: 0.8545 (0.8564) time: 0.1362 data: 0.0435 max mem: 9305 +Train: [78] [3200/6250] eta: 0:08:34 lr: 0.000015 grad: 0.1438 (0.1453) loss: 0.8576 (0.8563) time: 0.1308 data: 0.0458 max mem: 9305 +Train: [78] [3300/6250] eta: 0:08:15 lr: 0.000015 grad: 0.1390 (0.1451) loss: 0.8450 (0.8562) time: 0.1349 data: 0.0491 max mem: 9305 +Train: [78] [3400/6250] eta: 0:07:58 lr: 0.000015 grad: 0.1463 (0.1450) loss: 0.8505 (0.8561) time: 0.1601 data: 0.0784 max mem: 9305 +Train: [78] [3500/6250] eta: 0:07:41 lr: 0.000015 grad: 0.1382 (0.1449) loss: 0.8555 (0.8560) time: 0.1442 data: 0.0585 max mem: 9305 +Train: [78] [3600/6250] eta: 0:07:24 lr: 0.000015 grad: 0.1329 (0.1448) loss: 0.8548 (0.8559) time: 0.1802 data: 0.0923 max mem: 9305 +Train: [78] [3700/6250] eta: 0:07:07 lr: 0.000015 grad: 0.1394 (0.1446) loss: 0.8510 (0.8559) time: 0.1502 data: 0.0697 max mem: 9305 +Train: [78] [3800/6250] eta: 0:06:50 lr: 0.000015 grad: 0.1347 (0.1445) loss: 0.8518 (0.8558) time: 0.1558 data: 0.0691 max mem: 9305 +Train: [78] [3900/6250] eta: 0:06:33 lr: 0.000015 grad: 0.1284 (0.1444) loss: 0.8554 (0.8558) time: 0.1916 data: 0.0612 max mem: 9305 +Train: [78] [4000/6250] eta: 0:06:18 lr: 0.000015 grad: 0.1386 (0.1444) loss: 0.8552 (0.8558) time: 0.1076 data: 0.0002 max mem: 9305 +Train: [78] [4100/6250] eta: 0:06:00 lr: 0.000015 grad: 0.1449 (0.1444) loss: 0.8509 (0.8557) time: 0.1443 data: 0.0484 max mem: 9305 +Train: [78] [4200/6250] eta: 0:05:43 lr: 0.000015 grad: 0.1412 (0.1445) loss: 0.8539 (0.8557) time: 0.1516 data: 0.0660 max mem: 9305 +Train: [78] [4300/6250] eta: 0:05:27 lr: 0.000015 grad: 0.1337 (0.1445) loss: 0.8582 (0.8557) time: 0.1091 data: 0.0004 max mem: 9305 +Train: [78] [4400/6250] eta: 0:05:10 lr: 0.000015 grad: 0.1424 (0.1445) loss: 0.8495 (0.8556) time: 0.1614 data: 0.0761 max mem: 9305 +Train: [78] [4500/6250] eta: 0:04:52 lr: 0.000015 grad: 0.1351 (0.1446) loss: 0.8595 (0.8556) time: 0.1485 data: 0.0682 max mem: 9305 +Train: [78] [4600/6250] eta: 0:04:36 lr: 0.000015 grad: 0.1462 (0.1446) loss: 0.8553 (0.8556) time: 0.1467 data: 0.0648 max mem: 9305 +Train: [78] [4700/6250] eta: 0:04:19 lr: 0.000015 grad: 0.1454 (0.1446) loss: 0.8505 (0.8556) time: 0.1916 data: 0.1060 max mem: 9305 +Train: [78] [4800/6250] eta: 0:04:02 lr: 0.000015 grad: 0.1470 (0.1447) loss: 0.8487 (0.8555) time: 0.1776 data: 0.0912 max mem: 9305 +Train: [78] [4900/6250] eta: 0:03:45 lr: 0.000015 grad: 0.1400 (0.1448) loss: 0.8492 (0.8554) time: 0.1625 data: 0.0663 max mem: 9305 +Train: [78] [5000/6250] eta: 0:03:29 lr: 0.000015 grad: 0.1394 (0.1448) loss: 0.8587 (0.8554) time: 0.1551 data: 0.0581 max mem: 9305 +Train: [78] [5100/6250] eta: 0:03:11 lr: 0.000015 grad: 0.1498 (0.1449) loss: 0.8487 (0.8553) time: 0.1384 data: 0.0489 max mem: 9305 +Train: [78] [5200/6250] eta: 0:02:54 lr: 0.000015 grad: 0.1337 (0.1449) loss: 0.8536 (0.8552) time: 0.1401 data: 0.0550 max mem: 9305 +Train: [78] [5300/6250] eta: 0:02:38 lr: 0.000015 grad: 0.1452 (0.1450) loss: 0.8466 (0.8551) time: 0.2039 data: 0.1197 max mem: 9305 +Train: [78] [5400/6250] eta: 0:02:21 lr: 0.000015 grad: 0.1591 (0.1452) loss: 0.8445 (0.8550) time: 0.1540 data: 0.0596 max mem: 9305 +Train: [78] [5500/6250] eta: 0:02:04 lr: 0.000015 grad: 0.1505 (0.1455) loss: 0.8455 (0.8548) time: 0.1775 data: 0.0942 max mem: 9305 +Train: [78] [5600/6250] eta: 0:01:48 lr: 0.000015 grad: 0.1435 (0.1456) loss: 0.8435 (0.8547) time: 0.2309 data: 0.1550 max mem: 9305 +Train: [78] [5700/6250] eta: 0:01:31 lr: 0.000015 grad: 0.1423 (0.1457) loss: 0.8551 (0.8546) time: 0.1541 data: 0.0733 max mem: 9305 +Train: [78] [5800/6250] eta: 0:01:14 lr: 0.000015 grad: 0.1534 (0.1459) loss: 0.8487 (0.8544) time: 0.1413 data: 0.0577 max mem: 9305 +Train: [78] [5900/6250] eta: 0:00:57 lr: 0.000015 grad: 0.1468 (0.1460) loss: 0.8515 (0.8544) time: 0.1079 data: 0.0239 max mem: 9305 +Train: [78] [6000/6250] eta: 0:00:41 lr: 0.000015 grad: 0.1457 (0.1461) loss: 0.8458 (0.8543) time: 0.1635 data: 0.0766 max mem: 9305 +Train: [78] [6100/6250] eta: 0:00:24 lr: 0.000015 grad: 0.1460 (0.1462) loss: 0.8511 (0.8542) time: 0.1285 data: 0.0450 max mem: 9305 +Train: [78] [6200/6250] eta: 0:00:08 lr: 0.000014 grad: 0.1570 (0.1464) loss: 0.8405 (0.8541) time: 0.1673 data: 0.0844 max mem: 9305 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1464 (0.1464) loss: 0.8468 (0.8540) time: 0.1598 data: 0.0733 max mem: 9305 +Train: [78] Total time: 0:17:12 (0.1653 s / it) +Averaged stats: lr: 0.000014 grad: 0.1464 (0.1464) loss: 0.8468 (0.8540) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:04:36 loss: 0.8737 (0.8737) time: 4.4596 data: 4.3908 max mem: 9305 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8652 (0.8660) time: 0.1404 data: 0.1118 max mem: 9305 +Eval (hcp-train-subset): [78] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-train-subset): loss: 0.8652 (0.8660) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:40 loss: 0.8692 (0.8692) time: 5.4942 data: 5.4591 max mem: 9305 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8730 (0.8747) time: 0.1585 data: 0.1283 max mem: 9305 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-val): loss: 0.8730 (0.8747) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 11:00:11 lr: 0.000014 grad: 0.0769 (0.0769) loss: 0.9028 (0.9028) time: 6.3379 data: 6.2362 max mem: 9305 +Train: [79] [ 100/6250] eta: 0:21:46 lr: 0.000014 grad: 0.1556 (0.1828) loss: 0.8651 (0.8623) time: 0.1675 data: 0.0592 max mem: 9305 +Train: [79] [ 200/6250] eta: 0:19:30 lr: 0.000014 grad: 0.1505 (0.1731) loss: 0.8451 (0.8593) time: 0.1548 data: 0.0525 max mem: 9305 +Train: [79] [ 300/6250] eta: 0:20:05 lr: 0.000014 grad: 0.1459 (0.1698) loss: 0.8519 (0.8571) time: 0.3502 data: 0.1739 max mem: 9305 +Train: [79] [ 400/6250] eta: 0:18:19 lr: 0.000014 grad: 0.1456 (0.1654) loss: 0.8532 (0.8559) time: 0.1507 data: 0.0500 max mem: 9305 +Train: [79] [ 500/6250] eta: 0:18:23 lr: 0.000014 grad: 0.1503 (0.1629) loss: 0.8513 (0.8544) time: 0.1632 data: 0.0085 max mem: 9305 +Train: [79] [ 600/6250] eta: 0:19:04 lr: 0.000014 grad: 0.1466 (0.1607) loss: 0.8404 (0.8532) time: 0.4943 data: 0.3509 max mem: 9305 +Train: [79] [ 700/6250] eta: 0:17:52 lr: 0.000014 grad: 0.1433 (0.1599) loss: 0.8475 (0.8520) time: 0.1592 data: 0.0708 max mem: 9305 +Train: [79] [ 800/6250] eta: 0:17:03 lr: 0.000014 grad: 0.1469 (0.1587) loss: 0.8422 (0.8511) time: 0.1776 data: 0.0744 max mem: 9305 +Train: [79] [ 900/6250] eta: 0:16:36 lr: 0.000014 grad: 0.1330 (0.1573) loss: 0.8471 (0.8508) time: 0.1122 data: 0.0002 max mem: 9305 +Train: [79] [1000/6250] eta: 0:16:00 lr: 0.000014 grad: 0.1297 (0.1556) loss: 0.8547 (0.8508) time: 0.1699 data: 0.0796 max mem: 9305 +Train: [79] [1100/6250] eta: 0:15:47 lr: 0.000014 grad: 0.1343 (0.1545) loss: 0.8572 (0.8507) time: 0.2265 data: 0.1234 max mem: 9305 +Train: [79] [1200/6250] eta: 0:15:27 lr: 0.000014 grad: 0.1375 (0.1533) loss: 0.8558 (0.8507) time: 0.2364 data: 0.1238 max mem: 9305 +Train: [79] [1300/6250] eta: 0:15:08 lr: 0.000014 grad: 0.1388 (0.1521) loss: 0.8512 (0.8508) time: 0.1088 data: 0.0002 max mem: 9305 +Train: [79] [1400/6250] eta: 0:14:42 lr: 0.000014 grad: 0.1485 (0.1515) loss: 0.8459 (0.8509) time: 0.1626 data: 0.0723 max mem: 9305 +Train: [79] [1500/6250] eta: 0:14:14 lr: 0.000014 grad: 0.1417 (0.1510) loss: 0.8504 (0.8509) time: 0.1406 data: 0.0449 max mem: 9305 +Train: [79] [1600/6250] eta: 0:13:49 lr: 0.000014 grad: 0.1372 (0.1507) loss: 0.8599 (0.8508) time: 0.1462 data: 0.0557 max mem: 9305 +Train: [79] [1700/6250] eta: 0:13:25 lr: 0.000014 grad: 0.1454 (0.1506) loss: 0.8499 (0.8507) time: 0.1747 data: 0.0914 max mem: 9305 +Train: [79] [1800/6250] eta: 0:13:00 lr: 0.000014 grad: 0.1475 (0.1502) loss: 0.8499 (0.8508) time: 0.1839 data: 0.0499 max mem: 9305 +Train: [79] [1900/6250] eta: 0:12:39 lr: 0.000014 grad: 0.1318 (0.1500) loss: 0.8550 (0.8508) time: 0.1503 data: 0.0528 max mem: 9305 +Train: [79] [2000/6250] eta: 0:12:31 lr: 0.000014 grad: 0.1428 (0.1497) loss: 0.8539 (0.8509) time: 0.0911 data: 0.0002 max mem: 9305 +Train: [79] [2100/6250] eta: 0:12:07 lr: 0.000014 grad: 0.1391 (0.1495) loss: 0.8543 (0.8509) time: 0.1387 data: 0.0500 max mem: 9305 +Train: [79] [2200/6250] eta: 0:11:45 lr: 0.000014 grad: 0.1391 (0.1493) loss: 0.8515 (0.8510) time: 0.1116 data: 0.0217 max mem: 9305 +Train: [79] [2300/6250] eta: 0:11:29 lr: 0.000014 grad: 0.1390 (0.1491) loss: 0.8523 (0.8510) time: 0.2371 data: 0.1563 max mem: 9305 +Train: [79] [2400/6250] eta: 0:11:06 lr: 0.000014 grad: 0.1416 (0.1491) loss: 0.8536 (0.8510) time: 0.1456 data: 0.0590 max mem: 9305 +Train: [79] [2500/6250] eta: 0:10:46 lr: 0.000014 grad: 0.1461 (0.1489) loss: 0.8468 (0.8510) time: 0.1539 data: 0.0781 max mem: 9305 +Train: [79] [2600/6250] eta: 0:10:27 lr: 0.000014 grad: 0.1466 (0.1490) loss: 0.8507 (0.8509) time: 0.1481 data: 0.0670 max mem: 9305 +Train: [79] [2700/6250] eta: 0:10:06 lr: 0.000014 grad: 0.1362 (0.1489) loss: 0.8585 (0.8509) time: 0.1477 data: 0.0523 max mem: 9305 +Train: [79] [2800/6250] eta: 0:09:46 lr: 0.000014 grad: 0.1419 (0.1488) loss: 0.8530 (0.8510) time: 0.1171 data: 0.0215 max mem: 9305 +Train: [79] [2900/6250] eta: 0:09:29 lr: 0.000014 grad: 0.1387 (0.1486) loss: 0.8532 (0.8510) time: 0.1374 data: 0.0475 max mem: 9305 +Train: [79] [3000/6250] eta: 0:09:10 lr: 0.000014 grad: 0.1460 (0.1484) loss: 0.8513 (0.8511) time: 0.1421 data: 0.0535 max mem: 9305 +Train: [79] [3100/6250] eta: 0:08:51 lr: 0.000014 grad: 0.1472 (0.1483) loss: 0.8528 (0.8512) time: 0.1183 data: 0.0327 max mem: 9305 +Train: [79] [3200/6250] eta: 0:08:37 lr: 0.000014 grad: 0.1478 (0.1484) loss: 0.8489 (0.8512) time: 0.2921 data: 0.2075 max mem: 9305 +Train: [79] [3300/6250] eta: 0:08:17 lr: 0.000014 grad: 0.1407 (0.1485) loss: 0.8551 (0.8513) time: 0.1410 data: 0.0535 max mem: 9305 +Train: [79] [3400/6250] eta: 0:08:00 lr: 0.000014 grad: 0.1380 (0.1484) loss: 0.8510 (0.8513) time: 0.1598 data: 0.0721 max mem: 9305 +Train: [79] [3500/6250] eta: 0:07:42 lr: 0.000014 grad: 0.1452 (0.1485) loss: 0.8587 (0.8513) time: 0.1317 data: 0.0408 max mem: 9305 +Train: [79] [3600/6250] eta: 0:07:24 lr: 0.000014 grad: 0.1359 (0.1483) loss: 0.8497 (0.8514) time: 0.1446 data: 0.0547 max mem: 9305 +Train: [79] [3700/6250] eta: 0:07:06 lr: 0.000014 grad: 0.1459 (0.1482) loss: 0.8584 (0.8515) time: 0.1683 data: 0.0821 max mem: 9305 +Train: [79] [3800/6250] eta: 0:06:49 lr: 0.000014 grad: 0.1358 (0.1480) loss: 0.8552 (0.8516) time: 0.1317 data: 0.0459 max mem: 9305 +Train: [79] [3900/6250] eta: 0:06:31 lr: 0.000014 grad: 0.1442 (0.1479) loss: 0.8628 (0.8517) time: 0.1594 data: 0.0715 max mem: 9305 +Train: [79] [4000/6250] eta: 0:06:13 lr: 0.000014 grad: 0.1392 (0.1478) loss: 0.8537 (0.8518) time: 0.1603 data: 0.0816 max mem: 9305 +Train: [79] [4100/6250] eta: 0:05:55 lr: 0.000014 grad: 0.1405 (0.1479) loss: 0.8551 (0.8518) time: 0.1313 data: 0.0512 max mem: 9305 +Train: [79] [4200/6250] eta: 0:05:40 lr: 0.000014 grad: 0.1434 (0.1479) loss: 0.8489 (0.8518) time: 0.1028 data: 0.0002 max mem: 9305 +Train: [79] [4300/6250] eta: 0:05:24 lr: 0.000014 grad: 0.1580 (0.1479) loss: 0.8529 (0.8518) time: 0.1558 data: 0.0573 max mem: 9305 +Train: [79] [4400/6250] eta: 0:05:07 lr: 0.000014 grad: 0.1471 (0.1480) loss: 0.8477 (0.8517) time: 0.2003 data: 0.1164 max mem: 9305 +Train: [79] [4500/6250] eta: 0:04:51 lr: 0.000014 grad: 0.1452 (0.1480) loss: 0.8477 (0.8517) time: 0.1828 data: 0.1004 max mem: 9305 +Train: [79] [4600/6250] eta: 0:04:35 lr: 0.000014 grad: 0.1452 (0.1481) loss: 0.8557 (0.8517) time: 0.1672 data: 0.0732 max mem: 9305 +Train: [79] [4700/6250] eta: 0:04:18 lr: 0.000013 grad: 0.1520 (0.1480) loss: 0.8536 (0.8518) time: 0.1648 data: 0.0848 max mem: 9305 +Train: [79] [4800/6250] eta: 0:04:01 lr: 0.000013 grad: 0.1432 (0.1480) loss: 0.8516 (0.8519) time: 0.1602 data: 0.0719 max mem: 9305 +Train: [79] [4900/6250] eta: 0:03:44 lr: 0.000013 grad: 0.1432 (0.1480) loss: 0.8491 (0.8519) time: 0.1348 data: 0.0292 max mem: 9305 +Train: [79] [5000/6250] eta: 0:03:28 lr: 0.000013 grad: 0.1421 (0.1479) loss: 0.8536 (0.8520) time: 0.1158 data: 0.0269 max mem: 9305 +Train: [79] [5100/6250] eta: 0:03:11 lr: 0.000013 grad: 0.1370 (0.1479) loss: 0.8560 (0.8520) time: 0.1487 data: 0.0461 max mem: 9305 +Train: [79] [5200/6250] eta: 0:02:55 lr: 0.000013 grad: 0.1426 (0.1478) loss: 0.8515 (0.8520) time: 0.2541 data: 0.1592 max mem: 9305 +Train: [79] [5300/6250] eta: 0:02:38 lr: 0.000013 grad: 0.1316 (0.1477) loss: 0.8615 (0.8521) time: 0.1390 data: 0.0515 max mem: 9305 +Train: [79] [5400/6250] eta: 0:02:22 lr: 0.000013 grad: 0.1419 (0.1476) loss: 0.8569 (0.8521) time: 0.0966 data: 0.0002 max mem: 9305 +Train: [79] [5500/6250] eta: 0:02:05 lr: 0.000013 grad: 0.1417 (0.1476) loss: 0.8518 (0.8522) time: 0.1517 data: 0.0629 max mem: 9305 +Train: [79] [5600/6250] eta: 0:01:48 lr: 0.000013 grad: 0.1437 (0.1476) loss: 0.8530 (0.8522) time: 0.1885 data: 0.1076 max mem: 9305 +Train: [79] [5700/6250] eta: 0:01:32 lr: 0.000013 grad: 0.1439 (0.1475) loss: 0.8510 (0.8521) time: 0.1071 data: 0.0002 max mem: 9305 +Train: [79] [5800/6250] eta: 0:01:15 lr: 0.000013 grad: 0.1425 (0.1475) loss: 0.8525 (0.8522) time: 0.1298 data: 0.0370 max mem: 9305 +Train: [79] [5900/6250] eta: 0:00:58 lr: 0.000013 grad: 0.1426 (0.1475) loss: 0.8549 (0.8522) time: 0.2546 data: 0.1700 max mem: 9305 +Train: [79] [6000/6250] eta: 0:00:41 lr: 0.000013 grad: 0.1378 (0.1474) loss: 0.8522 (0.8522) time: 0.1428 data: 0.0596 max mem: 9305 +Train: [79] [6100/6250] eta: 0:00:25 lr: 0.000013 grad: 0.1503 (0.1474) loss: 0.8536 (0.8522) time: 0.1892 data: 0.0994 max mem: 9305 +Train: [79] [6200/6250] eta: 0:00:08 lr: 0.000013 grad: 0.1429 (0.1474) loss: 0.8493 (0.8522) time: 0.1545 data: 0.0699 max mem: 9305 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1382 (0.1474) loss: 0.8506 (0.8522) time: 0.1651 data: 0.0739 max mem: 9305 +Train: [79] Total time: 0:17:30 (0.1681 s / it) +Averaged stats: lr: 0.000013 grad: 0.1382 (0.1474) loss: 0.8506 (0.8522) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:06:52 loss: 0.8807 (0.8807) time: 6.6596 data: 6.6244 max mem: 9305 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8646 (0.8670) time: 0.1274 data: 0.0983 max mem: 9305 +Eval (hcp-train-subset): [79] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (hcp-train-subset): loss: 0.8646 (0.8670) +Making plots (hcp-train-subset): example=26 +Eval (hcp-val): [79] [ 0/62] eta: 0:06:32 loss: 0.8749 (0.8749) time: 6.3302 data: 6.2700 max mem: 9305 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8745 (0.8756) time: 0.1353 data: 0.1062 max mem: 9305 +Eval (hcp-val): [79] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-val): loss: 0.8745 (0.8756) +Making plots (hcp-val): example=1 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 11:27:21 lr: 0.000013 grad: nan (nan) loss: 0.8228 (0.8228) time: 6.5987 data: 6.4700 max mem: 9305 +Train: [80] [ 100/6250] eta: 0:20:43 lr: 0.000013 grad: 0.1830 (0.1988) loss: 0.8563 (0.8571) time: 0.1487 data: 0.0424 max mem: 9305 +Train: [80] [ 200/6250] eta: 0:17:44 lr: 0.000013 grad: 0.1853 (0.1957) loss: 0.8333 (0.8483) time: 0.1475 data: 0.0443 max mem: 9305 +Train: [80] [ 300/6250] eta: 0:16:42 lr: 0.000013 grad: 0.1731 (0.1911) loss: 0.8399 (0.8442) time: 0.1646 data: 0.0792 max mem: 9305 +Train: [80] [ 400/6250] eta: 0:16:10 lr: 0.000013 grad: 0.1414 (0.1823) loss: 0.8543 (0.8446) time: 0.1409 data: 0.0358 max mem: 9305 +Train: [80] [ 500/6250] eta: 0:15:47 lr: 0.000013 grad: 0.1492 (0.1753) loss: 0.8535 (0.8462) time: 0.1464 data: 0.0424 max mem: 9305 +Train: [80] [ 600/6250] eta: 0:15:32 lr: 0.000013 grad: 0.1460 (0.1716) loss: 0.8537 (0.8470) time: 0.1812 data: 0.0901 max mem: 9305 +Train: [80] [ 700/6250] eta: 0:15:11 lr: 0.000013 grad: 0.1419 (0.1688) loss: 0.8494 (0.8475) time: 0.1498 data: 0.0612 max mem: 9305 +Train: [80] [ 800/6250] eta: 0:15:07 lr: 0.000013 grad: 0.1572 (0.1663) loss: 0.8411 (0.8476) time: 0.2583 data: 0.1616 max mem: 9305 +Train: [80] [ 900/6250] eta: 0:15:05 lr: 0.000013 grad: 0.1363 (0.1647) loss: 0.8522 (0.8478) time: 0.2973 data: 0.1696 max mem: 9305 +Train: [80] [1000/6250] eta: 0:14:36 lr: 0.000013 grad: 0.1482 (0.1633) loss: 0.8514 (0.8480) time: 0.1668 data: 0.0491 max mem: 9305 +Train: [80] [1100/6250] eta: 0:14:34 lr: 0.000013 grad: 0.1416 (0.1615) loss: 0.8478 (0.8481) time: 0.1749 data: 0.0515 max mem: 9305 +Train: [80] [1200/6250] eta: 0:14:20 lr: 0.000013 grad: 0.1470 (0.1601) loss: 0.8516 (0.8482) time: 0.1302 data: 0.0004 max mem: 9305 +Train: [80] [1300/6250] eta: 0:13:58 lr: 0.000013 grad: 0.1406 (0.1593) loss: 0.8482 (0.8481) time: 0.1450 data: 0.0557 max mem: 9305 +Train: [80] [1400/6250] eta: 0:13:38 lr: 0.000013 grad: 0.1477 (0.1587) loss: 0.8509 (0.8479) time: 0.1505 data: 0.0653 max mem: 9305 +Train: [80] [1500/6250] eta: 0:13:15 lr: 0.000013 grad: 0.1504 (0.1582) loss: 0.8446 (0.8478) time: 0.1259 data: 0.0412 max mem: 9305 +Train: [80] [1600/6250] eta: 0:13:06 lr: 0.000013 grad: 0.1340 (0.1578) loss: 0.8520 (0.8479) time: 0.2981 data: 0.2165 max mem: 9305 +Train: [80] [1700/6250] eta: 0:12:38 lr: 0.000013 grad: 0.1447 (0.1575) loss: 0.8479 (0.8478) time: 0.1506 data: 0.0708 max mem: 9305 +Train: [80] [1800/6250] eta: 0:12:19 lr: 0.000013 grad: 0.1415 (0.1572) loss: 0.8511 (0.8480) time: 0.1384 data: 0.0544 max mem: 9305 +Train: [80] [1900/6250] eta: 0:11:58 lr: 0.000013 grad: 0.1461 (0.1570) loss: 0.8503 (0.8480) time: 0.1533 data: 0.0702 max mem: 9305 +Train: [80] [2000/6250] eta: 0:11:40 lr: 0.000013 grad: 0.1423 (0.1568) loss: 0.8491 (0.8480) time: 0.1595 data: 0.0774 max mem: 9305 +Train: [80] [2100/6250] eta: 0:11:24 lr: 0.000013 grad: 0.1455 (0.1564) loss: 0.8520 (0.8481) time: 0.1210 data: 0.0085 max mem: 9305 +Train: [80] [2200/6250] eta: 0:11:05 lr: 0.000013 grad: 0.1576 (0.1564) loss: 0.8409 (0.8481) time: 0.1336 data: 0.0451 max mem: 9305 +Train: [80] [2300/6250] eta: 0:10:46 lr: 0.000013 grad: 0.1545 (0.1562) loss: 0.8530 (0.8482) time: 0.1366 data: 0.0515 max mem: 9305 +Train: [80] [2400/6250] eta: 0:10:27 lr: 0.000013 grad: 0.1452 (0.1560) loss: 0.8533 (0.8484) time: 0.1653 data: 0.0729 max mem: 9305 +Train: [80] [2500/6250] eta: 0:10:15 lr: 0.000013 grad: 0.1522 (0.1559) loss: 0.8531 (0.8486) time: 0.1010 data: 0.0002 max mem: 9305 +Train: [80] [2600/6250] eta: 0:09:53 lr: 0.000013 grad: 0.1485 (0.1559) loss: 0.8489 (0.8486) time: 0.1488 data: 0.0519 max mem: 9305 +Train: [80] [2700/6250] eta: 0:09:36 lr: 0.000013 grad: 0.1444 (0.1557) loss: 0.8507 (0.8487) time: 0.1563 data: 0.0702 max mem: 9305 +Train: [80] [2800/6250] eta: 0:09:20 lr: 0.000013 grad: 0.1394 (0.1556) loss: 0.8520 (0.8489) time: 0.1543 data: 0.0716 max mem: 9305 +Train: [80] [2900/6250] eta: 0:09:02 lr: 0.000013 grad: 0.1376 (0.1552) loss: 0.8559 (0.8490) time: 0.1546 data: 0.0651 max mem: 9305 +Train: [80] [3000/6250] eta: 0:08:45 lr: 0.000013 grad: 0.1383 (0.1550) loss: 0.8575 (0.8492) time: 0.1491 data: 0.0578 max mem: 9305 +Train: [80] [3100/6250] eta: 0:08:28 lr: 0.000013 grad: 0.1455 (0.1548) loss: 0.8490 (0.8493) time: 0.1622 data: 0.0783 max mem: 9305 +Train: [80] [3200/6250] eta: 0:08:10 lr: 0.000013 grad: 0.1465 (0.1547) loss: 0.8523 (0.8494) time: 0.1242 data: 0.0368 max mem: 9305 +Train: [80] [3300/6250] eta: 0:07:56 lr: 0.000013 grad: 0.1432 (0.1546) loss: 0.8521 (0.8495) time: 0.1959 data: 0.1096 max mem: 9305 +Train: [80] [3400/6250] eta: 0:07:38 lr: 0.000012 grad: 0.1451 (0.1543) loss: 0.8572 (0.8497) time: 0.1464 data: 0.0606 max mem: 9305 +Train: [80] [3500/6250] eta: 0:07:22 lr: 0.000012 grad: 0.1444 (0.1541) loss: 0.8583 (0.8499) time: 0.1382 data: 0.0500 max mem: 9305 +Train: [80] [3600/6250] eta: 0:07:06 lr: 0.000012 grad: 0.1375 (0.1538) loss: 0.8586 (0.8501) time: 0.1856 data: 0.0968 max mem: 9305 +Train: [80] [3700/6250] eta: 0:06:50 lr: 0.000012 grad: 0.1417 (0.1535) loss: 0.8580 (0.8503) time: 0.1266 data: 0.0413 max mem: 9305 +Train: [80] [3800/6250] eta: 0:06:33 lr: 0.000012 grad: 0.1402 (0.1533) loss: 0.8678 (0.8504) time: 0.1318 data: 0.0331 max mem: 9305 +Train: [80] [3900/6250] eta: 0:06:17 lr: 0.000012 grad: 0.1311 (0.1530) loss: 0.8624 (0.8506) time: 0.2222 data: 0.1393 max mem: 9305 +Train: [80] [4000/6250] eta: 0:06:02 lr: 0.000012 grad: 0.1358 (0.1529) loss: 0.8587 (0.8507) time: 0.1848 data: 0.0847 max mem: 9305 +Train: [80] [4100/6250] eta: 0:05:46 lr: 0.000012 grad: 0.1300 (0.1526) loss: 0.8642 (0.8509) time: 0.1515 data: 0.0584 max mem: 9305 +Train: [80] [4200/6250] eta: 0:05:29 lr: 0.000012 grad: 0.1326 (0.1524) loss: 0.8573 (0.8511) time: 0.1585 data: 0.0770 max mem: 9305 +Train: [80] [4300/6250] eta: 0:05:14 lr: 0.000012 grad: 0.1329 (0.1521) loss: 0.8648 (0.8513) time: 0.1645 data: 0.0694 max mem: 9305 +Train: [80] [4400/6250] eta: 0:04:58 lr: 0.000012 grad: 0.1312 (0.1519) loss: 0.8605 (0.8515) time: 0.1949 data: 0.1000 max mem: 9305 +Train: [80] [4500/6250] eta: 0:04:43 lr: 0.000012 grad: 0.1345 (0.1517) loss: 0.8577 (0.8517) time: 0.1845 data: 0.0941 max mem: 9305 +Train: [80] [4600/6250] eta: 0:04:27 lr: 0.000012 grad: 0.1362 (0.1514) loss: 0.8642 (0.8519) time: 0.1795 data: 0.0845 max mem: 9305 +Train: [80] [4700/6250] eta: 0:04:11 lr: 0.000012 grad: 0.1336 (0.1513) loss: 0.8605 (0.8521) time: 0.1255 data: 0.0410 max mem: 9305 +Train: [80] [4800/6250] eta: 0:03:55 lr: 0.000012 grad: 0.1392 (0.1511) loss: 0.8573 (0.8522) time: 0.1008 data: 0.0006 max mem: 9305 +Train: [80] [4900/6250] eta: 0:03:38 lr: 0.000012 grad: 0.1398 (0.1509) loss: 0.8594 (0.8523) time: 0.1286 data: 0.0402 max mem: 9305 +Train: [80] [5000/6250] eta: 0:03:23 lr: 0.000012 grad: 0.1305 (0.1507) loss: 0.8570 (0.8525) time: 0.2629 data: 0.1727 max mem: 9305 +Train: [80] [5100/6250] eta: 0:03:06 lr: 0.000012 grad: 0.1432 (0.1506) loss: 0.8587 (0.8526) time: 0.1617 data: 0.0787 max mem: 9305 +Train: [80] [5200/6250] eta: 0:02:49 lr: 0.000012 grad: 0.1446 (0.1505) loss: 0.8579 (0.8527) time: 0.1714 data: 0.0686 max mem: 9305 +Train: [80] [5300/6250] eta: 0:02:33 lr: 0.000012 grad: 0.1386 (0.1503) loss: 0.8572 (0.8528) time: 0.1507 data: 0.0605 max mem: 9305 +Train: [80] [5400/6250] eta: 0:02:17 lr: 0.000012 grad: 0.1471 (0.1503) loss: 0.8540 (0.8529) time: 0.1558 data: 0.0735 max mem: 9305 +Train: [80] [5500/6250] eta: 0:02:01 lr: 0.000012 grad: 0.1376 (0.1503) loss: 0.8599 (0.8530) time: 0.1462 data: 0.0601 max mem: 9305 +Train: [80] [5600/6250] eta: 0:01:44 lr: 0.000012 grad: 0.1420 (0.1502) loss: 0.8497 (0.8530) time: 0.1622 data: 0.0847 max mem: 9305 +Train: [80] [5700/6250] eta: 0:01:28 lr: 0.000012 grad: 0.1370 (0.1501) loss: 0.8551 (0.8530) time: 0.1437 data: 0.0553 max mem: 9305 +Train: [80] [5800/6250] eta: 0:01:12 lr: 0.000012 grad: 0.1450 (0.1501) loss: 0.8488 (0.8530) time: 0.1135 data: 0.0143 max mem: 9305 +Train: [80] [5900/6250] eta: 0:00:56 lr: 0.000012 grad: 0.1355 (0.1500) loss: 0.8578 (0.8530) time: 0.1738 data: 0.0863 max mem: 9305 +Train: [80] [6000/6250] eta: 0:00:40 lr: 0.000012 grad: 0.1393 (0.1499) loss: 0.8553 (0.8530) time: 0.1468 data: 0.0659 max mem: 9305 +Train: [80] [6100/6250] eta: 0:00:24 lr: 0.000012 grad: 0.1414 (0.1498) loss: 0.8540 (0.8530) time: 0.1427 data: 0.0453 max mem: 9305 +Train: [80] [6200/6250] eta: 0:00:08 lr: 0.000012 grad: 0.1443 (0.1498) loss: 0.8533 (0.8530) time: 0.1354 data: 0.0524 max mem: 9305 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1401 (0.1497) loss: 0.8532 (0.8530) time: 0.1850 data: 0.1029 max mem: 9305 +Train: [80] Total time: 0:16:54 (0.1623 s / it) +Averaged stats: lr: 0.000012 grad: 0.1401 (0.1497) loss: 0.8532 (0.8530) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:06:23 loss: 0.8686 (0.8686) time: 6.1889 data: 6.1541 max mem: 9305 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8623 (0.8649) time: 0.1460 data: 0.1160 max mem: 9305 +Eval (hcp-train-subset): [80] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.8623 (0.8649) +Eval (hcp-val): [80] [ 0/62] eta: 0:05:56 loss: 0.8672 (0.8672) time: 5.7435 data: 5.7080 max mem: 9305 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8742 (0.8753) time: 0.1317 data: 0.0993 max mem: 9305 +Eval (hcp-val): [80] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (hcp-val): loss: 0.8742 (0.8753) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 9:29:04 lr: 0.000012 grad: nan (nan) loss: 0.7981 (0.7981) time: 5.4630 data: 5.2634 max mem: 9305 +Train: [81] [ 100/6250] eta: 0:20:28 lr: 0.000012 grad: 0.1395 (0.1824) loss: 0.8642 (0.8571) time: 0.1470 data: 0.0453 max mem: 9305 +Train: [81] [ 200/6250] eta: 0:17:47 lr: 0.000012 grad: 0.1407 (0.1703) loss: 0.8505 (0.8575) time: 0.1478 data: 0.0456 max mem: 9305 +Train: [81] [ 300/6250] eta: 0:16:31 lr: 0.000012 grad: 0.1670 (0.1665) loss: 0.8554 (0.8550) time: 0.1376 data: 0.0328 max mem: 9305 +Train: [81] [ 400/6250] eta: 0:15:46 lr: 0.000012 grad: 0.1595 (0.1649) loss: 0.8427 (0.8527) time: 0.1631 data: 0.0642 max mem: 9305 +Train: [81] [ 500/6250] eta: 0:15:18 lr: 0.000012 grad: 0.1520 (0.1629) loss: 0.8478 (0.8520) time: 0.1439 data: 0.0560 max mem: 9305 +Train: [81] [ 600/6250] eta: 0:14:49 lr: 0.000012 grad: 0.1417 (0.1610) loss: 0.8553 (0.8519) time: 0.1451 data: 0.0529 max mem: 9305 +Train: [81] [ 700/6250] eta: 0:14:33 lr: 0.000012 grad: 0.1503 (0.1603) loss: 0.8465 (0.8515) time: 0.1472 data: 0.0464 max mem: 9305 +Train: [81] [ 800/6250] eta: 0:14:25 lr: 0.000012 grad: 0.1533 (0.1593) loss: 0.8429 (0.8514) time: 0.1568 data: 0.0579 max mem: 9305 +Train: [81] [ 900/6250] eta: 0:14:11 lr: 0.000012 grad: 0.1438 (0.1591) loss: 0.8501 (0.8511) time: 0.1756 data: 0.0876 max mem: 9305 +Train: [81] [1000/6250] eta: 0:13:52 lr: 0.000012 grad: 0.1502 (0.1584) loss: 0.8466 (0.8510) time: 0.1568 data: 0.0692 max mem: 9305 +Train: [81] [1100/6250] eta: 0:13:33 lr: 0.000012 grad: 0.1452 (0.1583) loss: 0.8512 (0.8508) time: 0.1555 data: 0.0629 max mem: 9305 +Train: [81] [1200/6250] eta: 0:13:12 lr: 0.000012 grad: 0.1560 (0.1585) loss: 0.8433 (0.8505) time: 0.1422 data: 0.0546 max mem: 9305 +Train: [81] [1300/6250] eta: 0:12:56 lr: 0.000012 grad: 0.1511 (0.1584) loss: 0.8502 (0.8504) time: 0.1529 data: 0.0679 max mem: 9305 +Train: [81] [1400/6250] eta: 0:12:53 lr: 0.000012 grad: 0.1388 (0.1581) loss: 0.8511 (0.8503) time: 0.0938 data: 0.0002 max mem: 9305 +Train: [81] [1500/6250] eta: 0:12:29 lr: 0.000012 grad: 0.1564 (0.1582) loss: 0.8360 (0.8498) time: 0.1284 data: 0.0427 max mem: 9305 +Train: [81] [1600/6250] eta: 0:12:09 lr: 0.000012 grad: 0.1472 (0.1580) loss: 0.8510 (0.8498) time: 0.0927 data: 0.0032 max mem: 9305 +Train: [81] [1700/6250] eta: 0:12:03 lr: 0.000012 grad: 0.1435 (0.1580) loss: 0.8548 (0.8498) time: 0.1020 data: 0.0002 max mem: 9305 +Train: [81] [1800/6250] eta: 0:11:39 lr: 0.000012 grad: 0.1493 (0.1578) loss: 0.8456 (0.8497) time: 0.1246 data: 0.0263 max mem: 9305 +Train: [81] [1900/6250] eta: 0:11:25 lr: 0.000012 grad: 0.1592 (0.1576) loss: 0.8466 (0.8496) time: 0.2045 data: 0.1170 max mem: 9305 +Train: [81] [2000/6250] eta: 0:11:09 lr: 0.000012 grad: 0.1548 (0.1575) loss: 0.8435 (0.8496) time: 0.1597 data: 0.0719 max mem: 9305 +Train: [81] [2100/6250] eta: 0:10:55 lr: 0.000012 grad: 0.1488 (0.1573) loss: 0.8445 (0.8496) time: 0.1709 data: 0.0806 max mem: 9305 +Train: [81] [2200/6250] eta: 0:10:39 lr: 0.000012 grad: 0.1597 (0.1573) loss: 0.8443 (0.8495) time: 0.1074 data: 0.0133 max mem: 9305 +Train: [81] [2300/6250] eta: 0:10:25 lr: 0.000011 grad: 0.1445 (0.1572) loss: 0.8456 (0.8495) time: 0.1499 data: 0.0535 max mem: 9305 +Train: [81] [2400/6250] eta: 0:10:10 lr: 0.000011 grad: 0.1423 (0.1571) loss: 0.8416 (0.8493) time: 0.1655 data: 0.0753 max mem: 9305 +Train: [81] [2500/6250] eta: 0:09:55 lr: 0.000011 grad: 0.1492 (0.1569) loss: 0.8518 (0.8493) time: 0.1832 data: 0.1011 max mem: 9305 +Train: [81] [2600/6250] eta: 0:09:36 lr: 0.000011 grad: 0.1520 (0.1567) loss: 0.8529 (0.8493) time: 0.1439 data: 0.0629 max mem: 9305 +Train: [81] [2700/6250] eta: 0:09:24 lr: 0.000011 grad: 0.1526 (0.1566) loss: 0.8484 (0.8493) time: 0.1919 data: 0.1043 max mem: 9305 +Train: [81] [2800/6250] eta: 0:09:06 lr: 0.000011 grad: 0.1492 (0.1565) loss: 0.8496 (0.8492) time: 0.1613 data: 0.0762 max mem: 9305 +Train: [81] [2900/6250] eta: 0:08:53 lr: 0.000011 grad: 0.1433 (0.1564) loss: 0.8504 (0.8492) time: 0.2560 data: 0.1636 max mem: 9305 +Train: [81] [3000/6250] eta: 0:08:37 lr: 0.000011 grad: 0.1462 (0.1563) loss: 0.8495 (0.8491) time: 0.2702 data: 0.1823 max mem: 9305 +Train: [81] [3100/6250] eta: 0:08:18 lr: 0.000011 grad: 0.1493 (0.1561) loss: 0.8481 (0.8490) time: 0.1038 data: 0.0004 max mem: 9305 +Train: [81] [3200/6250] eta: 0:08:01 lr: 0.000011 grad: 0.1408 (0.1560) loss: 0.8564 (0.8490) time: 0.1481 data: 0.0632 max mem: 9305 +Train: [81] [3300/6250] eta: 0:07:45 lr: 0.000011 grad: 0.1388 (0.1558) loss: 0.8502 (0.8490) time: 0.1690 data: 0.0729 max mem: 9305 +Train: [81] [3400/6250] eta: 0:07:28 lr: 0.000011 grad: 0.1479 (0.1557) loss: 0.8519 (0.8490) time: 0.1388 data: 0.0556 max mem: 9305 +Train: [81] [3500/6250] eta: 0:07:11 lr: 0.000011 grad: 0.1511 (0.1556) loss: 0.8439 (0.8489) time: 0.1516 data: 0.0596 max mem: 9305 +Train: [81] [3600/6250] eta: 0:06:55 lr: 0.000011 grad: 0.1474 (0.1555) loss: 0.8483 (0.8489) time: 0.1591 data: 0.0682 max mem: 9305 +Train: [81] [3700/6250] eta: 0:06:38 lr: 0.000011 grad: 0.1463 (0.1554) loss: 0.8528 (0.8489) time: 0.1750 data: 0.0915 max mem: 9305 +Train: [81] [3800/6250] eta: 0:06:25 lr: 0.000011 grad: 0.1548 (0.1553) loss: 0.8525 (0.8489) time: 0.1467 data: 0.0570 max mem: 9305 +Train: [81] [3900/6250] eta: 0:06:11 lr: 0.000011 grad: 0.1457 (0.1551) loss: 0.8428 (0.8489) time: 0.1730 data: 0.0797 max mem: 9305 +Train: [81] [4000/6250] eta: 0:05:54 lr: 0.000011 grad: 0.1461 (0.1550) loss: 0.8544 (0.8490) time: 0.1437 data: 0.0486 max mem: 9305 +Train: [81] [4100/6250] eta: 0:05:38 lr: 0.000011 grad: 0.1508 (0.1550) loss: 0.8513 (0.8490) time: 0.1280 data: 0.0398 max mem: 9305 +Train: [81] [4200/6250] eta: 0:05:22 lr: 0.000011 grad: 0.1517 (0.1550) loss: 0.8503 (0.8490) time: 0.1619 data: 0.0717 max mem: 9305 +Train: [81] [4300/6250] eta: 0:05:07 lr: 0.000011 grad: 0.1534 (0.1550) loss: 0.8435 (0.8491) time: 0.1630 data: 0.0732 max mem: 9305 +Train: [81] [4400/6250] eta: 0:04:51 lr: 0.000011 grad: 0.1493 (0.1549) loss: 0.8571 (0.8491) time: 0.1634 data: 0.0647 max mem: 9305 +Train: [81] [4500/6250] eta: 0:04:35 lr: 0.000011 grad: 0.1310 (0.1548) loss: 0.8551 (0.8492) time: 0.1776 data: 0.0796 max mem: 9305 +Train: [81] [4600/6250] eta: 0:04:20 lr: 0.000011 grad: 0.1505 (0.1547) loss: 0.8512 (0.8492) time: 0.1535 data: 0.0538 max mem: 9305 +Train: [81] [4700/6250] eta: 0:04:04 lr: 0.000011 grad: 0.1533 (0.1547) loss: 0.8444 (0.8492) time: 0.1453 data: 0.0602 max mem: 9305 +Train: [81] [4800/6250] eta: 0:03:47 lr: 0.000011 grad: 0.1376 (0.1546) loss: 0.8559 (0.8493) time: 0.1420 data: 0.0496 max mem: 9305 +Train: [81] [4900/6250] eta: 0:03:31 lr: 0.000011 grad: 0.1428 (0.1545) loss: 0.8575 (0.8493) time: 0.1448 data: 0.0651 max mem: 9305 +Train: [81] [5000/6250] eta: 0:03:16 lr: 0.000011 grad: 0.1417 (0.1544) loss: 0.8492 (0.8494) time: 0.1337 data: 0.0423 max mem: 9305 +Train: [81] [5100/6250] eta: 0:03:02 lr: 0.000011 grad: 0.1438 (0.1543) loss: 0.8497 (0.8495) time: 0.1439 data: 0.0261 max mem: 9305 +Train: [81] [5200/6250] eta: 0:02:47 lr: 0.000011 grad: 0.1515 (0.1542) loss: 0.8413 (0.8495) time: 0.1594 data: 0.0579 max mem: 9305 +Train: [81] [5300/6250] eta: 0:02:32 lr: 0.000011 grad: 0.1442 (0.1542) loss: 0.8571 (0.8495) time: 0.2256 data: 0.1172 max mem: 9305 +Train: [81] [5400/6250] eta: 0:02:16 lr: 0.000011 grad: 0.1529 (0.1542) loss: 0.8475 (0.8495) time: 0.1717 data: 0.0807 max mem: 9305 +Train: [81] [5500/6250] eta: 0:02:00 lr: 0.000011 grad: 0.1413 (0.1541) loss: 0.8491 (0.8495) time: 0.1665 data: 0.0734 max mem: 9305 +Train: [81] [5600/6250] eta: 0:01:44 lr: 0.000011 grad: 0.1505 (0.1542) loss: 0.8464 (0.8494) time: 0.1014 data: 0.0033 max mem: 9305 +Train: [81] [5700/6250] eta: 0:01:28 lr: 0.000011 grad: 0.1545 (0.1542) loss: 0.8479 (0.8494) time: 0.1657 data: 0.0832 max mem: 9305 +Train: [81] [5800/6250] eta: 0:01:12 lr: 0.000011 grad: 0.1513 (0.1542) loss: 0.8426 (0.8494) time: 0.1442 data: 0.0545 max mem: 9305 +Train: [81] [5900/6250] eta: 0:00:56 lr: 0.000011 grad: 0.1664 (0.1542) loss: 0.8485 (0.8494) time: 0.1262 data: 0.0338 max mem: 9305 +Train: [81] [6000/6250] eta: 0:00:40 lr: 0.000011 grad: 0.1420 (0.1542) loss: 0.8494 (0.8494) time: 0.1517 data: 0.0690 max mem: 9305 +Train: [81] [6100/6250] eta: 0:00:24 lr: 0.000011 grad: 0.1532 (0.1542) loss: 0.8520 (0.8494) time: 0.1309 data: 0.0367 max mem: 9305 +Train: [81] [6200/6250] eta: 0:00:08 lr: 0.000011 grad: 0.1390 (0.1541) loss: 0.8552 (0.8494) time: 0.1437 data: 0.0381 max mem: 9305 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1508 (0.1541) loss: 0.8602 (0.8494) time: 0.1476 data: 0.0513 max mem: 9305 +Train: [81] Total time: 0:16:52 (0.1620 s / it) +Averaged stats: lr: 0.000011 grad: 0.1508 (0.1541) loss: 0.8602 (0.8494) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:05:39 loss: 0.8707 (0.8707) time: 5.4700 data: 5.4341 max mem: 9305 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8626 (0.8647) time: 0.1242 data: 0.0958 max mem: 9305 +Eval (hcp-train-subset): [81] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8626 (0.8647) +Eval (hcp-val): [81] [ 0/62] eta: 0:06:27 loss: 0.8693 (0.8693) time: 6.2573 data: 6.2236 max mem: 9305 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8745 (0.8748) time: 0.1699 data: 0.1405 max mem: 9305 +Eval (hcp-val): [81] Total time: 0:00:16 (0.2607 s / it) +Averaged stats (hcp-val): loss: 0.8745 (0.8748) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 11:42:32 lr: 0.000011 grad: 0.1146 (0.1146) loss: 0.9174 (0.9174) time: 6.7444 data: 6.6359 max mem: 9305 +Train: [82] [ 100/6250] eta: 0:21:13 lr: 0.000011 grad: 0.1904 (0.2121) loss: 0.8480 (0.8609) time: 0.1520 data: 0.0464 max mem: 9305 +Train: [82] [ 200/6250] eta: 0:18:34 lr: 0.000011 grad: 0.1499 (0.1973) loss: 0.8593 (0.8551) time: 0.1602 data: 0.0618 max mem: 9305 +Train: [82] [ 300/6250] eta: 0:16:54 lr: 0.000011 grad: 0.1607 (0.1865) loss: 0.8460 (0.8534) time: 0.1467 data: 0.0559 max mem: 9305 +Train: [82] [ 400/6250] eta: 0:16:06 lr: 0.000011 grad: 0.1488 (0.1792) loss: 0.8573 (0.8529) time: 0.1594 data: 0.0677 max mem: 9305 +Train: [82] [ 500/6250] eta: 0:15:41 lr: 0.000011 grad: 0.1394 (0.1732) loss: 0.8551 (0.8534) time: 0.1726 data: 0.0820 max mem: 9305 +Train: [82] [ 600/6250] eta: 0:15:16 lr: 0.000011 grad: 0.1454 (0.1705) loss: 0.8541 (0.8532) time: 0.1717 data: 0.0789 max mem: 9305 +Train: [82] [ 700/6250] eta: 0:14:45 lr: 0.000011 grad: 0.1559 (0.1693) loss: 0.8482 (0.8528) time: 0.1598 data: 0.0678 max mem: 9305 +Train: [82] [ 800/6250] eta: 0:14:32 lr: 0.000011 grad: 0.1618 (0.1675) loss: 0.8462 (0.8524) time: 0.1870 data: 0.0915 max mem: 9305 +Train: [82] [ 900/6250] eta: 0:14:19 lr: 0.000011 grad: 0.1378 (0.1656) loss: 0.8535 (0.8524) time: 0.1652 data: 0.0784 max mem: 9305 +Train: [82] [1000/6250] eta: 0:14:16 lr: 0.000011 grad: 0.1513 (0.1640) loss: 0.8491 (0.8525) time: 0.2369 data: 0.1350 max mem: 9305 +Train: [82] [1100/6250] eta: 0:13:45 lr: 0.000011 grad: 0.1406 (0.1625) loss: 0.8554 (0.8526) time: 0.1371 data: 0.0460 max mem: 9305 +Train: [82] [1200/6250] eta: 0:13:42 lr: 0.000011 grad: 0.1502 (0.1617) loss: 0.8550 (0.8525) time: 0.2733 data: 0.1796 max mem: 9305 +Train: [82] [1300/6250] eta: 0:13:35 lr: 0.000011 grad: 0.1439 (0.1605) loss: 0.8546 (0.8526) time: 0.1369 data: 0.0404 max mem: 9305 +Train: [82] [1400/6250] eta: 0:13:32 lr: 0.000010 grad: 0.1414 (0.1598) loss: 0.8482 (0.8525) time: 0.1108 data: 0.0003 max mem: 9305 +Train: [82] [1500/6250] eta: 0:13:19 lr: 0.000010 grad: 0.1484 (0.1593) loss: 0.8537 (0.8524) time: 0.1630 data: 0.0822 max mem: 9305 +Train: [82] [1600/6250] eta: 0:13:15 lr: 0.000010 grad: 0.1429 (0.1587) loss: 0.8510 (0.8524) time: 0.1791 data: 0.0790 max mem: 9305 +Train: [82] [1700/6250] eta: 0:12:52 lr: 0.000010 grad: 0.1366 (0.1581) loss: 0.8482 (0.8522) time: 0.1692 data: 0.0719 max mem: 9305 +Train: [82] [1800/6250] eta: 0:12:38 lr: 0.000010 grad: 0.1517 (0.1578) loss: 0.8516 (0.8522) time: 0.2383 data: 0.1488 max mem: 9305 +Train: [82] [1900/6250] eta: 0:12:13 lr: 0.000010 grad: 0.1476 (0.1577) loss: 0.8545 (0.8521) time: 0.1349 data: 0.0421 max mem: 9305 +Train: [82] [2000/6250] eta: 0:11:54 lr: 0.000010 grad: 0.1494 (0.1576) loss: 0.8501 (0.8520) time: 0.1644 data: 0.0725 max mem: 9305 +Train: [82] [2100/6250] eta: 0:11:43 lr: 0.000010 grad: 0.1539 (0.1576) loss: 0.8537 (0.8519) time: 0.1015 data: 0.0003 max mem: 9305 +Train: [82] [2200/6250] eta: 0:11:22 lr: 0.000010 grad: 0.1497 (0.1573) loss: 0.8455 (0.8518) time: 0.1572 data: 0.0706 max mem: 9305 +Train: [82] [2300/6250] eta: 0:11:08 lr: 0.000010 grad: 0.1502 (0.1573) loss: 0.8443 (0.8517) time: 0.1661 data: 0.0573 max mem: 9305 +Train: [82] [2400/6250] eta: 0:10:46 lr: 0.000010 grad: 0.1483 (0.1571) loss: 0.8488 (0.8517) time: 0.1197 data: 0.0335 max mem: 9305 +Train: [82] [2500/6250] eta: 0:10:25 lr: 0.000010 grad: 0.1650 (0.1570) loss: 0.8407 (0.8517) time: 0.1507 data: 0.0563 max mem: 9305 +Train: [82] [2600/6250] eta: 0:10:06 lr: 0.000010 grad: 0.1543 (0.1569) loss: 0.8612 (0.8517) time: 0.1419 data: 0.0561 max mem: 9305 +Train: [82] [2700/6250] eta: 0:09:48 lr: 0.000010 grad: 0.1512 (0.1570) loss: 0.8443 (0.8516) time: 0.1495 data: 0.0671 max mem: 9305 +Train: [82] [2800/6250] eta: 0:09:29 lr: 0.000010 grad: 0.1538 (0.1570) loss: 0.8494 (0.8515) time: 0.1536 data: 0.0621 max mem: 9305 +Train: [82] [2900/6250] eta: 0:09:17 lr: 0.000010 grad: 0.1534 (0.1570) loss: 0.8497 (0.8514) time: 0.2536 data: 0.1581 max mem: 9305 +Train: [82] [3000/6250] eta: 0:08:59 lr: 0.000010 grad: 0.1664 (0.1570) loss: 0.8462 (0.8514) time: 0.1670 data: 0.0775 max mem: 9305 +Train: [82] [3100/6250] eta: 0:08:44 lr: 0.000010 grad: 0.1589 (0.1571) loss: 0.8530 (0.8513) time: 0.2009 data: 0.1129 max mem: 9305 +Train: [82] [3200/6250] eta: 0:08:27 lr: 0.000010 grad: 0.1610 (0.1571) loss: 0.8447 (0.8512) time: 0.1598 data: 0.0687 max mem: 9305 +Train: [82] [3300/6250] eta: 0:08:09 lr: 0.000010 grad: 0.1601 (0.1571) loss: 0.8516 (0.8512) time: 0.1527 data: 0.0622 max mem: 9305 +Train: [82] [3400/6250] eta: 0:07:52 lr: 0.000010 grad: 0.1699 (0.1571) loss: 0.8457 (0.8511) time: 0.1761 data: 0.0910 max mem: 9305 +Train: [82] [3500/6250] eta: 0:07:34 lr: 0.000010 grad: 0.1515 (0.1569) loss: 0.8561 (0.8511) time: 0.1387 data: 0.0501 max mem: 9305 +Train: [82] [3600/6250] eta: 0:07:16 lr: 0.000010 grad: 0.1536 (0.1569) loss: 0.8465 (0.8511) time: 0.1489 data: 0.0607 max mem: 9305 +Train: [82] [3700/6250] eta: 0:07:01 lr: 0.000010 grad: 0.1497 (0.1567) loss: 0.8602 (0.8511) time: 0.1717 data: 0.0850 max mem: 9305 +Train: [82] [3800/6250] eta: 0:06:43 lr: 0.000010 grad: 0.1439 (0.1566) loss: 0.8479 (0.8511) time: 0.1417 data: 0.0581 max mem: 9305 +Train: [82] [3900/6250] eta: 0:06:26 lr: 0.000010 grad: 0.1435 (0.1564) loss: 0.8447 (0.8511) time: 0.1498 data: 0.0664 max mem: 9305 +Train: [82] [4000/6250] eta: 0:06:09 lr: 0.000010 grad: 0.1424 (0.1563) loss: 0.8540 (0.8511) time: 0.1691 data: 0.0879 max mem: 9305 +Train: [82] [4100/6250] eta: 0:05:51 lr: 0.000010 grad: 0.1484 (0.1563) loss: 0.8572 (0.8510) time: 0.1542 data: 0.0700 max mem: 9305 +Train: [82] [4200/6250] eta: 0:05:35 lr: 0.000010 grad: 0.1449 (0.1562) loss: 0.8437 (0.8510) time: 0.1464 data: 0.0575 max mem: 9305 +Train: [82] [4300/6250] eta: 0:05:17 lr: 0.000010 grad: 0.1558 (0.1561) loss: 0.8486 (0.8511) time: 0.1548 data: 0.0678 max mem: 9305 +Train: [82] [4400/6250] eta: 0:05:01 lr: 0.000010 grad: 0.1596 (0.1560) loss: 0.8437 (0.8511) time: 0.1531 data: 0.0545 max mem: 9305 +Train: [82] [4500/6250] eta: 0:04:44 lr: 0.000010 grad: 0.1402 (0.1557) loss: 0.8577 (0.8513) time: 0.1359 data: 0.0537 max mem: 9305 +Train: [82] [4600/6250] eta: 0:04:27 lr: 0.000010 grad: 0.1344 (0.1556) loss: 0.8531 (0.8513) time: 0.1372 data: 0.0554 max mem: 9305 +Train: [82] [4700/6250] eta: 0:04:11 lr: 0.000010 grad: 0.1513 (0.1555) loss: 0.8518 (0.8513) time: 0.1831 data: 0.0963 max mem: 9305 +Train: [82] [4800/6250] eta: 0:03:55 lr: 0.000010 grad: 0.1557 (0.1554) loss: 0.8566 (0.8513) time: 0.1838 data: 0.0994 max mem: 9305 +Train: [82] [4900/6250] eta: 0:03:38 lr: 0.000010 grad: 0.1379 (0.1552) loss: 0.8594 (0.8513) time: 0.1627 data: 0.0804 max mem: 9305 +Train: [82] [5000/6250] eta: 0:03:21 lr: 0.000010 grad: 0.1413 (0.1550) loss: 0.8430 (0.8513) time: 0.1513 data: 0.0620 max mem: 9305 +Train: [82] [5100/6250] eta: 0:03:05 lr: 0.000010 grad: 0.1546 (0.1549) loss: 0.8530 (0.8513) time: 0.1456 data: 0.0532 max mem: 9305 +Train: [82] [5200/6250] eta: 0:02:49 lr: 0.000010 grad: 0.1425 (0.1549) loss: 0.8529 (0.8513) time: 0.1209 data: 0.0004 max mem: 9305 +Train: [82] [5300/6250] eta: 0:02:33 lr: 0.000010 grad: 0.1440 (0.1549) loss: 0.8450 (0.8512) time: 0.2572 data: 0.1503 max mem: 9305 +Train: [82] [5400/6250] eta: 0:02:17 lr: 0.000010 grad: 0.1513 (0.1548) loss: 0.8504 (0.8511) time: 0.1006 data: 0.0003 max mem: 9305 +Train: [82] [5500/6250] eta: 0:02:02 lr: 0.000010 grad: 0.1534 (0.1549) loss: 0.8487 (0.8511) time: 0.2145 data: 0.0962 max mem: 9305 +Train: [82] [5600/6250] eta: 0:01:46 lr: 0.000010 grad: 0.1425 (0.1548) loss: 0.8475 (0.8510) time: 0.1480 data: 0.0632 max mem: 9305 +Train: [82] [5700/6250] eta: 0:01:30 lr: 0.000010 grad: 0.1497 (0.1548) loss: 0.8443 (0.8510) time: 0.2011 data: 0.1063 max mem: 9305 +Train: [82] [5800/6250] eta: 0:01:13 lr: 0.000010 grad: 0.1440 (0.1547) loss: 0.8506 (0.8510) time: 0.1917 data: 0.0905 max mem: 9305 +Train: [82] [5900/6250] eta: 0:00:57 lr: 0.000010 grad: 0.1388 (0.1546) loss: 0.8509 (0.8510) time: 0.1825 data: 0.0969 max mem: 9305 +Train: [82] [6000/6250] eta: 0:00:41 lr: 0.000010 grad: 0.1456 (0.1545) loss: 0.8500 (0.8510) time: 0.1501 data: 0.0632 max mem: 9305 +Train: [82] [6100/6250] eta: 0:00:24 lr: 0.000010 grad: 0.1504 (0.1546) loss: 0.8494 (0.8510) time: 0.1359 data: 0.0298 max mem: 9305 +Train: [82] [6200/6250] eta: 0:00:08 lr: 0.000010 grad: 0.1491 (0.1545) loss: 0.8508 (0.8509) time: 0.2893 data: 0.2071 max mem: 9305 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1425 (0.1544) loss: 0.8538 (0.8510) time: 0.1212 data: 0.0258 max mem: 9305 +Train: [82] Total time: 0:17:14 (0.1656 s / it) +Averaged stats: lr: 0.000010 grad: 0.1425 (0.1544) loss: 0.8538 (0.8510) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:05:53 loss: 0.8685 (0.8685) time: 5.7048 data: 5.6705 max mem: 9305 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8608 (0.8641) time: 0.1243 data: 0.0945 max mem: 9305 +Eval (hcp-train-subset): [82] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-train-subset): loss: 0.8608 (0.8641) +Eval (hcp-val): [82] [ 0/62] eta: 0:06:01 loss: 0.8753 (0.8753) time: 5.8366 data: 5.8004 max mem: 9305 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8740 (0.8748) time: 0.1497 data: 0.1210 max mem: 9305 +Eval (hcp-val): [82] Total time: 0:00:15 (0.2446 s / it) +Averaged stats (hcp-val): loss: 0.8740 (0.8748) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 8:31:55 lr: 0.000010 grad: 0.0832 (0.0832) loss: 0.8968 (0.8968) time: 4.9145 data: 4.6185 max mem: 9305 +Train: [83] [ 100/6250] eta: 0:21:50 lr: 0.000010 grad: 0.1765 (0.1993) loss: 0.8592 (0.8643) time: 0.1533 data: 0.0563 max mem: 9305 +Train: [83] [ 200/6250] eta: 0:19:12 lr: 0.000010 grad: 0.1618 (0.1847) loss: 0.8658 (0.8601) time: 0.1816 data: 0.0809 max mem: 9305 +Train: [83] [ 300/6250] eta: 0:17:59 lr: 0.000010 grad: 0.1596 (0.1778) loss: 0.8539 (0.8590) time: 0.1799 data: 0.0783 max mem: 9305 +Train: [83] [ 400/6250] eta: 0:17:15 lr: 0.000010 grad: 0.1557 (0.1743) loss: 0.8559 (0.8581) time: 0.1320 data: 0.0218 max mem: 9305 +Train: [83] [ 500/6250] eta: 0:16:31 lr: 0.000010 grad: 0.1477 (0.1719) loss: 0.8568 (0.8572) time: 0.1531 data: 0.0624 max mem: 9305 +Train: [83] [ 600/6250] eta: 0:16:03 lr: 0.000010 grad: 0.1477 (0.1690) loss: 0.8588 (0.8571) time: 0.1669 data: 0.0763 max mem: 9305 +Train: [83] [ 700/6250] eta: 0:15:40 lr: 0.000009 grad: 0.1421 (0.1664) loss: 0.8549 (0.8569) time: 0.2007 data: 0.1167 max mem: 9305 +Train: [83] [ 800/6250] eta: 0:15:15 lr: 0.000009 grad: 0.1449 (0.1640) loss: 0.8575 (0.8570) time: 0.1781 data: 0.0866 max mem: 9305 +Train: [83] [ 900/6250] eta: 0:14:52 lr: 0.000009 grad: 0.1378 (0.1622) loss: 0.8553 (0.8568) time: 0.1691 data: 0.0824 max mem: 9305 +Train: [83] [1000/6250] eta: 0:14:28 lr: 0.000009 grad: 0.1434 (0.1607) loss: 0.8607 (0.8568) time: 0.1370 data: 0.0458 max mem: 9305 +Train: [83] [1100/6250] eta: 0:14:00 lr: 0.000009 grad: 0.1441 (0.1598) loss: 0.8549 (0.8566) time: 0.1075 data: 0.0191 max mem: 9305 +Train: [83] [1200/6250] eta: 0:13:43 lr: 0.000009 grad: 0.1484 (0.1590) loss: 0.8521 (0.8566) time: 0.1586 data: 0.0659 max mem: 9305 +Train: [83] [1300/6250] eta: 0:13:22 lr: 0.000009 grad: 0.1432 (0.1581) loss: 0.8570 (0.8567) time: 0.1427 data: 0.0461 max mem: 9305 +Train: [83] [1400/6250] eta: 0:13:03 lr: 0.000009 grad: 0.1378 (0.1574) loss: 0.8583 (0.8565) time: 0.1181 data: 0.0212 max mem: 9305 +Train: [83] [1500/6250] eta: 0:12:51 lr: 0.000009 grad: 0.1448 (0.1567) loss: 0.8568 (0.8565) time: 0.2076 data: 0.0940 max mem: 9305 +Train: [83] [1600/6250] eta: 0:12:43 lr: 0.000009 grad: 0.1547 (0.1566) loss: 0.8546 (0.8563) time: 0.2217 data: 0.1128 max mem: 9305 +Train: [83] [1700/6250] eta: 0:12:48 lr: 0.000009 grad: 0.1470 (0.1564) loss: 0.8532 (0.8561) time: 0.1251 data: 0.0003 max mem: 9305 +Train: [83] [1800/6250] eta: 0:12:36 lr: 0.000009 grad: 0.1502 (0.1564) loss: 0.8574 (0.8559) time: 0.2621 data: 0.1374 max mem: 9305 +Train: [83] [1900/6250] eta: 0:12:22 lr: 0.000009 grad: 0.1470 (0.1563) loss: 0.8523 (0.8556) time: 0.3022 data: 0.1659 max mem: 9305 +Train: [83] [2000/6250] eta: 0:12:05 lr: 0.000009 grad: 0.1493 (0.1561) loss: 0.8514 (0.8555) time: 0.1363 data: 0.0004 max mem: 9305 +Train: [83] [2100/6250] eta: 0:12:00 lr: 0.000009 grad: 0.1465 (0.1557) loss: 0.8646 (0.8555) time: 0.1030 data: 0.0007 max mem: 9305 +Train: [83] [2200/6250] eta: 0:11:38 lr: 0.000009 grad: 0.1489 (0.1554) loss: 0.8538 (0.8554) time: 0.1512 data: 0.0572 max mem: 9305 +Train: [83] [2300/6250] eta: 0:11:21 lr: 0.000009 grad: 0.1538 (0.1554) loss: 0.8539 (0.8552) time: 0.1536 data: 0.0697 max mem: 9305 +Train: [83] [2400/6250] eta: 0:11:02 lr: 0.000009 grad: 0.1465 (0.1553) loss: 0.8573 (0.8551) time: 0.1734 data: 0.0758 max mem: 9305 +Train: [83] [2500/6250] eta: 0:10:44 lr: 0.000009 grad: 0.1472 (0.1552) loss: 0.8552 (0.8550) time: 0.1191 data: 0.0081 max mem: 9305 +Train: [83] [2600/6250] eta: 0:10:28 lr: 0.000009 grad: 0.1623 (0.1552) loss: 0.8484 (0.8548) time: 0.2312 data: 0.1388 max mem: 9305 +Train: [83] [2700/6250] eta: 0:10:08 lr: 0.000009 grad: 0.1529 (0.1552) loss: 0.8556 (0.8546) time: 0.1564 data: 0.0668 max mem: 9305 +Train: [83] [2800/6250] eta: 0:09:52 lr: 0.000009 grad: 0.1514 (0.1552) loss: 0.8473 (0.8545) time: 0.1958 data: 0.1095 max mem: 9305 +Train: [83] [2900/6250] eta: 0:09:33 lr: 0.000009 grad: 0.1528 (0.1552) loss: 0.8510 (0.8543) time: 0.1797 data: 0.0868 max mem: 9305 +Train: [83] [3000/6250] eta: 0:09:16 lr: 0.000009 grad: 0.1602 (0.1551) loss: 0.8535 (0.8542) time: 0.0933 data: 0.0002 max mem: 9305 +Train: [83] [3100/6250] eta: 0:08:56 lr: 0.000009 grad: 0.1599 (0.1552) loss: 0.8522 (0.8540) time: 0.1792 data: 0.0970 max mem: 9305 +Train: [83] [3200/6250] eta: 0:08:48 lr: 0.000009 grad: 0.1462 (0.1552) loss: 0.8530 (0.8539) time: 0.5044 data: 0.4184 max mem: 9305 +Train: [83] [3300/6250] eta: 0:08:28 lr: 0.000009 grad: 0.1483 (0.1551) loss: 0.8544 (0.8538) time: 0.1659 data: 0.0755 max mem: 9305 +Train: [83] [3400/6250] eta: 0:08:11 lr: 0.000009 grad: 0.1415 (0.1550) loss: 0.8544 (0.8537) time: 0.1201 data: 0.0243 max mem: 9305 +Train: [83] [3500/6250] eta: 0:07:53 lr: 0.000009 grad: 0.1487 (0.1550) loss: 0.8543 (0.8535) time: 0.1476 data: 0.0626 max mem: 9305 +Train: [83] [3600/6250] eta: 0:07:36 lr: 0.000009 grad: 0.1497 (0.1551) loss: 0.8501 (0.8534) time: 0.1661 data: 0.0816 max mem: 9305 +Train: [83] [3700/6250] eta: 0:07:17 lr: 0.000009 grad: 0.1468 (0.1551) loss: 0.8537 (0.8533) time: 0.1607 data: 0.0703 max mem: 9305 +Train: [83] [3800/6250] eta: 0:06:58 lr: 0.000009 grad: 0.1539 (0.1553) loss: 0.8562 (0.8531) time: 0.1250 data: 0.0193 max mem: 9305 +Train: [83] [3900/6250] eta: 0:06:40 lr: 0.000009 grad: 0.1475 (0.1554) loss: 0.8484 (0.8530) time: 0.1750 data: 0.0802 max mem: 9305 +Train: [83] [4000/6250] eta: 0:06:22 lr: 0.000009 grad: 0.1571 (0.1556) loss: 0.8481 (0.8528) time: 0.1511 data: 0.0617 max mem: 9305 +Train: [83] [4100/6250] eta: 0:06:03 lr: 0.000009 grad: 0.1575 (0.1557) loss: 0.8501 (0.8526) time: 0.1330 data: 0.0455 max mem: 9305 +Train: [83] [4200/6250] eta: 0:05:45 lr: 0.000009 grad: 0.1654 (0.1558) loss: 0.8430 (0.8524) time: 0.1662 data: 0.0786 max mem: 9305 +Train: [83] [4300/6250] eta: 0:05:28 lr: 0.000009 grad: 0.1568 (0.1559) loss: 0.8554 (0.8524) time: 0.1309 data: 0.0432 max mem: 9305 +Train: [83] [4400/6250] eta: 0:05:10 lr: 0.000009 grad: 0.1483 (0.1559) loss: 0.8468 (0.8523) time: 0.1435 data: 0.0500 max mem: 9305 +Train: [83] [4500/6250] eta: 0:04:53 lr: 0.000009 grad: 0.1567 (0.1559) loss: 0.8465 (0.8522) time: 0.1508 data: 0.0626 max mem: 9305 +Train: [83] [4600/6250] eta: 0:04:36 lr: 0.000009 grad: 0.1513 (0.1559) loss: 0.8599 (0.8522) time: 0.1485 data: 0.0608 max mem: 9305 +Train: [83] [4700/6250] eta: 0:04:19 lr: 0.000009 grad: 0.1479 (0.1559) loss: 0.8523 (0.8522) time: 0.1479 data: 0.0451 max mem: 9305 +Train: [83] [4800/6250] eta: 0:04:02 lr: 0.000009 grad: 0.1510 (0.1559) loss: 0.8435 (0.8522) time: 0.1824 data: 0.0886 max mem: 9305 +Train: [83] [4900/6250] eta: 0:03:45 lr: 0.000009 grad: 0.1488 (0.1559) loss: 0.8506 (0.8521) time: 0.1367 data: 0.0327 max mem: 9305 +Train: [83] [5000/6250] eta: 0:03:28 lr: 0.000009 grad: 0.1496 (0.1558) loss: 0.8492 (0.8520) time: 0.3029 data: 0.2083 max mem: 9305 +Train: [83] [5100/6250] eta: 0:03:11 lr: 0.000009 grad: 0.1493 (0.1559) loss: 0.8466 (0.8520) time: 0.1986 data: 0.1055 max mem: 9305 +Train: [83] [5200/6250] eta: 0:02:54 lr: 0.000009 grad: 0.1556 (0.1558) loss: 0.8476 (0.8519) time: 0.0962 data: 0.0002 max mem: 9305 +Train: [83] [5300/6250] eta: 0:02:38 lr: 0.000009 grad: 0.1488 (0.1558) loss: 0.8535 (0.8519) time: 0.1995 data: 0.1089 max mem: 9305 +Train: [83] [5400/6250] eta: 0:02:21 lr: 0.000009 grad: 0.1535 (0.1558) loss: 0.8419 (0.8519) time: 0.1922 data: 0.0867 max mem: 9305 +Train: [83] [5500/6250] eta: 0:02:04 lr: 0.000009 grad: 0.1492 (0.1558) loss: 0.8554 (0.8519) time: 0.1422 data: 0.0562 max mem: 9305 +Train: [83] [5600/6250] eta: 0:01:48 lr: 0.000009 grad: 0.1498 (0.1558) loss: 0.8530 (0.8519) time: 0.1721 data: 0.0450 max mem: 9305 +Train: [83] [5700/6250] eta: 0:01:31 lr: 0.000009 grad: 0.1424 (0.1558) loss: 0.8505 (0.8519) time: 0.1422 data: 0.0493 max mem: 9305 +Train: [83] [5800/6250] eta: 0:01:15 lr: 0.000009 grad: 0.1444 (0.1557) loss: 0.8438 (0.8518) time: 0.1073 data: 0.0003 max mem: 9305 +Train: [83] [5900/6250] eta: 0:00:59 lr: 0.000009 grad: 0.1512 (0.1558) loss: 0.8504 (0.8518) time: 0.2283 data: 0.1209 max mem: 9305 +Train: [83] [6000/6250] eta: 0:00:42 lr: 0.000009 grad: 0.1432 (0.1559) loss: 0.8521 (0.8517) time: 0.1680 data: 0.0761 max mem: 9305 +Train: [83] [6100/6250] eta: 0:00:25 lr: 0.000009 grad: 0.1511 (0.1558) loss: 0.8459 (0.8517) time: 0.1585 data: 0.0609 max mem: 9305 +Train: [83] [6200/6250] eta: 0:00:08 lr: 0.000009 grad: 0.1465 (0.1558) loss: 0.8457 (0.8516) time: 0.1377 data: 0.0466 max mem: 9305 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1366 (0.1558) loss: 0.8504 (0.8516) time: 0.1924 data: 0.0891 max mem: 9305 +Train: [83] Total time: 0:17:37 (0.1692 s / it) +Averaged stats: lr: 0.000009 grad: 0.1366 (0.1558) loss: 0.8504 (0.8516) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:03:49 loss: 0.8690 (0.8690) time: 3.7036 data: 3.5923 max mem: 9305 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8598 (0.8623) time: 0.1149 data: 0.0863 max mem: 9305 +Eval (hcp-train-subset): [83] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (hcp-train-subset): loss: 0.8598 (0.8623) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:00 loss: 0.8683 (0.8683) time: 3.8856 data: 3.8400 max mem: 9305 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8729 (0.8750) time: 0.1741 data: 0.1432 max mem: 9305 +Eval (hcp-val): [83] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-val): loss: 0.8729 (0.8750) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 10:57:58 lr: 0.000009 grad: 0.2805 (0.2805) loss: 0.8901 (0.8901) time: 6.3165 data: 6.2002 max mem: 9305 +Train: [84] [ 100/6250] eta: 0:22:30 lr: 0.000009 grad: 0.1618 (0.2032) loss: 0.8534 (0.8551) time: 0.1809 data: 0.0800 max mem: 9305 +Train: [84] [ 200/6250] eta: 0:19:12 lr: 0.000009 grad: 0.1397 (0.1803) loss: 0.8582 (0.8557) time: 0.1681 data: 0.0615 max mem: 9305 +Train: [84] [ 300/6250] eta: 0:17:45 lr: 0.000008 grad: 0.1550 (0.1726) loss: 0.8568 (0.8552) time: 0.1672 data: 0.0613 max mem: 9305 +Train: [84] [ 400/6250] eta: 0:17:04 lr: 0.000008 grad: 0.1470 (0.1676) loss: 0.8530 (0.8552) time: 0.1499 data: 0.0552 max mem: 9305 +Train: [84] [ 500/6250] eta: 0:16:43 lr: 0.000008 grad: 0.1406 (0.1655) loss: 0.8531 (0.8551) time: 0.1772 data: 0.0623 max mem: 9305 +Train: [84] [ 600/6250] eta: 0:16:28 lr: 0.000008 grad: 0.1541 (0.1640) loss: 0.8572 (0.8552) time: 0.1676 data: 0.0678 max mem: 9305 +Train: [84] [ 700/6250] eta: 0:15:56 lr: 0.000008 grad: 0.1405 (0.1621) loss: 0.8607 (0.8558) time: 0.1207 data: 0.0176 max mem: 9305 +Train: [84] [ 800/6250] eta: 0:15:39 lr: 0.000008 grad: 0.1605 (0.1616) loss: 0.8536 (0.8553) time: 0.1384 data: 0.0479 max mem: 9305 +Train: [84] [ 900/6250] eta: 0:15:07 lr: 0.000008 grad: 0.1501 (0.1603) loss: 0.8526 (0.8554) time: 0.1042 data: 0.0002 max mem: 9305 +Train: [84] [1000/6250] eta: 0:14:50 lr: 0.000008 grad: 0.1414 (0.1595) loss: 0.8511 (0.8554) time: 0.1651 data: 0.0768 max mem: 9305 +Train: [84] [1100/6250] eta: 0:14:25 lr: 0.000008 grad: 0.1465 (0.1585) loss: 0.8482 (0.8553) time: 0.1672 data: 0.0830 max mem: 9305 +Train: [84] [1200/6250] eta: 0:14:03 lr: 0.000008 grad: 0.1429 (0.1578) loss: 0.8489 (0.8552) time: 0.1575 data: 0.0715 max mem: 9305 +Train: [84] [1300/6250] eta: 0:13:43 lr: 0.000008 grad: 0.1538 (0.1572) loss: 0.8504 (0.8552) time: 0.1556 data: 0.0627 max mem: 9305 +Train: [84] [1400/6250] eta: 0:13:26 lr: 0.000008 grad: 0.1432 (0.1568) loss: 0.8575 (0.8550) time: 0.1607 data: 0.0729 max mem: 9305 +Train: [84] [1500/6250] eta: 0:13:15 lr: 0.000008 grad: 0.1529 (0.1566) loss: 0.8557 (0.8550) time: 0.2166 data: 0.1203 max mem: 9305 +Train: [84] [1600/6250] eta: 0:12:46 lr: 0.000008 grad: 0.1552 (0.1562) loss: 0.8451 (0.8548) time: 0.0950 data: 0.0002 max mem: 9305 +Train: [84] [1700/6250] eta: 0:12:26 lr: 0.000008 grad: 0.1357 (0.1557) loss: 0.8566 (0.8548) time: 0.1736 data: 0.0848 max mem: 9305 +Train: [84] [1800/6250] eta: 0:12:14 lr: 0.000008 grad: 0.1521 (0.1555) loss: 0.8487 (0.8547) time: 0.2197 data: 0.1247 max mem: 9305 +Train: [84] [1900/6250] eta: 0:11:57 lr: 0.000008 grad: 0.1550 (0.1558) loss: 0.8497 (0.8544) time: 0.1710 data: 0.0877 max mem: 9305 +Train: [84] [2000/6250] eta: 0:11:50 lr: 0.000008 grad: 0.1548 (0.1560) loss: 0.8545 (0.8542) time: 0.2091 data: 0.0530 max mem: 9305 +Train: [84] [2100/6250] eta: 0:11:35 lr: 0.000008 grad: 0.1527 (0.1559) loss: 0.8509 (0.8541) time: 0.1624 data: 0.0650 max mem: 9305 +Train: [84] [2200/6250] eta: 0:11:27 lr: 0.000008 grad: 0.1560 (0.1561) loss: 0.8541 (0.8540) time: 0.1100 data: 0.0157 max mem: 9305 +Train: [84] [2300/6250] eta: 0:11:09 lr: 0.000008 grad: 0.1544 (0.1561) loss: 0.8507 (0.8538) time: 0.1638 data: 0.0683 max mem: 9305 +Train: [84] [2400/6250] eta: 0:10:58 lr: 0.000008 grad: 0.1472 (0.1559) loss: 0.8519 (0.8538) time: 0.3615 data: 0.2317 max mem: 9305 +Train: [84] [2500/6250] eta: 0:10:53 lr: 0.000008 grad: 0.1417 (0.1560) loss: 0.8525 (0.8537) time: 0.4719 data: 0.3476 max mem: 9305 +Train: [84] [2600/6250] eta: 0:10:32 lr: 0.000008 grad: 0.1537 (0.1560) loss: 0.8469 (0.8536) time: 0.1447 data: 0.0591 max mem: 9305 +Train: [84] [2700/6250] eta: 0:10:21 lr: 0.000008 grad: 0.1517 (0.1560) loss: 0.8525 (0.8535) time: 0.1036 data: 0.0003 max mem: 9305 +Train: [84] [2800/6250] eta: 0:10:00 lr: 0.000008 grad: 0.1478 (0.1559) loss: 0.8534 (0.8535) time: 0.1159 data: 0.0242 max mem: 9305 +Train: [84] [2900/6250] eta: 0:09:41 lr: 0.000008 grad: 0.1515 (0.1557) loss: 0.8502 (0.8535) time: 0.1586 data: 0.0773 max mem: 9305 +Train: [84] [3000/6250] eta: 0:09:22 lr: 0.000008 grad: 0.1455 (0.1556) loss: 0.8543 (0.8534) time: 0.1660 data: 0.0758 max mem: 9305 +Train: [84] [3100/6250] eta: 0:09:03 lr: 0.000008 grad: 0.1576 (0.1557) loss: 0.8568 (0.8534) time: 0.1346 data: 0.0509 max mem: 9305 +Train: [84] [3200/6250] eta: 0:08:49 lr: 0.000008 grad: 0.1524 (0.1556) loss: 0.8505 (0.8534) time: 0.0895 data: 0.0004 max mem: 9305 +Train: [84] [3300/6250] eta: 0:08:30 lr: 0.000008 grad: 0.1456 (0.1554) loss: 0.8553 (0.8534) time: 0.1087 data: 0.0178 max mem: 9305 +Train: [84] [3400/6250] eta: 0:08:11 lr: 0.000008 grad: 0.1441 (0.1552) loss: 0.8601 (0.8535) time: 0.1364 data: 0.0462 max mem: 9305 +Train: [84] [3500/6250] eta: 0:07:53 lr: 0.000008 grad: 0.1347 (0.1553) loss: 0.8568 (0.8534) time: 0.1795 data: 0.0886 max mem: 9305 +Train: [84] [3600/6250] eta: 0:07:35 lr: 0.000008 grad: 0.1534 (0.1553) loss: 0.8526 (0.8534) time: 0.1603 data: 0.0744 max mem: 9305 +Train: [84] [3700/6250] eta: 0:07:17 lr: 0.000008 grad: 0.1576 (0.1553) loss: 0.8566 (0.8534) time: 0.1573 data: 0.0658 max mem: 9305 +Train: [84] [3800/6250] eta: 0:07:00 lr: 0.000008 grad: 0.1534 (0.1553) loss: 0.8545 (0.8535) time: 0.1846 data: 0.0968 max mem: 9305 +Train: [84] [3900/6250] eta: 0:06:42 lr: 0.000008 grad: 0.1483 (0.1552) loss: 0.8570 (0.8535) time: 0.1498 data: 0.0574 max mem: 9305 +Train: [84] [4000/6250] eta: 0:06:24 lr: 0.000008 grad: 0.1490 (0.1551) loss: 0.8574 (0.8536) time: 0.1551 data: 0.0724 max mem: 9305 +Train: [84] [4100/6250] eta: 0:06:05 lr: 0.000008 grad: 0.1616 (0.1553) loss: 0.8565 (0.8536) time: 0.1374 data: 0.0524 max mem: 9305 +Train: [84] [4200/6250] eta: 0:05:47 lr: 0.000008 grad: 0.1556 (0.1553) loss: 0.8563 (0.8536) time: 0.1311 data: 0.0397 max mem: 9305 +Train: [84] [4300/6250] eta: 0:05:29 lr: 0.000008 grad: 0.1622 (0.1553) loss: 0.8500 (0.8536) time: 0.1583 data: 0.0614 max mem: 9305 +Train: [84] [4400/6250] eta: 0:05:12 lr: 0.000008 grad: 0.1527 (0.1554) loss: 0.8513 (0.8535) time: 0.1713 data: 0.0870 max mem: 9305 +Train: [84] [4500/6250] eta: 0:04:54 lr: 0.000008 grad: 0.1502 (0.1552) loss: 0.8585 (0.8536) time: 0.1192 data: 0.0286 max mem: 9305 +Train: [84] [4600/6250] eta: 0:04:36 lr: 0.000008 grad: 0.1427 (0.1551) loss: 0.8521 (0.8537) time: 0.1363 data: 0.0479 max mem: 9305 +Train: [84] [4700/6250] eta: 0:04:19 lr: 0.000008 grad: 0.1398 (0.1549) loss: 0.8541 (0.8537) time: 0.1305 data: 0.0371 max mem: 9305 +Train: [84] [4800/6250] eta: 0:04:02 lr: 0.000008 grad: 0.1503 (0.1548) loss: 0.8542 (0.8538) time: 0.1580 data: 0.0647 max mem: 9305 +Train: [84] [4900/6250] eta: 0:03:44 lr: 0.000008 grad: 0.1467 (0.1548) loss: 0.8530 (0.8538) time: 0.1604 data: 0.0674 max mem: 9305 +Train: [84] [5000/6250] eta: 0:03:27 lr: 0.000008 grad: 0.1428 (0.1548) loss: 0.8544 (0.8538) time: 0.1188 data: 0.0347 max mem: 9305 +Train: [84] [5100/6250] eta: 0:03:10 lr: 0.000008 grad: 0.1465 (0.1547) loss: 0.8497 (0.8538) time: 0.1426 data: 0.0597 max mem: 9305 +Train: [84] [5200/6250] eta: 0:02:53 lr: 0.000008 grad: 0.1499 (0.1547) loss: 0.8516 (0.8539) time: 0.1712 data: 0.0695 max mem: 9305 +Train: [84] [5300/6250] eta: 0:02:37 lr: 0.000008 grad: 0.1526 (0.1548) loss: 0.8501 (0.8538) time: 0.1528 data: 0.0569 max mem: 9305 +Train: [84] [5400/6250] eta: 0:02:20 lr: 0.000008 grad: 0.1493 (0.1548) loss: 0.8549 (0.8538) time: 0.0998 data: 0.0108 max mem: 9305 +Train: [84] [5500/6250] eta: 0:02:03 lr: 0.000008 grad: 0.1528 (0.1549) loss: 0.8510 (0.8538) time: 0.1313 data: 0.0336 max mem: 9305 +Train: [84] [5600/6250] eta: 0:01:47 lr: 0.000008 grad: 0.1489 (0.1549) loss: 0.8528 (0.8538) time: 0.1959 data: 0.0809 max mem: 9305 +Train: [84] [5700/6250] eta: 0:01:31 lr: 0.000008 grad: 0.1553 (0.1549) loss: 0.8511 (0.8538) time: 0.2105 data: 0.1103 max mem: 9305 +Train: [84] [5800/6250] eta: 0:01:14 lr: 0.000008 grad: 0.1560 (0.1550) loss: 0.8535 (0.8537) time: 0.1687 data: 0.0511 max mem: 9305 +Train: [84] [5900/6250] eta: 0:00:58 lr: 0.000008 grad: 0.1494 (0.1549) loss: 0.8456 (0.8537) time: 0.1766 data: 0.0822 max mem: 9305 +Train: [84] [6000/6250] eta: 0:00:41 lr: 0.000008 grad: 0.1399 (0.1549) loss: 0.8557 (0.8537) time: 0.1463 data: 0.0558 max mem: 9305 +Train: [84] [6100/6250] eta: 0:00:25 lr: 0.000008 grad: 0.1417 (0.1549) loss: 0.8600 (0.8537) time: 0.1385 data: 0.0419 max mem: 9305 +Train: [84] [6200/6250] eta: 0:00:08 lr: 0.000008 grad: 0.1686 (0.1549) loss: 0.8571 (0.8537) time: 0.1770 data: 0.0951 max mem: 9305 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.1494 (0.1548) loss: 0.8508 (0.8537) time: 0.1753 data: 0.0910 max mem: 9305 +Train: [84] Total time: 0:17:32 (0.1684 s / it) +Averaged stats: lr: 0.000008 grad: 0.1494 (0.1548) loss: 0.8508 (0.8537) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:04:25 loss: 0.8656 (0.8656) time: 4.2838 data: 4.2061 max mem: 9305 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8605 (0.8617) time: 0.1547 data: 0.1251 max mem: 9305 +Eval (hcp-train-subset): [84] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (hcp-train-subset): loss: 0.8605 (0.8617) +Making plots (hcp-train-subset): example=9 +Eval (hcp-val): [84] [ 0/62] eta: 0:05:44 loss: 0.8714 (0.8714) time: 5.5645 data: 5.5085 max mem: 9305 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8731 (0.8745) time: 0.1647 data: 0.1358 max mem: 9305 +Eval (hcp-val): [84] Total time: 0:00:15 (0.2460 s / it) +Averaged stats (hcp-val): loss: 0.8731 (0.8745) +Making plots (hcp-val): example=39 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [85] [ 0/6250] eta: 9:54:20 lr: 0.000008 grad: 0.1034 (0.1034) loss: 0.9097 (0.9097) time: 5.7057 data: 5.4252 max mem: 9305 +Train: [85] [ 100/6250] eta: 0:23:05 lr: 0.000008 grad: 0.1359 (0.1542) loss: 0.8669 (0.8760) time: 0.1819 data: 0.0785 max mem: 9305 +Train: [85] [ 200/6250] eta: 0:19:45 lr: 0.000008 grad: 0.1435 (0.1625) loss: 0.8524 (0.8693) time: 0.1578 data: 0.0538 max mem: 9305 +Train: [85] [ 300/6250] eta: 0:18:20 lr: 0.000007 grad: 0.1596 (0.1614) loss: 0.8555 (0.8655) time: 0.1617 data: 0.0569 max mem: 9305 +Train: [85] [ 400/6250] eta: 0:17:14 lr: 0.000007 grad: 0.1402 (0.1614) loss: 0.8624 (0.8634) time: 0.1477 data: 0.0609 max mem: 9305 +Train: [85] [ 500/6250] eta: 0:16:37 lr: 0.000007 grad: 0.1556 (0.1606) loss: 0.8596 (0.8620) time: 0.1513 data: 0.0550 max mem: 9305 +Train: [85] [ 600/6250] eta: 0:16:01 lr: 0.000007 grad: 0.1375 (0.1607) loss: 0.8553 (0.8610) time: 0.1896 data: 0.0873 max mem: 9305 +Train: [85] [ 700/6250] eta: 0:15:47 lr: 0.000007 grad: 0.1391 (0.1591) loss: 0.8524 (0.8604) time: 0.2236 data: 0.1069 max mem: 9305 +Train: [85] [ 800/6250] eta: 0:15:30 lr: 0.000007 grad: 0.1390 (0.1575) loss: 0.8569 (0.8602) time: 0.1644 data: 0.0690 max mem: 9305 +Train: [85] [ 900/6250] eta: 0:15:21 lr: 0.000007 grad: 0.1436 (0.1566) loss: 0.8584 (0.8598) time: 0.2901 data: 0.1369 max mem: 9305 +Train: [85] [1000/6250] eta: 0:14:59 lr: 0.000007 grad: 0.1415 (0.1560) loss: 0.8586 (0.8595) time: 0.2208 data: 0.1112 max mem: 9305 +Train: [85] [1100/6250] eta: 0:14:24 lr: 0.000007 grad: 0.1382 (0.1557) loss: 0.8550 (0.8590) time: 0.1488 data: 0.0502 max mem: 9305 +Train: [85] [1200/6250] eta: 0:13:59 lr: 0.000007 grad: 0.1432 (0.1553) loss: 0.8537 (0.8584) time: 0.1787 data: 0.0825 max mem: 9305 +Train: [85] [1300/6250] eta: 0:13:37 lr: 0.000007 grad: 0.1605 (0.1551) loss: 0.8520 (0.8580) time: 0.1929 data: 0.0986 max mem: 9305 +Train: [85] [1400/6250] eta: 0:13:14 lr: 0.000007 grad: 0.1487 (0.1548) loss: 0.8577 (0.8577) time: 0.1433 data: 0.0482 max mem: 9305 +Train: [85] [1500/6250] eta: 0:12:52 lr: 0.000007 grad: 0.1529 (0.1545) loss: 0.8440 (0.8573) time: 0.1495 data: 0.0646 max mem: 9305 +Train: [85] [1600/6250] eta: 0:12:26 lr: 0.000007 grad: 0.1594 (0.1545) loss: 0.8521 (0.8569) time: 0.1313 data: 0.0412 max mem: 9305 +Train: [85] [1700/6250] eta: 0:12:04 lr: 0.000007 grad: 0.1495 (0.1546) loss: 0.8518 (0.8565) time: 0.1719 data: 0.0915 max mem: 9305 +Train: [85] [1800/6250] eta: 0:11:42 lr: 0.000007 grad: 0.1474 (0.1547) loss: 0.8465 (0.8562) time: 0.1055 data: 0.0194 max mem: 9305 +Train: [85] [1900/6250] eta: 0:11:25 lr: 0.000007 grad: 0.1506 (0.1549) loss: 0.8514 (0.8558) time: 0.1574 data: 0.0628 max mem: 9305 +Train: [85] [2000/6250] eta: 0:11:04 lr: 0.000007 grad: 0.1568 (0.1552) loss: 0.8462 (0.8554) time: 0.1419 data: 0.0600 max mem: 9305 +Train: [85] [2100/6250] eta: 0:10:44 lr: 0.000007 grad: 0.1473 (0.1552) loss: 0.8519 (0.8553) time: 0.1508 data: 0.0743 max mem: 9305 +Train: [85] [2200/6250] eta: 0:10:24 lr: 0.000007 grad: 0.1594 (0.1555) loss: 0.8463 (0.8550) time: 0.1313 data: 0.0513 max mem: 9305 +Train: [85] [2300/6250] eta: 0:10:05 lr: 0.000007 grad: 0.1474 (0.1556) loss: 0.8459 (0.8549) time: 0.1366 data: 0.0480 max mem: 9305 +Train: [85] [2400/6250] eta: 0:09:47 lr: 0.000007 grad: 0.1546 (0.1557) loss: 0.8484 (0.8546) time: 0.1392 data: 0.0570 max mem: 9305 +Train: [85] [2500/6250] eta: 0:09:31 lr: 0.000007 grad: 0.1489 (0.1557) loss: 0.8511 (0.8545) time: 0.1517 data: 0.0713 max mem: 9305 +Train: [85] [2600/6250] eta: 0:09:15 lr: 0.000007 grad: 0.1534 (0.1559) loss: 0.8475 (0.8543) time: 0.1529 data: 0.0665 max mem: 9305 +Train: [85] [2700/6250] eta: 0:09:03 lr: 0.000007 grad: 0.1627 (0.1561) loss: 0.8561 (0.8541) time: 0.1549 data: 0.0522 max mem: 9305 +Train: [85] [2800/6250] eta: 0:08:47 lr: 0.000007 grad: 0.1582 (0.1561) loss: 0.8522 (0.8540) time: 0.1362 data: 0.0483 max mem: 9305 +Train: [85] [2900/6250] eta: 0:08:31 lr: 0.000007 grad: 0.1539 (0.1563) loss: 0.8528 (0.8539) time: 0.1932 data: 0.0791 max mem: 9305 +Train: [85] [3000/6250] eta: 0:08:16 lr: 0.000007 grad: 0.1547 (0.1565) loss: 0.8515 (0.8537) time: 0.0922 data: 0.0004 max mem: 9305 +Train: [85] [3100/6250] eta: 0:08:00 lr: 0.000007 grad: 0.1532 (0.1567) loss: 0.8468 (0.8535) time: 0.1266 data: 0.0475 max mem: 9305 +Train: [85] [3200/6250] eta: 0:07:45 lr: 0.000007 grad: 0.1621 (0.1569) loss: 0.8459 (0.8534) time: 0.1488 data: 0.0608 max mem: 9305 +Train: [85] [3300/6250] eta: 0:07:30 lr: 0.000007 grad: 0.1544 (0.1571) loss: 0.8515 (0.8532) time: 0.1784 data: 0.0967 max mem: 9305 +Train: [85] [3400/6250] eta: 0:07:13 lr: 0.000007 grad: 0.1618 (0.1574) loss: 0.8460 (0.8531) time: 0.1490 data: 0.0615 max mem: 9305 +Train: [85] [3500/6250] eta: 0:06:58 lr: 0.000007 grad: 0.1523 (0.1574) loss: 0.8520 (0.8530) time: 0.1516 data: 0.0648 max mem: 9305 +Train: [85] [3600/6250] eta: 0:06:42 lr: 0.000007 grad: 0.1494 (0.1574) loss: 0.8520 (0.8530) time: 0.1540 data: 0.0707 max mem: 9305 +Train: [85] [3700/6250] eta: 0:06:26 lr: 0.000007 grad: 0.1393 (0.1573) loss: 0.8596 (0.8531) time: 0.1289 data: 0.0372 max mem: 9305 +Train: [85] [3800/6250] eta: 0:06:10 lr: 0.000007 grad: 0.1459 (0.1572) loss: 0.8555 (0.8531) time: 0.1399 data: 0.0525 max mem: 9305 +Train: [85] [3900/6250] eta: 0:05:53 lr: 0.000007 grad: 0.1544 (0.1573) loss: 0.8491 (0.8532) time: 0.1255 data: 0.0395 max mem: 9305 +Train: [85] [4000/6250] eta: 0:05:40 lr: 0.000007 grad: 0.1647 (0.1573) loss: 0.8467 (0.8532) time: 0.1050 data: 0.0110 max mem: 9305 +Train: [85] [4100/6250] eta: 0:05:24 lr: 0.000007 grad: 0.1545 (0.1574) loss: 0.8503 (0.8532) time: 0.1716 data: 0.0895 max mem: 9305 +Train: [85] [4200/6250] eta: 0:05:09 lr: 0.000007 grad: 0.1636 (0.1575) loss: 0.8506 (0.8532) time: 0.1069 data: 0.0230 max mem: 9305 +Train: [85] [4300/6250] eta: 0:04:54 lr: 0.000007 grad: 0.1451 (0.1576) loss: 0.8578 (0.8532) time: 0.1116 data: 0.0229 max mem: 9305 +Train: [85] [4400/6250] eta: 0:04:38 lr: 0.000007 grad: 0.1635 (0.1578) loss: 0.8437 (0.8531) time: 0.0921 data: 0.0081 max mem: 9305 +Train: [85] [4500/6250] eta: 0:04:23 lr: 0.000007 grad: 0.1568 (0.1578) loss: 0.8534 (0.8531) time: 0.1510 data: 0.0660 max mem: 9305 +Train: [85] [4600/6250] eta: 0:04:07 lr: 0.000007 grad: 0.1471 (0.1579) loss: 0.8532 (0.8530) time: 0.1217 data: 0.0310 max mem: 9305 +Train: [85] [4700/6250] eta: 0:03:52 lr: 0.000007 grad: 0.1585 (0.1580) loss: 0.8532 (0.8529) time: 0.1481 data: 0.0734 max mem: 9305 +Train: [85] [4800/6250] eta: 0:03:36 lr: 0.000007 grad: 0.1656 (0.1580) loss: 0.8495 (0.8529) time: 0.1244 data: 0.0445 max mem: 9305 +Train: [85] [4900/6250] eta: 0:03:21 lr: 0.000007 grad: 0.1469 (0.1579) loss: 0.8497 (0.8529) time: 0.1315 data: 0.0482 max mem: 9305 +Train: [85] [5000/6250] eta: 0:03:06 lr: 0.000007 grad: 0.1585 (0.1579) loss: 0.8473 (0.8529) time: 0.1419 data: 0.0616 max mem: 9305 +Train: [85] [5100/6250] eta: 0:02:51 lr: 0.000007 grad: 0.1504 (0.1578) loss: 0.8520 (0.8530) time: 0.1475 data: 0.0597 max mem: 9305 +Train: [85] [5200/6250] eta: 0:02:36 lr: 0.000007 grad: 0.1583 (0.1579) loss: 0.8480 (0.8529) time: 0.1349 data: 0.0503 max mem: 9305 +Train: [85] [5300/6250] eta: 0:02:21 lr: 0.000007 grad: 0.1401 (0.1578) loss: 0.8577 (0.8530) time: 0.1787 data: 0.0972 max mem: 9305 +Train: [85] [5400/6250] eta: 0:02:06 lr: 0.000007 grad: 0.1537 (0.1577) loss: 0.8555 (0.8530) time: 0.1353 data: 0.0586 max mem: 9305 +Train: [85] [5500/6250] eta: 0:01:51 lr: 0.000007 grad: 0.1447 (0.1576) loss: 0.8593 (0.8530) time: 0.1413 data: 0.0622 max mem: 9305 +Train: [85] [5600/6250] eta: 0:01:36 lr: 0.000007 grad: 0.1396 (0.1575) loss: 0.8548 (0.8531) time: 0.1833 data: 0.0990 max mem: 9305 +Train: [85] [5700/6250] eta: 0:01:21 lr: 0.000007 grad: 0.1499 (0.1573) loss: 0.8518 (0.8532) time: 0.1607 data: 0.0810 max mem: 9305 +Train: [85] [5800/6250] eta: 0:01:06 lr: 0.000007 grad: 0.1486 (0.1572) loss: 0.8591 (0.8532) time: 0.1238 data: 0.0356 max mem: 9305 +Train: [85] [5900/6250] eta: 0:00:52 lr: 0.000007 grad: 0.1526 (0.1571) loss: 0.8549 (0.8532) time: 0.1616 data: 0.0848 max mem: 9305 +Train: [85] [6000/6250] eta: 0:00:37 lr: 0.000007 grad: 0.1535 (0.1571) loss: 0.8553 (0.8532) time: 0.1134 data: 0.0281 max mem: 9305 +Train: [85] [6100/6250] eta: 0:00:22 lr: 0.000007 grad: 0.1442 (0.1572) loss: 0.8544 (0.8532) time: 0.1415 data: 0.0627 max mem: 9305 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.1500 (0.1572) loss: 0.8561 (0.8531) time: 0.1503 data: 0.0610 max mem: 9305 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1587 (0.1573) loss: 0.8509 (0.8531) time: 0.1724 data: 0.0945 max mem: 9305 +Train: [85] Total time: 0:15:30 (0.1489 s / it) +Averaged stats: lr: 0.000007 grad: 0.1587 (0.1573) loss: 0.8509 (0.8531) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:05:38 loss: 0.8676 (0.8676) time: 5.4598 data: 5.4235 max mem: 9305 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8576 (0.8612) time: 0.1582 data: 0.1302 max mem: 9305 +Eval (hcp-train-subset): [85] Total time: 0:00:15 (0.2459 s / it) +Averaged stats (hcp-train-subset): loss: 0.8576 (0.8612) +Eval (hcp-val): [85] [ 0/62] eta: 0:05:41 loss: 0.8743 (0.8743) time: 5.5076 data: 5.4741 max mem: 9305 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8723 (0.8746) time: 0.1385 data: 0.1089 max mem: 9305 +Eval (hcp-val): [85] Total time: 0:00:15 (0.2547 s / it) +Averaged stats (hcp-val): loss: 0.8723 (0.8746) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 8:47:29 lr: 0.000007 grad: 1.0103 (1.0103) loss: 0.8127 (0.8127) time: 5.0639 data: 4.7176 max mem: 9305 +Train: [86] [ 100/6250] eta: 0:22:54 lr: 0.000007 grad: 0.1771 (0.2342) loss: 0.8643 (0.8592) time: 0.1526 data: 0.0352 max mem: 9305 +Train: [86] [ 200/6250] eta: 0:19:08 lr: 0.000007 grad: 0.1704 (0.2123) loss: 0.8477 (0.8547) time: 0.1502 data: 0.0457 max mem: 9305 +Train: [86] [ 300/6250] eta: 0:17:24 lr: 0.000007 grad: 0.1591 (0.2043) loss: 0.8516 (0.8521) time: 0.1336 data: 0.0358 max mem: 9305 +Train: [86] [ 400/6250] eta: 0:16:29 lr: 0.000007 grad: 0.1613 (0.1952) loss: 0.8516 (0.8522) time: 0.1311 data: 0.0388 max mem: 9305 +Train: [86] [ 500/6250] eta: 0:15:50 lr: 0.000007 grad: 0.1467 (0.1881) loss: 0.8423 (0.8520) time: 0.1539 data: 0.0506 max mem: 9305 +Train: [86] [ 600/6250] eta: 0:16:18 lr: 0.000006 grad: 0.1503 (0.1830) loss: 0.8487 (0.8519) time: 0.1802 data: 0.0881 max mem: 9305 +Train: [86] [ 700/6250] eta: 0:15:42 lr: 0.000006 grad: 0.1551 (0.1794) loss: 0.8543 (0.8519) time: 0.0990 data: 0.0002 max mem: 9305 +Train: [86] [ 800/6250] eta: 0:15:21 lr: 0.000006 grad: 0.1572 (0.1768) loss: 0.8513 (0.8516) time: 0.1197 data: 0.0317 max mem: 9305 +Train: [86] [ 900/6250] eta: 0:15:10 lr: 0.000006 grad: 0.1467 (0.1747) loss: 0.8506 (0.8514) time: 0.1399 data: 0.0453 max mem: 9305 +Train: [86] [1000/6250] eta: 0:14:45 lr: 0.000006 grad: 0.1462 (0.1729) loss: 0.8538 (0.8515) time: 0.1501 data: 0.0623 max mem: 9305 +Train: [86] [1100/6250] eta: 0:15:03 lr: 0.000006 grad: 0.1425 (0.1712) loss: 0.8512 (0.8516) time: 0.0958 data: 0.0002 max mem: 9305 +Train: [86] [1200/6250] eta: 0:14:30 lr: 0.000006 grad: 0.1458 (0.1700) loss: 0.8507 (0.8515) time: 0.1470 data: 0.0644 max mem: 9305 +Train: [86] [1300/6250] eta: 0:14:04 lr: 0.000006 grad: 0.1697 (0.1690) loss: 0.8455 (0.8514) time: 0.1372 data: 0.0587 max mem: 9305 +Train: [86] [1400/6250] eta: 0:13:39 lr: 0.000006 grad: 0.1573 (0.1684) loss: 0.8443 (0.8512) time: 0.1677 data: 0.0812 max mem: 9305 +Train: [86] [1500/6250] eta: 0:13:17 lr: 0.000006 grad: 0.1544 (0.1677) loss: 0.8513 (0.8512) time: 0.1465 data: 0.0647 max mem: 9305 +Train: [86] [1600/6250] eta: 0:12:56 lr: 0.000006 grad: 0.1528 (0.1669) loss: 0.8558 (0.8512) time: 0.1286 data: 0.0434 max mem: 9305 +Train: [86] [1700/6250] eta: 0:12:33 lr: 0.000006 grad: 0.1508 (0.1662) loss: 0.8493 (0.8512) time: 0.1458 data: 0.0622 max mem: 9305 +Train: [86] [1800/6250] eta: 0:12:12 lr: 0.000006 grad: 0.1545 (0.1658) loss: 0.8513 (0.8510) time: 0.1485 data: 0.0644 max mem: 9305 +Train: [86] [1900/6250] eta: 0:11:53 lr: 0.000006 grad: 0.1424 (0.1653) loss: 0.8494 (0.8510) time: 0.1681 data: 0.0903 max mem: 9305 +Train: [86] [2000/6250] eta: 0:11:36 lr: 0.000006 grad: 0.1505 (0.1647) loss: 0.8580 (0.8510) time: 0.1515 data: 0.0683 max mem: 9305 +Train: [86] [2100/6250] eta: 0:11:15 lr: 0.000006 grad: 0.1558 (0.1642) loss: 0.8529 (0.8509) time: 0.1375 data: 0.0557 max mem: 9305 +Train: [86] [2200/6250] eta: 0:10:54 lr: 0.000006 grad: 0.1523 (0.1638) loss: 0.8534 (0.8510) time: 0.1249 data: 0.0430 max mem: 9305 +Train: [86] [2300/6250] eta: 0:10:37 lr: 0.000006 grad: 0.1530 (0.1635) loss: 0.8596 (0.8510) time: 0.1790 data: 0.0896 max mem: 9305 +Train: [86] [2400/6250] eta: 0:10:19 lr: 0.000006 grad: 0.1504 (0.1632) loss: 0.8504 (0.8510) time: 0.1023 data: 0.0136 max mem: 9305 +Train: [86] [2500/6250] eta: 0:10:02 lr: 0.000006 grad: 0.1443 (0.1629) loss: 0.8525 (0.8510) time: 0.1781 data: 0.1019 max mem: 9305 +Train: [86] [2600/6250] eta: 0:09:43 lr: 0.000006 grad: 0.1544 (0.1625) loss: 0.8535 (0.8511) time: 0.1413 data: 0.0571 max mem: 9305 +Train: [86] [2700/6250] eta: 0:09:28 lr: 0.000006 grad: 0.1406 (0.1621) loss: 0.8571 (0.8511) time: 0.1449 data: 0.0582 max mem: 9305 +Train: [86] [2800/6250] eta: 0:09:10 lr: 0.000006 grad: 0.1625 (0.1619) loss: 0.8469 (0.8511) time: 0.1551 data: 0.0758 max mem: 9305 +Train: [86] [2900/6250] eta: 0:08:52 lr: 0.000006 grad: 0.1510 (0.1617) loss: 0.8534 (0.8511) time: 0.1473 data: 0.0669 max mem: 9305 +Train: [86] [3000/6250] eta: 0:08:35 lr: 0.000006 grad: 0.1423 (0.1615) loss: 0.8609 (0.8512) time: 0.1548 data: 0.0731 max mem: 9305 +Train: [86] [3100/6250] eta: 0:08:18 lr: 0.000006 grad: 0.1427 (0.1612) loss: 0.8582 (0.8513) time: 0.1330 data: 0.0494 max mem: 9305 +Train: [86] [3200/6250] eta: 0:08:00 lr: 0.000006 grad: 0.1600 (0.1611) loss: 0.8507 (0.8514) time: 0.1338 data: 0.0473 max mem: 9305 +Train: [86] [3300/6250] eta: 0:07:43 lr: 0.000006 grad: 0.1488 (0.1609) loss: 0.8560 (0.8515) time: 0.1352 data: 0.0513 max mem: 9305 +Train: [86] [3400/6250] eta: 0:07:25 lr: 0.000006 grad: 0.1445 (0.1608) loss: 0.8605 (0.8516) time: 0.1241 data: 0.0386 max mem: 9305 +Train: [86] [3500/6250] eta: 0:07:08 lr: 0.000006 grad: 0.1624 (0.1607) loss: 0.8491 (0.8516) time: 0.1014 data: 0.0158 max mem: 9305 +Train: [86] [3600/6250] eta: 0:06:51 lr: 0.000006 grad: 0.1495 (0.1606) loss: 0.8503 (0.8516) time: 0.1454 data: 0.0626 max mem: 9305 +Train: [86] [3700/6250] eta: 0:06:35 lr: 0.000006 grad: 0.1513 (0.1605) loss: 0.8485 (0.8516) time: 0.1447 data: 0.0637 max mem: 9305 +Train: [86] [3800/6250] eta: 0:06:20 lr: 0.000006 grad: 0.1458 (0.1603) loss: 0.8545 (0.8517) time: 0.1886 data: 0.0953 max mem: 9305 +Train: [86] [3900/6250] eta: 0:06:08 lr: 0.000006 grad: 0.1538 (0.1602) loss: 0.8519 (0.8517) time: 0.3632 data: 0.2397 max mem: 9305 +Train: [86] [4000/6250] eta: 0:05:53 lr: 0.000006 grad: 0.1489 (0.1599) loss: 0.8520 (0.8518) time: 0.1348 data: 0.0425 max mem: 9305 +Train: [86] [4100/6250] eta: 0:05:36 lr: 0.000006 grad: 0.1461 (0.1597) loss: 0.8620 (0.8519) time: 0.1379 data: 0.0514 max mem: 9305 +Train: [86] [4200/6250] eta: 0:05:20 lr: 0.000006 grad: 0.1518 (0.1595) loss: 0.8502 (0.8519) time: 0.1414 data: 0.0560 max mem: 9305 +Train: [86] [4300/6250] eta: 0:05:04 lr: 0.000006 grad: 0.1482 (0.1593) loss: 0.8531 (0.8520) time: 0.1502 data: 0.0696 max mem: 9305 +Train: [86] [4400/6250] eta: 0:04:48 lr: 0.000006 grad: 0.1475 (0.1591) loss: 0.8573 (0.8521) time: 0.1618 data: 0.0811 max mem: 9305 +Train: [86] [4500/6250] eta: 0:04:34 lr: 0.000006 grad: 0.1454 (0.1590) loss: 0.8615 (0.8522) time: 0.2306 data: 0.1474 max mem: 9305 +Train: [86] [4600/6250] eta: 0:04:16 lr: 0.000006 grad: 0.1554 (0.1589) loss: 0.8574 (0.8523) time: 0.0959 data: 0.0164 max mem: 9305 +Train: [86] [4700/6250] eta: 0:04:01 lr: 0.000006 grad: 0.1626 (0.1589) loss: 0.8543 (0.8523) time: 0.1857 data: 0.0814 max mem: 9305 +Train: [86] [4800/6250] eta: 0:03:46 lr: 0.000006 grad: 0.1503 (0.1589) loss: 0.8521 (0.8523) time: 0.1776 data: 0.0903 max mem: 9305 +Train: [86] [4900/6250] eta: 0:03:29 lr: 0.000006 grad: 0.1491 (0.1588) loss: 0.8506 (0.8523) time: 0.1273 data: 0.0322 max mem: 9305 +Train: [86] [5000/6250] eta: 0:03:14 lr: 0.000006 grad: 0.1542 (0.1588) loss: 0.8469 (0.8523) time: 0.1479 data: 0.0615 max mem: 9305 +Train: [86] [5100/6250] eta: 0:02:58 lr: 0.000006 grad: 0.1607 (0.1588) loss: 0.8518 (0.8523) time: 0.2731 data: 0.1768 max mem: 9305 +Train: [86] [5200/6250] eta: 0:02:42 lr: 0.000006 grad: 0.1595 (0.1588) loss: 0.8489 (0.8523) time: 0.1234 data: 0.0306 max mem: 9305 +Train: [86] [5300/6250] eta: 0:02:26 lr: 0.000006 grad: 0.1611 (0.1588) loss: 0.8537 (0.8523) time: 0.1175 data: 0.0371 max mem: 9305 +Train: [86] [5400/6250] eta: 0:02:11 lr: 0.000006 grad: 0.1543 (0.1588) loss: 0.8538 (0.8522) time: 0.1585 data: 0.0784 max mem: 9305 +Train: [86] [5500/6250] eta: 0:01:55 lr: 0.000006 grad: 0.1568 (0.1588) loss: 0.8509 (0.8522) time: 0.1428 data: 0.0610 max mem: 9305 +Train: [86] [5600/6250] eta: 0:01:39 lr: 0.000006 grad: 0.1575 (0.1588) loss: 0.8506 (0.8522) time: 0.1505 data: 0.0714 max mem: 9305 +Train: [86] [5700/6250] eta: 0:01:24 lr: 0.000006 grad: 0.1650 (0.1589) loss: 0.8486 (0.8521) time: 0.1481 data: 0.0680 max mem: 9305 +Train: [86] [5800/6250] eta: 0:01:08 lr: 0.000006 grad: 0.1572 (0.1590) loss: 0.8456 (0.8520) time: 0.1350 data: 0.0469 max mem: 9305 +Train: [86] [5900/6250] eta: 0:00:53 lr: 0.000006 grad: 0.1496 (0.1590) loss: 0.8524 (0.8520) time: 0.1450 data: 0.0650 max mem: 9305 +Train: [86] [6000/6250] eta: 0:00:38 lr: 0.000006 grad: 0.1603 (0.1590) loss: 0.8510 (0.8519) time: 0.1437 data: 0.0639 max mem: 9305 +Train: [86] [6100/6250] eta: 0:00:23 lr: 0.000006 grad: 0.1653 (0.1590) loss: 0.8474 (0.8519) time: 0.1693 data: 0.0882 max mem: 9305 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.1468 (0.1589) loss: 0.8580 (0.8519) time: 0.1901 data: 0.1133 max mem: 9305 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1588 (0.1589) loss: 0.8515 (0.8518) time: 0.1795 data: 0.0883 max mem: 9305 +Train: [86] Total time: 0:16:07 (0.1547 s / it) +Averaged stats: lr: 0.000006 grad: 0.1588 (0.1589) loss: 0.8515 (0.8518) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:24 loss: 0.8692 (0.8692) time: 5.2294 data: 5.1959 max mem: 9305 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8560 (0.8608) time: 0.1160 data: 0.0876 max mem: 9305 +Eval (hcp-train-subset): [86] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (hcp-train-subset): loss: 0.8560 (0.8608) +Eval (hcp-val): [86] [ 0/62] eta: 0:05:41 loss: 0.8743 (0.8743) time: 5.5007 data: 5.4672 max mem: 9305 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8718 (0.8743) time: 0.1187 data: 0.0880 max mem: 9305 +Eval (hcp-val): [86] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (hcp-val): loss: 0.8718 (0.8743) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 7:36:09 lr: 0.000006 grad: 0.1097 (0.1097) loss: 0.9013 (0.9013) time: 4.3791 data: 4.0982 max mem: 9305 +Train: [87] [ 100/6250] eta: 0:20:21 lr: 0.000006 grad: 0.1681 (0.2099) loss: 0.8589 (0.8601) time: 0.1457 data: 0.0409 max mem: 9305 +Train: [87] [ 200/6250] eta: 0:17:31 lr: 0.000006 grad: 0.1550 (0.1961) loss: 0.8664 (0.8578) time: 0.1647 data: 0.0756 max mem: 9305 +Train: [87] [ 300/6250] eta: 0:16:06 lr: 0.000006 grad: 0.1436 (0.1831) loss: 0.8650 (0.8580) time: 0.1507 data: 0.0597 max mem: 9305 +Train: [87] [ 400/6250] eta: 0:15:33 lr: 0.000006 grad: 0.1461 (0.1775) loss: 0.8570 (0.8580) time: 0.1422 data: 0.0445 max mem: 9305 +Train: [87] [ 500/6250] eta: 0:14:52 lr: 0.000006 grad: 0.1405 (0.1733) loss: 0.8588 (0.8581) time: 0.1141 data: 0.0249 max mem: 9305 +Train: [87] [ 600/6250] eta: 0:14:20 lr: 0.000006 grad: 0.1594 (0.1704) loss: 0.8601 (0.8584) time: 0.1261 data: 0.0363 max mem: 9305 +Train: [87] [ 700/6250] eta: 0:14:14 lr: 0.000006 grad: 0.1710 (0.1698) loss: 0.8451 (0.8579) time: 0.1411 data: 0.0188 max mem: 9305 +Train: [87] [ 800/6250] eta: 0:13:52 lr: 0.000006 grad: 0.1724 (0.1691) loss: 0.8505 (0.8576) time: 0.1515 data: 0.0500 max mem: 9305 +Train: [87] [ 900/6250] eta: 0:14:13 lr: 0.000006 grad: 0.1756 (0.1686) loss: 0.8459 (0.8572) time: 0.2691 data: 0.1623 max mem: 9305 +Train: [87] [1000/6250] eta: 0:13:49 lr: 0.000006 grad: 0.1561 (0.1677) loss: 0.8570 (0.8572) time: 0.0878 data: 0.0002 max mem: 9305 +Train: [87] [1100/6250] eta: 0:13:35 lr: 0.000006 grad: 0.1483 (0.1662) loss: 0.8610 (0.8571) time: 0.0973 data: 0.0002 max mem: 9305 +Train: [87] [1200/6250] eta: 0:13:10 lr: 0.000006 grad: 0.1507 (0.1653) loss: 0.8553 (0.8571) time: 0.1324 data: 0.0315 max mem: 9305 +Train: [87] [1300/6250] eta: 0:12:53 lr: 0.000006 grad: 0.1507 (0.1645) loss: 0.8597 (0.8572) time: 0.1156 data: 0.0183 max mem: 9305 +Train: [87] [1400/6250] eta: 0:12:37 lr: 0.000005 grad: 0.1504 (0.1640) loss: 0.8572 (0.8572) time: 0.1272 data: 0.0413 max mem: 9305 +Train: [87] [1500/6250] eta: 0:12:33 lr: 0.000005 grad: 0.1582 (0.1636) loss: 0.8455 (0.8571) time: 0.3549 data: 0.2408 max mem: 9305 +Train: [87] [1600/6250] eta: 0:12:05 lr: 0.000005 grad: 0.1543 (0.1633) loss: 0.8583 (0.8570) time: 0.1398 data: 0.0653 max mem: 9305 +Train: [87] [1700/6250] eta: 0:11:43 lr: 0.000005 grad: 0.1589 (0.1631) loss: 0.8473 (0.8569) time: 0.1265 data: 0.0391 max mem: 9305 +Train: [87] [1800/6250] eta: 0:11:25 lr: 0.000005 grad: 0.1517 (0.1627) loss: 0.8531 (0.8569) time: 0.1529 data: 0.0719 max mem: 9305 +Train: [87] [1900/6250] eta: 0:11:06 lr: 0.000005 grad: 0.1522 (0.1623) loss: 0.8566 (0.8568) time: 0.1319 data: 0.0543 max mem: 9305 +Train: [87] [2000/6250] eta: 0:10:46 lr: 0.000005 grad: 0.1429 (0.1620) loss: 0.8565 (0.8569) time: 0.1381 data: 0.0597 max mem: 9305 +Train: [87] [2100/6250] eta: 0:10:31 lr: 0.000005 grad: 0.1508 (0.1617) loss: 0.8626 (0.8569) time: 0.1709 data: 0.0848 max mem: 9305 +Train: [87] [2200/6250] eta: 0:10:11 lr: 0.000005 grad: 0.1638 (0.1616) loss: 0.8526 (0.8567) time: 0.1361 data: 0.0554 max mem: 9305 +Train: [87] [2300/6250] eta: 0:09:55 lr: 0.000005 grad: 0.1481 (0.1618) loss: 0.8500 (0.8566) time: 0.1418 data: 0.0588 max mem: 9305 +Train: [87] [2400/6250] eta: 0:09:38 lr: 0.000005 grad: 0.1630 (0.1618) loss: 0.8498 (0.8564) time: 0.1432 data: 0.0563 max mem: 9305 +Train: [87] [2500/6250] eta: 0:09:25 lr: 0.000005 grad: 0.1559 (0.1617) loss: 0.8572 (0.8563) time: 0.1448 data: 0.0582 max mem: 9305 +Train: [87] [2600/6250] eta: 0:09:10 lr: 0.000005 grad: 0.1569 (0.1618) loss: 0.8500 (0.8561) time: 0.1482 data: 0.0688 max mem: 9305 +Train: [87] [2700/6250] eta: 0:08:54 lr: 0.000005 grad: 0.1660 (0.1621) loss: 0.8493 (0.8559) time: 0.1528 data: 0.0709 max mem: 9305 +Train: [87] [2800/6250] eta: 0:08:39 lr: 0.000005 grad: 0.1610 (0.1621) loss: 0.8518 (0.8557) time: 0.1450 data: 0.0609 max mem: 9305 +Train: [87] [2900/6250] eta: 0:08:23 lr: 0.000005 grad: 0.1671 (0.1621) loss: 0.8496 (0.8555) time: 0.1428 data: 0.0603 max mem: 9305 +Train: [87] [3000/6250] eta: 0:08:06 lr: 0.000005 grad: 0.1578 (0.1621) loss: 0.8511 (0.8554) time: 0.1115 data: 0.0255 max mem: 9305 +Train: [87] [3100/6250] eta: 0:07:49 lr: 0.000005 grad: 0.1541 (0.1621) loss: 0.8550 (0.8553) time: 0.1324 data: 0.0463 max mem: 9305 +Train: [87] [3200/6250] eta: 0:07:32 lr: 0.000005 grad: 0.1630 (0.1623) loss: 0.8461 (0.8553) time: 0.1350 data: 0.0448 max mem: 9305 +Train: [87] [3300/6250] eta: 0:07:14 lr: 0.000005 grad: 0.1558 (0.1622) loss: 0.8482 (0.8552) time: 0.1374 data: 0.0539 max mem: 9305 +Train: [87] [3400/6250] eta: 0:06:57 lr: 0.000005 grad: 0.1554 (0.1621) loss: 0.8444 (0.8551) time: 0.1047 data: 0.0191 max mem: 9305 +Train: [87] [3500/6250] eta: 0:06:41 lr: 0.000005 grad: 0.1447 (0.1619) loss: 0.8601 (0.8550) time: 0.1343 data: 0.0526 max mem: 9305 +Train: [87] [3600/6250] eta: 0:06:28 lr: 0.000005 grad: 0.1452 (0.1617) loss: 0.8517 (0.8549) time: 0.1318 data: 0.0556 max mem: 9305 +Train: [87] [3700/6250] eta: 0:06:17 lr: 0.000005 grad: 0.1600 (0.1614) loss: 0.8516 (0.8549) time: 0.3041 data: 0.2146 max mem: 9305 +Train: [87] [3800/6250] eta: 0:06:02 lr: 0.000005 grad: 0.1418 (0.1613) loss: 0.8504 (0.8548) time: 0.1793 data: 0.1027 max mem: 9305 +Train: [87] [3900/6250] eta: 0:05:48 lr: 0.000005 grad: 0.1479 (0.1611) loss: 0.8534 (0.8548) time: 0.1637 data: 0.0820 max mem: 9305 +Train: [87] [4000/6250] eta: 0:05:34 lr: 0.000005 grad: 0.1558 (0.1611) loss: 0.8510 (0.8547) time: 0.1677 data: 0.0792 max mem: 9305 +Train: [87] [4100/6250] eta: 0:05:19 lr: 0.000005 grad: 0.1483 (0.1609) loss: 0.8521 (0.8546) time: 0.1495 data: 0.0685 max mem: 9305 +Train: [87] [4200/6250] eta: 0:05:04 lr: 0.000005 grad: 0.1499 (0.1608) loss: 0.8450 (0.8545) time: 0.1302 data: 0.0376 max mem: 9305 +Train: [87] [4300/6250] eta: 0:04:49 lr: 0.000005 grad: 0.1547 (0.1606) loss: 0.8438 (0.8544) time: 0.1236 data: 0.0363 max mem: 9305 +Train: [87] [4400/6250] eta: 0:04:36 lr: 0.000005 grad: 0.1458 (0.1604) loss: 0.8542 (0.8544) time: 0.3282 data: 0.2368 max mem: 9305 +Train: [87] [4500/6250] eta: 0:04:21 lr: 0.000005 grad: 0.1464 (0.1603) loss: 0.8572 (0.8544) time: 0.1473 data: 0.0363 max mem: 9305 +Train: [87] [4600/6250] eta: 0:04:05 lr: 0.000005 grad: 0.1545 (0.1603) loss: 0.8554 (0.8543) time: 0.1329 data: 0.0508 max mem: 9305 +Train: [87] [4700/6250] eta: 0:03:50 lr: 0.000005 grad: 0.1537 (0.1602) loss: 0.8462 (0.8543) time: 0.1613 data: 0.0781 max mem: 9305 +Train: [87] [4800/6250] eta: 0:03:36 lr: 0.000005 grad: 0.1520 (0.1601) loss: 0.8500 (0.8542) time: 0.1095 data: 0.0223 max mem: 9305 +Train: [87] [4900/6250] eta: 0:03:21 lr: 0.000005 grad: 0.1561 (0.1600) loss: 0.8506 (0.8541) time: 0.1419 data: 0.0606 max mem: 9305 +Train: [87] [5000/6250] eta: 0:03:06 lr: 0.000005 grad: 0.1567 (0.1600) loss: 0.8460 (0.8541) time: 0.1442 data: 0.0594 max mem: 9305 +Train: [87] [5100/6250] eta: 0:02:51 lr: 0.000005 grad: 0.1674 (0.1600) loss: 0.8489 (0.8540) time: 0.1654 data: 0.0843 max mem: 9305 +Train: [87] [5200/6250] eta: 0:02:37 lr: 0.000005 grad: 0.1495 (0.1600) loss: 0.8588 (0.8539) time: 0.3199 data: 0.2141 max mem: 9305 +Train: [87] [5300/6250] eta: 0:02:22 lr: 0.000005 grad: 0.1621 (0.1600) loss: 0.8472 (0.8539) time: 0.1741 data: 0.0964 max mem: 9305 +Train: [87] [5400/6250] eta: 0:02:06 lr: 0.000005 grad: 0.1550 (0.1601) loss: 0.8425 (0.8538) time: 0.1464 data: 0.0626 max mem: 9305 +Train: [87] [5500/6250] eta: 0:01:52 lr: 0.000005 grad: 0.1641 (0.1601) loss: 0.8500 (0.8537) time: 0.1038 data: 0.0011 max mem: 9305 +Train: [87] [5600/6250] eta: 0:01:37 lr: 0.000005 grad: 0.1563 (0.1601) loss: 0.8499 (0.8536) time: 0.1254 data: 0.0463 max mem: 9305 +Train: [87] [5700/6250] eta: 0:01:21 lr: 0.000005 grad: 0.1571 (0.1600) loss: 0.8486 (0.8536) time: 0.1602 data: 0.0643 max mem: 9305 +Train: [87] [5800/6250] eta: 0:01:07 lr: 0.000005 grad: 0.1515 (0.1599) loss: 0.8545 (0.8535) time: 0.0886 data: 0.0004 max mem: 9305 +Train: [87] [5900/6250] eta: 0:00:52 lr: 0.000005 grad: 0.1566 (0.1598) loss: 0.8523 (0.8535) time: 0.1345 data: 0.0550 max mem: 9305 +Train: [87] [6000/6250] eta: 0:00:37 lr: 0.000005 grad: 0.1496 (0.1598) loss: 0.8537 (0.8535) time: 0.1432 data: 0.0538 max mem: 9305 +Train: [87] [6100/6250] eta: 0:00:22 lr: 0.000005 grad: 0.1522 (0.1598) loss: 0.8489 (0.8534) time: 0.1590 data: 0.0826 max mem: 9305 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.1493 (0.1597) loss: 0.8544 (0.8534) time: 0.1388 data: 0.0584 max mem: 9305 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1612 (0.1597) loss: 0.8514 (0.8534) time: 0.1509 data: 0.0588 max mem: 9305 +Train: [87] Total time: 0:15:34 (0.1495 s / it) +Averaged stats: lr: 0.000005 grad: 0.1612 (0.1597) loss: 0.8514 (0.8534) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:03:57 loss: 0.8646 (0.8646) time: 3.8350 data: 3.6988 max mem: 9305 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8563 (0.8597) time: 0.1218 data: 0.0931 max mem: 9305 +Eval (hcp-train-subset): [87] Total time: 0:00:13 (0.2187 s / it) +Averaged stats (hcp-train-subset): loss: 0.8563 (0.8597) +Eval (hcp-val): [87] [ 0/62] eta: 0:04:53 loss: 0.8668 (0.8668) time: 4.7418 data: 4.7076 max mem: 9305 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8748 (0.8745) time: 0.1290 data: 0.1000 max mem: 9305 +Eval (hcp-val): [87] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-val): loss: 0.8748 (0.8745) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 9:09:36 lr: 0.000005 grad: 0.1962 (0.1962) loss: 0.8929 (0.8929) time: 5.2762 data: 5.1568 max mem: 9305 +Train: [88] [ 100/6250] eta: 0:20:15 lr: 0.000005 grad: 0.1759 (0.2003) loss: 0.8613 (0.8614) time: 0.1363 data: 0.0375 max mem: 9305 +Train: [88] [ 200/6250] eta: 0:16:58 lr: 0.000005 grad: 0.1648 (0.1888) loss: 0.8597 (0.8583) time: 0.1491 data: 0.0633 max mem: 9305 +Train: [88] [ 300/6250] eta: 0:15:38 lr: 0.000005 grad: 0.1598 (0.1826) loss: 0.8470 (0.8568) time: 0.1296 data: 0.0324 max mem: 9305 +Train: [88] [ 400/6250] eta: 0:14:43 lr: 0.000005 grad: 0.1477 (0.1786) loss: 0.8596 (0.8559) time: 0.1343 data: 0.0483 max mem: 9305 +Train: [88] [ 500/6250] eta: 0:14:01 lr: 0.000005 grad: 0.1590 (0.1751) loss: 0.8548 (0.8562) time: 0.1142 data: 0.0285 max mem: 9305 +Train: [88] [ 600/6250] eta: 0:13:20 lr: 0.000005 grad: 0.1746 (0.1745) loss: 0.8499 (0.8557) time: 0.1192 data: 0.0287 max mem: 9305 +Train: [88] [ 700/6250] eta: 0:12:53 lr: 0.000005 grad: 0.1592 (0.1736) loss: 0.8450 (0.8549) time: 0.1362 data: 0.0528 max mem: 9305 +Train: [88] [ 800/6250] eta: 0:12:44 lr: 0.000005 grad: 0.1746 (0.1730) loss: 0.8462 (0.8545) time: 0.1701 data: 0.0819 max mem: 9305 +Train: [88] [ 900/6250] eta: 0:12:46 lr: 0.000005 grad: 0.1647 (0.1719) loss: 0.8512 (0.8543) time: 0.1903 data: 0.1025 max mem: 9305 +Train: [88] [1000/6250] eta: 0:12:47 lr: 0.000005 grad: 0.1641 (0.1713) loss: 0.8486 (0.8540) time: 0.1717 data: 0.0761 max mem: 9305 +Train: [88] [1100/6250] eta: 0:12:46 lr: 0.000005 grad: 0.1673 (0.1711) loss: 0.8404 (0.8534) time: 0.2133 data: 0.1258 max mem: 9305 +Train: [88] [1200/6250] eta: 0:12:34 lr: 0.000005 grad: 0.1686 (0.1708) loss: 0.8476 (0.8531) time: 0.1695 data: 0.0815 max mem: 9305 +Train: [88] [1300/6250] eta: 0:12:14 lr: 0.000005 grad: 0.1641 (0.1708) loss: 0.8504 (0.8528) time: 0.1438 data: 0.0586 max mem: 9305 +Train: [88] [1400/6250] eta: 0:11:57 lr: 0.000005 grad: 0.1606 (0.1706) loss: 0.8493 (0.8525) time: 0.1357 data: 0.0480 max mem: 9305 +Train: [88] [1500/6250] eta: 0:11:43 lr: 0.000005 grad: 0.1611 (0.1700) loss: 0.8451 (0.8524) time: 0.1404 data: 0.0570 max mem: 9305 +Train: [88] [1600/6250] eta: 0:11:24 lr: 0.000005 grad: 0.1451 (0.1694) loss: 0.8625 (0.8523) time: 0.1366 data: 0.0466 max mem: 9305 +Train: [88] [1700/6250] eta: 0:11:16 lr: 0.000005 grad: 0.1486 (0.1688) loss: 0.8509 (0.8522) time: 0.1619 data: 0.0689 max mem: 9305 +Train: [88] [1800/6250] eta: 0:10:59 lr: 0.000005 grad: 0.1547 (0.1681) loss: 0.8523 (0.8521) time: 0.1601 data: 0.0783 max mem: 9305 +Train: [88] [1900/6250] eta: 0:10:44 lr: 0.000005 grad: 0.1589 (0.1678) loss: 0.8437 (0.8520) time: 0.1532 data: 0.0738 max mem: 9305 +Train: [88] [2000/6250] eta: 0:10:32 lr: 0.000005 grad: 0.1689 (0.1677) loss: 0.8496 (0.8518) time: 0.1384 data: 0.0586 max mem: 9305 +Train: [88] [2100/6250] eta: 0:10:26 lr: 0.000005 grad: 0.1429 (0.1673) loss: 0.8585 (0.8520) time: 0.3483 data: 0.2305 max mem: 9305 +Train: [88] [2200/6250] eta: 0:10:11 lr: 0.000005 grad: 0.1682 (0.1674) loss: 0.8491 (0.8519) time: 0.1711 data: 0.0768 max mem: 9305 +Train: [88] [2300/6250] eta: 0:09:54 lr: 0.000005 grad: 0.1705 (0.1674) loss: 0.8444 (0.8518) time: 0.1584 data: 0.0776 max mem: 9305 +Train: [88] [2400/6250] eta: 0:09:38 lr: 0.000005 grad: 0.1683 (0.1676) loss: 0.8537 (0.8517) time: 0.0996 data: 0.0128 max mem: 9305 +Train: [88] [2500/6250] eta: 0:09:22 lr: 0.000005 grad: 0.1633 (0.1681) loss: 0.8501 (0.8516) time: 0.1456 data: 0.0639 max mem: 9305 +Train: [88] [2600/6250] eta: 0:09:08 lr: 0.000005 grad: 0.1773 (0.1686) loss: 0.8445 (0.8513) time: 0.1516 data: 0.0649 max mem: 9305 +Train: [88] [2700/6250] eta: 0:08:51 lr: 0.000005 grad: 0.1711 (0.1688) loss: 0.8413 (0.8511) time: 0.1223 data: 0.0312 max mem: 9305 +Train: [88] [2800/6250] eta: 0:08:35 lr: 0.000005 grad: 0.1769 (0.1690) loss: 0.8434 (0.8510) time: 0.1417 data: 0.0521 max mem: 9305 +Train: [88] [2900/6250] eta: 0:08:18 lr: 0.000004 grad: 0.1664 (0.1690) loss: 0.8512 (0.8508) time: 0.1261 data: 0.0364 max mem: 9305 +Train: [88] [3000/6250] eta: 0:08:01 lr: 0.000004 grad: 0.1776 (0.1691) loss: 0.8499 (0.8508) time: 0.1433 data: 0.0604 max mem: 9305 +Train: [88] [3100/6250] eta: 0:07:45 lr: 0.000004 grad: 0.1660 (0.1691) loss: 0.8526 (0.8507) time: 0.1459 data: 0.0597 max mem: 9305 +Train: [88] [3200/6250] eta: 0:07:31 lr: 0.000004 grad: 0.1684 (0.1692) loss: 0.8444 (0.8505) time: 0.0872 data: 0.0002 max mem: 9305 +Train: [88] [3300/6250] eta: 0:07:20 lr: 0.000004 grad: 0.1633 (0.1692) loss: 0.8544 (0.8505) time: 0.2669 data: 0.1735 max mem: 9305 +Train: [88] [3400/6250] eta: 0:07:08 lr: 0.000004 grad: 0.1812 (0.1693) loss: 0.8543 (0.8505) time: 0.3257 data: 0.2185 max mem: 9305 +Train: [88] [3500/6250] eta: 0:06:54 lr: 0.000004 grad: 0.1718 (0.1694) loss: 0.8421 (0.8504) time: 0.1111 data: 0.0199 max mem: 9305 +Train: [88] [3600/6250] eta: 0:06:38 lr: 0.000004 grad: 0.1760 (0.1694) loss: 0.8444 (0.8503) time: 0.1477 data: 0.0563 max mem: 9305 +Train: [88] [3700/6250] eta: 0:06:22 lr: 0.000004 grad: 0.1609 (0.1692) loss: 0.8485 (0.8502) time: 0.1383 data: 0.0573 max mem: 9305 +Train: [88] [3800/6250] eta: 0:06:10 lr: 0.000004 grad: 0.1523 (0.1692) loss: 0.8439 (0.8501) time: 0.1056 data: 0.0038 max mem: 9305 +Train: [88] [3900/6250] eta: 0:05:54 lr: 0.000004 grad: 0.1659 (0.1690) loss: 0.8453 (0.8500) time: 0.1498 data: 0.0677 max mem: 9305 +Train: [88] [4000/6250] eta: 0:05:38 lr: 0.000004 grad: 0.1620 (0.1690) loss: 0.8494 (0.8500) time: 0.1239 data: 0.0414 max mem: 9305 +Train: [88] [4100/6250] eta: 0:05:22 lr: 0.000004 grad: 0.1595 (0.1688) loss: 0.8486 (0.8500) time: 0.1181 data: 0.0364 max mem: 9305 +Train: [88] [4200/6250] eta: 0:05:06 lr: 0.000004 grad: 0.1629 (0.1688) loss: 0.8469 (0.8500) time: 0.1229 data: 0.0374 max mem: 9305 +Train: [88] [4300/6250] eta: 0:04:51 lr: 0.000004 grad: 0.1683 (0.1688) loss: 0.8534 (0.8499) time: 0.1404 data: 0.0519 max mem: 9305 +Train: [88] [4400/6250] eta: 0:04:36 lr: 0.000004 grad: 0.1569 (0.1688) loss: 0.8508 (0.8499) time: 0.1527 data: 0.0706 max mem: 9305 +Train: [88] [4500/6250] eta: 0:04:21 lr: 0.000004 grad: 0.1506 (0.1687) loss: 0.8530 (0.8499) time: 0.1933 data: 0.1153 max mem: 9305 +Train: [88] [4600/6250] eta: 0:04:06 lr: 0.000004 grad: 0.1557 (0.1686) loss: 0.8497 (0.8499) time: 0.2068 data: 0.1229 max mem: 9305 +Train: [88] [4700/6250] eta: 0:03:51 lr: 0.000004 grad: 0.1459 (0.1685) loss: 0.8552 (0.8499) time: 0.1520 data: 0.0745 max mem: 9305 +Train: [88] [4800/6250] eta: 0:03:36 lr: 0.000004 grad: 0.1612 (0.1684) loss: 0.8457 (0.8499) time: 0.1518 data: 0.0687 max mem: 9305 +Train: [88] [4900/6250] eta: 0:03:22 lr: 0.000004 grad: 0.1628 (0.1683) loss: 0.8462 (0.8499) time: 0.1695 data: 0.0882 max mem: 9305 +Train: [88] [5000/6250] eta: 0:03:07 lr: 0.000004 grad: 0.1487 (0.1682) loss: 0.8494 (0.8499) time: 0.1630 data: 0.0843 max mem: 9305 +Train: [88] [5100/6250] eta: 0:02:52 lr: 0.000004 grad: 0.1615 (0.1683) loss: 0.8479 (0.8499) time: 0.1336 data: 0.0433 max mem: 9305 +Train: [88] [5200/6250] eta: 0:02:37 lr: 0.000004 grad: 0.1760 (0.1684) loss: 0.8431 (0.8499) time: 0.1420 data: 0.0622 max mem: 9305 +Train: [88] [5300/6250] eta: 0:02:22 lr: 0.000004 grad: 0.1656 (0.1685) loss: 0.8451 (0.8498) time: 0.1202 data: 0.0329 max mem: 9305 +Train: [88] [5400/6250] eta: 0:02:06 lr: 0.000004 grad: 0.1699 (0.1685) loss: 0.8461 (0.8498) time: 0.1175 data: 0.0360 max mem: 9305 +Train: [88] [5500/6250] eta: 0:01:52 lr: 0.000004 grad: 0.1619 (0.1684) loss: 0.8514 (0.8498) time: 0.1830 data: 0.1036 max mem: 9305 +Train: [88] [5600/6250] eta: 0:01:36 lr: 0.000004 grad: 0.1521 (0.1684) loss: 0.8532 (0.8498) time: 0.1337 data: 0.0520 max mem: 9305 +Train: [88] [5700/6250] eta: 0:01:21 lr: 0.000004 grad: 0.1626 (0.1684) loss: 0.8530 (0.8498) time: 0.1486 data: 0.0650 max mem: 9305 +Train: [88] [5800/6250] eta: 0:01:06 lr: 0.000004 grad: 0.1617 (0.1684) loss: 0.8578 (0.8498) time: 0.1583 data: 0.0763 max mem: 9305 +Train: [88] [5900/6250] eta: 0:00:52 lr: 0.000004 grad: 0.1535 (0.1683) loss: 0.8548 (0.8499) time: 0.1452 data: 0.0636 max mem: 9305 +Train: [88] [6000/6250] eta: 0:00:37 lr: 0.000004 grad: 0.1617 (0.1682) loss: 0.8453 (0.8499) time: 0.1063 data: 0.0198 max mem: 9305 +Train: [88] [6100/6250] eta: 0:00:22 lr: 0.000004 grad: 0.1573 (0.1682) loss: 0.8504 (0.8499) time: 0.1472 data: 0.0604 max mem: 9305 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.1528 (0.1680) loss: 0.8564 (0.8499) time: 0.1015 data: 0.0062 max mem: 9305 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1590 (0.1679) loss: 0.8548 (0.8499) time: 0.1338 data: 0.0564 max mem: 9305 +Train: [88] Total time: 0:15:36 (0.1499 s / it) +Averaged stats: lr: 0.000004 grad: 0.1590 (0.1679) loss: 0.8548 (0.8499) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:04:12 loss: 0.8606 (0.8606) time: 4.0704 data: 4.0051 max mem: 9305 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8553 (0.8590) time: 0.1244 data: 0.0947 max mem: 9305 +Eval (hcp-train-subset): [88] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (hcp-train-subset): loss: 0.8553 (0.8590) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:47 loss: 0.8694 (0.8694) time: 4.6400 data: 4.5774 max mem: 9305 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8724 (0.8741) time: 0.1444 data: 0.1155 max mem: 9305 +Eval (hcp-val): [88] Total time: 0:00:13 (0.2197 s / it) +Averaged stats (hcp-val): loss: 0.8724 (0.8741) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 8:49:42 lr: 0.000004 grad: 0.2600 (0.2600) loss: 0.8894 (0.8894) time: 5.0852 data: 4.7568 max mem: 9305 +Train: [89] [ 100/6250] eta: 0:21:06 lr: 0.000004 grad: 0.1579 (0.2084) loss: 0.8731 (0.8650) time: 0.1599 data: 0.0460 max mem: 9305 +Train: [89] [ 200/6250] eta: 0:17:43 lr: 0.000004 grad: 0.1524 (0.1868) loss: 0.8623 (0.8620) time: 0.1388 data: 0.0345 max mem: 9305 +Train: [89] [ 300/6250] eta: 0:16:30 lr: 0.000004 grad: 0.1376 (0.1793) loss: 0.8672 (0.8594) time: 0.1257 data: 0.0182 max mem: 9305 +Train: [89] [ 400/6250] eta: 0:15:19 lr: 0.000004 grad: 0.1793 (0.1794) loss: 0.8521 (0.8571) time: 0.1193 data: 0.0314 max mem: 9305 +Train: [89] [ 500/6250] eta: 0:14:41 lr: 0.000004 grad: 0.1707 (0.1792) loss: 0.8498 (0.8558) time: 0.1188 data: 0.0287 max mem: 9305 +Train: [89] [ 600/6250] eta: 0:14:02 lr: 0.000004 grad: 0.1479 (0.1782) loss: 0.8490 (0.8547) time: 0.1274 data: 0.0386 max mem: 9305 +Train: [89] [ 700/6250] eta: 0:13:34 lr: 0.000004 grad: 0.1609 (0.1772) loss: 0.8513 (0.8542) time: 0.1206 data: 0.0364 max mem: 9305 +Train: [89] [ 800/6250] eta: 0:13:16 lr: 0.000004 grad: 0.1492 (0.1751) loss: 0.8572 (0.8540) time: 0.1447 data: 0.0523 max mem: 9305 +Train: [89] [ 900/6250] eta: 0:13:17 lr: 0.000004 grad: 0.1579 (0.1735) loss: 0.8576 (0.8542) time: 0.1495 data: 0.0474 max mem: 9305 +Train: [89] [1000/6250] eta: 0:13:18 lr: 0.000004 grad: 0.1572 (0.1722) loss: 0.8556 (0.8542) time: 0.2056 data: 0.1164 max mem: 9305 +Train: [89] [1100/6250] eta: 0:13:11 lr: 0.000004 grad: 0.1449 (0.1705) loss: 0.8526 (0.8543) time: 0.1153 data: 0.0356 max mem: 9305 +Train: [89] [1200/6250] eta: 0:13:01 lr: 0.000004 grad: 0.1553 (0.1694) loss: 0.8530 (0.8543) time: 0.1956 data: 0.1099 max mem: 9305 +Train: [89] [1300/6250] eta: 0:12:51 lr: 0.000004 grad: 0.1654 (0.1689) loss: 0.8528 (0.8541) time: 0.1961 data: 0.1107 max mem: 9305 +Train: [89] [1400/6250] eta: 0:12:43 lr: 0.000004 grad: 0.1562 (0.1683) loss: 0.8501 (0.8539) time: 0.2519 data: 0.1703 max mem: 9305 +Train: [89] [1500/6250] eta: 0:12:28 lr: 0.000004 grad: 0.1495 (0.1681) loss: 0.8571 (0.8538) time: 0.1553 data: 0.0657 max mem: 9305 +Train: [89] [1600/6250] eta: 0:12:13 lr: 0.000004 grad: 0.1582 (0.1677) loss: 0.8515 (0.8538) time: 0.1180 data: 0.0285 max mem: 9305 +Train: [89] [1700/6250] eta: 0:11:58 lr: 0.000004 grad: 0.1624 (0.1678) loss: 0.8515 (0.8536) time: 0.1881 data: 0.1082 max mem: 9305 +Train: [89] [1800/6250] eta: 0:11:43 lr: 0.000004 grad: 0.1528 (0.1676) loss: 0.8479 (0.8534) time: 0.1813 data: 0.0978 max mem: 9305 +Train: [89] [1900/6250] eta: 0:11:31 lr: 0.000004 grad: 0.1453 (0.1673) loss: 0.8537 (0.8535) time: 0.2447 data: 0.1635 max mem: 9305 +Train: [89] [2000/6250] eta: 0:11:14 lr: 0.000004 grad: 0.1618 (0.1669) loss: 0.8495 (0.8534) time: 0.1452 data: 0.0501 max mem: 9305 +Train: [89] [2100/6250] eta: 0:10:58 lr: 0.000004 grad: 0.1492 (0.1666) loss: 0.8550 (0.8533) time: 0.1388 data: 0.0571 max mem: 9305 +Train: [89] [2200/6250] eta: 0:10:39 lr: 0.000004 grad: 0.1588 (0.1663) loss: 0.8450 (0.8531) time: 0.1403 data: 0.0537 max mem: 9305 +Train: [89] [2300/6250] eta: 0:10:23 lr: 0.000004 grad: 0.1502 (0.1660) loss: 0.8506 (0.8531) time: 0.1564 data: 0.0693 max mem: 9305 +Train: [89] [2400/6250] eta: 0:10:08 lr: 0.000004 grad: 0.1595 (0.1657) loss: 0.8468 (0.8530) time: 0.1524 data: 0.0676 max mem: 9305 +Train: [89] [2500/6250] eta: 0:09:51 lr: 0.000004 grad: 0.1577 (0.1658) loss: 0.8496 (0.8528) time: 0.1397 data: 0.0566 max mem: 9305 +Train: [89] [2600/6250] eta: 0:09:33 lr: 0.000004 grad: 0.1605 (0.1657) loss: 0.8487 (0.8528) time: 0.1375 data: 0.0597 max mem: 9305 +Train: [89] [2700/6250] eta: 0:09:14 lr: 0.000004 grad: 0.1457 (0.1655) loss: 0.8553 (0.8527) time: 0.1268 data: 0.0331 max mem: 9305 +Train: [89] [2800/6250] eta: 0:08:56 lr: 0.000004 grad: 0.1609 (0.1653) loss: 0.8517 (0.8527) time: 0.1585 data: 0.0757 max mem: 9305 +Train: [89] [2900/6250] eta: 0:08:40 lr: 0.000004 grad: 0.1558 (0.1652) loss: 0.8520 (0.8526) time: 0.1409 data: 0.0532 max mem: 9305 +Train: [89] [3000/6250] eta: 0:08:24 lr: 0.000004 grad: 0.1560 (0.1651) loss: 0.8496 (0.8525) time: 0.1419 data: 0.0589 max mem: 9305 +Train: [89] [3100/6250] eta: 0:08:08 lr: 0.000004 grad: 0.1578 (0.1650) loss: 0.8542 (0.8525) time: 0.1474 data: 0.0621 max mem: 9305 +Train: [89] [3200/6250] eta: 0:07:52 lr: 0.000004 grad: 0.1524 (0.1650) loss: 0.8537 (0.8525) time: 0.1455 data: 0.0635 max mem: 9305 +Train: [89] [3300/6250] eta: 0:07:36 lr: 0.000004 grad: 0.1653 (0.1650) loss: 0.8533 (0.8524) time: 0.1244 data: 0.0486 max mem: 9305 +Train: [89] [3400/6250] eta: 0:07:19 lr: 0.000004 grad: 0.1635 (0.1650) loss: 0.8491 (0.8524) time: 0.1414 data: 0.0607 max mem: 9305 +Train: [89] [3500/6250] eta: 0:07:03 lr: 0.000004 grad: 0.1467 (0.1650) loss: 0.8513 (0.8524) time: 0.1479 data: 0.0691 max mem: 9305 +Train: [89] [3600/6250] eta: 0:06:46 lr: 0.000004 grad: 0.1653 (0.1650) loss: 0.8464 (0.8524) time: 0.1290 data: 0.0428 max mem: 9305 +Train: [89] [3700/6250] eta: 0:06:30 lr: 0.000004 grad: 0.1621 (0.1650) loss: 0.8505 (0.8523) time: 0.1189 data: 0.0349 max mem: 9305 +Train: [89] [3800/6250] eta: 0:06:14 lr: 0.000004 grad: 0.1599 (0.1649) loss: 0.8573 (0.8523) time: 0.1515 data: 0.0645 max mem: 9305 +Train: [89] [3900/6250] eta: 0:05:58 lr: 0.000004 grad: 0.1580 (0.1649) loss: 0.8522 (0.8524) time: 0.1476 data: 0.0646 max mem: 9305 +Train: [89] [4000/6250] eta: 0:05:42 lr: 0.000004 grad: 0.1637 (0.1648) loss: 0.8482 (0.8523) time: 0.0946 data: 0.0096 max mem: 9305 +Train: [89] [4100/6250] eta: 0:05:27 lr: 0.000004 grad: 0.1540 (0.1648) loss: 0.8446 (0.8522) time: 0.1613 data: 0.0724 max mem: 9305 +Train: [89] [4200/6250] eta: 0:05:11 lr: 0.000004 grad: 0.1513 (0.1648) loss: 0.8509 (0.8521) time: 0.1377 data: 0.0377 max mem: 9305 +Train: [89] [4300/6250] eta: 0:04:55 lr: 0.000004 grad: 0.1665 (0.1648) loss: 0.8468 (0.8521) time: 0.1425 data: 0.0631 max mem: 9305 +Train: [89] [4400/6250] eta: 0:04:39 lr: 0.000004 grad: 0.1647 (0.1649) loss: 0.8443 (0.8520) time: 0.1150 data: 0.0323 max mem: 9305 +Train: [89] [4500/6250] eta: 0:04:23 lr: 0.000004 grad: 0.1621 (0.1651) loss: 0.8426 (0.8519) time: 0.1415 data: 0.0463 max mem: 9305 +Train: [89] [4600/6250] eta: 0:04:09 lr: 0.000004 grad: 0.1562 (0.1651) loss: 0.8502 (0.8518) time: 0.0924 data: 0.0002 max mem: 9305 +Train: [89] [4700/6250] eta: 0:03:53 lr: 0.000004 grad: 0.1589 (0.1650) loss: 0.8514 (0.8517) time: 0.1498 data: 0.0719 max mem: 9305 +Train: [89] [4800/6250] eta: 0:03:38 lr: 0.000004 grad: 0.1578 (0.1649) loss: 0.8502 (0.8516) time: 0.1251 data: 0.0389 max mem: 9305 +Train: [89] [4900/6250] eta: 0:03:23 lr: 0.000004 grad: 0.1587 (0.1648) loss: 0.8497 (0.8516) time: 0.1341 data: 0.0499 max mem: 9305 +Train: [89] [5000/6250] eta: 0:03:08 lr: 0.000004 grad: 0.1627 (0.1648) loss: 0.8436 (0.8515) time: 0.1317 data: 0.0423 max mem: 9305 +Train: [89] [5100/6250] eta: 0:02:52 lr: 0.000004 grad: 0.1595 (0.1648) loss: 0.8528 (0.8515) time: 0.1461 data: 0.0657 max mem: 9305 +Train: [89] [5200/6250] eta: 0:02:37 lr: 0.000003 grad: 0.1559 (0.1647) loss: 0.8526 (0.8515) time: 0.1485 data: 0.0690 max mem: 9305 +Train: [89] [5300/6250] eta: 0:02:22 lr: 0.000003 grad: 0.1484 (0.1647) loss: 0.8589 (0.8515) time: 0.1315 data: 0.0476 max mem: 9305 +Train: [89] [5400/6250] eta: 0:02:07 lr: 0.000003 grad: 0.1544 (0.1646) loss: 0.8511 (0.8515) time: 0.1408 data: 0.0547 max mem: 9305 +Train: [89] [5500/6250] eta: 0:01:53 lr: 0.000003 grad: 0.1454 (0.1646) loss: 0.8568 (0.8516) time: 0.1458 data: 0.0602 max mem: 9305 +Train: [89] [5600/6250] eta: 0:01:37 lr: 0.000003 grad: 0.1564 (0.1645) loss: 0.8510 (0.8516) time: 0.1925 data: 0.1161 max mem: 9305 +Train: [89] [5700/6250] eta: 0:01:22 lr: 0.000003 grad: 0.1582 (0.1643) loss: 0.8493 (0.8516) time: 0.1372 data: 0.0560 max mem: 9305 +Train: [89] [5800/6250] eta: 0:01:07 lr: 0.000003 grad: 0.1508 (0.1641) loss: 0.8542 (0.8517) time: 0.1050 data: 0.0148 max mem: 9305 +Train: [89] [5900/6250] eta: 0:00:52 lr: 0.000003 grad: 0.1642 (0.1641) loss: 0.8476 (0.8517) time: 0.1372 data: 0.0556 max mem: 9305 +Train: [89] [6000/6250] eta: 0:00:37 lr: 0.000003 grad: 0.1516 (0.1641) loss: 0.8528 (0.8516) time: 0.1545 data: 0.0754 max mem: 9305 +Train: [89] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1604 (0.1641) loss: 0.8539 (0.8516) time: 0.1597 data: 0.0800 max mem: 9305 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1522 (0.1640) loss: 0.8581 (0.8517) time: 0.1374 data: 0.0536 max mem: 9305 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1604 (0.1640) loss: 0.8508 (0.8517) time: 0.1401 data: 0.0613 max mem: 9305 +Train: [89] Total time: 0:15:43 (0.1509 s / it) +Averaged stats: lr: 0.000003 grad: 0.1604 (0.1640) loss: 0.8508 (0.8517) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:03:56 loss: 0.8637 (0.8637) time: 3.8219 data: 3.7451 max mem: 9305 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8569 (0.8588) time: 0.1336 data: 0.1038 max mem: 9305 +Eval (hcp-train-subset): [89] Total time: 0:00:13 (0.2167 s / it) +Averaged stats (hcp-train-subset): loss: 0.8569 (0.8588) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [89] [ 0/62] eta: 0:05:21 loss: 0.8680 (0.8680) time: 5.1915 data: 5.1090 max mem: 9305 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8744 (0.8737) time: 0.1481 data: 0.1196 max mem: 9305 +Eval (hcp-val): [89] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (hcp-val): loss: 0.8744 (0.8737) +Making plots (hcp-val): example=15 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [90] [ 0/6250] eta: 10:04:08 lr: 0.000003 grad: 0.5059 (0.5059) loss: 0.8073 (0.8073) time: 5.7997 data: 5.5937 max mem: 9305 +Train: [90] [ 100/6250] eta: 0:21:58 lr: 0.000003 grad: 0.1549 (0.1906) loss: 0.8675 (0.8702) time: 0.1364 data: 0.0398 max mem: 9305 +Train: [90] [ 200/6250] eta: 0:18:53 lr: 0.000003 grad: 0.1570 (0.1761) loss: 0.8513 (0.8672) time: 0.1372 data: 0.0271 max mem: 9305 +Train: [90] [ 300/6250] eta: 0:17:25 lr: 0.000003 grad: 0.1505 (0.1742) loss: 0.8572 (0.8653) time: 0.1469 data: 0.0374 max mem: 9305 +Train: [90] [ 400/6250] eta: 0:16:53 lr: 0.000003 grad: 0.1752 (0.1742) loss: 0.8520 (0.8626) time: 0.1109 data: 0.0003 max mem: 9305 +Train: [90] [ 500/6250] eta: 0:15:50 lr: 0.000003 grad: 0.1679 (0.1757) loss: 0.8581 (0.8603) time: 0.1424 data: 0.0532 max mem: 9305 +Train: [90] [ 600/6250] eta: 0:15:35 lr: 0.000003 grad: 0.1410 (0.1750) loss: 0.8587 (0.8591) time: 0.1361 data: 0.0371 max mem: 9305 +Train: [90] [ 700/6250] eta: 0:15:25 lr: 0.000003 grad: 0.1496 (0.1740) loss: 0.8558 (0.8581) time: 0.1802 data: 0.0763 max mem: 9305 +Train: [90] [ 800/6250] eta: 0:15:16 lr: 0.000003 grad: 0.1660 (0.1726) loss: 0.8537 (0.8576) time: 0.1296 data: 0.0216 max mem: 9305 +Train: [90] [ 900/6250] eta: 0:14:47 lr: 0.000003 grad: 0.1532 (0.1714) loss: 0.8556 (0.8571) time: 0.1763 data: 0.0867 max mem: 9305 +Train: [90] [1000/6250] eta: 0:14:27 lr: 0.000003 grad: 0.1541 (0.1705) loss: 0.8550 (0.8566) time: 0.1020 data: 0.0002 max mem: 9305 +Train: [90] [1100/6250] eta: 0:14:03 lr: 0.000003 grad: 0.1553 (0.1699) loss: 0.8560 (0.8564) time: 0.1562 data: 0.0748 max mem: 9305 +Train: [90] [1200/6250] eta: 0:13:39 lr: 0.000003 grad: 0.1546 (0.1688) loss: 0.8533 (0.8561) time: 0.0980 data: 0.0096 max mem: 9305 +Train: [90] [1300/6250] eta: 0:13:14 lr: 0.000003 grad: 0.1576 (0.1678) loss: 0.8530 (0.8556) time: 0.1561 data: 0.0753 max mem: 9305 +Train: [90] [1400/6250] eta: 0:12:55 lr: 0.000003 grad: 0.1510 (0.1672) loss: 0.8519 (0.8553) time: 0.1315 data: 0.0283 max mem: 9305 +Train: [90] [1500/6250] eta: 0:12:36 lr: 0.000003 grad: 0.1472 (0.1666) loss: 0.8561 (0.8552) time: 0.1274 data: 0.0462 max mem: 9305 +Train: [90] [1600/6250] eta: 0:12:14 lr: 0.000003 grad: 0.1444 (0.1659) loss: 0.8562 (0.8551) time: 0.1408 data: 0.0616 max mem: 9305 +Train: [90] [1700/6250] eta: 0:11:58 lr: 0.000003 grad: 0.1515 (0.1655) loss: 0.8504 (0.8549) time: 0.1862 data: 0.0855 max mem: 9305 +Train: [90] [1800/6250] eta: 0:11:40 lr: 0.000003 grad: 0.1581 (0.1653) loss: 0.8505 (0.8548) time: 0.1429 data: 0.0556 max mem: 9305 +Train: [90] [1900/6250] eta: 0:11:21 lr: 0.000003 grad: 0.1502 (0.1649) loss: 0.8535 (0.8548) time: 0.1588 data: 0.0725 max mem: 9305 +Train: [90] [2000/6250] eta: 0:11:03 lr: 0.000003 grad: 0.1572 (0.1649) loss: 0.8497 (0.8548) time: 0.1438 data: 0.0603 max mem: 9305 +Train: [90] [2100/6250] eta: 0:10:45 lr: 0.000003 grad: 0.1504 (0.1646) loss: 0.8487 (0.8547) time: 0.1502 data: 0.0591 max mem: 9305 +Train: [90] [2200/6250] eta: 0:10:26 lr: 0.000003 grad: 0.1505 (0.1644) loss: 0.8597 (0.8547) time: 0.1210 data: 0.0326 max mem: 9305 +Train: [90] [2300/6250] eta: 0:10:08 lr: 0.000003 grad: 0.1528 (0.1642) loss: 0.8599 (0.8547) time: 0.1297 data: 0.0416 max mem: 9305 +Train: [90] [2400/6250] eta: 0:09:47 lr: 0.000003 grad: 0.1599 (0.1641) loss: 0.8450 (0.8546) time: 0.1109 data: 0.0124 max mem: 9305 +Train: [90] [2500/6250] eta: 0:09:28 lr: 0.000003 grad: 0.1606 (0.1640) loss: 0.8554 (0.8546) time: 0.1355 data: 0.0575 max mem: 9305 +Train: [90] [2600/6250] eta: 0:09:08 lr: 0.000003 grad: 0.1544 (0.1639) loss: 0.8509 (0.8546) time: 0.1001 data: 0.0138 max mem: 9305 +Train: [90] [2700/6250] eta: 0:08:50 lr: 0.000003 grad: 0.1458 (0.1638) loss: 0.8563 (0.8545) time: 0.1552 data: 0.0713 max mem: 9305 +Train: [90] [2800/6250] eta: 0:08:35 lr: 0.000003 grad: 0.1413 (0.1636) loss: 0.8548 (0.8544) time: 0.1570 data: 0.0718 max mem: 9305 +Train: [90] [2900/6250] eta: 0:08:19 lr: 0.000003 grad: 0.1493 (0.1634) loss: 0.8534 (0.8543) time: 0.1447 data: 0.0618 max mem: 9305 +Train: [90] [3000/6250] eta: 0:08:03 lr: 0.000003 grad: 0.1645 (0.1632) loss: 0.8513 (0.8543) time: 0.1443 data: 0.0637 max mem: 9305 +Train: [90] [3100/6250] eta: 0:07:47 lr: 0.000003 grad: 0.1534 (0.1631) loss: 0.8528 (0.8542) time: 0.1416 data: 0.0588 max mem: 9305 +Train: [90] [3200/6250] eta: 0:07:33 lr: 0.000003 grad: 0.1611 (0.1630) loss: 0.8519 (0.8541) time: 0.1502 data: 0.0730 max mem: 9305 +Train: [90] [3300/6250] eta: 0:07:17 lr: 0.000003 grad: 0.1472 (0.1628) loss: 0.8585 (0.8541) time: 0.1676 data: 0.0738 max mem: 9305 +Train: [90] [3400/6250] eta: 0:07:03 lr: 0.000003 grad: 0.1436 (0.1626) loss: 0.8532 (0.8542) time: 0.0953 data: 0.0153 max mem: 9305 +Train: [90] [3500/6250] eta: 0:06:47 lr: 0.000003 grad: 0.1587 (0.1626) loss: 0.8521 (0.8541) time: 0.1376 data: 0.0595 max mem: 9305 +Train: [90] [3600/6250] eta: 0:06:33 lr: 0.000003 grad: 0.1533 (0.1626) loss: 0.8554 (0.8541) time: 0.1123 data: 0.0203 max mem: 9305 +Train: [90] [3700/6250] eta: 0:06:19 lr: 0.000003 grad: 0.1602 (0.1625) loss: 0.8538 (0.8541) time: 0.1372 data: 0.0505 max mem: 9305 +Train: [90] [3800/6250] eta: 0:06:03 lr: 0.000003 grad: 0.1506 (0.1626) loss: 0.8555 (0.8541) time: 0.1195 data: 0.0346 max mem: 9305 +Train: [90] [3900/6250] eta: 0:05:47 lr: 0.000003 grad: 0.1536 (0.1624) loss: 0.8592 (0.8542) time: 0.1247 data: 0.0370 max mem: 9305 +Train: [90] [4000/6250] eta: 0:05:31 lr: 0.000003 grad: 0.1635 (0.1624) loss: 0.8499 (0.8541) time: 0.1260 data: 0.0392 max mem: 9305 +Train: [90] [4100/6250] eta: 0:05:16 lr: 0.000003 grad: 0.1519 (0.1624) loss: 0.8519 (0.8541) time: 0.1271 data: 0.0455 max mem: 9305 +Train: [90] [4200/6250] eta: 0:05:00 lr: 0.000003 grad: 0.1694 (0.1624) loss: 0.8540 (0.8541) time: 0.1075 data: 0.0240 max mem: 9305 +Train: [90] [4300/6250] eta: 0:04:45 lr: 0.000003 grad: 0.1677 (0.1625) loss: 0.8604 (0.8541) time: 0.1341 data: 0.0518 max mem: 9305 +Train: [90] [4400/6250] eta: 0:04:30 lr: 0.000003 grad: 0.1557 (0.1626) loss: 0.8530 (0.8540) time: 0.1405 data: 0.0588 max mem: 9305 +Train: [90] [4500/6250] eta: 0:04:16 lr: 0.000003 grad: 0.1564 (0.1627) loss: 0.8527 (0.8540) time: 0.1004 data: 0.0174 max mem: 9305 +Train: [90] [4600/6250] eta: 0:04:01 lr: 0.000003 grad: 0.1587 (0.1627) loss: 0.8546 (0.8539) time: 0.1257 data: 0.0418 max mem: 9305 +Train: [90] [4700/6250] eta: 0:03:46 lr: 0.000003 grad: 0.1518 (0.1628) loss: 0.8503 (0.8538) time: 0.1325 data: 0.0507 max mem: 9305 +Train: [90] [4800/6250] eta: 0:03:31 lr: 0.000003 grad: 0.1626 (0.1629) loss: 0.8465 (0.8537) time: 0.1441 data: 0.0642 max mem: 9305 +Train: [90] [4900/6250] eta: 0:03:16 lr: 0.000003 grad: 0.1634 (0.1629) loss: 0.8544 (0.8536) time: 0.1322 data: 0.0352 max mem: 9305 +Train: [90] [5000/6250] eta: 0:03:01 lr: 0.000003 grad: 0.1497 (0.1629) loss: 0.8618 (0.8536) time: 0.1411 data: 0.0493 max mem: 9305 +Train: [90] [5100/6250] eta: 0:02:46 lr: 0.000003 grad: 0.1552 (0.1629) loss: 0.8547 (0.8535) time: 0.1338 data: 0.0469 max mem: 9305 +Train: [90] [5200/6250] eta: 0:02:32 lr: 0.000003 grad: 0.1663 (0.1629) loss: 0.8521 (0.8534) time: 0.1032 data: 0.0175 max mem: 9305 +Train: [90] [5300/6250] eta: 0:02:17 lr: 0.000003 grad: 0.1671 (0.1629) loss: 0.8490 (0.8534) time: 0.1421 data: 0.0660 max mem: 9305 +Train: [90] [5400/6250] eta: 0:02:03 lr: 0.000003 grad: 0.1596 (0.1629) loss: 0.8508 (0.8533) time: 0.1519 data: 0.0653 max mem: 9305 +Train: [90] [5500/6250] eta: 0:01:48 lr: 0.000003 grad: 0.1586 (0.1629) loss: 0.8543 (0.8533) time: 0.1391 data: 0.0570 max mem: 9305 +Train: [90] [5600/6250] eta: 0:01:34 lr: 0.000003 grad: 0.1463 (0.1628) loss: 0.8593 (0.8533) time: 0.1062 data: 0.0252 max mem: 9305 +Train: [90] [5700/6250] eta: 0:01:19 lr: 0.000003 grad: 0.1497 (0.1627) loss: 0.8562 (0.8533) time: 0.1474 data: 0.0666 max mem: 9305 +Train: [90] [5800/6250] eta: 0:01:05 lr: 0.000003 grad: 0.1517 (0.1627) loss: 0.8556 (0.8533) time: 0.0939 data: 0.0027 max mem: 9305 +Train: [90] [5900/6250] eta: 0:00:50 lr: 0.000003 grad: 0.1562 (0.1627) loss: 0.8475 (0.8533) time: 0.1397 data: 0.0576 max mem: 9305 +Train: [90] [6000/6250] eta: 0:00:36 lr: 0.000003 grad: 0.1605 (0.1627) loss: 0.8537 (0.8533) time: 0.1678 data: 0.0841 max mem: 9305 +Train: [90] [6100/6250] eta: 0:00:21 lr: 0.000003 grad: 0.1614 (0.1627) loss: 0.8531 (0.8533) time: 0.1517 data: 0.0721 max mem: 9305 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1588 (0.1628) loss: 0.8455 (0.8533) time: 0.1493 data: 0.0659 max mem: 9305 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1622 (0.1627) loss: 0.8495 (0.8532) time: 0.1279 data: 0.0487 max mem: 9305 +Train: [90] Total time: 0:15:09 (0.1456 s / it) +Averaged stats: lr: 0.000003 grad: 0.1622 (0.1627) loss: 0.8495 (0.8532) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:04:56 loss: 0.8584 (0.8584) time: 4.7872 data: 4.7537 max mem: 9305 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8561 (0.8582) time: 0.1295 data: 0.1010 max mem: 9305 +Eval (hcp-train-subset): [90] Total time: 0:00:13 (0.2110 s / it) +Averaged stats (hcp-train-subset): loss: 0.8561 (0.8582) +Eval (hcp-val): [90] [ 0/62] eta: 0:03:56 loss: 0.8707 (0.8707) time: 3.8147 data: 3.7443 max mem: 9305 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8720 (0.8730) time: 0.1104 data: 0.0819 max mem: 9305 +Eval (hcp-val): [90] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (hcp-val): loss: 0.8720 (0.8730) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 10:43:50 lr: 0.000003 grad: 0.0949 (0.0949) loss: 0.9120 (0.9120) time: 6.1809 data: 6.0514 max mem: 9305 +Train: [91] [ 100/6250] eta: 0:21:06 lr: 0.000003 grad: 0.2055 (0.2376) loss: 0.8521 (0.8565) time: 0.1525 data: 0.0632 max mem: 9305 +Train: [91] [ 200/6250] eta: 0:18:00 lr: 0.000003 grad: 0.1682 (0.2169) loss: 0.8503 (0.8528) time: 0.1354 data: 0.0304 max mem: 9305 +Train: [91] [ 300/6250] eta: 0:16:28 lr: 0.000003 grad: 0.1716 (0.2086) loss: 0.8425 (0.8512) time: 0.1298 data: 0.0357 max mem: 9305 +Train: [91] [ 400/6250] eta: 0:15:31 lr: 0.000003 grad: 0.1606 (0.2021) loss: 0.8599 (0.8523) time: 0.1370 data: 0.0520 max mem: 9305 +Train: [91] [ 500/6250] eta: 0:14:44 lr: 0.000003 grad: 0.1481 (0.1972) loss: 0.8648 (0.8529) time: 0.1236 data: 0.0380 max mem: 9305 +Train: [91] [ 600/6250] eta: 0:14:09 lr: 0.000003 grad: 0.1614 (0.1926) loss: 0.8507 (0.8533) time: 0.1403 data: 0.0492 max mem: 9305 +Train: [91] [ 700/6250] eta: 0:13:42 lr: 0.000003 grad: 0.1696 (0.1891) loss: 0.8565 (0.8537) time: 0.1235 data: 0.0312 max mem: 9305 +Train: [91] [ 800/6250] eta: 0:13:24 lr: 0.000003 grad: 0.1680 (0.1873) loss: 0.8539 (0.8536) time: 0.1572 data: 0.0728 max mem: 9305 +Train: [91] [ 900/6250] eta: 0:13:07 lr: 0.000003 grad: 0.1760 (0.1862) loss: 0.8529 (0.8534) time: 0.1462 data: 0.0544 max mem: 9305 +Train: [91] [1000/6250] eta: 0:13:00 lr: 0.000003 grad: 0.1653 (0.1843) loss: 0.8531 (0.8534) time: 0.1372 data: 0.0562 max mem: 9305 +Train: [91] [1100/6250] eta: 0:12:39 lr: 0.000003 grad: 0.1755 (0.1827) loss: 0.8552 (0.8536) time: 0.1501 data: 0.0508 max mem: 9305 +Train: [91] [1200/6250] eta: 0:12:40 lr: 0.000003 grad: 0.1484 (0.1809) loss: 0.8531 (0.8538) time: 0.2052 data: 0.1198 max mem: 9305 +Train: [91] [1300/6250] eta: 0:12:19 lr: 0.000003 grad: 0.1618 (0.1799) loss: 0.8584 (0.8541) time: 0.1756 data: 0.0781 max mem: 9305 +Train: [91] [1400/6250] eta: 0:12:10 lr: 0.000003 grad: 0.1624 (0.1788) loss: 0.8521 (0.8541) time: 0.1967 data: 0.0875 max mem: 9305 +Train: [91] [1500/6250] eta: 0:11:58 lr: 0.000003 grad: 0.1574 (0.1778) loss: 0.8581 (0.8543) time: 0.1557 data: 0.0766 max mem: 9305 +Train: [91] [1600/6250] eta: 0:11:42 lr: 0.000003 grad: 0.1528 (0.1769) loss: 0.8565 (0.8544) time: 0.1628 data: 0.0674 max mem: 9305 +Train: [91] [1700/6250] eta: 0:11:26 lr: 0.000003 grad: 0.1564 (0.1762) loss: 0.8559 (0.8545) time: 0.1373 data: 0.0515 max mem: 9305 +Train: [91] [1800/6250] eta: 0:11:12 lr: 0.000003 grad: 0.1569 (0.1756) loss: 0.8550 (0.8546) time: 0.1638 data: 0.0791 max mem: 9305 +Train: [91] [1900/6250] eta: 0:10:56 lr: 0.000003 grad: 0.1720 (0.1753) loss: 0.8517 (0.8545) time: 0.1481 data: 0.0635 max mem: 9305 +Train: [91] [2000/6250] eta: 0:10:41 lr: 0.000003 grad: 0.1663 (0.1750) loss: 0.8536 (0.8546) time: 0.1599 data: 0.0660 max mem: 9305 +Train: [91] [2100/6250] eta: 0:10:23 lr: 0.000003 grad: 0.1714 (0.1746) loss: 0.8529 (0.8546) time: 0.1307 data: 0.0468 max mem: 9305 +Train: [91] [2200/6250] eta: 0:10:05 lr: 0.000003 grad: 0.1568 (0.1743) loss: 0.8540 (0.8546) time: 0.1468 data: 0.0690 max mem: 9305 +Train: [91] [2300/6250] eta: 0:09:47 lr: 0.000003 grad: 0.1632 (0.1745) loss: 0.8475 (0.8545) time: 0.1355 data: 0.0514 max mem: 9305 +Train: [91] [2400/6250] eta: 0:09:30 lr: 0.000003 grad: 0.1651 (0.1742) loss: 0.8535 (0.8546) time: 0.1350 data: 0.0550 max mem: 9305 +Train: [91] [2500/6250] eta: 0:09:16 lr: 0.000003 grad: 0.1586 (0.1741) loss: 0.8604 (0.8546) time: 0.1155 data: 0.0284 max mem: 9305 +Train: [91] [2600/6250] eta: 0:09:05 lr: 0.000003 grad: 0.1637 (0.1741) loss: 0.8510 (0.8545) time: 0.2825 data: 0.1778 max mem: 9305 +Train: [91] [2700/6250] eta: 0:08:53 lr: 0.000002 grad: 0.1801 (0.1741) loss: 0.8421 (0.8544) time: 0.0930 data: 0.0002 max mem: 9305 +Train: [91] [2800/6250] eta: 0:08:38 lr: 0.000002 grad: 0.1688 (0.1739) loss: 0.8506 (0.8543) time: 0.1497 data: 0.0718 max mem: 9305 +Train: [91] [2900/6250] eta: 0:08:25 lr: 0.000002 grad: 0.1568 (0.1738) loss: 0.8574 (0.8543) time: 0.2043 data: 0.1246 max mem: 9305 +Train: [91] [3000/6250] eta: 0:08:10 lr: 0.000002 grad: 0.1668 (0.1737) loss: 0.8535 (0.8544) time: 0.1555 data: 0.0729 max mem: 9305 +Train: [91] [3100/6250] eta: 0:07:54 lr: 0.000002 grad: 0.1671 (0.1736) loss: 0.8522 (0.8544) time: 0.1506 data: 0.0688 max mem: 9305 +Train: [91] [3200/6250] eta: 0:07:40 lr: 0.000002 grad: 0.1651 (0.1733) loss: 0.8546 (0.8545) time: 0.1710 data: 0.0887 max mem: 9305 +Train: [91] [3300/6250] eta: 0:07:27 lr: 0.000002 grad: 0.1658 (0.1733) loss: 0.8569 (0.8546) time: 0.1402 data: 0.0578 max mem: 9305 +Train: [91] [3400/6250] eta: 0:07:13 lr: 0.000002 grad: 0.1725 (0.1731) loss: 0.8503 (0.8546) time: 0.1813 data: 0.0988 max mem: 9305 +Train: [91] [3500/6250] eta: 0:06:58 lr: 0.000002 grad: 0.1609 (0.1729) loss: 0.8575 (0.8547) time: 0.1793 data: 0.1008 max mem: 9305 +Train: [91] [3600/6250] eta: 0:06:43 lr: 0.000002 grad: 0.1604 (0.1728) loss: 0.8592 (0.8547) time: 0.1515 data: 0.0692 max mem: 9305 +Train: [91] [3700/6250] eta: 0:06:28 lr: 0.000002 grad: 0.1615 (0.1726) loss: 0.8553 (0.8547) time: 0.1145 data: 0.0325 max mem: 9305 +Train: [91] [3800/6250] eta: 0:06:14 lr: 0.000002 grad: 0.1637 (0.1724) loss: 0.8532 (0.8546) time: 0.1727 data: 0.0894 max mem: 9305 +Train: [91] [3900/6250] eta: 0:05:58 lr: 0.000002 grad: 0.1746 (0.1724) loss: 0.8524 (0.8546) time: 0.1452 data: 0.0627 max mem: 9305 +Train: [91] [4000/6250] eta: 0:05:43 lr: 0.000002 grad: 0.1590 (0.1723) loss: 0.8486 (0.8546) time: 0.1500 data: 0.0630 max mem: 9305 +Train: [91] [4100/6250] eta: 0:05:27 lr: 0.000002 grad: 0.1633 (0.1720) loss: 0.8592 (0.8546) time: 0.1616 data: 0.0730 max mem: 9305 +Train: [91] [4200/6250] eta: 0:05:12 lr: 0.000002 grad: 0.1634 (0.1719) loss: 0.8553 (0.8547) time: 0.1613 data: 0.0592 max mem: 9305 +Train: [91] [4300/6250] eta: 0:04:56 lr: 0.000002 grad: 0.1455 (0.1716) loss: 0.8629 (0.8547) time: 0.1326 data: 0.0499 max mem: 9305 +Train: [91] [4400/6250] eta: 0:04:41 lr: 0.000002 grad: 0.1610 (0.1714) loss: 0.8576 (0.8547) time: 0.2166 data: 0.1102 max mem: 9305 +Train: [91] [4500/6250] eta: 0:04:27 lr: 0.000002 grad: 0.1561 (0.1712) loss: 0.8571 (0.8547) time: 0.0944 data: 0.0002 max mem: 9305 +Train: [91] [4600/6250] eta: 0:04:12 lr: 0.000002 grad: 0.1513 (0.1711) loss: 0.8599 (0.8547) time: 0.1555 data: 0.0745 max mem: 9305 +Train: [91] [4700/6250] eta: 0:03:56 lr: 0.000002 grad: 0.1643 (0.1710) loss: 0.8597 (0.8547) time: 0.1543 data: 0.0588 max mem: 9305 +Train: [91] [4800/6250] eta: 0:03:41 lr: 0.000002 grad: 0.1650 (0.1708) loss: 0.8515 (0.8548) time: 0.1085 data: 0.0233 max mem: 9305 +Train: [91] [4900/6250] eta: 0:03:26 lr: 0.000002 grad: 0.1583 (0.1706) loss: 0.8542 (0.8548) time: 0.0972 data: 0.0128 max mem: 9305 +Train: [91] [5000/6250] eta: 0:03:11 lr: 0.000002 grad: 0.1582 (0.1706) loss: 0.8589 (0.8548) time: 0.1439 data: 0.0574 max mem: 9305 +Train: [91] [5100/6250] eta: 0:02:55 lr: 0.000002 grad: 0.1591 (0.1704) loss: 0.8565 (0.8548) time: 0.1179 data: 0.0376 max mem: 9305 +Train: [91] [5200/6250] eta: 0:02:39 lr: 0.000002 grad: 0.1530 (0.1703) loss: 0.8580 (0.8548) time: 0.1325 data: 0.0499 max mem: 9305 +Train: [91] [5300/6250] eta: 0:02:24 lr: 0.000002 grad: 0.1564 (0.1702) loss: 0.8615 (0.8548) time: 0.1080 data: 0.0286 max mem: 9305 +Train: [91] [5400/6250] eta: 0:02:09 lr: 0.000002 grad: 0.1586 (0.1701) loss: 0.8533 (0.8548) time: 0.1324 data: 0.0542 max mem: 9305 +Train: [91] [5500/6250] eta: 0:01:54 lr: 0.000002 grad: 0.1778 (0.1700) loss: 0.8538 (0.8549) time: 0.1412 data: 0.0534 max mem: 9305 +Train: [91] [5600/6250] eta: 0:01:39 lr: 0.000002 grad: 0.1537 (0.1699) loss: 0.8493 (0.8548) time: 0.1512 data: 0.0754 max mem: 9305 +Train: [91] [5700/6250] eta: 0:01:23 lr: 0.000002 grad: 0.1649 (0.1697) loss: 0.8551 (0.8548) time: 0.1176 data: 0.0341 max mem: 9305 +Train: [91] [5800/6250] eta: 0:01:08 lr: 0.000002 grad: 0.1570 (0.1696) loss: 0.8516 (0.8548) time: 0.1248 data: 0.0421 max mem: 9305 +Train: [91] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.1599 (0.1696) loss: 0.8517 (0.8547) time: 0.1390 data: 0.0540 max mem: 9305 +Train: [91] [6000/6250] eta: 0:00:37 lr: 0.000002 grad: 0.1475 (0.1694) loss: 0.8551 (0.8547) time: 0.1388 data: 0.0564 max mem: 9305 +Train: [91] [6100/6250] eta: 0:00:22 lr: 0.000002 grad: 0.1581 (0.1693) loss: 0.8511 (0.8547) time: 0.1416 data: 0.0586 max mem: 9305 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1467 (0.1691) loss: 0.8589 (0.8547) time: 0.1431 data: 0.0582 max mem: 9305 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1519 (0.1690) loss: 0.8525 (0.8547) time: 0.1256 data: 0.0397 max mem: 9305 +Train: [91] Total time: 0:15:46 (0.1515 s / it) +Averaged stats: lr: 0.000002 grad: 0.1519 (0.1690) loss: 0.8525 (0.8547) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:06:37 loss: 0.8617 (0.8617) time: 6.4135 data: 6.3772 max mem: 9305 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8547 (0.8576) time: 0.1300 data: 0.1015 max mem: 9305 +Eval (hcp-train-subset): [91] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-train-subset): loss: 0.8547 (0.8576) +Eval (hcp-val): [91] [ 0/62] eta: 0:06:10 loss: 0.8668 (0.8668) time: 5.9833 data: 5.9491 max mem: 9305 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8726 (0.8732) time: 0.0979 data: 0.0684 max mem: 9305 +Eval (hcp-val): [91] Total time: 0:00:13 (0.2144 s / it) +Averaged stats (hcp-val): loss: 0.8726 (0.8732) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 7:47:16 lr: 0.000002 grad: 0.1271 (0.1271) loss: 0.8960 (0.8960) time: 4.4859 data: 4.1386 max mem: 9305 +Train: [92] [ 100/6250] eta: 0:20:06 lr: 0.000002 grad: 0.2173 (0.2488) loss: 0.8367 (0.8524) time: 0.1284 data: 0.0252 max mem: 9305 +Train: [92] [ 200/6250] eta: 0:16:57 lr: 0.000002 grad: 0.1851 (0.2268) loss: 0.8496 (0.8487) time: 0.1305 data: 0.0308 max mem: 9305 +Train: [92] [ 300/6250] eta: 0:15:26 lr: 0.000002 grad: 0.1701 (0.2111) loss: 0.8582 (0.8492) time: 0.1226 data: 0.0313 max mem: 9305 +Train: [92] [ 400/6250] eta: 0:14:31 lr: 0.000002 grad: 0.1840 (0.2027) loss: 0.8374 (0.8495) time: 0.1288 data: 0.0417 max mem: 9305 +Train: [92] [ 500/6250] eta: 0:13:48 lr: 0.000002 grad: 0.1948 (0.1988) loss: 0.8545 (0.8495) time: 0.1157 data: 0.0254 max mem: 9305 +Train: [92] [ 600/6250] eta: 0:13:16 lr: 0.000002 grad: 0.1590 (0.1954) loss: 0.8570 (0.8499) time: 0.1170 data: 0.0202 max mem: 9305 +Train: [92] [ 700/6250] eta: 0:12:50 lr: 0.000002 grad: 0.1561 (0.1923) loss: 0.8510 (0.8506) time: 0.1311 data: 0.0394 max mem: 9305 +Train: [92] [ 800/6250] eta: 0:12:36 lr: 0.000002 grad: 0.1524 (0.1890) loss: 0.8549 (0.8514) time: 0.1179 data: 0.0191 max mem: 9305 +Train: [92] [ 900/6250] eta: 0:12:25 lr: 0.000002 grad: 0.1577 (0.1862) loss: 0.8561 (0.8520) time: 0.1479 data: 0.0522 max mem: 9305 +Train: [92] [1000/6250] eta: 0:12:18 lr: 0.000002 grad: 0.1568 (0.1848) loss: 0.8550 (0.8525) time: 0.1691 data: 0.0853 max mem: 9305 +Train: [92] [1100/6250] eta: 0:12:13 lr: 0.000002 grad: 0.1563 (0.1833) loss: 0.8528 (0.8527) time: 0.1449 data: 0.0650 max mem: 9305 +Train: [92] [1200/6250] eta: 0:12:04 lr: 0.000002 grad: 0.1531 (0.1818) loss: 0.8599 (0.8530) time: 0.1467 data: 0.0670 max mem: 9305 +Train: [92] [1300/6250] eta: 0:11:47 lr: 0.000002 grad: 0.1566 (0.1806) loss: 0.8498 (0.8532) time: 0.1437 data: 0.0522 max mem: 9305 +Train: [92] [1400/6250] eta: 0:11:31 lr: 0.000002 grad: 0.1493 (0.1794) loss: 0.8562 (0.8533) time: 0.1229 data: 0.0413 max mem: 9305 +Train: [92] [1500/6250] eta: 0:11:22 lr: 0.000002 grad: 0.1579 (0.1781) loss: 0.8564 (0.8535) time: 0.1848 data: 0.1036 max mem: 9305 +Train: [92] [1600/6250] eta: 0:11:11 lr: 0.000002 grad: 0.1550 (0.1768) loss: 0.8623 (0.8537) time: 0.1481 data: 0.0552 max mem: 9305 +Train: [92] [1700/6250] eta: 0:11:00 lr: 0.000002 grad: 0.1519 (0.1756) loss: 0.8480 (0.8538) time: 0.1598 data: 0.0738 max mem: 9305 +Train: [92] [1800/6250] eta: 0:10:45 lr: 0.000002 grad: 0.1519 (0.1744) loss: 0.8520 (0.8540) time: 0.1305 data: 0.0454 max mem: 9305 +Train: [92] [1900/6250] eta: 0:10:33 lr: 0.000002 grad: 0.1568 (0.1733) loss: 0.8557 (0.8542) time: 0.2254 data: 0.1356 max mem: 9305 +Train: [92] [2000/6250] eta: 0:10:17 lr: 0.000002 grad: 0.1522 (0.1724) loss: 0.8540 (0.8544) time: 0.1423 data: 0.0624 max mem: 9305 +Train: [92] [2100/6250] eta: 0:10:05 lr: 0.000002 grad: 0.1520 (0.1715) loss: 0.8570 (0.8546) time: 0.1779 data: 0.0645 max mem: 9305 +Train: [92] [2200/6250] eta: 0:09:56 lr: 0.000002 grad: 0.1640 (0.1709) loss: 0.8489 (0.8545) time: 0.2265 data: 0.1460 max mem: 9305 +Train: [92] [2300/6250] eta: 0:09:38 lr: 0.000002 grad: 0.1648 (0.1705) loss: 0.8558 (0.8545) time: 0.1372 data: 0.0515 max mem: 9305 +Train: [92] [2400/6250] eta: 0:09:23 lr: 0.000002 grad: 0.1523 (0.1698) loss: 0.8510 (0.8546) time: 0.1388 data: 0.0583 max mem: 9305 +Train: [92] [2500/6250] eta: 0:09:09 lr: 0.000002 grad: 0.1545 (0.1692) loss: 0.8495 (0.8547) time: 0.0886 data: 0.0002 max mem: 9305 +Train: [92] [2600/6250] eta: 0:08:52 lr: 0.000002 grad: 0.1587 (0.1691) loss: 0.8566 (0.8547) time: 0.1291 data: 0.0509 max mem: 9305 +Train: [92] [2700/6250] eta: 0:08:37 lr: 0.000002 grad: 0.1494 (0.1689) loss: 0.8511 (0.8546) time: 0.1416 data: 0.0574 max mem: 9305 +Train: [92] [2800/6250] eta: 0:08:22 lr: 0.000002 grad: 0.1537 (0.1686) loss: 0.8496 (0.8546) time: 0.1334 data: 0.0420 max mem: 9305 +Train: [92] [2900/6250] eta: 0:08:07 lr: 0.000002 grad: 0.1573 (0.1684) loss: 0.8523 (0.8546) time: 0.1134 data: 0.0297 max mem: 9305 +Train: [92] [3000/6250] eta: 0:07:51 lr: 0.000002 grad: 0.1539 (0.1682) loss: 0.8561 (0.8546) time: 0.1357 data: 0.0497 max mem: 9305 +Train: [92] [3100/6250] eta: 0:07:40 lr: 0.000002 grad: 0.1571 (0.1680) loss: 0.8569 (0.8546) time: 0.1484 data: 0.0615 max mem: 9305 +Train: [92] [3200/6250] eta: 0:07:24 lr: 0.000002 grad: 0.1669 (0.1677) loss: 0.8581 (0.8545) time: 0.1455 data: 0.0670 max mem: 9305 +Train: [92] [3300/6250] eta: 0:07:09 lr: 0.000002 grad: 0.1653 (0.1676) loss: 0.8499 (0.8545) time: 0.1448 data: 0.0602 max mem: 9305 +Train: [92] [3400/6250] eta: 0:06:53 lr: 0.000002 grad: 0.1538 (0.1675) loss: 0.8538 (0.8544) time: 0.1434 data: 0.0669 max mem: 9305 +Train: [92] [3500/6250] eta: 0:06:39 lr: 0.000002 grad: 0.1524 (0.1675) loss: 0.8538 (0.8544) time: 0.1736 data: 0.0879 max mem: 9305 +Train: [92] [3600/6250] eta: 0:06:23 lr: 0.000002 grad: 0.1499 (0.1676) loss: 0.8524 (0.8542) time: 0.1404 data: 0.0552 max mem: 9305 +Train: [92] [3700/6250] eta: 0:06:09 lr: 0.000002 grad: 0.1580 (0.1677) loss: 0.8557 (0.8541) time: 0.1387 data: 0.0567 max mem: 9305 +Train: [92] [3800/6250] eta: 0:05:54 lr: 0.000002 grad: 0.1661 (0.1678) loss: 0.8512 (0.8540) time: 0.1363 data: 0.0489 max mem: 9305 +Train: [92] [3900/6250] eta: 0:05:39 lr: 0.000002 grad: 0.1641 (0.1677) loss: 0.8469 (0.8539) time: 0.1591 data: 0.0825 max mem: 9305 +Train: [92] [4000/6250] eta: 0:05:25 lr: 0.000002 grad: 0.1601 (0.1677) loss: 0.8514 (0.8538) time: 0.1601 data: 0.0807 max mem: 9305 +Train: [92] [4100/6250] eta: 0:05:10 lr: 0.000002 grad: 0.1492 (0.1677) loss: 0.8531 (0.8537) time: 0.1473 data: 0.0640 max mem: 9305 +Train: [92] [4200/6250] eta: 0:04:56 lr: 0.000002 grad: 0.1561 (0.1676) loss: 0.8442 (0.8536) time: 0.1441 data: 0.0613 max mem: 9305 +Train: [92] [4300/6250] eta: 0:04:42 lr: 0.000002 grad: 0.1735 (0.1677) loss: 0.8500 (0.8535) time: 0.1508 data: 0.0729 max mem: 9305 +Train: [92] [4400/6250] eta: 0:04:27 lr: 0.000002 grad: 0.1462 (0.1676) loss: 0.8550 (0.8535) time: 0.1275 data: 0.0450 max mem: 9305 +Train: [92] [4500/6250] eta: 0:04:13 lr: 0.000002 grad: 0.1702 (0.1676) loss: 0.8492 (0.8534) time: 0.1504 data: 0.0693 max mem: 9305 +Train: [92] [4600/6250] eta: 0:04:00 lr: 0.000002 grad: 0.1603 (0.1676) loss: 0.8575 (0.8534) time: 0.2009 data: 0.1216 max mem: 9305 +Train: [92] [4700/6250] eta: 0:03:45 lr: 0.000002 grad: 0.1670 (0.1679) loss: 0.8487 (0.8532) time: 0.1688 data: 0.0847 max mem: 9305 +Train: [92] [4800/6250] eta: 0:03:31 lr: 0.000002 grad: 0.1806 (0.1680) loss: 0.8476 (0.8532) time: 0.1316 data: 0.0497 max mem: 9305 +Train: [92] [4900/6250] eta: 0:03:16 lr: 0.000002 grad: 0.1767 (0.1682) loss: 0.8428 (0.8530) time: 0.1322 data: 0.0420 max mem: 9305 +Train: [92] [5000/6250] eta: 0:03:01 lr: 0.000002 grad: 0.1773 (0.1683) loss: 0.8467 (0.8530) time: 0.1433 data: 0.0642 max mem: 9305 +Train: [92] [5100/6250] eta: 0:02:47 lr: 0.000002 grad: 0.1681 (0.1684) loss: 0.8473 (0.8529) time: 0.2014 data: 0.1060 max mem: 9305 +Train: [92] [5200/6250] eta: 0:02:33 lr: 0.000002 grad: 0.1589 (0.1685) loss: 0.8531 (0.8528) time: 0.1392 data: 0.0558 max mem: 9305 +Train: [92] [5300/6250] eta: 0:02:18 lr: 0.000002 grad: 0.1645 (0.1686) loss: 0.8468 (0.8527) time: 0.1334 data: 0.0522 max mem: 9305 +Train: [92] [5400/6250] eta: 0:02:03 lr: 0.000002 grad: 0.1650 (0.1687) loss: 0.8495 (0.8526) time: 0.1471 data: 0.0697 max mem: 9305 +Train: [92] [5500/6250] eta: 0:01:49 lr: 0.000002 grad: 0.1586 (0.1687) loss: 0.8518 (0.8526) time: 0.1449 data: 0.0603 max mem: 9305 +Train: [92] [5600/6250] eta: 0:01:34 lr: 0.000002 grad: 0.1500 (0.1688) loss: 0.8525 (0.8526) time: 0.1939 data: 0.1050 max mem: 9305 +Train: [92] [5700/6250] eta: 0:01:20 lr: 0.000002 grad: 0.1695 (0.1688) loss: 0.8517 (0.8525) time: 0.1137 data: 0.0299 max mem: 9305 +Train: [92] [5800/6250] eta: 0:01:05 lr: 0.000002 grad: 0.1638 (0.1688) loss: 0.8545 (0.8525) time: 0.1938 data: 0.0935 max mem: 9305 +Train: [92] [5900/6250] eta: 0:00:51 lr: 0.000002 grad: 0.1605 (0.1688) loss: 0.8500 (0.8525) time: 0.1366 data: 0.0544 max mem: 9305 +Train: [92] [6000/6250] eta: 0:00:36 lr: 0.000002 grad: 0.1664 (0.1688) loss: 0.8530 (0.8525) time: 0.1519 data: 0.0644 max mem: 9305 +Train: [92] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.1689 (0.1688) loss: 0.8538 (0.8525) time: 0.1412 data: 0.0587 max mem: 9305 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1572 (0.1687) loss: 0.8522 (0.8525) time: 0.1635 data: 0.0641 max mem: 9305 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1639 (0.1686) loss: 0.8615 (0.8526) time: 0.2516 data: 0.1134 max mem: 9305 +Train: [92] Total time: 0:15:22 (0.1476 s / it) +Averaged stats: lr: 0.000002 grad: 0.1639 (0.1686) loss: 0.8615 (0.8526) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:03:57 loss: 0.8583 (0.8583) time: 3.8306 data: 3.7594 max mem: 9305 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8554 (0.8575) time: 0.1454 data: 0.1167 max mem: 9305 +Eval (hcp-train-subset): [92] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-train-subset): loss: 0.8554 (0.8575) +Eval (hcp-val): [92] [ 0/62] eta: 0:03:30 loss: 0.8716 (0.8716) time: 3.3981 data: 3.3210 max mem: 9305 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8719 (0.8735) time: 0.1570 data: 0.1281 max mem: 9305 +Eval (hcp-val): [92] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (hcp-val): loss: 0.8719 (0.8735) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 8:27:12 lr: 0.000002 grad: 0.7303 (0.7303) loss: 0.8021 (0.8021) time: 4.8692 data: 4.5952 max mem: 9305 +Train: [93] [ 100/6250] eta: 0:21:14 lr: 0.000002 grad: 0.1918 (0.2335) loss: 0.8555 (0.8645) time: 0.1471 data: 0.0397 max mem: 9305 +Train: [93] [ 200/6250] eta: 0:18:03 lr: 0.000002 grad: 0.1705 (0.2091) loss: 0.8609 (0.8625) time: 0.1461 data: 0.0393 max mem: 9305 +Train: [93] [ 300/6250] eta: 0:16:43 lr: 0.000002 grad: 0.1683 (0.2005) loss: 0.8515 (0.8590) time: 0.1504 data: 0.0573 max mem: 9305 +Train: [93] [ 400/6250] eta: 0:15:46 lr: 0.000002 grad: 0.1700 (0.1955) loss: 0.8621 (0.8574) time: 0.1580 data: 0.0680 max mem: 9305 +Train: [93] [ 500/6250] eta: 0:15:00 lr: 0.000002 grad: 0.1511 (0.1902) loss: 0.8551 (0.8569) time: 0.1358 data: 0.0439 max mem: 9305 +Train: [93] [ 600/6250] eta: 0:14:32 lr: 0.000002 grad: 0.1684 (0.1871) loss: 0.8489 (0.8559) time: 0.1590 data: 0.0718 max mem: 9305 +Train: [93] [ 700/6250] eta: 0:14:08 lr: 0.000002 grad: 0.1630 (0.1853) loss: 0.8535 (0.8550) time: 0.1706 data: 0.0717 max mem: 9305 +Train: [93] [ 800/6250] eta: 0:14:17 lr: 0.000002 grad: 0.1626 (0.1837) loss: 0.8503 (0.8543) time: 0.3047 data: 0.2097 max mem: 9305 +Train: [93] [ 900/6250] eta: 0:14:13 lr: 0.000002 grad: 0.1688 (0.1819) loss: 0.8432 (0.8535) time: 0.2231 data: 0.1311 max mem: 9305 +Train: [93] [1000/6250] eta: 0:13:52 lr: 0.000002 grad: 0.1556 (0.1807) loss: 0.8541 (0.8530) time: 0.1316 data: 0.0446 max mem: 9305 +Train: [93] [1100/6250] eta: 0:13:27 lr: 0.000002 grad: 0.1493 (0.1792) loss: 0.8544 (0.8528) time: 0.1395 data: 0.0491 max mem: 9305 +Train: [93] [1200/6250] eta: 0:13:03 lr: 0.000002 grad: 0.1605 (0.1784) loss: 0.8535 (0.8526) time: 0.1513 data: 0.0595 max mem: 9305 +Train: [93] [1300/6250] eta: 0:12:41 lr: 0.000002 grad: 0.1539 (0.1773) loss: 0.8456 (0.8525) time: 0.1066 data: 0.0171 max mem: 9305 +Train: [93] [1400/6250] eta: 0:12:19 lr: 0.000002 grad: 0.1525 (0.1767) loss: 0.8589 (0.8525) time: 0.1476 data: 0.0641 max mem: 9305 +Train: [93] [1500/6250] eta: 0:11:53 lr: 0.000002 grad: 0.1642 (0.1761) loss: 0.8596 (0.8526) time: 0.1305 data: 0.0440 max mem: 9305 +Train: [93] [1600/6250] eta: 0:11:29 lr: 0.000002 grad: 0.1580 (0.1757) loss: 0.8520 (0.8526) time: 0.1200 data: 0.0376 max mem: 9305 +Train: [93] [1700/6250] eta: 0:11:06 lr: 0.000002 grad: 0.1570 (0.1750) loss: 0.8526 (0.8527) time: 0.1293 data: 0.0449 max mem: 9305 +Train: [93] [1800/6250] eta: 0:10:46 lr: 0.000002 grad: 0.1599 (0.1745) loss: 0.8540 (0.8529) time: 0.1324 data: 0.0510 max mem: 9305 +Train: [93] [1900/6250] eta: 0:10:29 lr: 0.000002 grad: 0.1688 (0.1741) loss: 0.8574 (0.8530) time: 0.1328 data: 0.0481 max mem: 9305 +Train: [93] [2000/6250] eta: 0:10:15 lr: 0.000002 grad: 0.1655 (0.1738) loss: 0.8530 (0.8530) time: 0.0943 data: 0.0050 max mem: 9305 +Train: [93] [2100/6250] eta: 0:10:04 lr: 0.000002 grad: 0.1569 (0.1733) loss: 0.8524 (0.8531) time: 0.1848 data: 0.1064 max mem: 9305 +Train: [93] [2200/6250] eta: 0:09:52 lr: 0.000002 grad: 0.1684 (0.1729) loss: 0.8528 (0.8529) time: 0.2034 data: 0.1242 max mem: 9305 +Train: [93] [2300/6250] eta: 0:09:42 lr: 0.000001 grad: 0.1696 (0.1726) loss: 0.8515 (0.8528) time: 0.1117 data: 0.0135 max mem: 9305 +Train: [93] [2400/6250] eta: 0:09:29 lr: 0.000001 grad: 0.1563 (0.1722) loss: 0.8558 (0.8527) time: 0.1351 data: 0.0508 max mem: 9305 +Train: [93] [2500/6250] eta: 0:09:13 lr: 0.000001 grad: 0.1626 (0.1720) loss: 0.8445 (0.8526) time: 0.1230 data: 0.0384 max mem: 9305 +Train: [93] [2600/6250] eta: 0:08:58 lr: 0.000001 grad: 0.1543 (0.1717) loss: 0.8517 (0.8525) time: 0.1462 data: 0.0580 max mem: 9305 +Train: [93] [2700/6250] eta: 0:08:44 lr: 0.000001 grad: 0.1664 (0.1714) loss: 0.8575 (0.8525) time: 0.1318 data: 0.0473 max mem: 9305 +Train: [93] [2800/6250] eta: 0:08:32 lr: 0.000001 grad: 0.1554 (0.1710) loss: 0.8549 (0.8525) time: 0.2200 data: 0.1331 max mem: 9305 +Train: [93] [2900/6250] eta: 0:08:16 lr: 0.000001 grad: 0.1604 (0.1704) loss: 0.8508 (0.8526) time: 0.1298 data: 0.0420 max mem: 9305 +Train: [93] [3000/6250] eta: 0:08:01 lr: 0.000001 grad: 0.1542 (0.1700) loss: 0.8552 (0.8527) time: 0.1985 data: 0.0893 max mem: 9305 +Train: [93] [3100/6250] eta: 0:07:46 lr: 0.000001 grad: 0.1481 (0.1695) loss: 0.8609 (0.8528) time: 0.1539 data: 0.0692 max mem: 9305 +Train: [93] [3200/6250] eta: 0:07:31 lr: 0.000001 grad: 0.1564 (0.1690) loss: 0.8576 (0.8529) time: 0.1377 data: 0.0532 max mem: 9305 +Train: [93] [3300/6250] eta: 0:07:16 lr: 0.000001 grad: 0.1467 (0.1686) loss: 0.8602 (0.8531) time: 0.1485 data: 0.0608 max mem: 9305 +Train: [93] [3400/6250] eta: 0:07:02 lr: 0.000001 grad: 0.1526 (0.1682) loss: 0.8520 (0.8532) time: 0.1231 data: 0.0313 max mem: 9305 +Train: [93] [3500/6250] eta: 0:06:46 lr: 0.000001 grad: 0.1456 (0.1677) loss: 0.8597 (0.8534) time: 0.1351 data: 0.0489 max mem: 9305 +Train: [93] [3600/6250] eta: 0:06:31 lr: 0.000001 grad: 0.1501 (0.1673) loss: 0.8666 (0.8536) time: 0.1523 data: 0.0464 max mem: 9305 +Train: [93] [3700/6250] eta: 0:06:17 lr: 0.000001 grad: 0.1528 (0.1668) loss: 0.8590 (0.8538) time: 0.1290 data: 0.0390 max mem: 9305 +Train: [93] [3800/6250] eta: 0:06:03 lr: 0.000001 grad: 0.1488 (0.1664) loss: 0.8571 (0.8539) time: 0.2345 data: 0.1359 max mem: 9305 +Train: [93] [3900/6250] eta: 0:05:48 lr: 0.000001 grad: 0.1439 (0.1660) loss: 0.8616 (0.8542) time: 0.1856 data: 0.1040 max mem: 9305 +Train: [93] [4000/6250] eta: 0:05:32 lr: 0.000001 grad: 0.1448 (0.1657) loss: 0.8583 (0.8543) time: 0.1398 data: 0.0564 max mem: 9305 +Train: [93] [4100/6250] eta: 0:05:16 lr: 0.000001 grad: 0.1530 (0.1654) loss: 0.8578 (0.8544) time: 0.1072 data: 0.0181 max mem: 9305 +Train: [93] [4200/6250] eta: 0:05:01 lr: 0.000001 grad: 0.1545 (0.1651) loss: 0.8636 (0.8545) time: 0.1681 data: 0.0694 max mem: 9305 +Train: [93] [4300/6250] eta: 0:04:46 lr: 0.000001 grad: 0.1467 (0.1649) loss: 0.8621 (0.8546) time: 0.1390 data: 0.0564 max mem: 9305 +Train: [93] [4400/6250] eta: 0:04:31 lr: 0.000001 grad: 0.1411 (0.1647) loss: 0.8670 (0.8547) time: 0.1370 data: 0.0518 max mem: 9305 +Train: [93] [4500/6250] eta: 0:04:16 lr: 0.000001 grad: 0.1523 (0.1644) loss: 0.8616 (0.8549) time: 0.1358 data: 0.0513 max mem: 9305 +Train: [93] [4600/6250] eta: 0:04:01 lr: 0.000001 grad: 0.1454 (0.1641) loss: 0.8561 (0.8551) time: 0.1592 data: 0.0763 max mem: 9305 +Train: [93] [4700/6250] eta: 0:03:46 lr: 0.000001 grad: 0.1609 (0.1640) loss: 0.8565 (0.8551) time: 0.1376 data: 0.0550 max mem: 9305 +Train: [93] [4800/6250] eta: 0:03:31 lr: 0.000001 grad: 0.1556 (0.1639) loss: 0.8583 (0.8552) time: 0.1506 data: 0.0719 max mem: 9305 +Train: [93] [4900/6250] eta: 0:03:17 lr: 0.000001 grad: 0.1565 (0.1637) loss: 0.8542 (0.8553) time: 0.1734 data: 0.0892 max mem: 9305 +Train: [93] [5000/6250] eta: 0:03:02 lr: 0.000001 grad: 0.1516 (0.1636) loss: 0.8564 (0.8553) time: 0.1731 data: 0.0925 max mem: 9305 +Train: [93] [5100/6250] eta: 0:02:48 lr: 0.000001 grad: 0.1607 (0.1635) loss: 0.8513 (0.8554) time: 0.1266 data: 0.0399 max mem: 9305 +Train: [93] [5200/6250] eta: 0:02:33 lr: 0.000001 grad: 0.1560 (0.1635) loss: 0.8594 (0.8554) time: 0.1709 data: 0.0944 max mem: 9305 +Train: [93] [5300/6250] eta: 0:02:19 lr: 0.000001 grad: 0.1526 (0.1635) loss: 0.8588 (0.8554) time: 0.3166 data: 0.2335 max mem: 9305 +Train: [93] [5400/6250] eta: 0:02:04 lr: 0.000001 grad: 0.1521 (0.1635) loss: 0.8599 (0.8554) time: 0.0845 data: 0.0002 max mem: 9305 +Train: [93] [5500/6250] eta: 0:01:50 lr: 0.000001 grad: 0.1514 (0.1635) loss: 0.8589 (0.8554) time: 0.1829 data: 0.1014 max mem: 9305 +Train: [93] [5600/6250] eta: 0:01:35 lr: 0.000001 grad: 0.1551 (0.1633) loss: 0.8563 (0.8555) time: 0.1622 data: 0.0826 max mem: 9305 +Train: [93] [5700/6250] eta: 0:01:20 lr: 0.000001 grad: 0.1644 (0.1633) loss: 0.8604 (0.8555) time: 0.1474 data: 0.0650 max mem: 9305 +Train: [93] [5800/6250] eta: 0:01:05 lr: 0.000001 grad: 0.1525 (0.1633) loss: 0.8623 (0.8556) time: 0.1270 data: 0.0445 max mem: 9305 +Train: [93] [5900/6250] eta: 0:00:51 lr: 0.000001 grad: 0.1662 (0.1632) loss: 0.8544 (0.8556) time: 0.1368 data: 0.0574 max mem: 9305 +Train: [93] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.1641 (0.1633) loss: 0.8558 (0.8556) time: 0.1467 data: 0.0687 max mem: 9305 +Train: [93] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.1647 (0.1632) loss: 0.8568 (0.8556) time: 0.1403 data: 0.0578 max mem: 9305 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1501 (0.1632) loss: 0.8617 (0.8557) time: 0.1330 data: 0.0498 max mem: 9305 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1456 (0.1632) loss: 0.8625 (0.8557) time: 0.1586 data: 0.0654 max mem: 9305 +Train: [93] Total time: 0:15:17 (0.1468 s / it) +Averaged stats: lr: 0.000001 grad: 0.1456 (0.1632) loss: 0.8625 (0.8557) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:06:51 loss: 0.8613 (0.8613) time: 6.6317 data: 6.5955 max mem: 9305 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8544 (0.8572) time: 0.1551 data: 0.1261 max mem: 9305 +Eval (hcp-train-subset): [93] Total time: 0:00:15 (0.2457 s / it) +Averaged stats (hcp-train-subset): loss: 0.8544 (0.8572) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:55 loss: 0.8687 (0.8687) time: 5.7264 data: 5.6922 max mem: 9305 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8701 (0.8725) time: 0.1336 data: 0.1051 max mem: 9305 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (hcp-val): loss: 0.8701 (0.8725) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 8:14:22 lr: 0.000001 grad: 0.2347 (0.2347) loss: 0.8900 (0.8900) time: 4.7461 data: 4.5865 max mem: 9305 +Train: [94] [ 100/6250] eta: 0:21:34 lr: 0.000001 grad: 0.1711 (0.1914) loss: 0.8636 (0.8598) time: 0.1604 data: 0.0562 max mem: 9305 +Train: [94] [ 200/6250] eta: 0:19:07 lr: 0.000001 grad: 0.1764 (0.1901) loss: 0.8560 (0.8570) time: 0.1836 data: 0.0777 max mem: 9305 +Train: [94] [ 300/6250] eta: 0:17:35 lr: 0.000001 grad: 0.1806 (0.1857) loss: 0.8520 (0.8559) time: 0.1476 data: 0.0482 max mem: 9305 +Train: [94] [ 400/6250] eta: 0:16:34 lr: 0.000001 grad: 0.1680 (0.1839) loss: 0.8628 (0.8554) time: 0.1387 data: 0.0582 max mem: 9305 +Train: [94] [ 500/6250] eta: 0:15:46 lr: 0.000001 grad: 0.1583 (0.1804) loss: 0.8558 (0.8560) time: 0.1322 data: 0.0417 max mem: 9305 +Train: [94] [ 600/6250] eta: 0:15:00 lr: 0.000001 grad: 0.1472 (0.1775) loss: 0.8578 (0.8563) time: 0.1225 data: 0.0345 max mem: 9305 +Train: [94] [ 700/6250] eta: 0:14:37 lr: 0.000001 grad: 0.1657 (0.1761) loss: 0.8557 (0.8562) time: 0.1337 data: 0.0461 max mem: 9305 +Train: [94] [ 800/6250] eta: 0:14:26 lr: 0.000001 grad: 0.1611 (0.1751) loss: 0.8611 (0.8561) time: 0.1506 data: 0.0689 max mem: 9305 +Train: [94] [ 900/6250] eta: 0:14:04 lr: 0.000001 grad: 0.1705 (0.1746) loss: 0.8581 (0.8563) time: 0.1468 data: 0.0633 max mem: 9305 +Train: [94] [1000/6250] eta: 0:13:48 lr: 0.000001 grad: 0.1702 (0.1746) loss: 0.8549 (0.8558) time: 0.1382 data: 0.0464 max mem: 9305 +Train: [94] [1100/6250] eta: 0:13:24 lr: 0.000001 grad: 0.1685 (0.1744) loss: 0.8565 (0.8555) time: 0.1275 data: 0.0423 max mem: 9305 +Train: [94] [1200/6250] eta: 0:13:03 lr: 0.000001 grad: 0.1620 (0.1743) loss: 0.8532 (0.8552) time: 0.1414 data: 0.0486 max mem: 9305 +Train: [94] [1300/6250] eta: 0:12:39 lr: 0.000001 grad: 0.1635 (0.1741) loss: 0.8526 (0.8550) time: 0.1404 data: 0.0490 max mem: 9305 +Train: [94] [1400/6250] eta: 0:12:17 lr: 0.000001 grad: 0.1703 (0.1736) loss: 0.8559 (0.8550) time: 0.1397 data: 0.0587 max mem: 9305 +Train: [94] [1500/6250] eta: 0:11:57 lr: 0.000001 grad: 0.1705 (0.1732) loss: 0.8500 (0.8549) time: 0.1334 data: 0.0503 max mem: 9305 +Train: [94] [1600/6250] eta: 0:11:46 lr: 0.000001 grad: 0.1544 (0.1725) loss: 0.8541 (0.8549) time: 0.1315 data: 0.0434 max mem: 9305 +Train: [94] [1700/6250] eta: 0:11:28 lr: 0.000001 grad: 0.1411 (0.1719) loss: 0.8604 (0.8550) time: 0.1388 data: 0.0587 max mem: 9305 +Train: [94] [1800/6250] eta: 0:11:13 lr: 0.000001 grad: 0.1522 (0.1712) loss: 0.8611 (0.8551) time: 0.2019 data: 0.1224 max mem: 9305 +Train: [94] [1900/6250] eta: 0:11:03 lr: 0.000001 grad: 0.1481 (0.1705) loss: 0.8571 (0.8552) time: 0.2108 data: 0.1291 max mem: 9305 +Train: [94] [2000/6250] eta: 0:10:53 lr: 0.000001 grad: 0.1640 (0.1700) loss: 0.8562 (0.8553) time: 0.2385 data: 0.1615 max mem: 9305 +Train: [94] [2100/6250] eta: 0:10:38 lr: 0.000001 grad: 0.1574 (0.1696) loss: 0.8533 (0.8552) time: 0.1652 data: 0.0840 max mem: 9305 +Train: [94] [2200/6250] eta: 0:10:25 lr: 0.000001 grad: 0.1488 (0.1692) loss: 0.8562 (0.8552) time: 0.1573 data: 0.0789 max mem: 9305 +Train: [94] [2300/6250] eta: 0:10:10 lr: 0.000001 grad: 0.1549 (0.1689) loss: 0.8549 (0.8551) time: 0.1581 data: 0.0652 max mem: 9305 +Train: [94] [2400/6250] eta: 0:09:56 lr: 0.000001 grad: 0.1563 (0.1685) loss: 0.8515 (0.8551) time: 0.1212 data: 0.0395 max mem: 9305 +Train: [94] [2500/6250] eta: 0:09:38 lr: 0.000001 grad: 0.1488 (0.1677) loss: 0.8587 (0.8553) time: 0.1576 data: 0.0777 max mem: 9305 +Train: [94] [2600/6250] eta: 0:09:20 lr: 0.000001 grad: 0.1502 (0.1673) loss: 0.8599 (0.8553) time: 0.1446 data: 0.0577 max mem: 9305 +Train: [94] [2700/6250] eta: 0:09:03 lr: 0.000001 grad: 0.1414 (0.1668) loss: 0.8562 (0.8555) time: 0.1436 data: 0.0641 max mem: 9305 +Train: [94] [2800/6250] eta: 0:08:45 lr: 0.000001 grad: 0.1527 (0.1664) loss: 0.8587 (0.8556) time: 0.1263 data: 0.0419 max mem: 9305 +Train: [94] [2900/6250] eta: 0:08:27 lr: 0.000001 grad: 0.1523 (0.1661) loss: 0.8483 (0.8555) time: 0.1436 data: 0.0577 max mem: 9305 +Train: [94] [3000/6250] eta: 0:08:10 lr: 0.000001 grad: 0.1666 (0.1658) loss: 0.8515 (0.8555) time: 0.0963 data: 0.0077 max mem: 9305 +Train: [94] [3100/6250] eta: 0:07:55 lr: 0.000001 grad: 0.1546 (0.1656) loss: 0.8519 (0.8556) time: 0.1405 data: 0.0567 max mem: 9305 +Train: [94] [3200/6250] eta: 0:07:38 lr: 0.000001 grad: 0.1562 (0.1655) loss: 0.8565 (0.8555) time: 0.1465 data: 0.0695 max mem: 9305 +Train: [94] [3300/6250] eta: 0:07:21 lr: 0.000001 grad: 0.1626 (0.1656) loss: 0.8515 (0.8554) time: 0.1260 data: 0.0440 max mem: 9305 +Train: [94] [3400/6250] eta: 0:07:07 lr: 0.000001 grad: 0.1640 (0.1657) loss: 0.8530 (0.8554) time: 0.2056 data: 0.1236 max mem: 9305 +Train: [94] [3500/6250] eta: 0:06:51 lr: 0.000001 grad: 0.1658 (0.1658) loss: 0.8520 (0.8553) time: 0.1685 data: 0.0859 max mem: 9305 +Train: [94] [3600/6250] eta: 0:06:36 lr: 0.000001 grad: 0.1641 (0.1657) loss: 0.8428 (0.8552) time: 0.1504 data: 0.0653 max mem: 9305 +Train: [94] [3700/6250] eta: 0:06:20 lr: 0.000001 grad: 0.1626 (0.1657) loss: 0.8516 (0.8553) time: 0.1156 data: 0.0276 max mem: 9305 +Train: [94] [3800/6250] eta: 0:06:05 lr: 0.000001 grad: 0.1720 (0.1660) loss: 0.8401 (0.8551) time: 0.1481 data: 0.0630 max mem: 9305 +Train: [94] [3900/6250] eta: 0:05:51 lr: 0.000001 grad: 0.1525 (0.1660) loss: 0.8568 (0.8551) time: 0.2096 data: 0.1155 max mem: 9305 +Train: [94] [4000/6250] eta: 0:05:35 lr: 0.000001 grad: 0.1551 (0.1661) loss: 0.8595 (0.8551) time: 0.1378 data: 0.0570 max mem: 9305 +Train: [94] [4100/6250] eta: 0:05:20 lr: 0.000001 grad: 0.1460 (0.1661) loss: 0.8568 (0.8550) time: 0.1476 data: 0.0639 max mem: 9305 +Train: [94] [4200/6250] eta: 0:05:06 lr: 0.000001 grad: 0.1605 (0.1661) loss: 0.8557 (0.8549) time: 0.1480 data: 0.0628 max mem: 9305 +Train: [94] [4300/6250] eta: 0:04:51 lr: 0.000001 grad: 0.1608 (0.1661) loss: 0.8511 (0.8549) time: 0.1528 data: 0.0581 max mem: 9305 +Train: [94] [4400/6250] eta: 0:04:36 lr: 0.000001 grad: 0.1698 (0.1661) loss: 0.8473 (0.8549) time: 0.1288 data: 0.0435 max mem: 9305 +Train: [94] [4500/6250] eta: 0:04:21 lr: 0.000001 grad: 0.1633 (0.1662) loss: 0.8496 (0.8548) time: 0.1806 data: 0.0774 max mem: 9305 +Train: [94] [4600/6250] eta: 0:04:07 lr: 0.000001 grad: 0.1821 (0.1663) loss: 0.8571 (0.8548) time: 0.2208 data: 0.1338 max mem: 9305 +Train: [94] [4700/6250] eta: 0:03:51 lr: 0.000001 grad: 0.1564 (0.1663) loss: 0.8523 (0.8548) time: 0.1296 data: 0.0424 max mem: 9305 +Train: [94] [4800/6250] eta: 0:03:36 lr: 0.000001 grad: 0.1557 (0.1663) loss: 0.8541 (0.8548) time: 0.0852 data: 0.0002 max mem: 9305 +Train: [94] [4900/6250] eta: 0:03:21 lr: 0.000001 grad: 0.1549 (0.1664) loss: 0.8558 (0.8548) time: 0.1368 data: 0.0551 max mem: 9305 +Train: [94] [5000/6250] eta: 0:03:06 lr: 0.000001 grad: 0.1495 (0.1665) loss: 0.8556 (0.8548) time: 0.1845 data: 0.1037 max mem: 9305 +Train: [94] [5100/6250] eta: 0:02:52 lr: 0.000001 grad: 0.1621 (0.1666) loss: 0.8547 (0.8548) time: 0.0940 data: 0.0032 max mem: 9305 +Train: [94] [5200/6250] eta: 0:02:36 lr: 0.000001 grad: 0.1677 (0.1667) loss: 0.8443 (0.8548) time: 0.1206 data: 0.0330 max mem: 9305 +Train: [94] [5300/6250] eta: 0:02:21 lr: 0.000001 grad: 0.1618 (0.1669) loss: 0.8574 (0.8547) time: 0.1341 data: 0.0363 max mem: 9305 +Train: [94] [5400/6250] eta: 0:02:06 lr: 0.000001 grad: 0.1698 (0.1669) loss: 0.8543 (0.8547) time: 0.1655 data: 0.0861 max mem: 9305 +Train: [94] [5500/6250] eta: 0:01:51 lr: 0.000001 grad: 0.1595 (0.1670) loss: 0.8548 (0.8547) time: 0.1306 data: 0.0485 max mem: 9305 +Train: [94] [5600/6250] eta: 0:01:36 lr: 0.000001 grad: 0.1617 (0.1669) loss: 0.8552 (0.8547) time: 0.1364 data: 0.0546 max mem: 9305 +Train: [94] [5700/6250] eta: 0:01:21 lr: 0.000001 grad: 0.1586 (0.1668) loss: 0.8484 (0.8547) time: 0.1682 data: 0.0805 max mem: 9305 +Train: [94] [5800/6250] eta: 0:01:06 lr: 0.000001 grad: 0.1605 (0.1669) loss: 0.8546 (0.8546) time: 0.1997 data: 0.1199 max mem: 9305 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1608 (0.1668) loss: 0.8537 (0.8546) time: 0.1642 data: 0.0853 max mem: 9305 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1641 (0.1669) loss: 0.8517 (0.8546) time: 0.1363 data: 0.0482 max mem: 9305 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1682 (0.1669) loss: 0.8509 (0.8545) time: 0.1514 data: 0.0677 max mem: 9305 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1615 (0.1669) loss: 0.8539 (0.8545) time: 0.1414 data: 0.0593 max mem: 9305 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1632 (0.1669) loss: 0.8411 (0.8545) time: 0.1015 data: 0.0002 max mem: 9305 +Train: [94] Total time: 0:15:39 (0.1503 s / it) +Averaged stats: lr: 0.000001 grad: 0.1632 (0.1669) loss: 0.8411 (0.8545) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:05:53 loss: 0.8595 (0.8595) time: 5.7003 data: 5.6651 max mem: 9305 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8540 (0.8561) time: 0.1595 data: 0.1301 max mem: 9305 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8540 (0.8561) +Making plots (hcp-train-subset): example=2 +Eval (hcp-val): [94] [ 0/62] eta: 0:06:05 loss: 0.8703 (0.8703) time: 5.8872 data: 5.8531 max mem: 9305 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8723 (0.8725) time: 0.1064 data: 0.0757 max mem: 9305 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2171 s / it) +Averaged stats (hcp-val): loss: 0.8723 (0.8725) +Making plots (hcp-val): example=53 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [95] [ 0/6250] eta: 10:31:21 lr: 0.000001 grad: 0.1913 (0.1913) loss: 0.8640 (0.8640) time: 6.0610 data: 5.9539 max mem: 9305 +Train: [95] [ 100/6250] eta: 0:20:54 lr: 0.000001 grad: 0.1969 (0.2285) loss: 0.8455 (0.8531) time: 0.1823 data: 0.0827 max mem: 9305 +Train: [95] [ 200/6250] eta: 0:17:33 lr: 0.000001 grad: 0.1705 (0.2074) loss: 0.8589 (0.8541) time: 0.1526 data: 0.0474 max mem: 9305 +Train: [95] [ 300/6250] eta: 0:16:25 lr: 0.000001 grad: 0.1484 (0.1965) loss: 0.8523 (0.8548) time: 0.1773 data: 0.0834 max mem: 9305 +Train: [95] [ 400/6250] eta: 0:15:53 lr: 0.000001 grad: 0.1677 (0.1900) loss: 0.8438 (0.8545) time: 0.2165 data: 0.0854 max mem: 9305 +Train: [95] [ 500/6250] eta: 0:15:41 lr: 0.000001 grad: 0.1781 (0.1866) loss: 0.8496 (0.8540) time: 0.1570 data: 0.0673 max mem: 9305 +Train: [95] [ 600/6250] eta: 0:15:18 lr: 0.000001 grad: 0.1894 (0.1854) loss: 0.8392 (0.8532) time: 0.1511 data: 0.0618 max mem: 9305 +Train: [95] [ 700/6250] eta: 0:14:49 lr: 0.000001 grad: 0.1585 (0.1848) loss: 0.8567 (0.8526) time: 0.1368 data: 0.0486 max mem: 9305 +Train: [95] [ 800/6250] eta: 0:14:30 lr: 0.000001 grad: 0.1713 (0.1837) loss: 0.8492 (0.8528) time: 0.1770 data: 0.0864 max mem: 9305 +Train: [95] [ 900/6250] eta: 0:14:02 lr: 0.000001 grad: 0.1711 (0.1831) loss: 0.8528 (0.8527) time: 0.1452 data: 0.0547 max mem: 9305 +Train: [95] [1000/6250] eta: 0:13:33 lr: 0.000001 grad: 0.1735 (0.1826) loss: 0.8584 (0.8526) time: 0.1293 data: 0.0389 max mem: 9305 +Train: [95] [1100/6250] eta: 0:13:12 lr: 0.000001 grad: 0.1688 (0.1818) loss: 0.8518 (0.8527) time: 0.1396 data: 0.0586 max mem: 9305 +Train: [95] [1200/6250] eta: 0:12:53 lr: 0.000001 grad: 0.1543 (0.1808) loss: 0.8575 (0.8529) time: 0.1415 data: 0.0575 max mem: 9305 +Train: [95] [1300/6250] eta: 0:12:30 lr: 0.000001 grad: 0.1575 (0.1795) loss: 0.8633 (0.8531) time: 0.1352 data: 0.0483 max mem: 9305 +Train: [95] [1400/6250] eta: 0:12:16 lr: 0.000001 grad: 0.1469 (0.1783) loss: 0.8601 (0.8534) time: 0.1464 data: 0.0583 max mem: 9305 +Train: [95] [1500/6250] eta: 0:12:06 lr: 0.000001 grad: 0.1604 (0.1775) loss: 0.8569 (0.8536) time: 0.1326 data: 0.0425 max mem: 9305 +Train: [95] [1600/6250] eta: 0:11:46 lr: 0.000001 grad: 0.1549 (0.1767) loss: 0.8490 (0.8537) time: 0.1443 data: 0.0560 max mem: 9305 +Train: [95] [1700/6250] eta: 0:11:32 lr: 0.000001 grad: 0.1480 (0.1757) loss: 0.8610 (0.8540) time: 0.1274 data: 0.0360 max mem: 9305 +Train: [95] [1800/6250] eta: 0:11:14 lr: 0.000001 grad: 0.1578 (0.1749) loss: 0.8608 (0.8543) time: 0.1454 data: 0.0534 max mem: 9305 +Train: [95] [1900/6250] eta: 0:10:58 lr: 0.000001 grad: 0.1516 (0.1743) loss: 0.8665 (0.8546) time: 0.1386 data: 0.0442 max mem: 9305 +Train: [95] [2000/6250] eta: 0:10:40 lr: 0.000001 grad: 0.1551 (0.1734) loss: 0.8625 (0.8549) time: 0.1116 data: 0.0229 max mem: 9305 +Train: [95] [2100/6250] eta: 0:10:25 lr: 0.000001 grad: 0.1478 (0.1726) loss: 0.8600 (0.8552) time: 0.1606 data: 0.0800 max mem: 9305 +Train: [95] [2200/6250] eta: 0:10:10 lr: 0.000001 grad: 0.1515 (0.1719) loss: 0.8596 (0.8553) time: 0.2088 data: 0.1220 max mem: 9305 +Train: [95] [2300/6250] eta: 0:09:53 lr: 0.000001 grad: 0.1571 (0.1714) loss: 0.8516 (0.8555) time: 0.1258 data: 0.0367 max mem: 9305 +Train: [95] [2400/6250] eta: 0:09:35 lr: 0.000001 grad: 0.1622 (0.1712) loss: 0.8587 (0.8556) time: 0.1151 data: 0.0329 max mem: 9305 +Train: [95] [2500/6250] eta: 0:09:20 lr: 0.000001 grad: 0.1605 (0.1708) loss: 0.8641 (0.8558) time: 0.1415 data: 0.0558 max mem: 9305 +Train: [95] [2600/6250] eta: 0:09:02 lr: 0.000001 grad: 0.1632 (0.1704) loss: 0.8602 (0.8560) time: 0.1476 data: 0.0588 max mem: 9305 +Train: [95] [2700/6250] eta: 0:08:46 lr: 0.000001 grad: 0.1546 (0.1699) loss: 0.8621 (0.8562) time: 0.1384 data: 0.0548 max mem: 9305 +Train: [95] [2800/6250] eta: 0:08:29 lr: 0.000001 grad: 0.1553 (0.1697) loss: 0.8517 (0.8562) time: 0.1359 data: 0.0518 max mem: 9305 +Train: [95] [2900/6250] eta: 0:08:14 lr: 0.000001 grad: 0.1436 (0.1694) loss: 0.8613 (0.8564) time: 0.1495 data: 0.0612 max mem: 9305 +Train: [95] [3000/6250] eta: 0:07:59 lr: 0.000001 grad: 0.1700 (0.1694) loss: 0.8554 (0.8564) time: 0.1640 data: 0.0857 max mem: 9305 +Train: [95] [3100/6250] eta: 0:07:43 lr: 0.000001 grad: 0.1519 (0.1692) loss: 0.8649 (0.8566) time: 0.1418 data: 0.0587 max mem: 9305 +Train: [95] [3200/6250] eta: 0:07:28 lr: 0.000001 grad: 0.1512 (0.1689) loss: 0.8630 (0.8567) time: 0.1495 data: 0.0680 max mem: 9305 +Train: [95] [3300/6250] eta: 0:07:12 lr: 0.000001 grad: 0.1723 (0.1690) loss: 0.8560 (0.8568) time: 0.1508 data: 0.0733 max mem: 9305 +Train: [95] [3400/6250] eta: 0:06:58 lr: 0.000001 grad: 0.1620 (0.1690) loss: 0.8517 (0.8567) time: 0.1493 data: 0.0712 max mem: 9305 +Train: [95] [3500/6250] eta: 0:06:42 lr: 0.000001 grad: 0.1593 (0.1690) loss: 0.8624 (0.8568) time: 0.1249 data: 0.0339 max mem: 9305 +Train: [95] [3600/6250] eta: 0:06:27 lr: 0.000001 grad: 0.1613 (0.1690) loss: 0.8563 (0.8568) time: 0.1243 data: 0.0379 max mem: 9305 +Train: [95] [3700/6250] eta: 0:06:12 lr: 0.000001 grad: 0.1702 (0.1691) loss: 0.8604 (0.8568) time: 0.1300 data: 0.0419 max mem: 9305 +Train: [95] [3800/6250] eta: 0:05:58 lr: 0.000001 grad: 0.1661 (0.1692) loss: 0.8537 (0.8568) time: 0.1440 data: 0.0587 max mem: 9305 +Train: [95] [3900/6250] eta: 0:05:42 lr: 0.000001 grad: 0.1571 (0.1692) loss: 0.8557 (0.8568) time: 0.1280 data: 0.0423 max mem: 9305 +Train: [95] [4000/6250] eta: 0:05:27 lr: 0.000001 grad: 0.1465 (0.1693) loss: 0.8637 (0.8568) time: 0.1479 data: 0.0676 max mem: 9305 +Train: [95] [4100/6250] eta: 0:05:13 lr: 0.000001 grad: 0.1678 (0.1694) loss: 0.8540 (0.8568) time: 0.1188 data: 0.0326 max mem: 9305 +Train: [95] [4200/6250] eta: 0:04:57 lr: 0.000001 grad: 0.1738 (0.1694) loss: 0.8530 (0.8567) time: 0.1464 data: 0.0650 max mem: 9305 +Train: [95] [4300/6250] eta: 0:04:43 lr: 0.000001 grad: 0.1767 (0.1696) loss: 0.8599 (0.8567) time: 0.1463 data: 0.0638 max mem: 9305 +Train: [95] [4400/6250] eta: 0:04:28 lr: 0.000001 grad: 0.1599 (0.1697) loss: 0.8554 (0.8566) time: 0.1468 data: 0.0665 max mem: 9305 +Train: [95] [4500/6250] eta: 0:04:13 lr: 0.000001 grad: 0.1771 (0.1699) loss: 0.8471 (0.8566) time: 0.1056 data: 0.0183 max mem: 9305 +Train: [95] [4600/6250] eta: 0:03:59 lr: 0.000001 grad: 0.1633 (0.1700) loss: 0.8540 (0.8565) time: 0.1518 data: 0.0625 max mem: 9305 +Train: [95] [4700/6250] eta: 0:03:44 lr: 0.000001 grad: 0.1566 (0.1699) loss: 0.8652 (0.8566) time: 0.1236 data: 0.0360 max mem: 9305 +Train: [95] [4800/6250] eta: 0:03:29 lr: 0.000001 grad: 0.1578 (0.1698) loss: 0.8619 (0.8566) time: 0.1379 data: 0.0535 max mem: 9305 +Train: [95] [4900/6250] eta: 0:03:15 lr: 0.000001 grad: 0.1657 (0.1698) loss: 0.8569 (0.8566) time: 0.2280 data: 0.1317 max mem: 9305 +Train: [95] [5000/6250] eta: 0:03:00 lr: 0.000001 grad: 0.1738 (0.1699) loss: 0.8524 (0.8566) time: 0.1349 data: 0.0559 max mem: 9305 +Train: [95] [5100/6250] eta: 0:02:46 lr: 0.000001 grad: 0.1734 (0.1700) loss: 0.8526 (0.8566) time: 0.1682 data: 0.0832 max mem: 9305 +Train: [95] [5200/6250] eta: 0:02:31 lr: 0.000001 grad: 0.1763 (0.1699) loss: 0.8565 (0.8566) time: 0.1413 data: 0.0526 max mem: 9305 +Train: [95] [5300/6250] eta: 0:02:17 lr: 0.000001 grad: 0.1553 (0.1698) loss: 0.8546 (0.8567) time: 0.1526 data: 0.0542 max mem: 9305 +Train: [95] [5400/6250] eta: 0:02:02 lr: 0.000001 grad: 0.1555 (0.1697) loss: 0.8531 (0.8567) time: 0.1165 data: 0.0310 max mem: 9305 +Train: [95] [5500/6250] eta: 0:01:48 lr: 0.000001 grad: 0.1600 (0.1697) loss: 0.8565 (0.8567) time: 0.1516 data: 0.0705 max mem: 9305 +Train: [95] [5600/6250] eta: 0:01:33 lr: 0.000001 grad: 0.1623 (0.1697) loss: 0.8565 (0.8567) time: 0.1349 data: 0.0536 max mem: 9305 +Train: [95] [5700/6250] eta: 0:01:19 lr: 0.000001 grad: 0.1467 (0.1696) loss: 0.8589 (0.8567) time: 0.1257 data: 0.0461 max mem: 9305 +Train: [95] [5800/6250] eta: 0:01:04 lr: 0.000001 grad: 0.1615 (0.1697) loss: 0.8644 (0.8567) time: 0.1453 data: 0.0636 max mem: 9305 +Train: [95] [5900/6250] eta: 0:00:50 lr: 0.000001 grad: 0.1613 (0.1696) loss: 0.8527 (0.8567) time: 0.1144 data: 0.0306 max mem: 9305 +Train: [95] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.1525 (0.1695) loss: 0.8570 (0.8568) time: 0.1473 data: 0.0671 max mem: 9305 +Train: [95] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.1622 (0.1694) loss: 0.8560 (0.8568) time: 0.0914 data: 0.0002 max mem: 9305 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1565 (0.1692) loss: 0.8578 (0.8568) time: 0.1482 data: 0.0644 max mem: 9305 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1502 (0.1692) loss: 0.8609 (0.8568) time: 0.1288 data: 0.0487 max mem: 9305 +Train: [95] Total time: 0:15:05 (0.1449 s / it) +Averaged stats: lr: 0.000001 grad: 0.1502 (0.1692) loss: 0.8609 (0.8568) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:04:12 loss: 0.8588 (0.8588) time: 4.0764 data: 3.9932 max mem: 9305 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8554 (0.8563) time: 0.1242 data: 0.0958 max mem: 9305 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-train-subset): loss: 0.8554 (0.8563) +Eval (hcp-val): [95] [ 0/62] eta: 0:04:15 loss: 0.8697 (0.8697) time: 4.1203 data: 4.0573 max mem: 9305 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8708 (0.8723) time: 0.1191 data: 0.0909 max mem: 9305 +Eval (hcp-val): [95] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (hcp-val): loss: 0.8708 (0.8723) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 10:07:49 lr: 0.000001 grad: 0.1929 (0.1929) loss: 0.8679 (0.8679) time: 5.8350 data: 5.7013 max mem: 9305 +Train: [96] [ 100/6250] eta: 0:20:12 lr: 0.000001 grad: 0.1783 (0.2351) loss: 0.8617 (0.8620) time: 0.1186 data: 0.0096 max mem: 9305 +Train: [96] [ 200/6250] eta: 0:17:18 lr: 0.000001 grad: 0.1717 (0.2213) loss: 0.8579 (0.8588) time: 0.1758 data: 0.0803 max mem: 9305 +Train: [96] [ 300/6250] eta: 0:16:19 lr: 0.000001 grad: 0.1588 (0.2088) loss: 0.8656 (0.8596) time: 0.1558 data: 0.0648 max mem: 9305 +Train: [96] [ 400/6250] eta: 0:15:49 lr: 0.000001 grad: 0.1751 (0.2023) loss: 0.8626 (0.8600) time: 0.1756 data: 0.0811 max mem: 9305 +Train: [96] [ 500/6250] eta: 0:15:34 lr: 0.000001 grad: 0.1926 (0.1990) loss: 0.8563 (0.8592) time: 0.1789 data: 0.0860 max mem: 9305 +Train: [96] [ 600/6250] eta: 0:15:07 lr: 0.000001 grad: 0.1841 (0.1968) loss: 0.8545 (0.8585) time: 0.1610 data: 0.0713 max mem: 9305 +Train: [96] [ 700/6250] eta: 0:14:27 lr: 0.000001 grad: 0.1656 (0.1938) loss: 0.8574 (0.8579) time: 0.1135 data: 0.0223 max mem: 9305 +Train: [96] [ 800/6250] eta: 0:13:55 lr: 0.000001 grad: 0.1736 (0.1914) loss: 0.8598 (0.8577) time: 0.1402 data: 0.0482 max mem: 9305 +Train: [96] [ 900/6250] eta: 0:13:26 lr: 0.000001 grad: 0.1510 (0.1890) loss: 0.8650 (0.8579) time: 0.1632 data: 0.0786 max mem: 9305 +Train: [96] [1000/6250] eta: 0:12:51 lr: 0.000001 grad: 0.1644 (0.1868) loss: 0.8611 (0.8581) time: 0.1180 data: 0.0305 max mem: 9305 +Train: [96] [1100/6250] eta: 0:12:27 lr: 0.000000 grad: 0.1614 (0.1851) loss: 0.8594 (0.8581) time: 0.1419 data: 0.0580 max mem: 9305 +Train: [96] [1200/6250] eta: 0:12:07 lr: 0.000000 grad: 0.1583 (0.1836) loss: 0.8591 (0.8584) time: 0.1396 data: 0.0506 max mem: 9305 +Train: [96] [1300/6250] eta: 0:11:48 lr: 0.000000 grad: 0.1615 (0.1821) loss: 0.8630 (0.8585) time: 0.1421 data: 0.0336 max mem: 9305 +Train: [96] [1400/6250] eta: 0:11:33 lr: 0.000000 grad: 0.1548 (0.1807) loss: 0.8591 (0.8586) time: 0.1356 data: 0.0484 max mem: 9305 +Train: [96] [1500/6250] eta: 0:11:20 lr: 0.000000 grad: 0.1604 (0.1792) loss: 0.8610 (0.8588) time: 0.1617 data: 0.0665 max mem: 9305 +Train: [96] [1600/6250] eta: 0:11:14 lr: 0.000000 grad: 0.1563 (0.1780) loss: 0.8617 (0.8590) time: 0.2125 data: 0.1257 max mem: 9305 +Train: [96] [1700/6250] eta: 0:10:55 lr: 0.000000 grad: 0.1504 (0.1767) loss: 0.8646 (0.8592) time: 0.1116 data: 0.0272 max mem: 9305 +Train: [96] [1800/6250] eta: 0:10:43 lr: 0.000000 grad: 0.1557 (0.1759) loss: 0.8634 (0.8593) time: 0.1619 data: 0.0742 max mem: 9305 +Train: [96] [1900/6250] eta: 0:10:34 lr: 0.000000 grad: 0.1539 (0.1752) loss: 0.8619 (0.8592) time: 0.1157 data: 0.0168 max mem: 9305 +Train: [96] [2000/6250] eta: 0:10:18 lr: 0.000000 grad: 0.1564 (0.1746) loss: 0.8592 (0.8591) time: 0.1371 data: 0.0559 max mem: 9305 +Train: [96] [2100/6250] eta: 0:10:03 lr: 0.000000 grad: 0.1565 (0.1738) loss: 0.8505 (0.8590) time: 0.1418 data: 0.0541 max mem: 9305 +Train: [96] [2200/6250] eta: 0:09:49 lr: 0.000000 grad: 0.1499 (0.1730) loss: 0.8598 (0.8590) time: 0.1443 data: 0.0603 max mem: 9305 +Train: [96] [2300/6250] eta: 0:09:35 lr: 0.000000 grad: 0.1663 (0.1723) loss: 0.8583 (0.8588) time: 0.1009 data: 0.0110 max mem: 9305 +Train: [96] [2400/6250] eta: 0:09:18 lr: 0.000000 grad: 0.1435 (0.1720) loss: 0.8585 (0.8587) time: 0.1265 data: 0.0485 max mem: 9305 +Train: [96] [2500/6250] eta: 0:09:02 lr: 0.000000 grad: 0.1737 (0.1719) loss: 0.8519 (0.8584) time: 0.1376 data: 0.0603 max mem: 9305 +Train: [96] [2600/6250] eta: 0:08:49 lr: 0.000000 grad: 0.1587 (0.1718) loss: 0.8580 (0.8582) time: 0.1388 data: 0.0533 max mem: 9305 +Train: [96] [2700/6250] eta: 0:08:31 lr: 0.000000 grad: 0.1559 (0.1717) loss: 0.8570 (0.8581) time: 0.1350 data: 0.0523 max mem: 9305 +Train: [96] [2800/6250] eta: 0:08:15 lr: 0.000000 grad: 0.1696 (0.1716) loss: 0.8541 (0.8579) time: 0.1489 data: 0.0680 max mem: 9305 +Train: [96] [2900/6250] eta: 0:07:59 lr: 0.000000 grad: 0.1650 (0.1716) loss: 0.8539 (0.8577) time: 0.1313 data: 0.0488 max mem: 9305 +Train: [96] [3000/6250] eta: 0:07:44 lr: 0.000000 grad: 0.1641 (0.1714) loss: 0.8546 (0.8575) time: 0.1212 data: 0.0394 max mem: 9305 +Train: [96] [3100/6250] eta: 0:07:29 lr: 0.000000 grad: 0.1598 (0.1713) loss: 0.8562 (0.8573) time: 0.1433 data: 0.0622 max mem: 9305 +Train: [96] [3200/6250] eta: 0:07:14 lr: 0.000000 grad: 0.1668 (0.1713) loss: 0.8535 (0.8573) time: 0.1389 data: 0.0601 max mem: 9305 +Train: [96] [3300/6250] eta: 0:06:59 lr: 0.000000 grad: 0.1617 (0.1713) loss: 0.8558 (0.8572) time: 0.1411 data: 0.0590 max mem: 9305 +Train: [96] [3400/6250] eta: 0:06:44 lr: 0.000000 grad: 0.1584 (0.1713) loss: 0.8570 (0.8571) time: 0.1174 data: 0.0359 max mem: 9305 +Train: [96] [3500/6250] eta: 0:06:28 lr: 0.000000 grad: 0.1577 (0.1714) loss: 0.8562 (0.8570) time: 0.1337 data: 0.0501 max mem: 9305 +Train: [96] [3600/6250] eta: 0:06:13 lr: 0.000000 grad: 0.1647 (0.1714) loss: 0.8592 (0.8570) time: 0.1209 data: 0.0336 max mem: 9305 +Train: [96] [3700/6250] eta: 0:05:59 lr: 0.000000 grad: 0.1689 (0.1714) loss: 0.8591 (0.8570) time: 0.1444 data: 0.0653 max mem: 9305 +Train: [96] [3800/6250] eta: 0:05:44 lr: 0.000000 grad: 0.1745 (0.1714) loss: 0.8579 (0.8569) time: 0.1271 data: 0.0424 max mem: 9305 +Train: [96] [3900/6250] eta: 0:05:30 lr: 0.000000 grad: 0.1556 (0.1713) loss: 0.8666 (0.8570) time: 0.1356 data: 0.0545 max mem: 9305 +Train: [96] [4000/6250] eta: 0:05:16 lr: 0.000000 grad: 0.1650 (0.1711) loss: 0.8609 (0.8571) time: 0.1462 data: 0.0635 max mem: 9305 +Train: [96] [4100/6250] eta: 0:05:04 lr: 0.000000 grad: 0.1487 (0.1709) loss: 0.8655 (0.8572) time: 0.0928 data: 0.0002 max mem: 9305 +Train: [96] [4200/6250] eta: 0:04:49 lr: 0.000000 grad: 0.1638 (0.1709) loss: 0.8601 (0.8572) time: 0.1477 data: 0.0683 max mem: 9305 +Train: [96] [4300/6250] eta: 0:04:36 lr: 0.000000 grad: 0.1447 (0.1706) loss: 0.8547 (0.8572) time: 0.1692 data: 0.0791 max mem: 9305 +Train: [96] [4400/6250] eta: 0:04:22 lr: 0.000000 grad: 0.1465 (0.1705) loss: 0.8591 (0.8573) time: 0.1551 data: 0.0761 max mem: 9305 +Train: [96] [4500/6250] eta: 0:04:07 lr: 0.000000 grad: 0.1602 (0.1703) loss: 0.8604 (0.8573) time: 0.1390 data: 0.0600 max mem: 9305 +Train: [96] [4600/6250] eta: 0:03:53 lr: 0.000000 grad: 0.1572 (0.1702) loss: 0.8565 (0.8574) time: 0.1274 data: 0.0416 max mem: 9305 +Train: [96] [4700/6250] eta: 0:03:39 lr: 0.000000 grad: 0.1657 (0.1702) loss: 0.8586 (0.8574) time: 0.1418 data: 0.0598 max mem: 9305 +Train: [96] [4800/6250] eta: 0:03:25 lr: 0.000000 grad: 0.1628 (0.1702) loss: 0.8559 (0.8574) time: 0.1629 data: 0.0806 max mem: 9305 +Train: [96] [4900/6250] eta: 0:03:11 lr: 0.000000 grad: 0.1733 (0.1703) loss: 0.8605 (0.8574) time: 0.1601 data: 0.0611 max mem: 9305 +Train: [96] [5000/6250] eta: 0:02:57 lr: 0.000000 grad: 0.1722 (0.1703) loss: 0.8586 (0.8574) time: 0.1185 data: 0.0337 max mem: 9305 +Train: [96] [5100/6250] eta: 0:02:42 lr: 0.000000 grad: 0.1562 (0.1703) loss: 0.8628 (0.8574) time: 0.1023 data: 0.0216 max mem: 9305 +Train: [96] [5200/6250] eta: 0:02:28 lr: 0.000000 grad: 0.1713 (0.1703) loss: 0.8606 (0.8574) time: 0.1131 data: 0.0324 max mem: 9305 +Train: [96] [5300/6250] eta: 0:02:14 lr: 0.000000 grad: 0.1592 (0.1702) loss: 0.8568 (0.8574) time: 0.1685 data: 0.0893 max mem: 9305 +Train: [96] [5400/6250] eta: 0:01:59 lr: 0.000000 grad: 0.1692 (0.1702) loss: 0.8549 (0.8574) time: 0.0909 data: 0.0002 max mem: 9305 +Train: [96] [5500/6250] eta: 0:01:45 lr: 0.000000 grad: 0.1587 (0.1701) loss: 0.8620 (0.8574) time: 0.1411 data: 0.0564 max mem: 9305 +Train: [96] [5600/6250] eta: 0:01:31 lr: 0.000000 grad: 0.1647 (0.1702) loss: 0.8507 (0.8573) time: 0.1242 data: 0.0426 max mem: 9305 +Train: [96] [5700/6250] eta: 0:01:17 lr: 0.000000 grad: 0.1665 (0.1702) loss: 0.8511 (0.8572) time: 0.1307 data: 0.0432 max mem: 9305 +Train: [96] [5800/6250] eta: 0:01:03 lr: 0.000000 grad: 0.1644 (0.1702) loss: 0.8542 (0.8572) time: 0.1215 data: 0.0396 max mem: 9305 +Train: [96] [5900/6250] eta: 0:00:49 lr: 0.000000 grad: 0.1716 (0.1703) loss: 0.8531 (0.8571) time: 0.1392 data: 0.0584 max mem: 9305 +Train: [96] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.1743 (0.1704) loss: 0.8461 (0.8570) time: 0.1408 data: 0.0516 max mem: 9305 +Train: [96] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1653 (0.1704) loss: 0.8544 (0.8570) time: 0.1193 data: 0.0368 max mem: 9305 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1585 (0.1703) loss: 0.8616 (0.8569) time: 0.1503 data: 0.0590 max mem: 9305 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1599 (0.1703) loss: 0.8573 (0.8569) time: 0.2517 data: 0.1465 max mem: 9305 +Train: [96] Total time: 0:14:49 (0.1423 s / it) +Averaged stats: lr: 0.000000 grad: 0.1599 (0.1703) loss: 0.8573 (0.8569) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:05:52 loss: 0.8591 (0.8591) time: 5.6780 data: 5.6420 max mem: 9305 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8523 (0.8556) time: 0.1624 data: 0.1316 max mem: 9305 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-train-subset): loss: 0.8523 (0.8556) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:42 loss: 0.8698 (0.8698) time: 4.5541 data: 4.4977 max mem: 9305 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8727 (0.8729) time: 0.1296 data: 0.1011 max mem: 9305 +Eval (hcp-val): [96] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-val): loss: 0.8727 (0.8729) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 12:35:17 lr: 0.000000 grad: 0.3150 (0.3150) loss: 0.8573 (0.8573) time: 7.2508 data: 7.0814 max mem: 9305 +Train: [97] [ 100/6250] eta: 0:22:12 lr: 0.000000 grad: 0.2250 (0.2524) loss: 0.8376 (0.8525) time: 0.1767 data: 0.0689 max mem: 9305 +Train: [97] [ 200/6250] eta: 0:19:27 lr: 0.000000 grad: 0.1807 (0.2302) loss: 0.8554 (0.8517) time: 0.1848 data: 0.0933 max mem: 9305 +Train: [97] [ 300/6250] eta: 0:18:24 lr: 0.000000 grad: 0.1596 (0.2168) loss: 0.8624 (0.8511) time: 0.1902 data: 0.0902 max mem: 9305 +Train: [97] [ 400/6250] eta: 0:17:04 lr: 0.000000 grad: 0.1848 (0.2095) loss: 0.8478 (0.8498) time: 0.1431 data: 0.0542 max mem: 9305 +Train: [97] [ 500/6250] eta: 0:16:08 lr: 0.000000 grad: 0.1655 (0.2046) loss: 0.8432 (0.8494) time: 0.1339 data: 0.0550 max mem: 9305 +Train: [97] [ 600/6250] eta: 0:15:21 lr: 0.000000 grad: 0.1840 (0.1998) loss: 0.8544 (0.8501) time: 0.1331 data: 0.0493 max mem: 9305 +Train: [97] [ 700/6250] eta: 0:14:42 lr: 0.000000 grad: 0.1789 (0.1952) loss: 0.8613 (0.8512) time: 0.1304 data: 0.0430 max mem: 9305 +Train: [97] [ 800/6250] eta: 0:14:03 lr: 0.000000 grad: 0.1748 (0.1921) loss: 0.8570 (0.8520) time: 0.1038 data: 0.0191 max mem: 9305 +Train: [97] [ 900/6250] eta: 0:13:28 lr: 0.000000 grad: 0.1528 (0.1902) loss: 0.8711 (0.8527) time: 0.1183 data: 0.0380 max mem: 9305 +Train: [97] [1000/6250] eta: 0:12:52 lr: 0.000000 grad: 0.1552 (0.1882) loss: 0.8584 (0.8534) time: 0.1083 data: 0.0242 max mem: 9305 +Train: [97] [1100/6250] eta: 0:12:24 lr: 0.000000 grad: 0.1788 (0.1867) loss: 0.8564 (0.8538) time: 0.1142 data: 0.0314 max mem: 9305 +Train: [97] [1200/6250] eta: 0:12:01 lr: 0.000000 grad: 0.1622 (0.1849) loss: 0.8590 (0.8542) time: 0.1317 data: 0.0523 max mem: 9305 +Train: [97] [1300/6250] eta: 0:11:39 lr: 0.000000 grad: 0.1642 (0.1835) loss: 0.8616 (0.8546) time: 0.1378 data: 0.0587 max mem: 9305 +Train: [97] [1400/6250] eta: 0:11:16 lr: 0.000000 grad: 0.1591 (0.1827) loss: 0.8651 (0.8548) time: 0.1273 data: 0.0487 max mem: 9305 +Train: [97] [1500/6250] eta: 0:10:57 lr: 0.000000 grad: 0.1650 (0.1822) loss: 0.8531 (0.8548) time: 0.1305 data: 0.0495 max mem: 9305 +Train: [97] [1600/6250] eta: 0:10:37 lr: 0.000000 grad: 0.1720 (0.1814) loss: 0.8504 (0.8550) time: 0.1218 data: 0.0424 max mem: 9305 +Train: [97] [1700/6250] eta: 0:10:18 lr: 0.000000 grad: 0.1651 (0.1810) loss: 0.8561 (0.8550) time: 0.1250 data: 0.0437 max mem: 9305 +Train: [97] [1800/6250] eta: 0:10:00 lr: 0.000000 grad: 0.1730 (0.1806) loss: 0.8464 (0.8549) time: 0.1111 data: 0.0306 max mem: 9305 +Train: [97] [1900/6250] eta: 0:09:41 lr: 0.000000 grad: 0.1557 (0.1803) loss: 0.8518 (0.8549) time: 0.1245 data: 0.0494 max mem: 9305 +Train: [97] [2000/6250] eta: 0:09:24 lr: 0.000000 grad: 0.1726 (0.1798) loss: 0.8525 (0.8549) time: 0.1250 data: 0.0462 max mem: 9305 +Train: [97] [2100/6250] eta: 0:09:08 lr: 0.000000 grad: 0.1650 (0.1794) loss: 0.8533 (0.8550) time: 0.1190 data: 0.0408 max mem: 9305 +Train: [97] [2200/6250] eta: 0:08:53 lr: 0.000000 grad: 0.1659 (0.1791) loss: 0.8585 (0.8551) time: 0.1243 data: 0.0449 max mem: 9305 +Train: [97] [2300/6250] eta: 0:08:37 lr: 0.000000 grad: 0.1576 (0.1788) loss: 0.8581 (0.8551) time: 0.1225 data: 0.0423 max mem: 9305 +Train: [97] [2400/6250] eta: 0:08:22 lr: 0.000000 grad: 0.1772 (0.1788) loss: 0.8546 (0.8550) time: 0.1157 data: 0.0348 max mem: 9305 +Train: [97] [2500/6250] eta: 0:08:07 lr: 0.000000 grad: 0.1749 (0.1788) loss: 0.8546 (0.8549) time: 0.1212 data: 0.0390 max mem: 9305 +Train: [97] [2600/6250] eta: 0:07:53 lr: 0.000000 grad: 0.1617 (0.1785) loss: 0.8577 (0.8550) time: 0.1282 data: 0.0499 max mem: 9305 +Train: [97] [2700/6250] eta: 0:07:38 lr: 0.000000 grad: 0.1686 (0.1782) loss: 0.8555 (0.8550) time: 0.1264 data: 0.0469 max mem: 9305 +Train: [97] [2800/6250] eta: 0:07:23 lr: 0.000000 grad: 0.1675 (0.1778) loss: 0.8564 (0.8550) time: 0.1133 data: 0.0274 max mem: 9305 +Train: [97] [2900/6250] eta: 0:07:09 lr: 0.000000 grad: 0.1726 (0.1774) loss: 0.8542 (0.8551) time: 0.1294 data: 0.0477 max mem: 9305 +Train: [97] [3000/6250] eta: 0:06:55 lr: 0.000000 grad: 0.1677 (0.1772) loss: 0.8534 (0.8551) time: 0.1207 data: 0.0397 max mem: 9305 +Train: [97] [3100/6250] eta: 0:06:41 lr: 0.000000 grad: 0.1591 (0.1769) loss: 0.8539 (0.8552) time: 0.1211 data: 0.0357 max mem: 9305 +Train: [97] [3200/6250] eta: 0:06:27 lr: 0.000000 grad: 0.1566 (0.1768) loss: 0.8558 (0.8552) time: 0.1028 data: 0.0207 max mem: 9305 +Train: [97] [3300/6250] eta: 0:06:13 lr: 0.000000 grad: 0.1594 (0.1765) loss: 0.8591 (0.8552) time: 0.1277 data: 0.0400 max mem: 9305 +Train: [97] [3400/6250] eta: 0:06:00 lr: 0.000000 grad: 0.1626 (0.1761) loss: 0.8514 (0.8552) time: 0.1073 data: 0.0212 max mem: 9305 +Train: [97] [3500/6250] eta: 0:05:48 lr: 0.000000 grad: 0.1727 (0.1760) loss: 0.8506 (0.8552) time: 0.1526 data: 0.0534 max mem: 9305 +Train: [97] [3600/6250] eta: 0:05:35 lr: 0.000000 grad: 0.1631 (0.1760) loss: 0.8569 (0.8551) time: 0.0891 data: 0.0002 max mem: 9305 +Train: [97] [3700/6250] eta: 0:05:22 lr: 0.000000 grad: 0.1577 (0.1757) loss: 0.8639 (0.8552) time: 0.1134 data: 0.0305 max mem: 9305 +Train: [97] [3800/6250] eta: 0:05:09 lr: 0.000000 grad: 0.1693 (0.1756) loss: 0.8520 (0.8552) time: 0.1196 data: 0.0410 max mem: 9305 +Train: [97] [3900/6250] eta: 0:04:55 lr: 0.000000 grad: 0.1634 (0.1756) loss: 0.8591 (0.8551) time: 0.1171 data: 0.0412 max mem: 9305 +Train: [97] [4000/6250] eta: 0:04:42 lr: 0.000000 grad: 0.1529 (0.1754) loss: 0.8556 (0.8552) time: 0.1202 data: 0.0401 max mem: 9305 +Train: [97] [4100/6250] eta: 0:04:30 lr: 0.000000 grad: 0.1634 (0.1751) loss: 0.8547 (0.8552) time: 0.1169 data: 0.0399 max mem: 9305 +Train: [97] [4200/6250] eta: 0:04:17 lr: 0.000000 grad: 0.1586 (0.1750) loss: 0.8558 (0.8551) time: 0.1018 data: 0.0203 max mem: 9305 +Train: [97] [4300/6250] eta: 0:04:04 lr: 0.000000 grad: 0.1479 (0.1748) loss: 0.8534 (0.8552) time: 0.1315 data: 0.0479 max mem: 9305 +Train: [97] [4400/6250] eta: 0:03:52 lr: 0.000000 grad: 0.1585 (0.1746) loss: 0.8505 (0.8552) time: 0.0859 data: 0.0002 max mem: 9305 +Train: [97] [4500/6250] eta: 0:03:40 lr: 0.000000 grad: 0.1532 (0.1744) loss: 0.8576 (0.8552) time: 0.1450 data: 0.0520 max mem: 9305 +Train: [97] [4600/6250] eta: 0:03:27 lr: 0.000000 grad: 0.1601 (0.1744) loss: 0.8556 (0.8552) time: 0.1090 data: 0.0311 max mem: 9305 +Train: [97] [4700/6250] eta: 0:03:14 lr: 0.000000 grad: 0.1754 (0.1744) loss: 0.8542 (0.8552) time: 0.1138 data: 0.0373 max mem: 9305 +Train: [97] [4800/6250] eta: 0:03:02 lr: 0.000000 grad: 0.1617 (0.1742) loss: 0.8566 (0.8552) time: 0.1267 data: 0.0496 max mem: 9305 +Train: [97] [4900/6250] eta: 0:02:49 lr: 0.000000 grad: 0.1603 (0.1740) loss: 0.8568 (0.8553) time: 0.1278 data: 0.0434 max mem: 9305 +Train: [97] [5000/6250] eta: 0:02:37 lr: 0.000000 grad: 0.1665 (0.1738) loss: 0.8529 (0.8553) time: 0.1326 data: 0.0550 max mem: 9305 +Train: [97] [5100/6250] eta: 0:02:24 lr: 0.000000 grad: 0.1650 (0.1737) loss: 0.8627 (0.8553) time: 0.1207 data: 0.0420 max mem: 9305 +Train: [97] [5200/6250] eta: 0:02:11 lr: 0.000000 grad: 0.1617 (0.1736) loss: 0.8607 (0.8554) time: 0.1229 data: 0.0434 max mem: 9305 +Train: [97] [5300/6250] eta: 0:01:59 lr: 0.000000 grad: 0.1651 (0.1734) loss: 0.8580 (0.8555) time: 0.1361 data: 0.0477 max mem: 9305 +Train: [97] [5400/6250] eta: 0:01:46 lr: 0.000000 grad: 0.1514 (0.1732) loss: 0.8631 (0.8555) time: 0.1164 data: 0.0388 max mem: 9305 +Train: [97] [5500/6250] eta: 0:01:34 lr: 0.000000 grad: 0.1636 (0.1730) loss: 0.8501 (0.8555) time: 0.1290 data: 0.0511 max mem: 9305 +Train: [97] [5600/6250] eta: 0:01:21 lr: 0.000000 grad: 0.1625 (0.1729) loss: 0.8597 (0.8556) time: 0.1460 data: 0.0496 max mem: 9305 +Train: [97] [5700/6250] eta: 0:01:09 lr: 0.000000 grad: 0.1562 (0.1728) loss: 0.8590 (0.8556) time: 0.1247 data: 0.0430 max mem: 9305 +Train: [97] [5800/6250] eta: 0:00:56 lr: 0.000000 grad: 0.1533 (0.1728) loss: 0.8590 (0.8557) time: 0.1247 data: 0.0450 max mem: 9305 +Train: [97] [5900/6250] eta: 0:00:43 lr: 0.000000 grad: 0.1582 (0.1727) loss: 0.8547 (0.8557) time: 0.1210 data: 0.0410 max mem: 9305 +Train: [97] [6000/6250] eta: 0:00:31 lr: 0.000000 grad: 0.1648 (0.1727) loss: 0.8542 (0.8557) time: 0.1157 data: 0.0368 max mem: 9305 +Train: [97] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.1652 (0.1727) loss: 0.8505 (0.8557) time: 0.1095 data: 0.0306 max mem: 9305 +Train: [97] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1559 (0.1728) loss: 0.8540 (0.8556) time: 0.1215 data: 0.0421 max mem: 9305 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1520 (0.1728) loss: 0.8513 (0.8556) time: 0.1371 data: 0.0509 max mem: 9305 +Train: [97] Total time: 0:13:08 (0.1262 s / it) +Averaged stats: lr: 0.000000 grad: 0.1520 (0.1728) loss: 0.8513 (0.8556) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:03:51 loss: 0.8570 (0.8570) time: 3.7372 data: 3.6626 max mem: 9305 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8514 (0.8559) time: 0.1069 data: 0.0791 max mem: 9305 +Eval (hcp-train-subset): [97] Total time: 0:00:12 (0.1981 s / it) +Averaged stats (hcp-train-subset): loss: 0.8514 (0.8559) +Eval (hcp-val): [97] [ 0/62] eta: 0:04:32 loss: 0.8700 (0.8700) time: 4.3884 data: 4.3005 max mem: 9305 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8711 (0.8726) time: 0.1428 data: 0.1147 max mem: 9305 +Eval (hcp-val): [97] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (hcp-val): loss: 0.8711 (0.8726) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 8:39:01 lr: 0.000000 grad: 0.2738 (0.2738) loss: 0.8607 (0.8607) time: 4.9826 data: 4.7325 max mem: 9305 +Train: [98] [ 100/6250] eta: 0:19:55 lr: 0.000000 grad: 0.1689 (0.2112) loss: 0.8588 (0.8573) time: 0.1393 data: 0.0372 max mem: 9305 +Train: [98] [ 200/6250] eta: 0:17:01 lr: 0.000000 grad: 0.1613 (0.2006) loss: 0.8534 (0.8563) time: 0.1620 data: 0.0756 max mem: 9305 +Train: [98] [ 300/6250] eta: 0:15:13 lr: 0.000000 grad: 0.1719 (0.1901) loss: 0.8686 (0.8585) time: 0.1168 data: 0.0342 max mem: 9305 +Train: [98] [ 400/6250] eta: 0:14:03 lr: 0.000000 grad: 0.1772 (0.1880) loss: 0.8487 (0.8587) time: 0.1082 data: 0.0225 max mem: 9305 +Train: [98] [ 500/6250] eta: 0:13:18 lr: 0.000000 grad: 0.1588 (0.1864) loss: 0.8568 (0.8578) time: 0.1355 data: 0.0515 max mem: 9305 +Train: [98] [ 600/6250] eta: 0:12:38 lr: 0.000000 grad: 0.1690 (0.1846) loss: 0.8465 (0.8567) time: 0.1219 data: 0.0406 max mem: 9305 +Train: [98] [ 700/6250] eta: 0:12:07 lr: 0.000000 grad: 0.1571 (0.1829) loss: 0.8587 (0.8563) time: 0.1151 data: 0.0340 max mem: 9305 +Train: [98] [ 800/6250] eta: 0:11:44 lr: 0.000000 grad: 0.1621 (0.1813) loss: 0.8602 (0.8561) time: 0.1217 data: 0.0390 max mem: 9305 +Train: [98] [ 900/6250] eta: 0:11:31 lr: 0.000000 grad: 0.1587 (0.1795) loss: 0.8689 (0.8562) time: 0.1737 data: 0.0912 max mem: 9305 +Train: [98] [1000/6250] eta: 0:11:12 lr: 0.000000 grad: 0.1538 (0.1785) loss: 0.8601 (0.8564) time: 0.1193 data: 0.0335 max mem: 9305 +Train: [98] [1100/6250] eta: 0:11:01 lr: 0.000000 grad: 0.1662 (0.1780) loss: 0.8593 (0.8564) time: 0.0912 data: 0.0082 max mem: 9305 +Train: [98] [1200/6250] eta: 0:10:57 lr: 0.000000 grad: 0.1692 (0.1773) loss: 0.8504 (0.8566) time: 0.1822 data: 0.0945 max mem: 9305 +Train: [98] [1300/6250] eta: 0:10:45 lr: 0.000000 grad: 0.1597 (0.1766) loss: 0.8617 (0.8567) time: 0.1106 data: 0.0245 max mem: 9305 +Train: [98] [1400/6250] eta: 0:10:31 lr: 0.000000 grad: 0.1599 (0.1759) loss: 0.8584 (0.8566) time: 0.1228 data: 0.0408 max mem: 9305 +Train: [98] [1500/6250] eta: 0:10:22 lr: 0.000000 grad: 0.1574 (0.1754) loss: 0.8616 (0.8566) time: 0.0962 data: 0.0159 max mem: 9305 +Train: [98] [1600/6250] eta: 0:10:09 lr: 0.000000 grad: 0.1613 (0.1749) loss: 0.8570 (0.8565) time: 0.1352 data: 0.0549 max mem: 9305 +Train: [98] [1700/6250] eta: 0:09:54 lr: 0.000000 grad: 0.1654 (0.1744) loss: 0.8541 (0.8564) time: 0.1212 data: 0.0392 max mem: 9305 +Train: [98] [1800/6250] eta: 0:09:39 lr: 0.000000 grad: 0.1673 (0.1739) loss: 0.8527 (0.8563) time: 0.1076 data: 0.0264 max mem: 9305 +Train: [98] [1900/6250] eta: 0:09:25 lr: 0.000000 grad: 0.1613 (0.1733) loss: 0.8576 (0.8564) time: 0.1332 data: 0.0518 max mem: 9305 +Train: [98] [2000/6250] eta: 0:09:12 lr: 0.000000 grad: 0.1653 (0.1728) loss: 0.8525 (0.8563) time: 0.1454 data: 0.0622 max mem: 9305 +Train: [98] [2100/6250] eta: 0:08:57 lr: 0.000000 grad: 0.1684 (0.1725) loss: 0.8524 (0.8562) time: 0.1115 data: 0.0272 max mem: 9305 +Train: [98] [2200/6250] eta: 0:08:44 lr: 0.000000 grad: 0.1772 (0.1723) loss: 0.8528 (0.8562) time: 0.1339 data: 0.0544 max mem: 9305 +Train: [98] [2300/6250] eta: 0:08:31 lr: 0.000000 grad: 0.1587 (0.1721) loss: 0.8502 (0.8561) time: 0.1150 data: 0.0373 max mem: 9305 +Train: [98] [2400/6250] eta: 0:08:18 lr: 0.000000 grad: 0.1639 (0.1718) loss: 0.8584 (0.8560) time: 0.1231 data: 0.0424 max mem: 9305 +Train: [98] [2500/6250] eta: 0:08:04 lr: 0.000000 grad: 0.1607 (0.1717) loss: 0.8468 (0.8557) time: 0.1036 data: 0.0256 max mem: 9305 +Train: [98] [2600/6250] eta: 0:07:53 lr: 0.000000 grad: 0.1613 (0.1715) loss: 0.8498 (0.8556) time: 0.1502 data: 0.0673 max mem: 9305 +Train: [98] [2700/6250] eta: 0:07:42 lr: 0.000000 grad: 0.1662 (0.1714) loss: 0.8549 (0.8555) time: 0.1434 data: 0.0582 max mem: 9305 +Train: [98] [2800/6250] eta: 0:07:29 lr: 0.000000 grad: 0.1576 (0.1711) loss: 0.8584 (0.8555) time: 0.1244 data: 0.0445 max mem: 9305 +Train: [98] [2900/6250] eta: 0:07:15 lr: 0.000000 grad: 0.1665 (0.1709) loss: 0.8522 (0.8555) time: 0.1283 data: 0.0501 max mem: 9305 +Train: [98] [3000/6250] eta: 0:07:02 lr: 0.000000 grad: 0.1640 (0.1708) loss: 0.8544 (0.8555) time: 0.1273 data: 0.0418 max mem: 9305 +Train: [98] [3100/6250] eta: 0:06:50 lr: 0.000000 grad: 0.1660 (0.1707) loss: 0.8585 (0.8556) time: 0.1224 data: 0.0369 max mem: 9305 +Train: [98] [3200/6250] eta: 0:06:37 lr: 0.000000 grad: 0.1634 (0.1708) loss: 0.8603 (0.8556) time: 0.1206 data: 0.0442 max mem: 9305 +Train: [98] [3300/6250] eta: 0:06:24 lr: 0.000000 grad: 0.1489 (0.1708) loss: 0.8558 (0.8557) time: 0.1424 data: 0.0627 max mem: 9305 +Train: [98] [3400/6250] eta: 0:06:12 lr: 0.000000 grad: 0.1801 (0.1710) loss: 0.8515 (0.8556) time: 0.1464 data: 0.0697 max mem: 9305 +Train: [98] [3500/6250] eta: 0:05:58 lr: 0.000000 grad: 0.1619 (0.1711) loss: 0.8584 (0.8555) time: 0.1064 data: 0.0278 max mem: 9305 +Train: [98] [3600/6250] eta: 0:05:45 lr: 0.000000 grad: 0.1868 (0.1716) loss: 0.8555 (0.8554) time: 0.1876 data: 0.0860 max mem: 9305 +Train: [98] [3700/6250] eta: 0:05:32 lr: 0.000000 grad: 0.1752 (0.1720) loss: 0.8558 (0.8553) time: 0.1176 data: 0.0393 max mem: 9305 +Train: [98] [3800/6250] eta: 0:05:19 lr: 0.000000 grad: 0.1695 (0.1723) loss: 0.8542 (0.8552) time: 0.1190 data: 0.0409 max mem: 9305 +Train: [98] [3900/6250] eta: 0:05:06 lr: 0.000000 grad: 0.1707 (0.1726) loss: 0.8540 (0.8552) time: 0.1113 data: 0.0287 max mem: 9305 +Train: [98] [4000/6250] eta: 0:04:53 lr: 0.000000 grad: 0.1702 (0.1728) loss: 0.8552 (0.8551) time: 0.1555 data: 0.0642 max mem: 9305 +Train: [98] [4100/6250] eta: 0:04:39 lr: 0.000000 grad: 0.1883 (0.1730) loss: 0.8488 (0.8551) time: 0.1011 data: 0.0173 max mem: 9305 +Train: [98] [4200/6250] eta: 0:04:26 lr: 0.000000 grad: 0.1673 (0.1731) loss: 0.8537 (0.8551) time: 0.1219 data: 0.0409 max mem: 9305 +Train: [98] [4300/6250] eta: 0:04:13 lr: 0.000000 grad: 0.1813 (0.1733) loss: 0.8526 (0.8550) time: 0.1167 data: 0.0332 max mem: 9305 +Train: [98] [4400/6250] eta: 0:04:01 lr: 0.000000 grad: 0.1621 (0.1734) loss: 0.8588 (0.8550) time: 0.2021 data: 0.1229 max mem: 9305 +Train: [98] [4500/6250] eta: 0:03:47 lr: 0.000000 grad: 0.1756 (0.1734) loss: 0.8537 (0.8550) time: 0.1235 data: 0.0498 max mem: 9305 +Train: [98] [4600/6250] eta: 0:03:34 lr: 0.000000 grad: 0.1595 (0.1733) loss: 0.8610 (0.8551) time: 0.1367 data: 0.0573 max mem: 9305 +Train: [98] [4700/6250] eta: 0:03:21 lr: 0.000000 grad: 0.1648 (0.1733) loss: 0.8658 (0.8551) time: 0.1430 data: 0.0526 max mem: 9305 +Train: [98] [4800/6250] eta: 0:03:09 lr: 0.000000 grad: 0.1632 (0.1732) loss: 0.8589 (0.8552) time: 0.1777 data: 0.1003 max mem: 9305 +Train: [98] [4900/6250] eta: 0:02:56 lr: 0.000000 grad: 0.1669 (0.1731) loss: 0.8605 (0.8553) time: 0.1531 data: 0.0755 max mem: 9305 +Train: [98] [5000/6250] eta: 0:02:43 lr: 0.000000 grad: 0.1608 (0.1729) loss: 0.8537 (0.8553) time: 0.1502 data: 0.0673 max mem: 9305 +Train: [98] [5100/6250] eta: 0:02:29 lr: 0.000000 grad: 0.1600 (0.1726) loss: 0.8566 (0.8554) time: 0.1274 data: 0.0497 max mem: 9305 +Train: [98] [5200/6250] eta: 0:02:16 lr: 0.000000 grad: 0.1634 (0.1725) loss: 0.8540 (0.8554) time: 0.1374 data: 0.0546 max mem: 9305 +Train: [98] [5300/6250] eta: 0:02:03 lr: 0.000000 grad: 0.1579 (0.1723) loss: 0.8594 (0.8554) time: 0.1484 data: 0.0505 max mem: 9305 +Train: [98] [5400/6250] eta: 0:01:51 lr: 0.000000 grad: 0.1477 (0.1722) loss: 0.8550 (0.8555) time: 0.0784 data: 0.0002 max mem: 9305 +Train: [98] [5500/6250] eta: 0:01:38 lr: 0.000000 grad: 0.1649 (0.1720) loss: 0.8565 (0.8555) time: 0.1340 data: 0.0550 max mem: 9305 +Train: [98] [5600/6250] eta: 0:01:25 lr: 0.000000 grad: 0.1569 (0.1719) loss: 0.8605 (0.8555) time: 0.1392 data: 0.0595 max mem: 9305 +Train: [98] [5700/6250] eta: 0:01:12 lr: 0.000000 grad: 0.1662 (0.1717) loss: 0.8596 (0.8556) time: 0.1232 data: 0.0473 max mem: 9305 +Train: [98] [5800/6250] eta: 0:00:59 lr: 0.000000 grad: 0.1622 (0.1716) loss: 0.8571 (0.8556) time: 0.1437 data: 0.0658 max mem: 9305 +Train: [98] [5900/6250] eta: 0:00:45 lr: 0.000000 grad: 0.1647 (0.1715) loss: 0.8539 (0.8557) time: 0.1467 data: 0.0453 max mem: 9305 +Train: [98] [6000/6250] eta: 0:00:32 lr: 0.000000 grad: 0.1691 (0.1714) loss: 0.8586 (0.8557) time: 0.1059 data: 0.0248 max mem: 9305 +Train: [98] [6100/6250] eta: 0:00:19 lr: 0.000000 grad: 0.1576 (0.1713) loss: 0.8595 (0.8558) time: 0.1501 data: 0.0552 max mem: 9305 +Train: [98] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1647 (0.1713) loss: 0.8559 (0.8558) time: 0.1282 data: 0.0484 max mem: 9305 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1710 (0.1713) loss: 0.8533 (0.8558) time: 0.1294 data: 0.0467 max mem: 9305 +Train: [98] Total time: 0:13:44 (0.1319 s / it) +Averaged stats: lr: 0.000000 grad: 0.1710 (0.1713) loss: 0.8533 (0.8558) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:03:09 loss: 0.8635 (0.8635) time: 3.0534 data: 2.9591 max mem: 9305 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8537 (0.8557) time: 0.1050 data: 0.0772 max mem: 9305 +Eval (hcp-train-subset): [98] Total time: 0:00:12 (0.1982 s / it) +Averaged stats (hcp-train-subset): loss: 0.8537 (0.8557) +Eval (hcp-val): [98] [ 0/62] eta: 0:04:31 loss: 0.8699 (0.8699) time: 4.3739 data: 4.3421 max mem: 9305 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8709 (0.8720) time: 0.1226 data: 0.0949 max mem: 9305 +Eval (hcp-val): [98] Total time: 0:00:11 (0.1914 s / it) +Averaged stats (hcp-val): loss: 0.8709 (0.8720) +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 8:23:45 lr: 0.000000 grad: 0.4244 (0.4244) loss: 0.8353 (0.8353) time: 4.8360 data: 4.7344 max mem: 9305 +Train: [99] [ 100/6250] eta: 0:19:34 lr: 0.000000 grad: 0.1598 (0.1882) loss: 0.8839 (0.8705) time: 0.1325 data: 0.0282 max mem: 9305 +Train: [99] [ 200/6250] eta: 0:16:29 lr: 0.000000 grad: 0.1956 (0.1914) loss: 0.8654 (0.8691) time: 0.1265 data: 0.0331 max mem: 9305 +Train: [99] [ 300/6250] eta: 0:14:52 lr: 0.000000 grad: 0.1806 (0.1962) loss: 0.8604 (0.8663) time: 0.1270 data: 0.0427 max mem: 9305 +Train: [99] [ 400/6250] eta: 0:13:43 lr: 0.000000 grad: 0.1601 (0.1939) loss: 0.8662 (0.8643) time: 0.1086 data: 0.0210 max mem: 9305 +Train: [99] [ 500/6250] eta: 0:12:55 lr: 0.000000 grad: 0.1717 (0.1904) loss: 0.8533 (0.8630) time: 0.1102 data: 0.0228 max mem: 9305 +Train: [99] [ 600/6250] eta: 0:12:16 lr: 0.000000 grad: 0.1889 (0.1889) loss: 0.8611 (0.8625) time: 0.1080 data: 0.0170 max mem: 9305 +Train: [99] [ 700/6250] eta: 0:11:46 lr: 0.000000 grad: 0.1699 (0.1867) loss: 0.8637 (0.8624) time: 0.1130 data: 0.0342 max mem: 9305 +Train: [99] [ 800/6250] eta: 0:11:14 lr: 0.000000 grad: 0.1542 (0.1839) loss: 0.8601 (0.8627) time: 0.1023 data: 0.0161 max mem: 9305 +Train: [99] [ 900/6250] eta: 0:10:50 lr: 0.000000 grad: 0.1593 (0.1820) loss: 0.8637 (0.8628) time: 0.1137 data: 0.0262 max mem: 9305 +Train: [99] [1000/6250] eta: 0:10:29 lr: 0.000000 grad: 0.1447 (0.1802) loss: 0.8605 (0.8626) time: 0.0944 data: 0.0145 max mem: 9305 +Train: [99] [1100/6250] eta: 0:10:12 lr: 0.000000 grad: 0.1642 (0.1788) loss: 0.8616 (0.8625) time: 0.0845 data: 0.0003 max mem: 9305 +Train: [99] [1200/6250] eta: 0:10:00 lr: 0.000000 grad: 0.1609 (0.1776) loss: 0.8617 (0.8624) time: 0.1297 data: 0.0495 max mem: 9305 +Train: [99] [1300/6250] eta: 0:09:45 lr: 0.000000 grad: 0.1635 (0.1767) loss: 0.8570 (0.8622) time: 0.1250 data: 0.0485 max mem: 9305 +Train: [99] [1400/6250] eta: 0:09:36 lr: 0.000000 grad: 0.1627 (0.1757) loss: 0.8632 (0.8621) time: 0.1384 data: 0.0577 max mem: 9305 +Train: [99] [1500/6250] eta: 0:09:24 lr: 0.000000 grad: 0.1564 (0.1748) loss: 0.8586 (0.8620) time: 0.1277 data: 0.0479 max mem: 9305 +Train: [99] [1600/6250] eta: 0:09:15 lr: 0.000000 grad: 0.1517 (0.1742) loss: 0.8585 (0.8619) time: 0.1161 data: 0.0320 max mem: 9305 +Train: [99] [1700/6250] eta: 0:09:05 lr: 0.000000 grad: 0.1476 (0.1732) loss: 0.8596 (0.8617) time: 0.1353 data: 0.0471 max mem: 9305 +Train: [99] [1800/6250] eta: 0:08:54 lr: 0.000000 grad: 0.1560 (0.1725) loss: 0.8560 (0.8618) time: 0.1123 data: 0.0277 max mem: 9305 +Train: [99] [1900/6250] eta: 0:08:44 lr: 0.000000 grad: 0.1632 (0.1721) loss: 0.8555 (0.8616) time: 0.1161 data: 0.0345 max mem: 9305 +Train: [99] [2000/6250] eta: 0:08:33 lr: 0.000000 grad: 0.1536 (0.1718) loss: 0.8524 (0.8615) time: 0.1338 data: 0.0556 max mem: 9305 +Train: [99] [2100/6250] eta: 0:08:21 lr: 0.000000 grad: 0.1494 (0.1714) loss: 0.8657 (0.8613) time: 0.1391 data: 0.0614 max mem: 9305 +Train: [99] [2200/6250] eta: 0:08:09 lr: 0.000000 grad: 0.1533 (0.1713) loss: 0.8539 (0.8610) time: 0.1318 data: 0.0509 max mem: 9305 +Train: [99] [2300/6250] eta: 0:07:57 lr: 0.000000 grad: 0.1674 (0.1710) loss: 0.8562 (0.8608) time: 0.1168 data: 0.0302 max mem: 9305 +Train: [99] [2400/6250] eta: 0:07:45 lr: 0.000000 grad: 0.1670 (0.1709) loss: 0.8527 (0.8604) time: 0.1271 data: 0.0531 max mem: 9305 +Train: [99] [2500/6250] eta: 0:07:33 lr: 0.000000 grad: 0.1668 (0.1710) loss: 0.8521 (0.8601) time: 0.1149 data: 0.0354 max mem: 9305 +Train: [99] [2600/6250] eta: 0:07:21 lr: 0.000000 grad: 0.1809 (0.1711) loss: 0.8485 (0.8599) time: 0.1342 data: 0.0570 max mem: 9305 +Train: [99] [2700/6250] eta: 0:07:10 lr: 0.000000 grad: 0.1596 (0.1708) loss: 0.8527 (0.8596) time: 0.1138 data: 0.0334 max mem: 9305 +Train: [99] [2800/6250] eta: 0:06:58 lr: 0.000000 grad: 0.1677 (0.1707) loss: 0.8552 (0.8594) time: 0.1321 data: 0.0521 max mem: 9305 +Train: [99] [2900/6250] eta: 0:06:47 lr: 0.000000 grad: 0.1537 (0.1708) loss: 0.8583 (0.8592) time: 0.1221 data: 0.0413 max mem: 9305 +Train: [99] [3000/6250] eta: 0:06:38 lr: 0.000000 grad: 0.1592 (0.1707) loss: 0.8572 (0.8591) time: 0.1840 data: 0.1015 max mem: 9305 +Train: [99] [3100/6250] eta: 0:06:25 lr: 0.000000 grad: 0.1563 (0.1706) loss: 0.8573 (0.8589) time: 0.1113 data: 0.0280 max mem: 9305 +Train: [99] [3200/6250] eta: 0:06:13 lr: 0.000000 grad: 0.1609 (0.1706) loss: 0.8464 (0.8587) time: 0.1302 data: 0.0472 max mem: 9305 +Train: [99] [3300/6250] eta: 0:06:04 lr: 0.000000 grad: 0.1661 (0.1705) loss: 0.8439 (0.8585) time: 0.1329 data: 0.0500 max mem: 9305 +Train: [99] [3400/6250] eta: 0:05:52 lr: 0.000000 grad: 0.1748 (0.1706) loss: 0.8517 (0.8583) time: 0.1779 data: 0.0988 max mem: 9305 +Train: [99] [3500/6250] eta: 0:05:38 lr: 0.000000 grad: 0.1772 (0.1706) loss: 0.8457 (0.8581) time: 0.1172 data: 0.0367 max mem: 9305 +Train: [99] [3600/6250] eta: 0:05:27 lr: 0.000000 grad: 0.1452 (0.1705) loss: 0.8541 (0.8580) time: 0.1541 data: 0.0736 max mem: 9305 +Train: [99] [3700/6250] eta: 0:05:14 lr: 0.000000 grad: 0.1787 (0.1705) loss: 0.8559 (0.8578) time: 0.1300 data: 0.0546 max mem: 9305 +Train: [99] [3800/6250] eta: 0:05:01 lr: 0.000000 grad: 0.2000 (0.1708) loss: 0.8455 (0.8576) time: 0.1151 data: 0.0329 max mem: 9305 +Train: [99] [3900/6250] eta: 0:04:49 lr: 0.000000 grad: 0.1739 (0.1709) loss: 0.8558 (0.8575) time: 0.1007 data: 0.0168 max mem: 9305 +Train: [99] [4000/6250] eta: 0:04:37 lr: 0.000000 grad: 0.1715 (0.1711) loss: 0.8553 (0.8574) time: 0.1244 data: 0.0443 max mem: 9305 +Train: [99] [4100/6250] eta: 0:04:25 lr: 0.000000 grad: 0.1748 (0.1714) loss: 0.8528 (0.8572) time: 0.1136 data: 0.0341 max mem: 9305 +Train: [99] [4200/6250] eta: 0:04:13 lr: 0.000000 grad: 0.1819 (0.1716) loss: 0.8451 (0.8569) time: 0.1101 data: 0.0146 max mem: 9305 +Train: [99] [4300/6250] eta: 0:04:01 lr: 0.000000 grad: 0.1800 (0.1719) loss: 0.8557 (0.8567) time: 0.1312 data: 0.0550 max mem: 9305 +Train: [99] [4400/6250] eta: 0:03:48 lr: 0.000000 grad: 0.1679 (0.1721) loss: 0.8468 (0.8565) time: 0.1338 data: 0.0565 max mem: 9305 +Train: [99] [4500/6250] eta: 0:03:37 lr: 0.000000 grad: 0.1944 (0.1723) loss: 0.8447 (0.8564) time: 0.1770 data: 0.0928 max mem: 9305 +Train: [99] [4600/6250] eta: 0:03:24 lr: 0.000000 grad: 0.1903 (0.1725) loss: 0.8463 (0.8562) time: 0.1172 data: 0.0368 max mem: 9305 +Train: [99] [4700/6250] eta: 0:03:12 lr: 0.000000 grad: 0.1702 (0.1726) loss: 0.8578 (0.8562) time: 0.1326 data: 0.0573 max mem: 9305 +Train: [99] [4800/6250] eta: 0:03:00 lr: 0.000000 grad: 0.1794 (0.1727) loss: 0.8541 (0.8561) time: 0.1357 data: 0.0466 max mem: 9305 +Train: [99] [4900/6250] eta: 0:02:48 lr: 0.000000 grad: 0.1692 (0.1729) loss: 0.8569 (0.8560) time: 0.1095 data: 0.0299 max mem: 9305 +Train: [99] [5000/6250] eta: 0:02:35 lr: 0.000000 grad: 0.1796 (0.1729) loss: 0.8581 (0.8559) time: 0.1323 data: 0.0460 max mem: 9305 +Train: [99] [5100/6250] eta: 0:02:23 lr: 0.000000 grad: 0.1668 (0.1729) loss: 0.8544 (0.8558) time: 0.0911 data: 0.0002 max mem: 9305 +Train: [99] [5200/6250] eta: 0:02:11 lr: 0.000000 grad: 0.1827 (0.1731) loss: 0.8494 (0.8558) time: 0.1171 data: 0.0340 max mem: 9305 +Train: [99] [5300/6250] eta: 0:01:58 lr: 0.000000 grad: 0.1763 (0.1731) loss: 0.8532 (0.8557) time: 0.1082 data: 0.0304 max mem: 9305 +Train: [99] [5400/6250] eta: 0:01:46 lr: 0.000000 grad: 0.1699 (0.1731) loss: 0.8567 (0.8556) time: 0.1233 data: 0.0270 max mem: 9305 +Train: [99] [5500/6250] eta: 0:01:33 lr: 0.000000 grad: 0.1627 (0.1731) loss: 0.8559 (0.8555) time: 0.1366 data: 0.0556 max mem: 9305 +Train: [99] [5600/6250] eta: 0:01:21 lr: 0.000000 grad: 0.1617 (0.1731) loss: 0.8518 (0.8555) time: 0.1557 data: 0.0766 max mem: 9305 +Train: [99] [5700/6250] eta: 0:01:09 lr: 0.000000 grad: 0.1704 (0.1730) loss: 0.8544 (0.8554) time: 0.0900 data: 0.0002 max mem: 9305 +Train: [99] [5800/6250] eta: 0:00:56 lr: 0.000000 grad: 0.1554 (0.1729) loss: 0.8642 (0.8554) time: 0.2450 data: 0.1162 max mem: 9305 +Train: [99] [5900/6250] eta: 0:00:44 lr: 0.000000 grad: 0.1743 (0.1729) loss: 0.8563 (0.8554) time: 0.1328 data: 0.0518 max mem: 9305 +Train: [99] [6000/6250] eta: 0:00:31 lr: 0.000000 grad: 0.1537 (0.1728) loss: 0.8551 (0.8554) time: 0.1311 data: 0.0528 max mem: 9305 +Train: [99] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.1554 (0.1726) loss: 0.8536 (0.8554) time: 0.1313 data: 0.0418 max mem: 9305 +Train: [99] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1610 (0.1725) loss: 0.8542 (0.8554) time: 0.1787 data: 0.0826 max mem: 9305 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1704 (0.1725) loss: 0.8536 (0.8554) time: 0.1469 data: 0.0507 max mem: 9305 +Train: [99] Total time: 0:13:16 (0.1274 s / it) +Averaged stats: lr: 0.000000 grad: 0.1704 (0.1725) loss: 0.8536 (0.8554) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:05:20 loss: 0.8582 (0.8582) time: 5.1732 data: 5.1393 max mem: 9305 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8534 (0.8553) time: 0.1254 data: 0.0961 max mem: 9305 +Eval (hcp-train-subset): [99] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-train-subset): loss: 0.8534 (0.8553) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [99] [ 0/62] eta: 0:03:56 loss: 0.8669 (0.8669) time: 3.8092 data: 3.7035 max mem: 9305 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8703 (0.8723) time: 0.1104 data: 0.0813 max mem: 9305 +Eval (hcp-val): [99] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (hcp-val): loss: 0.8703 (0.8723) +Making plots (hcp-val): example=50 +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-last.pth +saving checkpoint experiments/decoders/output/decoders/attn_reg1_pep4/pretrain/checkpoint-00099.pth +done! training time: 1 day, 4:27:35 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ec63169077dc8e927bf9d5d811da2785dc17774 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..67627c2826152e930334ab9b5e3adc624438dfbc --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,train,0.6948818897637795,0.02029157347264888,0.6934772390324413,0.02040645293779715,0.6955491930761941,0.020259563066035643 +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,test,0.4423076923076923,0.06393842961056431,0.42296165778139205,0.06643375426593373,0.4317765567765568,0.06337285620838658 +flat_mae,patch,logistic,aabc_age,1,0.005994842503189409,train,0.702755905511811,0.01962068377866745,0.7008498140751963,0.02009337847731286,0.7034584542443357,0.01967809729005389 +flat_mae,patch,logistic,aabc_age,1,0.005994842503189409,test,0.4807692307692308,0.05994247735717949,0.464953007518797,0.060557186069661775,0.4757326007326007,0.059528098075645165 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5511811023622047,0.02026385984327841,0.5419169097033821,0.02099247337994033,0.5512358574883628,0.020297969573546563 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5769230769230769,0.0636049196117452,0.5574419920769665,0.06517530956860267,0.5693681318681318,0.06320569782530443 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,train,0.8484251968503937,0.015827621711022113,0.8497011698528963,0.015692401372041757,0.8492975014816193,0.01583099163202779 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,test,0.5961538461538461,0.06435942894925774,0.5796957671957672,0.07020365674745624,0.5959249084249085,0.06456269266455254 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,train,0.8543307086614174,0.015664175855524945,0.8549108840949845,0.01571130681141082,0.8543432782433078,0.015698624542891285 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,test,0.5,0.06096457163043827,0.4807328088578089,0.06403438695596674,0.5009157509157509,0.061359960287694665 +flat_mae,patch,logistic,aabc_age,5,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,5,21.54434690031882,test,0.40384615384615385,0.06508806051267602,0.40527057899621616,0.06458941012503842,0.40934065934065933,0.06547878621996794 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,train,0.6751968503937008,0.020456344368533923,0.6713112040246472,0.02090433526389376,0.6750003356419104,0.020467226041074676 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,test,0.5576923076923077,0.06709415577861436,0.5531968031968032,0.0687719221402243,0.55746336996337,0.06720194117586843 +flat_mae,patch,logistic,aabc_age,7,0.3593813663804626,train,0.9901574803149606,0.004673817036334574,0.9901601676498498,0.0046782995253146316,0.9900869360496674,0.004683842305070292 +flat_mae,patch,logistic,aabc_age,7,0.3593813663804626,test,0.4230769230769231,0.06238811286790886,0.434037178602396,0.058839784737511176,0.4223901098901099,0.06226682248784308 +flat_mae,patch,logistic,aabc_age,8,0.000774263682681127,train,0.562992125984252,0.021109873501599156,0.5521823020548502,0.021585179838535414,0.5612745474108584,0.020961582468895883 +flat_mae,patch,logistic,aabc_age,8,0.000774263682681127,test,0.4230769230769231,0.06416168555681127,0.42407274389447563,0.06451640433936351,0.4283424908424909,0.06507791466090054 +flat_mae,patch,logistic,aabc_age,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,9,166.81005372000556,test,0.4807692307692308,0.06106064499220349,0.4553203253861149,0.06235464590265778,0.47710622710622713,0.06062660005139559 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,train,0.5728346456692913,0.01960135914743058,0.5627355997942705,0.020247654107632018,0.5717227204639914,0.01949335103173381 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,test,0.4423076923076923,0.06677015020636004,0.44020376175548587,0.06731298849566593,0.43887362637362637,0.06641786960552945 +flat_mae,patch,logistic,aabc_age,11,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,11,2.782559402207126,test,0.5192307692307693,0.07207487067104956,0.5389784946236559,0.06840364489491058,0.521978021978022,0.07268554101867086 +flat_mae,patch,logistic,aabc_age,12,0.000774263682681127,train,0.5610236220472441,0.019915741042539247,0.5530105965062341,0.02058261765637357,0.56054620446538,0.01992984744075937 +flat_mae,patch,logistic,aabc_age,12,0.000774263682681127,test,0.4423076923076923,0.06702621363026162,0.43847376456072107,0.06735207054334835,0.4375,0.06670477249299274 +flat_mae,patch,logistic,aabc_age,13,0.000774263682681127,train,0.5570866141732284,0.019968744564982512,0.5459583688547883,0.020653791561540895,0.5557788906171783,0.019946867330309877 +flat_mae,patch,logistic,aabc_age,13,0.000774263682681127,test,0.4807692307692308,0.06265000932672758,0.4592857142857143,0.06309580416833055,0.47435897435897434,0.06218787199962487 +flat_mae,patch,logistic,aabc_age,14,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,14,21.54434690031882,test,0.5192307692307693,0.06297246275229162,0.5158730158730158,0.06448936563933773,0.5192307692307692,0.06328423306201163 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,train,0.8464566929133859,0.016162423841982652,0.8479326334389254,0.016049297641869322,0.8465463166656757,0.016220703704913596 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,test,0.5192307692307693,0.06232994616834762,0.4948506501986977,0.06358516579286405,0.5114468864468865,0.06200237848699235 +flat_mae,patch,logistic,aabc_age,16,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,16,166.81005372000556,test,0.5769230769230769,0.06636834634409466,0.5767771883289124,0.06666873533245012,0.5755494505494505,0.06650086521492393 +flat_mae,patch,logistic,aabc_age,17,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,17,166.81005372000556,test,0.5192307692307693,0.06224701461437622,0.5114331723027375,0.0625585612002248,0.5187728937728937,0.061994804026245076 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,train,0.6909448818897638,0.01890600659510544,0.6891253272885682,0.019106578628206618,0.6903414485154079,0.018937116573212072 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,test,0.5192307692307693,0.06308236374284508,0.4930555555555556,0.06562625879423027,0.5114468864468865,0.06275669269638118 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.8503937007874016,0.016061580820353808,0.8509357748146236,0.01612558419506533,0.8515988062655715,0.016019651265115554 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.5,0.06699036184319097,0.4963516687654619,0.06853019074851216,0.5041208791208791,0.06721143195604791 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,train,0.8503937007874016,0.015594835071194635,0.8511654625167603,0.01560466484314452,0.8506785480706345,0.015571007259490415 +flat_mae,patch,logistic,aabc_age,20,0.046415888336127774,test,0.5384615384615384,0.059665685974773106,0.5125,0.06661148770520964,0.5336538461538463,0.059541968306531856 +flat_mae,patch,logistic,aabc_age,21,0.005994842503189409,train,0.6889763779527559,0.018852112157640724,0.6857457217326933,0.019476766578071594,0.6888280631160228,0.018893127683910174 +flat_mae,patch,logistic,aabc_age,21,0.005994842503189409,test,0.4230769230769231,0.06286225196326879,0.39984787018255574,0.0650045067643268,0.4207875457875458,0.062489857044370926 +flat_mae,patch,logistic,aabc_age,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,22,2.782559402207126,test,0.4423076923076923,0.06273165353336053,0.4417838196286472,0.062401875013353536,0.4450549450549451,0.06269776008576518 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,23,2.782559402207126,test,0.4230769230769231,0.062474551031817906,0.41347372835075674,0.061146879775434336,0.41941391941391937,0.06192393605317474 +flat_mae,patch,logistic,aabc_age,24,0.046415888336127774,train,0.8661417322834646,0.01523396949739801,0.867260450263285,0.01511625584764321,0.8665723912472262,0.015194974803333398 +flat_mae,patch,logistic,aabc_age,24,0.046415888336127774,test,0.38461538461538464,0.06257325884028649,0.37998575498575493,0.06005579828319338,0.3823260073260073,0.06225366847949788 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,train,0.9862204724409449,0.005070823347987855,0.9862144171284287,0.005083549881805384,0.9862722458663302,0.00505139005020693 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,test,0.40384615384615385,0.06296673067261499,0.4019191919191919,0.06214382858615366,0.40453296703296704,0.0634074375939702 +flat_mae,patch,logistic,aabc_age,26,0.3593813663804626,train,0.9862204724409449,0.005274568005262155,0.9861092082735721,0.0053001980328355154,0.9862722458663302,0.0052326022816578934 +flat_mae,patch,logistic,aabc_age,26,0.3593813663804626,test,0.4423076923076923,0.06643654934414789,0.4371693121693122,0.06787323491375338,0.4432234432234432,0.06661394164122923 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,train,0.8641732283464567,0.015625953997784468,0.8654122444234059,0.015546363676857059,0.8649414113071047,0.015605782191447527 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,test,0.4423076923076923,0.06614105421940625,0.4334368530020704,0.06589256181501454,0.4375,0.06570980255481873 +flat_mae,patch,logistic,aabc_age,28,0.046415888336127774,train,0.8562992125984252,0.014858192084898627,0.8569382922950128,0.014827863508269598,0.8561242181940931,0.014844735060216935 +flat_mae,patch,logistic,aabc_age,28,0.046415888336127774,test,0.5192307692307693,0.0671206081282266,0.5140046734874322,0.0688331560213138,0.516025641025641,0.0672747751904488 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,train,0.8523622047244095,0.015708069374906878,0.8528361205532191,0.01566203887291765,0.852794650443335,0.015682256641716685 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,test,0.4807692307692308,0.06659303009178012,0.48738095238095236,0.06692663235951324,0.4819139194139194,0.06680843345472412 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,train,0.8562992125984252,0.015844133443427232,0.8574103979911872,0.015791720933913594,0.8566093406266722,0.015794644475868883 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,test,0.5192307692307693,0.06262147957483927,0.5215322580645162,0.060851573203911276,0.5217490842490843,0.06287193127526407 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,train,0.484251968503937,0.019676005407357904,0.4593375703699054,0.020338699579195617,0.48142018243576296,0.01961037640531053 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,test,0.5961538461538461,0.056380719248326196,0.5601917690152984,0.060942522473708245,0.586996336996337,0.055787088953064126 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,train,0.84251968503937,0.016452018229925716,0.8429955172376551,0.01643566626404677,0.8421965173795382,0.01648731780782347 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,test,0.4807692307692308,0.058215372418278115,0.46155810983397194,0.05898235239972977,0.47847985347985345,0.05809706902510418 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.8484251968503937,0.01553811670911704,0.8487104550716041,0.015601281908888742,0.8483948644869761,0.015534012479065202 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.4807692307692308,0.06806604650183917,0.48659151193633954,0.06679600511984381,0.4835164835164836,0.06852870238367119 +flat_mae,patch,logistic,aabc_age,34,0.046415888336127774,train,0.8562992125984252,0.015719365642318794,0.8572701963594396,0.015648903246458033,0.8569268818482937,0.015703606502228278 +flat_mae,patch,logistic,aabc_age,34,0.046415888336127774,test,0.34615384615384615,0.06556648913157864,0.3541666666666667,0.0665732497114596,0.3500457875457875,0.06610200557606605 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,train,0.7047244094488189,0.019630978917279357,0.7024235946430769,0.020016554225952086,0.7049894608440147,0.0196234256815161 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,test,0.4423076923076923,0.06935095337288313,0.44768740031897925,0.07093270422019816,0.44184981684981683,0.06945171800119297 +flat_mae,patch,logistic,aabc_age,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,36,21.54434690031882,test,0.4423076923076923,0.06316947946670666,0.4285471871678768,0.061464000094419276,0.44597069597069594,0.06395072574979835 +flat_mae,patch,logistic,aabc_age,37,0.3593813663804626,train,0.9881889763779528,0.004880302002406735,0.9881239132388313,0.004909689027957837,0.988238388228367,0.0048634293866834395 +flat_mae,patch,logistic,aabc_age,37,0.3593813663804626,test,0.46153846153846156,0.0654278024793597,0.45609890109890106,0.06445990609926705,0.4581043956043956,0.0651441183140845 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,train,0.6791338582677166,0.019482330703034973,0.6768146643677928,0.019857532592407683,0.6790149725061326,0.01957699692243614 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,test,0.5,0.06084372260045942,0.4699675324675324,0.0659010784346244,0.49633699633699635,0.060270392448125626 +flat_mae,patch,logistic,aabc_age,39,0.3593813663804626,train,0.9881889763779528,0.005322601681525421,0.9883937804795657,0.005232266723304954,0.9885559294499884,0.005161607150893129 +flat_mae,patch,logistic,aabc_age,39,0.3593813663804626,test,0.5,0.06301088240752661,0.48386150798992367,0.06663701895795351,0.49381868131868134,0.06282373100851386 +flat_mae,patch,logistic,aabc_age,40,0.000774263682681127,train,0.5433070866141733,0.021180801961983017,0.5308578683453636,0.02180133278112217,0.5416160007211506,0.02107424334443318 +flat_mae,patch,logistic,aabc_age,40,0.000774263682681127,test,0.46153846153846156,0.06222306693800618,0.44783717979813054,0.062273337908722325,0.4578754578754579,0.06181463605681359 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,train,0.84251968503937,0.015965962539850838,0.843391102480065,0.015954713255748314,0.8428992076932962,0.016016389558256787 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,test,0.46153846153846156,0.06567586715550255,0.4486048371917937,0.06896122621120893,0.46543040293040294,0.06647645643221954 +flat_mae,patch,logistic,aabc_age,42,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,42,2.782559402207126,test,0.4230769230769231,0.06020687953378141,0.39624958374958374,0.06131672087248395,0.4251373626373627,0.060640143022958864 +flat_mae,patch,logistic,aabc_age,43,0.000774263682681127,train,0.5531496062992126,0.020747179456275747,0.5424430641821947,0.02140784411441871,0.5517966192228835,0.020719569060376183 +flat_mae,patch,logistic,aabc_age,43,0.000774263682681127,test,0.40384615384615385,0.06346339158133543,0.3998678390333369,0.061609222861055496,0.39880952380952384,0.06305657494177065 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,train,0.84251968503937,0.01603711585286364,0.8427382733062088,0.016033425685362465,0.8428992076932962,0.015968182406209175 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,test,0.6153846153846154,0.06640528023134569,0.6199867724867725,0.06631419257899246,0.6156135531135531,0.06692486557072908 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.6771653543307087,0.018968345410084006,0.6719158433273266,0.019690981788300733,0.6765313422415894,0.019007826314826688 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.5,0.061190071515491835,0.4865993265993266,0.06299380040236957,0.5038919413919414,0.06166497624910466 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,train,0.5531496062992126,0.020434599718265372,0.5447400232668687,0.021336545687903725,0.552816850758263,0.020452698383175216 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,test,0.5,0.059179235823578535,0.4840102707749767,0.06559131071858905,0.5009157509157509,0.059286538752851836 +flat_mae,patch,logistic,aabc_age,47,0.046415888336127774,train,0.8562992125984252,0.015754947300347565,0.8565141595286136,0.01586217469950746,0.85704447638903,0.01575496483516015 +flat_mae,patch,logistic,aabc_age,47,0.046415888336127774,test,0.4423076923076923,0.06812244240896911,0.44351984569375874,0.06877988749065733,0.44482600732600736,0.06840017592892657 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.844488188976378,0.016107193218663125,0.8446120143345812,0.01614403309733392,0.8449653233957755,0.01609040010512827 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.4230769230769231,0.06727066014226657,0.4251127644733783,0.06689491114444936,0.41964285714285715,0.0669679523056175 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.8523622047244095,0.015822236099436473,0.8522393963724478,0.015874475091364353,0.852091960129577,0.015864122574479507 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.46153846153846156,0.06619206185982152,0.46409090909090905,0.06800996864913168,0.45947802197802196,0.06634829369844394 +flat_mae,patch,logistic,aabc_age,50,0.3593813663804626,train,0.9862204724409449,0.00512211230568687,0.9861171052736696,0.0051545608174014485,0.9862722458663302,0.00511448397577025 +flat_mae,patch,logistic,aabc_age,50,0.3593813663804626,test,0.40384615384615385,0.06367795267441263,0.399074074074074,0.06438639704321936,0.4049908424908425,0.0638068588372265 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,train,0.6850393700787402,0.020104082740783905,0.6828019108454042,0.020314368189950922,0.6856484553759286,0.020017664600509678 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,test,0.40384615384615385,0.06373386789737277,0.38681318681318677,0.062365729676168866,0.4015567765567766,0.06326994500021534 +flat_mae,patch,logistic,aabc_age,52,0.3593813663804626,train,0.9881889763779528,0.00472088287012822,0.9883682528581559,0.004650959347623039,0.9882883748985882,0.0046846559440514345 +flat_mae,patch,logistic,aabc_age,52,0.3593813663804626,test,0.40384615384615385,0.06834103482842588,0.4241558441558442,0.06675189779355754,0.4033882783882784,0.06854687229072734 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.4862204724409449,0.020981287923501877,0.47274892249251227,0.02133322417390471,0.48443892180310666,0.02082130525672688 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5384615384615384,0.05784148661613476,0.49271206690561525,0.05734096933876425,0.530448717948718,0.05679462513072888 +flat_mae,patch,logistic,aabc_age,54,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,54,21.54434690031882,test,0.5192307692307693,0.06320075789748543,0.517071464267866,0.0638378342314552,0.5238095238095237,0.06292528323211005 +flat_mae,patch,logistic,aabc_age,55,0.046415888336127774,train,0.8484251968503937,0.015230916796491713,0.8488951295614249,0.015241219314116107,0.8487800135791127,0.015247785186719433 +flat_mae,patch,logistic,aabc_age,55,0.046415888336127774,test,0.5384615384615384,0.06393158093947089,0.5387362637362637,0.06439750233446805,0.5409798534798534,0.06427131570401529 +flat_mae,patch,logistic,aabc_age,56,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,56,166.81005372000556,test,0.5961538461538461,0.06550888642912069,0.5970268556475453,0.06660447511758578,0.5993589743589742,0.06538737408376714 +flat_mae,patch,logistic,aabc_age,57,0.3593813663804626,train,0.9901574803149606,0.004460725478679844,0.9904258660268801,0.004345031035131095,0.9903544906010675,0.004367487480793711 +flat_mae,patch,logistic,aabc_age,57,0.3593813663804626,test,0.4807692307692308,0.0630685266456609,0.4706730769230769,0.06074946500874511,0.47573260073260076,0.06230021985401361 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,train,0.8681102362204725,0.01465828926685812,0.868939388314866,0.014658002671550478,0.8683209657280839,0.014688535612010162 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,test,0.4423076923076923,0.06095243804920368,0.4250018454270318,0.06279932603930409,0.440018315018315,0.06074748896479465 +flat_mae,patch,logistic,aabc_age,59,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,59,21.54434690031882,test,0.46153846153846156,0.06336561515074206,0.45786749482401656,0.06260081913383504,0.45810439560439564,0.06339517293507886 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6633858267716536,0.02097491002111096,0.6587660961131351,0.021622803600679773,0.663891427513814,0.020931418445492317 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.46153846153846156,0.06773189401940287,0.4589550264550265,0.0686351192044581,0.4640567765567766,0.06818925437535868 +flat_mae,patch,logistic,aabc_age,61,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,61,21.54434690031882,test,0.38461538461538464,0.06484732866898188,0.38476031215161643,0.06230401734592787,0.3823260073260073,0.06465853736851081 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,train,0.8385826771653543,0.01630628388964785,0.8395681711403447,0.0162439768914969,0.8389169362990013,0.01622897580818835 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,test,0.5192307692307693,0.06046559978530312,0.5100334448160535,0.06185539805150892,0.5187728937728937,0.06055519085800923 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.7125984251968503,0.018757311592375828,0.7117772410666011,0.01899600293803141,0.7134215048648898,0.018766757712882245 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.46153846153846156,0.059440425647730964,0.43628958760537706,0.06235217384842515,0.45650183150183155,0.058966959093397865 +flat_mae,patch,logistic,aabc_age,64,0.000774263682681127,train,0.562992125984252,0.019430580166193975,0.5540031948677914,0.020375197844816607,0.5629474825897747,0.01945972918961665 +flat_mae,patch,logistic,aabc_age,64,0.000774263682681127,test,0.40384615384615385,0.06585238782495681,0.39703648806909675,0.066467231803181,0.40041208791208793,0.06574089652425731 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,train,0.8582677165354331,0.014403862778523195,0.8586208545943513,0.014454734781074053,0.8588930242103304,0.014398801553411823 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,test,0.4807692307692308,0.07099854153014895,0.4843304843304843,0.0692508436610434,0.4803113553113553,0.07077002084290585 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6811023622047244,0.02001995834404917,0.6793652273008632,0.020284071605939355,0.6821013197439916,0.019902201130180628 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.5192307692307693,0.06654814291506403,0.516969696969697,0.06832755756256806,0.5176282051282051,0.06675380276247052 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.8622047244094488,0.014892057414835196,0.8620635828272121,0.01505093743695137,0.8615875095178456,0.014988177235836854 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.5576923076923077,0.06699818950128208,0.5439995421245422,0.06991994307630034,0.5602106227106227,0.06730189822685297 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.84251968503937,0.015757404199028935,0.8423330727450543,0.015873889615589803,0.8421465307093168,0.01577167151559254 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.05972098587470031,0.4593854282536151,0.05871237650698422,0.4622252747252747,0.060039574466443334 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,train,0.5610236220472441,0.019654494017864458,0.5520287264370113,0.019925398030650184,0.5606637990061163,0.019601362978923058 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,test,0.4230769230769231,0.06566793819291783,0.4139957264957266,0.06572999719196794,0.41666666666666663,0.0651189470357534 +flat_mae,patch,logistic,aabc_age,70,0.000774263682681127,train,0.5511811023622047,0.020247894469659328,0.5450490702143961,0.020711712606777365,0.5502479914229107,0.02017371842079691 +flat_mae,patch,logistic,aabc_age,70,0.000774263682681127,test,0.4423076923076923,0.06353259425322076,0.4281384982997887,0.06450008342698195,0.44024725274725274,0.06326474767896934 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,train,0.8582677165354331,0.015224262332091072,0.8584998716279582,0.015279384895882948,0.8585254963184877,0.01522021950533958 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,test,0.36538461538461536,0.0679189714762417,0.37191320812010464,0.06660227030117828,0.3676739926739927,0.06832106948496962 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,train,0.8523622047244095,0.01564794722053039,0.8537644167803349,0.015581712164673774,0.8531621783351777,0.015676205782457653 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,test,0.5961538461538461,0.06222097479260521,0.5871419449005655,0.06526671488916654,0.594551282051282,0.0621252979434804 +flat_mae,patch,logistic,aabc_age,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,73,166.81005372000556,test,0.4423076923076923,0.0640139015042734,0.44897662279971123,0.06318427605914387,0.44184981684981683,0.06422028806725977 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6968503937007874,0.02009250570281294,0.6939561204598799,0.020313387261469997,0.6965574168231398,0.020064979117851427 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.5,0.06370099346201231,0.48348190455855666,0.06977783177867006,0.49404761904761907,0.06390882061111194 +flat_mae,patch,logistic,aabc_age,75,0.046415888336127774,train,0.8661417322834646,0.015409955773248461,0.8667446237881344,0.015336662969745543,0.8667399724581838,0.015343435890982202 +flat_mae,patch,logistic,aabc_age,75,0.046415888336127774,test,0.5,0.0678356327966584,0.4990993788819876,0.06873657758591971,0.4967948717948718,0.06824918429714535 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.8523622047244095,0.016281741082887474,0.8528898546602418,0.016322184132503546,0.8531797995354716,0.016261163906355815 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.4423076923076923,0.06479347024957259,0.4379788890658456,0.0641290829508009,0.44024725274725274,0.06455533615814962 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,train,0.9921259842519685,0.0036988162785523255,0.992198673957564,0.003669340303613247,0.9923706196333256,0.003603681175900887 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,test,0.40384615384615385,0.067244354814645,0.4081709956709957,0.06724420721319062,0.40476190476190477,0.06763609881707043 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.8484251968503937,0.01565084975003582,0.8489104961211621,0.015655906614704947,0.8490475681305129,0.015609009827038545 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5192307692307693,0.06726239134866399,0.5221688034188035,0.06851844077423545,0.5173992673992673,0.0673046744280576 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,train,0.8543307086614174,0.015351067692853122,0.8544046871199859,0.015422850474293271,0.8546608194649293,0.015390570554017175 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,test,0.40384615384615385,0.06796274649572812,0.40312061711079944,0.06822277951445188,0.40041208791208793,0.067696319800022 +flat_mae,patch,logistic,aabc_age,80,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,80,21.54434690031882,test,0.4423076923076923,0.05725758005166461,0.41077712609970674,0.058784523132394226,0.44436813186813184,0.05783779695740996 +flat_mae,patch,logistic,aabc_age,81,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,81,21.54434690031882,test,0.38461538461538464,0.06759716871981335,0.38166666666666665,0.06693677717390185,0.38690476190476186,0.0681891688910621 +flat_mae,patch,logistic,aabc_age,82,0.005994842503189409,train,0.6909448818897638,0.02066492792338003,0.6882029053420806,0.021311163871001747,0.6905766375968807,0.020745101562254332 +flat_mae,patch,logistic,aabc_age,82,0.005994842503189409,test,0.5384615384615384,0.05734587009164693,0.48958333333333326,0.05883363297261905,0.530448717948718,0.05629635450409941 +flat_mae,patch,logistic,aabc_age,83,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,83,21.54434690031882,test,0.4807692307692308,0.06355326704017103,0.4836270051787293,0.06083105010674023,0.48466117216117216,0.06413390045649929 +flat_mae,patch,logistic,aabc_age,84,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,84,2.782559402207126,test,0.6538461538461539,0.05933969754002806,0.6415933528836755,0.06400134656238968,0.6481227106227107,0.05970823467108537 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,train,0.8622047244094488,0.015481061892730564,0.8629738108160657,0.015471055945838428,0.8625577543830039,0.015465752569122828 +flat_mae,patch,logistic,aabc_age,85,0.046415888336127774,test,0.5,0.059905942054950805,0.4777816736792894,0.0632158573549944,0.4951923076923077,0.0597095249412422 +flat_mae,patch,logistic,aabc_age,86,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,86,21.54434690031882,test,0.4807692307692308,0.06289103707507691,0.4461805555555556,0.0598596601193752,0.4700091575091575,0.06200829060523622 +flat_mae,patch,logistic,aabc_age,87,0.000774263682681127,train,0.5295275590551181,0.020424706236253438,0.5177729530761697,0.020941368674573548,0.5278382599172595,0.020323503926097913 +flat_mae,patch,logistic,aabc_age,87,0.000774263682681127,test,0.4807692307692308,0.0642481666903961,0.46303224905765744,0.06626531355792156,0.47870879120879123,0.06421900381558428 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,train,0.4862204724409449,0.018442801923077887,0.4567630274571186,0.019139335910791387,0.484641745414652,0.01831845027672499 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,test,0.46153846153846156,0.0648905655928393,0.46083144796380093,0.06717710139792502,0.4581043956043956,0.06488926927675946 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,train,0.5059055118110236,0.020929132006777852,0.4859289923215848,0.022042300596813395,0.5047178066664237,0.02089211191869146 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,test,0.36538461538461536,0.061735489874618896,0.33249521072796934,0.06060961940111162,0.3628663003663004,0.061280522532324425 +flat_mae,patch,logistic,aabc_age,90,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,90,166.81005372000556,test,0.5384615384615384,0.06308860117036307,0.5301683501683502,0.06347044501283591,0.538003663003663,0.06316428648325977 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,train,0.6909448818897638,0.021200872940849075,0.6889411792604991,0.02134431866354841,0.6910941254993872,0.02114695881854549 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,test,0.5384615384615384,0.06847764503706655,0.5282738095238095,0.07198063134921727,0.5396062271062272,0.0685009025722203 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,train,0.8405511811023622,0.017217024738667312,0.8412646648495838,0.01717026169057907,0.8411506332124383,0.01722343059343951 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,test,0.5576923076923077,0.061329259790974776,0.5488196988196988,0.06349232904282107,0.5588369963369964,0.061657982500512916 +flat_mae,patch,logistic,aabc_age,93,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,93,2.782559402207126,test,0.4423076923076923,0.06885035440558855,0.4562047101449276,0.0672593329364651,0.443452380952381,0.06903325577834273 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,train,0.8641732283464567,0.014558207114297712,0.8647868991131641,0.014482916670157232,0.8640711397823889,0.01454327542780627 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,test,0.4807692307692308,0.06238712885005886,0.46647465437788016,0.06360011196054274,0.47596153846153844,0.06202734414827909 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,train,0.9921259842519685,0.004017636965034734,0.9922149636634484,0.003968960150238622,0.9923706196333255,0.0038840073285618575 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,test,0.4807692307692308,0.06319261201293827,0.46188556067588327,0.06364665416116431,0.4757326007326007,0.06270752705378344 +flat_mae,patch,logistic,aabc_age,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,96,166.81005372000556,test,0.4807692307692308,0.06493510643201625,0.4771524771524771,0.06459712580260878,0.4816849816849817,0.06519454693432952 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,train,0.9901574803149606,0.004441698713034782,0.9903852200301615,0.004349067665239857,0.9907896263634254,0.0041680124161043155 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,test,0.4807692307692308,0.06355000826353886,0.4817424926120578,0.06227748550497058,0.48168498168498164,0.06348612024342677 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,train,0.7007874015748031,0.019151643899961386,0.6983029154081786,0.01966237035984872,0.7015599197528141,0.019195008462620015 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,test,0.4807692307692308,0.06096007035359123,0.4541306528785306,0.06777101981372591,0.47733516483516486,0.060838345212362255 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,test,0.5192307692307693,0.06438829141238271,0.5244588744588744,0.06348600335919154,0.5233516483516484,0.06469746483214656 +flat_mae,patch,logistic,aabc_age,100,0.3593813663804626,train,0.9940944881889764,0.003545275044048096,0.9940037281091885,0.0035897406614795926,0.9941691807844047,0.0035044605788328645 +flat_mae,patch,logistic,aabc_age,100,0.3593813663804626,test,0.4423076923076923,0.06730329655975739,0.4377898550724638,0.06804266690469894,0.4432234432234432,0.0673564505250262 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..179eeb9a600334fd8bf3ec36f28ee6cf42b2ab05 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:57:48 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:20:08 time: 5.2998 data: 4.2606 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:46 time: 0.2726 data: 0.0998 max mem: 3581 +extract (train) [ 40/228] eta: 0:01:07 time: 0.2010 data: 0.0624 max mem: 3581 +extract (train) [ 60/228] eta: 0:00:52 time: 0.2108 data: 0.0702 max mem: 3581 +extract (train) [ 80/228] eta: 0:00:42 time: 0.2104 data: 0.0702 max mem: 3581 +extract (train) [100/228] eta: 0:00:35 time: 0.2348 data: 0.0799 max mem: 3581 +extract (train) [120/228] eta: 0:00:28 time: 0.2193 data: 0.0724 max mem: 3581 +extract (train) [140/228] eta: 0:00:22 time: 0.2058 data: 0.0659 max mem: 3581 +extract (train) [160/228] eta: 0:00:17 time: 0.2205 data: 0.0762 max mem: 3581 +extract (train) [180/228] eta: 0:00:11 time: 0.2140 data: 0.0733 max mem: 3581 +extract (train) [200/228] eta: 0:00:06 time: 0.2035 data: 0.0648 max mem: 3581 +extract (train) [220/228] eta: 0:00:01 time: 0.1739 data: 0.0494 max mem: 3581 +extract (train) [227/228] eta: 0:00:00 time: 0.1787 data: 0.0524 max mem: 3581 +extract (train) Total time: 0:00:54 (0.2388 s / it) +extract (validation) [ 0/27] eta: 0:02:02 time: 4.5402 data: 4.4019 max mem: 3581 +extract (validation) [20/27] eta: 0:00:02 time: 0.1797 data: 0.0472 max mem: 3581 +extract (validation) [26/27] eta: 0:00:00 time: 0.1642 data: 0.0425 max mem: 3581 +extract (validation) Total time: 0:00:09 (0.3507 s / it) +extract (test) [ 0/26] eta: 0:01:57 time: 4.5291 data: 4.3778 max mem: 3581 +extract (test) [20/26] eta: 0:00:02 time: 0.2063 data: 0.0635 max mem: 3581 +extract (test) [25/26] eta: 0:00:00 time: 0.1926 data: 0.0577 max mem: 3581 +extract (test) Total time: 0:00:09 (0.3831 s / it) +feature extraction time: 0:01:13 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | train | 0.69488 | 0.020292 | 0.69348 | 0.020406 | 0.69555 | 0.02026 | +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | test | 0.44231 | 0.063938 | 0.42296 | 0.066434 | 0.43178 | 0.063373 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05994247735717949, "f1": 0.464953007518797, "f1_std": 0.060557186069661775, "bacc": 0.4757326007326007, "bacc_std": 0.059528098075645165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.0636049196117452, "f1": 0.5574419920769665, "f1_std": 0.06517530956860267, "bacc": 0.5693681318681318, "bacc_std": 0.06320569782530443} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06435942894925774, "f1": 0.5796957671957672, "f1_std": 0.07020365674745624, "bacc": 0.5959249084249085, "bacc_std": 0.06456269266455254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06096457163043827, "f1": 0.4807328088578089, "f1_std": 0.06403438695596674, "bacc": 0.5009157509157509, "bacc_std": 0.061359960287694665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06508806051267602, "f1": 0.40527057899621616, "f1_std": 0.06458941012503842, "bacc": 0.40934065934065933, "bacc_std": 0.06547878621996794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06709415577861436, "f1": 0.5531968031968032, "f1_std": 0.0687719221402243, "bacc": 0.55746336996337, "bacc_std": 0.06720194117586843} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06238811286790886, "f1": 0.434037178602396, "f1_std": 0.058839784737511176, "bacc": 0.4223901098901099, "bacc_std": 0.06226682248784308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06416168555681127, "f1": 0.42407274389447563, "f1_std": 0.06451640433936351, "bacc": 0.4283424908424909, "bacc_std": 0.06507791466090054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06106064499220349, "f1": 0.4553203253861149, "f1_std": 0.06235464590265778, "bacc": 0.47710622710622713, "bacc_std": 0.06062660005139559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06677015020636004, "f1": 0.44020376175548587, "f1_std": 0.06731298849566593, "bacc": 0.43887362637362637, "bacc_std": 0.06641786960552945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.07207487067104956, "f1": 0.5389784946236559, "f1_std": 0.06840364489491058, "bacc": 0.521978021978022, "bacc_std": 0.07268554101867086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06702621363026162, "f1": 0.43847376456072107, "f1_std": 0.06735207054334835, "bacc": 0.4375, "bacc_std": 0.06670477249299274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06265000932672758, "f1": 0.4592857142857143, "f1_std": 0.06309580416833055, "bacc": 0.47435897435897434, "bacc_std": 0.06218787199962487} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06297246275229162, "f1": 0.5158730158730158, "f1_std": 0.06448936563933773, "bacc": 0.5192307692307692, "bacc_std": 0.06328423306201163} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06232994616834762, "f1": 0.4948506501986977, "f1_std": 0.06358516579286405, "bacc": 0.5114468864468865, "bacc_std": 0.06200237848699235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 166.81005372000556, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06636834634409466, "f1": 0.5767771883289124, "f1_std": 0.06666873533245012, "bacc": 0.5755494505494505, "bacc_std": 0.06650086521492393} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 166.81005372000556, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06224701461437622, "f1": 0.5114331723027375, "f1_std": 0.0625585612002248, "bacc": 0.5187728937728937, "bacc_std": 0.061994804026245076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06308236374284508, "f1": 0.4930555555555556, "f1_std": 0.06562625879423027, "bacc": 0.5114468864468865, "bacc_std": 0.06275669269638118} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06699036184319097, "f1": 0.4963516687654619, "f1_std": 0.06853019074851216, "bacc": 0.5041208791208791, "bacc_std": 0.06721143195604791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.059665685974773106, "f1": 0.5125, "f1_std": 0.06661148770520964, "bacc": 0.5336538461538463, "bacc_std": 0.059541968306531856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06286225196326879, "f1": 0.39984787018255574, "f1_std": 0.0650045067643268, "bacc": 0.4207875457875458, "bacc_std": 0.062489857044370926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06273165353336053, "f1": 0.4417838196286472, "f1_std": 0.062401875013353536, "bacc": 0.4450549450549451, "bacc_std": 0.06269776008576518} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.062474551031817906, "f1": 0.41347372835075674, "f1_std": 0.061146879775434336, "bacc": 0.41941391941391937, "bacc_std": 0.06192393605317474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06257325884028649, "f1": 0.37998575498575493, "f1_std": 0.06005579828319338, "bacc": 0.3823260073260073, "bacc_std": 0.06225366847949788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06296673067261499, "f1": 0.4019191919191919, "f1_std": 0.06214382858615366, "bacc": 0.40453296703296704, "bacc_std": 0.0634074375939702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06643654934414789, "f1": 0.4371693121693122, "f1_std": 0.06787323491375338, "bacc": 0.4432234432234432, "bacc_std": 0.06661394164122923} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06614105421940625, "f1": 0.4334368530020704, "f1_std": 0.06589256181501454, "bacc": 0.4375, "bacc_std": 0.06570980255481873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0671206081282266, "f1": 0.5140046734874322, "f1_std": 0.0688331560213138, "bacc": 0.516025641025641, "bacc_std": 0.0672747751904488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06659303009178012, "f1": 0.48738095238095236, "f1_std": 0.06692663235951324, "bacc": 0.4819139194139194, "bacc_std": 0.06680843345472412} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06262147957483927, "f1": 0.5215322580645162, "f1_std": 0.060851573203911276, "bacc": 0.5217490842490843, "bacc_std": 0.06287193127526407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.056380719248326196, "f1": 0.5601917690152984, "f1_std": 0.060942522473708245, "bacc": 0.586996336996337, "bacc_std": 0.055787088953064126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.058215372418278115, "f1": 0.46155810983397194, "f1_std": 0.05898235239972977, "bacc": 0.47847985347985345, "bacc_std": 0.05809706902510418} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06806604650183917, "f1": 0.48659151193633954, "f1_std": 0.06679600511984381, "bacc": 0.4835164835164836, "bacc_std": 0.06852870238367119} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06556648913157864, "f1": 0.3541666666666667, "f1_std": 0.0665732497114596, "bacc": 0.3500457875457875, "bacc_std": 0.06610200557606605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06935095337288313, "f1": 0.44768740031897925, "f1_std": 0.07093270422019816, "bacc": 0.44184981684981683, "bacc_std": 0.06945171800119297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06316947946670666, "f1": 0.4285471871678768, "f1_std": 0.061464000094419276, "bacc": 0.44597069597069594, "bacc_std": 0.06395072574979835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0654278024793597, "f1": 0.45609890109890106, "f1_std": 0.06445990609926705, "bacc": 0.4581043956043956, "bacc_std": 0.0651441183140845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06084372260045942, "f1": 0.4699675324675324, "f1_std": 0.0659010784346244, "bacc": 0.49633699633699635, "bacc_std": 0.060270392448125626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06301088240752661, "f1": 0.48386150798992367, "f1_std": 0.06663701895795351, "bacc": 0.49381868131868134, "bacc_std": 0.06282373100851386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06222306693800618, "f1": 0.44783717979813054, "f1_std": 0.062273337908722325, "bacc": 0.4578754578754579, "bacc_std": 0.06181463605681359} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06567586715550255, "f1": 0.4486048371917937, "f1_std": 0.06896122621120893, "bacc": 0.46543040293040294, "bacc_std": 0.06647645643221954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06020687953378141, "f1": 0.39624958374958374, "f1_std": 0.06131672087248395, "bacc": 0.4251373626373627, "bacc_std": 0.060640143022958864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06346339158133543, "f1": 0.3998678390333369, "f1_std": 0.061609222861055496, "bacc": 0.39880952380952384, "bacc_std": 0.06305657494177065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06640528023134569, "f1": 0.6199867724867725, "f1_std": 0.06631419257899246, "bacc": 0.6156135531135531, "bacc_std": 0.06692486557072908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.061190071515491835, "f1": 0.4865993265993266, "f1_std": 0.06299380040236957, "bacc": 0.5038919413919414, "bacc_std": 0.06166497624910466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.059179235823578535, "f1": 0.4840102707749767, "f1_std": 0.06559131071858905, "bacc": 0.5009157509157509, "bacc_std": 0.059286538752851836} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06812244240896911, "f1": 0.44351984569375874, "f1_std": 0.06877988749065733, "bacc": 0.44482600732600736, "bacc_std": 0.06840017592892657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06727066014226657, "f1": 0.4251127644733783, "f1_std": 0.06689491114444936, "bacc": 0.41964285714285715, "bacc_std": 0.0669679523056175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06619206185982152, "f1": 0.46409090909090905, "f1_std": 0.06800996864913168, "bacc": 0.45947802197802196, "bacc_std": 0.06634829369844394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06367795267441263, "f1": 0.399074074074074, "f1_std": 0.06438639704321936, "bacc": 0.4049908424908425, "bacc_std": 0.0638068588372265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06373386789737277, "f1": 0.38681318681318677, "f1_std": 0.062365729676168866, "bacc": 0.4015567765567766, "bacc_std": 0.06326994500021534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06834103482842588, "f1": 0.4241558441558442, "f1_std": 0.06675189779355754, "bacc": 0.4033882783882784, "bacc_std": 0.06854687229072734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05784148661613476, "f1": 0.49271206690561525, "f1_std": 0.05734096933876425, "bacc": 0.530448717948718, "bacc_std": 0.05679462513072888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06320075789748543, "f1": 0.517071464267866, "f1_std": 0.0638378342314552, "bacc": 0.5238095238095237, "bacc_std": 0.06292528323211005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06393158093947089, "f1": 0.5387362637362637, "f1_std": 0.06439750233446805, "bacc": 0.5409798534798534, "bacc_std": 0.06427131570401529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 166.81005372000556, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06550888642912069, "f1": 0.5970268556475453, "f1_std": 0.06660447511758578, "bacc": 0.5993589743589742, "bacc_std": 0.06538737408376714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0630685266456609, "f1": 0.4706730769230769, "f1_std": 0.06074946500874511, "bacc": 0.47573260073260076, "bacc_std": 0.06230021985401361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06095243804920368, "f1": 0.4250018454270318, "f1_std": 0.06279932603930409, "bacc": 0.440018315018315, "bacc_std": 0.06074748896479465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 21.54434690031882, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06336561515074206, "f1": 0.45786749482401656, "f1_std": 0.06260081913383504, "bacc": 0.45810439560439564, "bacc_std": 0.06339517293507886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06773189401940287, "f1": 0.4589550264550265, "f1_std": 0.0686351192044581, "bacc": 0.4640567765567766, "bacc_std": 0.06818925437535868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 21.54434690031882, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06484732866898188, "f1": 0.38476031215161643, "f1_std": 0.06230401734592787, "bacc": 0.3823260073260073, "bacc_std": 0.06465853736851081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06046559978530312, "f1": 0.5100334448160535, "f1_std": 0.06185539805150892, "bacc": 0.5187728937728937, "bacc_std": 0.06055519085800923} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.059440425647730964, "f1": 0.43628958760537706, "f1_std": 0.06235217384842515, "bacc": 0.45650183150183155, "bacc_std": 0.058966959093397865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06585238782495681, "f1": 0.39703648806909675, "f1_std": 0.066467231803181, "bacc": 0.40041208791208793, "bacc_std": 0.06574089652425731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.07099854153014895, "f1": 0.4843304843304843, "f1_std": 0.0692508436610434, "bacc": 0.4803113553113553, "bacc_std": 0.07077002084290585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06654814291506403, "f1": 0.516969696969697, "f1_std": 0.06832755756256806, "bacc": 0.5176282051282051, "bacc_std": 0.06675380276247052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06699818950128208, "f1": 0.5439995421245422, "f1_std": 0.06991994307630034, "bacc": 0.5602106227106227, "bacc_std": 0.06730189822685297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05972098587470031, "f1": 0.4593854282536151, "f1_std": 0.05871237650698422, "bacc": 0.4622252747252747, "bacc_std": 0.060039574466443334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06566793819291783, "f1": 0.4139957264957266, "f1_std": 0.06572999719196794, "bacc": 0.41666666666666663, "bacc_std": 0.0651189470357534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06353259425322076, "f1": 0.4281384982997887, "f1_std": 0.06450008342698195, "bacc": 0.44024725274725274, "bacc_std": 0.06326474767896934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0679189714762417, "f1": 0.37191320812010464, "f1_std": 0.06660227030117828, "bacc": 0.3676739926739927, "bacc_std": 0.06832106948496962} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06222097479260521, "f1": 0.5871419449005655, "f1_std": 0.06526671488916654, "bacc": 0.594551282051282, "bacc_std": 0.0621252979434804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0640139015042734, "f1": 0.44897662279971123, "f1_std": 0.06318427605914387, "bacc": 0.44184981684981683, "bacc_std": 0.06422028806725977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06370099346201231, "f1": 0.48348190455855666, "f1_std": 0.06977783177867006, "bacc": 0.49404761904761907, "bacc_std": 0.06390882061111194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0678356327966584, "f1": 0.4990993788819876, "f1_std": 0.06873657758591971, "bacc": 0.4967948717948718, "bacc_std": 0.06824918429714535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06479347024957259, "f1": 0.4379788890658456, "f1_std": 0.0641290829508009, "bacc": 0.44024725274725274, "bacc_std": 0.06455533615814962} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.067244354814645, "f1": 0.4081709956709957, "f1_std": 0.06724420721319062, "bacc": 0.40476190476190477, "bacc_std": 0.06763609881707043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06726239134866399, "f1": 0.5221688034188035, "f1_std": 0.06851844077423545, "bacc": 0.5173992673992673, "bacc_std": 0.0673046744280576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06796274649572812, "f1": 0.40312061711079944, "f1_std": 0.06822277951445188, "bacc": 0.40041208791208793, "bacc_std": 0.067696319800022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.05725758005166461, "f1": 0.41077712609970674, "f1_std": 0.058784523132394226, "bacc": 0.44436813186813184, "bacc_std": 0.05783779695740996} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 21.54434690031882, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06759716871981335, "f1": 0.38166666666666665, "f1_std": 0.06693677717390185, "bacc": 0.38690476190476186, "bacc_std": 0.0681891688910621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05734587009164693, "f1": 0.48958333333333326, "f1_std": 0.05883363297261905, "bacc": 0.530448717948718, "bacc_std": 0.05629635450409941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06355326704017103, "f1": 0.4836270051787293, "f1_std": 0.06083105010674023, "bacc": 0.48466117216117216, "bacc_std": 0.06413390045649929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 2.782559402207126, "split": "test", "acc": 0.6538461538461539, "acc_std": 0.05933969754002806, "f1": 0.6415933528836755, "f1_std": 0.06400134656238968, "bacc": 0.6481227106227107, "bacc_std": 0.05970823467108537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.059905942054950805, "f1": 0.4777816736792894, "f1_std": 0.0632158573549944, "bacc": 0.4951923076923077, "bacc_std": 0.0597095249412422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06289103707507691, "f1": 0.4461805555555556, "f1_std": 0.0598596601193752, "bacc": 0.4700091575091575, "bacc_std": 0.06200829060523622} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0642481666903961, "f1": 0.46303224905765744, "f1_std": 0.06626531355792156, "bacc": 0.47870879120879123, "bacc_std": 0.06421900381558428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0648905655928393, "f1": 0.46083144796380093, "f1_std": 0.06717710139792502, "bacc": 0.4581043956043956, "bacc_std": 0.06488926927675946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.061735489874618896, "f1": 0.33249521072796934, "f1_std": 0.06060961940111162, "bacc": 0.3628663003663004, "bacc_std": 0.061280522532324425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 166.81005372000556, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06308860117036307, "f1": 0.5301683501683502, "f1_std": 0.06347044501283591, "bacc": 0.538003663003663, "bacc_std": 0.06316428648325977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06847764503706655, "f1": 0.5282738095238095, "f1_std": 0.07198063134921727, "bacc": 0.5396062271062272, "bacc_std": 0.0685009025722203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.061329259790974776, "f1": 0.5488196988196988, "f1_std": 0.06349232904282107, "bacc": 0.5588369963369964, "bacc_std": 0.061657982500512916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 2.782559402207126, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06885035440558855, "f1": 0.4562047101449276, "f1_std": 0.0672593329364651, "bacc": 0.443452380952381, "bacc_std": 0.06903325577834273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06238712885005886, "f1": 0.46647465437788016, "f1_std": 0.06360011196054274, "bacc": 0.47596153846153844, "bacc_std": 0.06202734414827909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06319261201293827, "f1": 0.46188556067588327, "f1_std": 0.06364665416116431, "bacc": 0.4757326007326007, "bacc_std": 0.06270752705378344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06493510643201625, "f1": 0.4771524771524771, "f1_std": 0.06459712580260878, "bacc": 0.4816849816849817, "bacc_std": 0.06519454693432952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06355000826353886, "f1": 0.4817424926120578, "f1_std": 0.06227748550497058, "bacc": 0.48168498168498164, "bacc_std": 0.06348612024342677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06096007035359123, "f1": 0.4541306528785306, "f1_std": 0.06777101981372591, "bacc": 0.47733516483516486, "bacc_std": 0.060838345212362255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06438829141238271, "f1": 0.5244588744588744, "f1_std": 0.06348600335919154, "bacc": 0.5233516483516484, "bacc_std": 0.06469746483214656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06730329655975739, "f1": 0.4377898550724638, "f1_std": 0.06804266690469894, "bacc": 0.4432234432234432, "bacc_std": 0.0673564505250262} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 14.273 | 42.588 | 0.82982 | 0.16474 | 0.82758 | 0.16897 | 0.82979 | 0.1651 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 14.273 | 42.588 | 0.4775 | 0.059638 | 0.46887 | 0.058126 | 0.47617 | 0.059191 | + + +done! total time: 0:05:47 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e81b9c177ff1569dbc2e9a7f39353d9f2fcac86a --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_age reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..8849bb680407b4198357733e61b905cea77dd14f --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,aabc_age,,0.005994842503189409,train,0.6850393700787402,0.02057044078978543,0.6819498393535776,0.021021045084985316,0.6845930593498721,0.020694234510457994 +flat_mae,reg,logistic,aabc_age,,0.005994842503189409,test,0.40384615384615385,0.06478686038161217,0.3849972191323693,0.06584515047914363,0.39491758241758246,0.06442890667979337 +flat_mae,reg,logistic,aabc_age,1,0.005994842503189409,train,0.687007874015748,0.02056054482429896,0.6835711318623137,0.021021975826643287,0.6872470698461226,0.020535967125602246 +flat_mae,reg,logistic,aabc_age,1,0.005994842503189409,test,0.5,0.06142417271793606,0.4847355457609541,0.06449093341030591,0.49793956043956045,0.061310621818550945 +flat_mae,reg,logistic,aabc_age,2,0.005994842503189409,train,0.6692913385826772,0.01969820780017261,0.6670830583087468,0.020148648276226092,0.6695370443181579,0.01969217017875493 +flat_mae,reg,logistic,aabc_age,2,0.005994842503189409,test,0.5384615384615384,0.06573007156311567,0.5271164021164021,0.06797923239740378,0.538003663003663,0.06599912464853959 +flat_mae,reg,logistic,aabc_age,3,0.005994842503189409,train,0.6771653543307087,0.020364310138334046,0.6765750932835655,0.020682817123916456,0.6780690616794754,0.020407247101009197 +flat_mae,reg,logistic,aabc_age,3,0.005994842503189409,test,0.5,0.05684343440981516,0.452855644546148,0.055771287955331005,0.4961080586080586,0.05592524823351923 +flat_mae,reg,logistic,aabc_age,4,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,4,2.782559402207126,test,0.5,0.059113470265785485,0.4857065217391304,0.06145568835069574,0.5119047619047619,0.05913300729279735 +flat_mae,reg,logistic,aabc_age,5,9.999999999999999e-05,train,0.4862204724409449,0.020177882069083698,0.45865154284230936,0.02058566131061348,0.48203093084050486,0.020019785588401888 +flat_mae,reg,logistic,aabc_age,5,9.999999999999999e-05,test,0.36538461538461536,0.05940756581227383,0.32481527093596063,0.050379433587192844,0.3598901098901099,0.05800778806618436 +flat_mae,reg,logistic,aabc_age,6,0.005994842503189409,train,0.6771653543307087,0.021164804709656943,0.6733053465105728,0.021691076785625332,0.6761461931494527,0.021171366311635 +flat_mae,reg,logistic,aabc_age,6,0.005994842503189409,test,0.5576923076923077,0.06421100571115414,0.5482387528320207,0.0667387632484132,0.5590659340659341,0.06431179467385122 +flat_mae,reg,logistic,aabc_age,7,0.000774263682681127,train,0.5531496062992126,0.021555352172173255,0.5460691545808277,0.02223432841533533,0.5532343653203271,0.021591466817456068 +flat_mae,reg,logistic,aabc_age,7,0.000774263682681127,test,0.5,0.06421989774417419,0.48419540229885055,0.06641901330757874,0.4965659340659341,0.06404645293413758 +flat_mae,reg,logistic,aabc_age,8,0.000774263682681127,train,0.5551181102362205,0.020185325793624302,0.5426952477324221,0.02083788232241086,0.5535451937037413,0.02009874309851788 +flat_mae,reg,logistic,aabc_age,8,0.000774263682681127,test,0.4423076923076923,0.06744108759397337,0.4377777777777777,0.06717082996375917,0.45100732600732596,0.06805279605738754 +flat_mae,reg,logistic,aabc_age,9,0.005994842503189409,train,0.6811023622047244,0.019564981921203886,0.6785665909892414,0.020048173490725237,0.6804959924355903,0.01958548905890785 +flat_mae,reg,logistic,aabc_age,9,0.005994842503189409,test,0.5,0.06169096488083967,0.47915047915047915,0.06578694478562355,0.49793956043956045,0.06145461815994957 +flat_mae,reg,logistic,aabc_age,10,0.005994842503189409,train,0.6929133858267716,0.01901926859700855,0.6912141079059919,0.019360460788902904,0.6932778357426029,0.018994506833083725 +flat_mae,reg,logistic,aabc_age,10,0.005994842503189409,test,0.40384615384615385,0.06908099182809019,0.4065336837075968,0.07025491008050007,0.40041208791208793,0.06887769273069774 +flat_mae,reg,logistic,aabc_age,11,0.005994842503189409,train,0.6771653543307087,0.02027604578649558,0.673757561007337,0.020635060304820676,0.6766813022522532,0.02030701746159942 +flat_mae,reg,logistic,aabc_age,11,0.005994842503189409,test,0.5769230769230769,0.06861714166429199,0.5740900856389987,0.0707835856620737,0.5741758241758241,0.06902073516431761 +flat_mae,reg,logistic,aabc_age,12,0.000774263682681127,train,0.5649606299212598,0.020009890836570357,0.5566279688059376,0.020575976090207004,0.563875772216138,0.01998132255694978 +flat_mae,reg,logistic,aabc_age,12,0.000774263682681127,test,0.38461538461538464,0.06379914859108031,0.37445887445887444,0.0632020766021588,0.37957875457875456,0.06331598528146307 +flat_mae,reg,logistic,aabc_age,13,0.000774263682681127,train,0.5590551181102362,0.020376779454228812,0.550094289093192,0.02081032842565713,0.5579949663303215,0.020294690936351133 +flat_mae,reg,logistic,aabc_age,13,0.000774263682681127,test,0.5384615384615384,0.06593259064283565,0.5327898550724638,0.06562435427768222,0.5338827838827839,0.06575966386802258 +flat_mae,reg,logistic,aabc_age,14,0.005994842503189409,train,0.6732283464566929,0.019338387108195756,0.6700853606487409,0.019808754742329533,0.6728166253986947,0.019255388576238937 +flat_mae,reg,logistic,aabc_age,14,0.005994842503189409,test,0.46153846153846156,0.06189190567566715,0.45470457079152726,0.06277008412907494,0.46543040293040294,0.06236891656132928 +flat_mae,reg,logistic,aabc_age,15,0.3593813663804626,train,0.9881889763779528,0.004901970069127415,0.9883592023936598,0.004835336899988941,0.9884559561095458,0.004803178800245606 +flat_mae,reg,logistic,aabc_age,15,0.3593813663804626,test,0.4423076923076923,0.0669922719202851,0.4451058201058201,0.06778545191198934,0.44505494505494503,0.0669779999591169 +flat_mae,reg,logistic,aabc_age,16,0.046415888336127774,train,0.8562992125984252,0.015435750209327996,0.8561326153283482,0.01558087594735146,0.8572796654705029,0.01537598288180686 +flat_mae,reg,logistic,aabc_age,16,0.046415888336127774,test,0.5,0.06847165007435796,0.5012820512820513,0.0688541383306548,0.5,0.06846726788761762 +flat_mae,reg,logistic,aabc_age,17,0.005994842503189409,train,0.6850393700787402,0.020135650011579024,0.6836956317806828,0.020443239331011227,0.6845606159700339,0.020196916433725523 +flat_mae,reg,logistic,aabc_age,17,0.005994842503189409,test,0.5,0.06626844947974643,0.48912101041133293,0.0690547922501384,0.49954212454212454,0.06623763664573815 +flat_mae,reg,logistic,aabc_age,18,0.000774263682681127,train,0.5531496062992126,0.02158942403600569,0.5466397502153316,0.021838652424544157,0.5534019465312847,0.021529862991963734 +flat_mae,reg,logistic,aabc_age,18,0.000774263682681127,test,0.5384615384615384,0.061061747291152095,0.5139382676147382,0.06309303341300278,0.5306776556776557,0.060440095762975043 +flat_mae,reg,logistic,aabc_age,19,0.3593813663804626,train,0.9901574803149606,0.004184687506041399,0.9901642565452844,0.004192031581063371,0.990254517260625,0.0041517975201843015 +flat_mae,reg,logistic,aabc_age,19,0.3593813663804626,test,0.4423076923076923,0.07018861950268225,0.44775580959520245,0.07090464440140459,0.4407051282051282,0.07036345332175588 +flat_mae,reg,logistic,aabc_age,20,0.3593813663804626,train,0.9921259842519685,0.0038809073839611518,0.9922034018483433,0.0038514493485712936,0.9925881875145045,0.0036667975991463817 +flat_mae,reg,logistic,aabc_age,20,0.3593813663804626,test,0.4230769230769231,0.06119252526190593,0.40805860805860805,0.059872092005645576,0.4194139194139194,0.06081871547284131 +flat_mae,reg,logistic,aabc_age,21,0.005994842503189409,train,0.6850393700787402,0.019622489599543574,0.6825291005291005,0.020141498823550614,0.685330914154307,0.019663022112302905 +flat_mae,reg,logistic,aabc_age,21,0.005994842503189409,test,0.4230769230769231,0.06288360384978023,0.4044968189705032,0.06604256846840295,0.4237637362637363,0.06296442546513618 +flat_mae,reg,logistic,aabc_age,22,0.3593813663804626,train,0.9881889763779528,0.004710443596579591,0.9883349881194649,0.004660720376095805,0.9884559561095458,0.004608094909463133 +flat_mae,reg,logistic,aabc_age,22,0.3593813663804626,test,0.34615384615384615,0.06307620778206059,0.3545454545454545,0.06131739173491658,0.34867216117216115,0.06365897667345995 +flat_mae,reg,logistic,aabc_age,23,0.046415888336127774,train,0.8661417322834646,0.015555039888040518,0.8672929007150443,0.015447991632440816,0.8669075536691415,0.015514775261891815 +flat_mae,reg,logistic,aabc_age,23,0.046415888336127774,test,0.46153846153846156,0.06355376748035277,0.4687645687645688,0.06323455105805896,0.4640567765567766,0.06397645660730855 +flat_mae,reg,logistic,aabc_age,24,0.005994842503189409,train,0.6830708661417323,0.019884892577479652,0.6785286104528431,0.020456719746529843,0.6826297160085847,0.019884543532419745 +flat_mae,reg,logistic,aabc_age,24,0.005994842503189409,test,0.4423076923076923,0.06560032291737523,0.44897342995169076,0.06536589933874433,0.44345238095238093,0.06578232501622489 +flat_mae,reg,logistic,aabc_age,25,0.005994842503189409,train,0.6791338582677166,0.02068029665767248,0.6775695071765533,0.020887325589161915,0.679064959176354,0.02064021451640301 +flat_mae,reg,logistic,aabc_age,25,0.005994842503189409,test,0.25,0.04917092215729552,0.2216019847598795,0.04390156410175785,0.2516025641025641,0.049688884553710594 +flat_mae,reg,logistic,aabc_age,26,0.005994842503189409,train,0.6889763779527559,0.020069818536679194,0.6853582061902904,0.020520523013105377,0.6885605085646226,0.020149903843059556 +flat_mae,reg,logistic,aabc_age,26,0.005994842503189409,test,0.38461538461538464,0.06383915624303384,0.3683835369896751,0.06755205328042414,0.3839285714285714,0.0637914267039946 +flat_mae,reg,logistic,aabc_age,27,0.046415888336127774,train,0.8700787401574803,0.0146585430464889,0.8705429581125483,0.014705557203904828,0.8714249341662368,0.01461026840847938 +flat_mae,reg,logistic,aabc_age,27,0.046415888336127774,test,0.40384615384615385,0.0641007320251013,0.3832175925925926,0.06491093164509434,0.4001831501831502,0.06359963841586123 +flat_mae,reg,logistic,aabc_age,28,9.999999999999999e-05,train,0.4940944881889764,0.02006555997856512,0.46440516552965905,0.020527618586061155,0.4907481506130739,0.019870481238725632 +flat_mae,reg,logistic,aabc_age,28,9.999999999999999e-05,test,0.46153846153846156,0.061048821281673056,0.3945663531870428,0.05199550131470788,0.4519230769230769,0.05940785293188987 +flat_mae,reg,logistic,aabc_age,29,0.046415888336127774,train,0.8582677165354331,0.016304215824592957,0.8576427755233265,0.016534623663305374,0.859078226621582,0.016316192991845984 +flat_mae,reg,logistic,aabc_age,29,0.046415888336127774,test,0.38461538461538464,0.0639157521490678,0.40235127191648934,0.06218634148848337,0.38553113553113555,0.06427175082897377 +flat_mae,reg,logistic,aabc_age,30,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,30,166.81005372000556,test,0.3269230769230769,0.06784374450632168,0.32926245210727967,0.07013052516871161,0.33241758241758246,0.06842712410303668 +flat_mae,reg,logistic,aabc_age,31,9.999999999999999e-05,train,0.4822834645669291,0.019707550514335728,0.4542374973883645,0.02006374032771566,0.4790865121818834,0.019554623763462828 +flat_mae,reg,logistic,aabc_age,31,9.999999999999999e-05,test,0.5192307692307693,0.06186911169758719,0.49215686274509807,0.06203652020557381,0.5128205128205128,0.061156859108231244 +flat_mae,reg,logistic,aabc_age,32,0.005994842503189409,train,0.687007874015748,0.020770505466352868,0.681411187128376,0.021460675140722852,0.685741715878164,0.020778807954407872 +flat_mae,reg,logistic,aabc_age,32,0.005994842503189409,test,0.5192307692307693,0.06219719546941288,0.5129310344827587,0.06400205017073944,0.5203754578754579,0.06254609648693672 +flat_mae,reg,logistic,aabc_age,33,0.3593813663804626,train,0.9921259842519685,0.00408667296242209,0.9920898402950271,0.004107395076942396,0.9923206329631042,0.004009252921207789 +flat_mae,reg,logistic,aabc_age,33,0.3593813663804626,test,0.46153846153846156,0.06700918421552951,0.4648432601880878,0.06725136130837574,0.4658882783882784,0.06721777341358681 +flat_mae,reg,logistic,aabc_age,34,0.000774263682681127,train,0.5570866141732284,0.019728878350207382,0.5484481457814205,0.02024130454956674,0.5566315409416003,0.019710794930324283 +flat_mae,reg,logistic,aabc_age,34,0.000774263682681127,test,0.4230769230769231,0.06797580495420115,0.43106008323399625,0.06923165820526607,0.42261904761904756,0.06817241793934554 +flat_mae,reg,logistic,aabc_age,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,35,2.782559402207126,test,0.3076923076923077,0.061773242038545,0.30890888499584157,0.06125974120921621,0.30975274725274726,0.06242253019692582 +flat_mae,reg,logistic,aabc_age,36,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,36,2.782559402207126,test,0.5,0.06577804709239961,0.49160996424864495,0.06574633040882259,0.5054945054945055,0.06638266144567449 +flat_mae,reg,logistic,aabc_age,37,0.3593813663804626,train,0.9881889763779528,0.004710155663153041,0.9883349881194649,0.004657098151016665,0.9884559561095458,0.004621873064472217 +flat_mae,reg,logistic,aabc_age,37,0.3593813663804626,test,0.46153846153846156,0.06865784302091077,0.46339597902097907,0.06973150996538761,0.4658882783882784,0.06887129522239946 +flat_mae,reg,logistic,aabc_age,38,0.005994842503189409,train,0.6732283464566929,0.020712035292959792,0.6712541365207717,0.021004195550862352,0.6736192890528953,0.020713478331510392 +flat_mae,reg,logistic,aabc_age,38,0.005994842503189409,test,0.5576923076923077,0.06792872831267721,0.5469530469530469,0.06900500443767416,0.5544871794871795,0.06773842679928224 +flat_mae,reg,logistic,aabc_age,39,0.005994842503189409,train,0.6673228346456693,0.020025965459337665,0.6650478660537764,0.020375324130281786,0.6679560510482576,0.02007123818889819 +flat_mae,reg,logistic,aabc_age,39,0.005994842503189409,test,0.4423076923076923,0.06937245117659113,0.44043637521898393,0.0697466543310598,0.4432234432234432,0.06975672412616275 +flat_mae,reg,logistic,aabc_age,40,0.000774263682681127,train,0.5393700787401575,0.020796517328395787,0.5280053026464739,0.021160770795305435,0.5382864329703925,0.020664680240961023 +flat_mae,reg,logistic,aabc_age,40,0.000774263682681127,test,0.5192307692307693,0.06470775220378368,0.5159774436090225,0.06684436641690726,0.5187728937728937,0.06478298240971794 +flat_mae,reg,logistic,aabc_age,41,0.046415888336127774,train,0.8720472440944882,0.014047316920117693,0.87223242127331,0.014137502404867611,0.8731235219768733,0.014005962417491169 +flat_mae,reg,logistic,aabc_age,41,0.046415888336127774,test,0.40384615384615385,0.06207525495804676,0.3809790209790209,0.06225583294262831,0.40293040293040294,0.06207664567488739 +flat_mae,reg,logistic,aabc_age,42,0.000774263682681127,train,0.5433070866141733,0.020986266910691807,0.5327407046113126,0.021735220995432392,0.542151109823951,0.02091273455098735 +flat_mae,reg,logistic,aabc_age,42,0.000774263682681127,test,0.4807692307692308,0.057724029740372905,0.45483954451345754,0.06524871152378642,0.48305860805860806,0.058170527403685245 +flat_mae,reg,logistic,aabc_age,43,0.000774263682681127,train,0.5570866141732284,0.02155177804633004,0.5488116534628162,0.02227825698224065,0.5553761203247479,0.0215489203068475 +flat_mae,reg,logistic,aabc_age,43,0.000774263682681127,test,0.4807692307692308,0.0655451761637971,0.4739057239057239,0.06590849766958944,0.47458791208791207,0.06540824472701073 +flat_mae,reg,logistic,aabc_age,44,0.005994842503189409,train,0.6850393700787402,0.01920512432070149,0.6818121575632567,0.01939694462565269,0.6850133729326856,0.019167197921961154 +flat_mae,reg,logistic,aabc_age,44,0.005994842503189409,test,0.4807692307692308,0.06821670785642789,0.47963875205254514,0.06923374625600366,0.4789377289377289,0.06822053982562544 +flat_mae,reg,logistic,aabc_age,45,0.000774263682681127,train,0.5669291338582677,0.019795505404613143,0.5544353280761855,0.020487305628156472,0.5650392509239742,0.019782311380618593 +flat_mae,reg,logistic,aabc_age,45,0.000774263682681127,test,0.46153846153846156,0.06514839682960323,0.44568185697217955,0.06706248079388707,0.4608516483516484,0.06494580527508842 +flat_mae,reg,logistic,aabc_age,46,0.000774263682681127,train,0.5551181102362205,0.020960628072552077,0.5465210973179101,0.02164884744224794,0.5537951270548478,0.02098829048596334 +flat_mae,reg,logistic,aabc_age,46,0.000774263682681127,test,0.5769230769230769,0.061208961617140026,0.5619539279161081,0.06662156156671377,0.575091575091575,0.06107875829953317 +flat_mae,reg,logistic,aabc_age,47,0.005994842503189409,train,0.6830708661417323,0.0192112204230231,0.6789731672719683,0.019677647079150234,0.6824621347976271,0.019170188742703112 +flat_mae,reg,logistic,aabc_age,47,0.005994842503189409,test,0.4423076923076923,0.06196887938178582,0.4257440476190476,0.06322630939411145,0.43864468864468864,0.061456413876477066 +flat_mae,reg,logistic,aabc_age,48,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,48,21.54434690031882,test,0.40384615384615385,0.06794646343623136,0.413711317418214,0.0662866328170163,0.40773809523809523,0.06847619168860274 +flat_mae,reg,logistic,aabc_age,49,0.3593813663804626,train,0.9960629921259843,0.0027038847279396393,0.9960256812008625,0.0027381782256236894,0.9959177552652626,0.0028007131204733707 +flat_mae,reg,logistic,aabc_age,49,0.3593813663804626,test,0.28846153846153844,0.06202810014098926,0.2859663865546218,0.059416904500076424,0.28456959706959706,0.06135681805459609 +flat_mae,reg,logistic,aabc_age,50,0.000774263682681127,train,0.5492125984251969,0.020981291247899037,0.5375288610663934,0.021420807093938475,0.5472292520555669,0.020959421752351253 +flat_mae,reg,logistic,aabc_age,50,0.000774263682681127,test,0.4423076923076923,0.06801644423512988,0.4447396536212326,0.07036323041764078,0.44047619047619047,0.06838090940146124 +flat_mae,reg,logistic,aabc_age,51,0.005994842503189409,train,0.6830708661417323,0.020515915378202534,0.6794807503451645,0.020919526549130416,0.6838675154251432,0.020477912273453316 +flat_mae,reg,logistic,aabc_age,51,0.005994842503189409,test,0.36538461538461536,0.06283485445921906,0.3591269841269841,0.0628621703738408,0.3660714285714286,0.06304377114830217 +flat_mae,reg,logistic,aabc_age,52,0.3593813663804626,train,0.9921259842519685,0.0038574320161946425,0.9922142548773127,0.0038159419577367256,0.9923206329631042,0.0037884857582902586 +flat_mae,reg,logistic,aabc_age,52,0.3593813663804626,test,0.40384615384615385,0.06876337954801165,0.4131444657760447,0.07001378586947185,0.40064102564102566,0.06878385461318579 +flat_mae,reg,logistic,aabc_age,53,9.999999999999999e-05,train,0.49803149606299213,0.02040904934235277,0.4840811688311688,0.020656952846033222,0.49576539781238194,0.02034385744535375 +flat_mae,reg,logistic,aabc_age,53,9.999999999999999e-05,test,0.5192307692307693,0.05359385949106417,0.46190476190476193,0.04461178108629741,0.5096153846153846,0.05203345519747334 +flat_mae,reg,logistic,aabc_age,54,0.005994842503189409,train,0.6811023622047244,0.019810050299688426,0.6800964953429806,0.020051458565750902,0.6828539967279709,0.019775530632146425 +flat_mae,reg,logistic,aabc_age,54,0.005994842503189409,test,0.40384615384615385,0.0647747462478958,0.40123655913978495,0.06527856641917759,0.4033882783882784,0.06493950279832605 +flat_mae,reg,logistic,aabc_age,55,9.999999999999999e-05,train,0.5059055118110236,0.02050847638648974,0.4769871607813594,0.02086080257495741,0.5020246399521279,0.020354140810374264 +flat_mae,reg,logistic,aabc_age,55,9.999999999999999e-05,test,0.4230769230769231,0.06346656157276698,0.4157102977667494,0.06642868533296961,0.4210164835164835,0.06319457382366415 +flat_mae,reg,logistic,aabc_age,56,0.005994842503189409,train,0.6968503937007874,0.019395992444550123,0.6927977996360037,0.019979615987800705,0.6968925792450551,0.019385746261058035 +flat_mae,reg,logistic,aabc_age,56,0.005994842503189409,test,0.40384615384615385,0.06511595256238296,0.39428571428571424,0.06577679802447842,0.40613553113553114,0.06564383941161023 +flat_mae,reg,logistic,aabc_age,57,0.000774263682681127,train,0.562992125984252,0.02166850556794969,0.5559356532408082,0.022210991538891438,0.5620096031945438,0.021611183117562913 +flat_mae,reg,logistic,aabc_age,57,0.000774263682681127,test,0.4230769230769231,0.06184438382465208,0.39095291064145343,0.05832989693307423,0.4191849816849817,0.060942008915609344 +flat_mae,reg,logistic,aabc_age,58,0.046415888336127774,train,0.8681102362204725,0.014637121267197234,0.8683953419429642,0.014676253548308501,0.8680710323769776,0.014635197019922292 +flat_mae,reg,logistic,aabc_age,58,0.046415888336127774,test,0.38461538461538464,0.06359751167399759,0.3740495012234143,0.06136501402847278,0.37934981684981683,0.06303850407474332 +flat_mae,reg,logistic,aabc_age,59,0.005994842503189409,train,0.6948818897637795,0.020291708294445068,0.6909962249942072,0.02068545531112302,0.6934034617147657,0.020290638075566542 +flat_mae,reg,logistic,aabc_age,59,0.005994842503189409,test,0.36538461538461536,0.06814967863625515,0.3590993935821522,0.06866135574565348,0.36469780219780223,0.06818729765465267 +flat_mae,reg,logistic,aabc_age,60,0.000774263682681127,train,0.5669291338582677,0.0217767205164394,0.5605303519421134,0.022143796084141167,0.5671796873351759,0.02167399583116165 +flat_mae,reg,logistic,aabc_age,60,0.000774263682681127,test,0.36538461538461536,0.06230592352665095,0.3551724137931035,0.06170813402900485,0.36446886446886445,0.06214966474749692 +flat_mae,reg,logistic,aabc_age,61,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,61,21.54434690031882,test,0.36538461538461536,0.06293017639091533,0.3621636983705949,0.06325570233282847,0.3660714285714286,0.06314164988090143 +flat_mae,reg,logistic,aabc_age,62,0.005994842503189409,train,0.6811023622047244,0.020471606585057072,0.6777640829082372,0.021058809480890544,0.6812486694195696,0.02042251902309143 +flat_mae,reg,logistic,aabc_age,62,0.005994842503189409,test,0.4423076923076923,0.06541287850688282,0.4193376068376068,0.06233107036551939,0.43292124542124544,0.06431139103192927 +flat_mae,reg,logistic,aabc_age,63,0.005994842503189409,train,0.6909448818897638,0.019009708696822503,0.6870057827883277,0.019502256116850136,0.6903590697157017,0.019070815099172918 +flat_mae,reg,logistic,aabc_age,63,0.005994842503189409,test,0.40384615384615385,0.05974891992289621,0.3802966002454493,0.06271063745681489,0.4015567765567766,0.05964430620476494 +flat_mae,reg,logistic,aabc_age,64,0.005994842503189409,train,0.6811023622047244,0.01958159275640134,0.6788520515971348,0.020011922051157482,0.6810311015383907,0.019590908880614447 +flat_mae,reg,logistic,aabc_age,64,0.005994842503189409,test,0.4807692307692308,0.06592128172764379,0.48408488063660476,0.06557500417367033,0.48054029304029305,0.06620330250955045 +flat_mae,reg,logistic,aabc_age,65,0.046415888336127774,train,0.8681102362204725,0.014595532674731255,0.8681977569982837,0.014711317773263299,0.8687737226907357,0.014550708942336277 +flat_mae,reg,logistic,aabc_age,65,0.046415888336127774,test,0.4807692307692308,0.06844256435424241,0.4871861230556883,0.0676254777597571,0.480540293040293,0.06815431984510657 +flat_mae,reg,logistic,aabc_age,66,0.005994842503189409,train,0.6929133858267716,0.019764728072729745,0.6902443571209163,0.02003510349307112,0.6920900229962658,0.01966603813084886 +flat_mae,reg,logistic,aabc_age,66,0.005994842503189409,test,0.4230769230769231,0.06542843554231442,0.4172092275540551,0.06548927313373361,0.4210164835164835,0.06537943236728051 +flat_mae,reg,logistic,aabc_age,67,0.3593813663804626,train,0.9940944881889764,0.003391544339256966,0.9941028733468465,0.003388763798198532,0.9941191941141835,0.0033893364069867054 +flat_mae,reg,logistic,aabc_age,67,0.3593813663804626,test,0.4423076923076923,0.060323033272501034,0.43418409547441805,0.05619428332410025,0.4489468864468864,0.06142920259961692 +flat_mae,reg,logistic,aabc_age,68,0.046415888336127774,train,0.8543307086614174,0.015426372481757858,0.8541742231159389,0.015640007566572682,0.8542433049028653,0.015429632861708953 +flat_mae,reg,logistic,aabc_age,68,0.046415888336127774,test,0.5,0.06722578532838237,0.5012479871175524,0.06707425893230956,0.5041208791208791,0.0677534322615662 +flat_mae,reg,logistic,aabc_age,69,9.999999999999999e-05,train,0.4940944881889764,0.02007536292425103,0.4710728019703346,0.020098570415022266,0.49094809729395905,0.019952140790379845 +flat_mae,reg,logistic,aabc_age,69,9.999999999999999e-05,test,0.4230769230769231,0.06678837017424923,0.4227880184331797,0.06693543250161488,0.4210164835164835,0.06660401216438756 +flat_mae,reg,logistic,aabc_age,70,0.005994842503189409,train,0.6673228346456693,0.020533032684210643,0.6645489679104171,0.0209402311482192,0.6681236322592152,0.020459216399886633 +flat_mae,reg,logistic,aabc_age,70,0.005994842503189409,test,0.4230769230769231,0.06526101439603893,0.42115384615384616,0.06584533473743305,0.42719780219780223,0.06579111184695192 +flat_mae,reg,logistic,aabc_age,71,0.000774263682681127,train,0.5551181102362205,0.020519885590114997,0.547173921858838,0.020982054131709078,0.5540126949360267,0.020415996290190593 +flat_mae,reg,logistic,aabc_age,71,0.000774263682681127,test,0.46153846153846156,0.06047274313872237,0.4448093413610655,0.05954840442563885,0.4578754578754579,0.06004892370302983 +flat_mae,reg,logistic,aabc_age,72,0.005994842503189409,train,0.6850393700787402,0.020411320026984297,0.6816159633414647,0.021070230429501496,0.6843606692891488,0.020428484701714888 +flat_mae,reg,logistic,aabc_age,72,0.005994842503189409,test,0.4807692307692308,0.05634439482683585,0.4411112720950449,0.05477922941648492,0.475503663003663,0.05556740275800569 +flat_mae,reg,logistic,aabc_age,73,0.000774263682681127,train,0.5551181102362205,0.022214949898840597,0.5488320858488135,0.02247266350381283,0.5545977907090484,0.022145285310827195 +flat_mae,reg,logistic,aabc_age,73,0.000774263682681127,test,0.5384615384615384,0.061686408695526114,0.5383333333333333,0.06031806730424589,0.5352564102564102,0.061621475533268286 +flat_mae,reg,logistic,aabc_age,74,9.999999999999999e-05,train,0.484251968503937,0.02104486636386177,0.461705320684044,0.021344310752042413,0.481202614554584,0.02091875676602893 +flat_mae,reg,logistic,aabc_age,74,9.999999999999999e-05,test,0.5,0.06578981908971925,0.47238095238095235,0.06947095039422635,0.49221611721611724,0.06513195489117536 +flat_mae,reg,logistic,aabc_age,75,0.000774263682681127,train,0.5590551181102362,0.020929010918985056,0.5543838497520924,0.0213309870765508,0.5588976033249646,0.02095628592131229 +flat_mae,reg,logistic,aabc_age,75,0.000774263682681127,test,0.46153846153846156,0.06151085919426397,0.4301587301587302,0.05886509405013649,0.45352564102564097,0.060590708112524524 +flat_mae,reg,logistic,aabc_age,76,0.000774263682681127,train,0.5610236220472441,0.02084835782041471,0.5523554990831233,0.021479188763079228,0.5604962177951587,0.020844803602240565 +flat_mae,reg,logistic,aabc_age,76,0.000774263682681127,test,0.38461538461538464,0.06276747499429751,0.3774801587301587,0.0625171226009185,0.37957875457875456,0.062385821661453435 +flat_mae,reg,logistic,aabc_age,77,0.005994842503189409,train,0.687007874015748,0.020178884121148235,0.6845081404759416,0.020517016997484254,0.6864767716618494,0.02016028626898012 +flat_mae,reg,logistic,aabc_age,77,0.005994842503189409,test,0.4423076923076923,0.07069447347481737,0.43916449894710763,0.07067938661862344,0.44184981684981683,0.07079127142743492 +flat_mae,reg,logistic,aabc_age,78,0.005994842503189409,train,0.6968503937007874,0.020245630337707324,0.6946221332222837,0.020755767238127235,0.6966573901635823,0.02035991676857503 +flat_mae,reg,logistic,aabc_age,78,0.005994842503189409,test,0.4807692307692308,0.06654014004107889,0.47919467787114844,0.0678641185958291,0.48466117216117216,0.06704438462851176 +flat_mae,reg,logistic,aabc_age,79,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,79,10000.0,test,0.38461538461538464,0.07023075869913141,0.38446969696969696,0.069882987401856,0.38255494505494503,0.07015011344882505 +flat_mae,reg,logistic,aabc_age,80,0.3593813663804626,train,0.9901574803149606,0.004394208759053348,0.9902702467446014,0.0043459306592370725,0.9905220718120251,0.004199796805642993 +flat_mae,reg,logistic,aabc_age,80,0.3593813663804626,test,0.4230769230769231,0.05880888714387131,0.4124137931034483,0.0578605988823677,0.4251373626373627,0.05947586095518086 +flat_mae,reg,logistic,aabc_age,81,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,81,2.782559402207126,test,0.36538461538461536,0.06287504043783329,0.36620879120879124,0.06245891614855424,0.3676739926739927,0.06334429448170893 +flat_mae,reg,logistic,aabc_age,82,0.000774263682681127,train,0.547244094488189,0.02101222227721214,0.5398042674834378,0.021571564746041917,0.5465508957803099,0.021072633143129658 +flat_mae,reg,logistic,aabc_age,82,0.000774263682681127,test,0.5384615384615384,0.06095175849723554,0.5078976034858388,0.06479013218466448,0.532051282051282,0.060544617753621674 +flat_mae,reg,logistic,aabc_age,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_age,83,166.81005372000556,test,0.38461538461538464,0.0628946239976888,0.3819143819143819,0.06263887207843015,0.3869047619047619,0.06337128318057263 +flat_mae,reg,logistic,aabc_age,84,0.046415888336127774,train,0.8582677165354331,0.01511108555629983,0.8591049054019192,0.015105439454502184,0.8590282399513607,0.015108185949355589 +flat_mae,reg,logistic,aabc_age,84,0.046415888336127774,test,0.5,0.0655075202331808,0.49338161838161837,0.0671966398746151,0.5057234432234432,0.06566979077280131 +flat_mae,reg,logistic,aabc_age,85,0.046415888336127774,train,0.860236220472441,0.015196186901594511,0.8606497146107287,0.015273317861490098,0.8614118968754616,0.015113407365331524 +flat_mae,reg,logistic,aabc_age,85,0.046415888336127774,test,0.4423076923076923,0.061465148637593156,0.4178571428571428,0.06352615483160763,0.43452380952380953,0.0610157098305593 +flat_mae,reg,logistic,aabc_age,86,0.046415888336127774,train,0.8720472440944882,0.013711604811845057,0.8721546310832025,0.013830240250565175,0.8735086710690099,0.013654660812306355 +flat_mae,reg,logistic,aabc_age,86,0.046415888336127774,test,0.4807692307692308,0.06243494839483003,0.4696632996632996,0.06279036473736897,0.47733516483516486,0.062167649338168804 +flat_mae,reg,logistic,aabc_age,87,0.000774263682681127,train,0.5551181102362205,0.02103773561181524,0.5471580855895973,0.021921691298997813,0.5535775591736688,0.021036077603084354 +flat_mae,reg,logistic,aabc_age,87,0.000774263682681127,test,0.4423076923076923,0.06773014676689464,0.4363663711489798,0.06710250138546157,0.4432234432234432,0.06822597890136231 +flat_mae,reg,logistic,aabc_age,88,9.999999999999999e-05,train,0.49015748031496065,0.019046442923390597,0.46580343339896113,0.01933873452640442,0.4875185562027584,0.01886879901847313 +flat_mae,reg,logistic,aabc_age,88,9.999999999999999e-05,test,0.4423076923076923,0.06499722342726987,0.4447829131652661,0.06647248899619328,0.43887362637362637,0.06490268367920983 +flat_mae,reg,logistic,aabc_age,89,0.005994842503189409,train,0.6692913385826772,0.019705036693113315,0.6653369685568108,0.02004284728882739,0.6687343806639572,0.01967564070436884 +flat_mae,reg,logistic,aabc_age,89,0.005994842503189409,test,0.40384615384615385,0.06288902595682984,0.39068035426731074,0.06316379308822381,0.40453296703296704,0.06300585230744246 +flat_mae,reg,logistic,aabc_age,90,0.046415888336127774,train,0.8562992125984252,0.015701259282588968,0.856464639522116,0.015782245048364795,0.8566269618269661,0.015722446909875436 +flat_mae,reg,logistic,aabc_age,90,0.046415888336127774,test,0.5,0.0663784648189153,0.5105933721450963,0.06336991634014841,0.5070970695970696,0.06702949681482474 +flat_mae,reg,logistic,aabc_age,91,9.999999999999999e-05,train,0.4822834645669291,0.021278126797661487,0.45708960736515636,0.02172262193410328,0.47913649885210463,0.02117187089575059 +flat_mae,reg,logistic,aabc_age,91,9.999999999999999e-05,test,0.5,0.06305931977630973,0.48052362707535123,0.0677152556307167,0.49793956043956045,0.06277796609434343 +flat_mae,reg,logistic,aabc_age,92,0.046415888336127774,train,0.8562992125984252,0.015367962351871307,0.8562961452498332,0.015474981690143406,0.8570121109191027,0.01535935368804946 +flat_mae,reg,logistic,aabc_age,92,0.046415888336127774,test,0.5192307692307693,0.06741256664756085,0.5164835164835165,0.06691936741700327,0.5233516483516483,0.06782695451655976 +flat_mae,reg,logistic,aabc_age,93,0.005994842503189409,train,0.6988188976377953,0.01989738812789529,0.6972770063552991,0.020128192286211488,0.6989910604174617,0.019876041829807366 +flat_mae,reg,logistic,aabc_age,93,0.005994842503189409,test,0.40384615384615385,0.060776533154240725,0.3847052280311457,0.05667312297319862,0.39720695970695974,0.05976015082443982 +flat_mae,reg,logistic,aabc_age,94,0.3593813663804626,train,0.9940944881889764,0.0033042732690326377,0.9942222870058268,0.003234558807580974,0.9943367619953625,0.0031747007123586893 +flat_mae,reg,logistic,aabc_age,94,0.3593813663804626,test,0.4230769230769231,0.06805057073614293,0.42133152173913047,0.06892136736373881,0.4198717948717948,0.06795356906321504 +flat_mae,reg,logistic,aabc_age,95,0.046415888336127774,train,0.8641732283464567,0.015454229830905166,0.8639988171210294,0.0155564134027984,0.8649590325073985,0.015433150691557653 +flat_mae,reg,logistic,aabc_age,95,0.046415888336127774,test,0.46153846153846156,0.06736234045253033,0.4503968253968254,0.0678090643815176,0.4581043956043956,0.06706549437469816 +flat_mae,reg,logistic,aabc_age,96,0.046415888336127774,train,0.8740157480314961,0.01464894976383829,0.8745297536773079,0.014687988943353741,0.8753572188903103,0.014545116850593582 +flat_mae,reg,logistic,aabc_age,96,0.046415888336127774,test,0.38461538461538464,0.058550034234624834,0.37938948306595366,0.058279640383632536,0.37797619047619047,0.057820193771665446 +flat_mae,reg,logistic,aabc_age,97,0.000774263682681127,train,0.5570866141732284,0.021176762061526366,0.5535118688886276,0.02165116276645247,0.5570314343033704,0.021228702260628434 +flat_mae,reg,logistic,aabc_age,97,0.000774263682681127,test,0.36538461538461536,0.0662119601630933,0.36547619047619045,0.06679293948458302,0.36034798534798534,0.06569022478264115 +flat_mae,reg,logistic,aabc_age,98,0.000774263682681127,train,0.562992125984252,0.021224826806985862,0.5563284202569917,0.021932370762259318,0.5622271710757227,0.021291384427535056 +flat_mae,reg,logistic,aabc_age,98,0.000774263682681127,test,0.4807692307692308,0.058324979677989344,0.436902966314731,0.05927452749465568,0.4741300366300366,0.05716100693112418 +flat_mae,reg,logistic,aabc_age,99,0.046415888336127774,train,0.8484251968503937,0.014792138642357695,0.8478523447503188,0.015050364667285635,0.8491827838715431,0.014752955231192742 +flat_mae,reg,logistic,aabc_age,99,0.046415888336127774,test,0.4807692307692308,0.07001335460099885,0.4826427598166728,0.07052680425092261,0.48511904761904756,0.07006149305029821 +flat_mae,reg,logistic,aabc_age,100,0.000774263682681127,train,0.5551181102362205,0.02082080053723847,0.5453091728476915,0.02148665096253465,0.5545654252391209,0.020736922119753497 +flat_mae,reg,logistic,aabc_age,100,0.000774263682681127,test,0.46153846153846156,0.06498479567282683,0.44142866250159374,0.06689547898580975,0.4608516483516484,0.06488574198052018 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fea68e47cc83edef721aa5b24d7a8688c478a6c5 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:30:43 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_age reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_age +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_age__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:20:04 time: 5.2848 data: 4.2886 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:35 time: 0.2191 data: 0.0738 max mem: 3581 +extract (train) [ 40/228] eta: 0:01:04 time: 0.2173 data: 0.0714 max mem: 3581 +extract (train) [ 60/228] eta: 0:00:50 time: 0.2095 data: 0.0711 max mem: 3581 +extract (train) [ 80/228] eta: 0:00:41 time: 0.2154 data: 0.0720 max mem: 3581 +extract (train) [100/228] eta: 0:00:33 time: 0.2039 data: 0.0650 max mem: 3581 +extract (train) [120/228] eta: 0:00:27 time: 0.2144 data: 0.0710 max mem: 3581 +extract (train) [140/228] eta: 0:00:21 time: 0.2082 data: 0.0678 max mem: 3581 +extract (train) [160/228] eta: 0:00:16 time: 0.2015 data: 0.0649 max mem: 3581 +extract (train) [180/228] eta: 0:00:11 time: 0.2171 data: 0.0725 max mem: 3581 +extract (train) [200/228] eta: 0:00:06 time: 0.1957 data: 0.0619 max mem: 3581 +extract (train) [220/228] eta: 0:00:01 time: 0.1806 data: 0.0553 max mem: 3581 +extract (train) [227/228] eta: 0:00:00 time: 0.1742 data: 0.0529 max mem: 3581 +extract (train) Total time: 0:00:52 (0.2307 s / it) +extract (validation) [ 0/27] eta: 0:01:57 time: 4.3588 data: 4.2195 max mem: 3581 +extract (validation) [20/27] eta: 0:00:02 time: 0.1760 data: 0.0525 max mem: 3581 +extract (validation) [26/27] eta: 0:00:00 time: 0.1730 data: 0.0533 max mem: 3581 +extract (validation) Total time: 0:00:09 (0.3411 s / it) +extract (test) [ 0/26] eta: 0:01:53 time: 4.3743 data: 4.2348 max mem: 3581 +extract (test) [20/26] eta: 0:00:02 time: 0.1888 data: 0.0591 max mem: 3581 +extract (test) [25/26] eta: 0:00:00 time: 0.1737 data: 0.0516 max mem: 3581 +extract (test) Total time: 0:00:09 (0.3593 s / it) +feature extraction time: 0:01:11 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_age | | 0.0059948 | train | 0.68504 | 0.02057 | 0.68195 | 0.021021 | 0.68459 | 0.020694 | +| flat_mae | reg | logistic | aabc_age | | 0.0059948 | test | 0.40385 | 0.064787 | 0.385 | 0.065845 | 0.39492 | 0.064429 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06142417271793606, "f1": 0.4847355457609541, "f1_std": 0.06449093341030591, "bacc": 0.49793956043956045, "bacc_std": 0.061310621818550945} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06573007156311567, "f1": 0.5271164021164021, "f1_std": 0.06797923239740378, "bacc": 0.538003663003663, "bacc_std": 0.06599912464853959} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.05684343440981516, "f1": 0.452855644546148, "f1_std": 0.055771287955331005, "bacc": 0.4961080586080586, "bacc_std": 0.05592524823351923} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.059113470265785485, "f1": 0.4857065217391304, "f1_std": 0.06145568835069574, "bacc": 0.5119047619047619, "bacc_std": 0.05913300729279735} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 9.999999999999999e-05, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.05940756581227383, "f1": 0.32481527093596063, "f1_std": 0.050379433587192844, "bacc": 0.3598901098901099, "bacc_std": 0.05800778806618436} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06421100571115414, "f1": 0.5482387528320207, "f1_std": 0.0667387632484132, "bacc": 0.5590659340659341, "bacc_std": 0.06431179467385122} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06421989774417419, "f1": 0.48419540229885055, "f1_std": 0.06641901330757874, "bacc": 0.4965659340659341, "bacc_std": 0.06404645293413758} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06744108759397337, "f1": 0.4377777777777777, "f1_std": 0.06717082996375917, "bacc": 0.45100732600732596, "bacc_std": 0.06805279605738754} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06169096488083967, "f1": 0.47915047915047915, "f1_std": 0.06578694478562355, "bacc": 0.49793956043956045, "bacc_std": 0.06145461815994957} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06908099182809019, "f1": 0.4065336837075968, "f1_std": 0.07025491008050007, "bacc": 0.40041208791208793, "bacc_std": 0.06887769273069774} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06861714166429199, "f1": 0.5740900856389987, "f1_std": 0.0707835856620737, "bacc": 0.5741758241758241, "bacc_std": 0.06902073516431761} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06379914859108031, "f1": 0.37445887445887444, "f1_std": 0.0632020766021588, "bacc": 0.37957875457875456, "bacc_std": 0.06331598528146307} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06593259064283565, "f1": 0.5327898550724638, "f1_std": 0.06562435427768222, "bacc": 0.5338827838827839, "bacc_std": 0.06575966386802258} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06189190567566715, "f1": 0.45470457079152726, "f1_std": 0.06277008412907494, "bacc": 0.46543040293040294, "bacc_std": 0.06236891656132928} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0669922719202851, "f1": 0.4451058201058201, "f1_std": 0.06778545191198934, "bacc": 0.44505494505494503, "bacc_std": 0.0669779999591169} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06847165007435796, "f1": 0.5012820512820513, "f1_std": 0.0688541383306548, "bacc": 0.5, "bacc_std": 0.06846726788761762} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06626844947974643, "f1": 0.48912101041133293, "f1_std": 0.0690547922501384, "bacc": 0.49954212454212454, "bacc_std": 0.06623763664573815} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.061061747291152095, "f1": 0.5139382676147382, "f1_std": 0.06309303341300278, "bacc": 0.5306776556776557, "bacc_std": 0.060440095762975043} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.07018861950268225, "f1": 0.44775580959520245, "f1_std": 0.07090464440140459, "bacc": 0.4407051282051282, "bacc_std": 0.07036345332175588} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06119252526190593, "f1": 0.40805860805860805, "f1_std": 0.059872092005645576, "bacc": 0.4194139194139194, "bacc_std": 0.06081871547284131} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06288360384978023, "f1": 0.4044968189705032, "f1_std": 0.06604256846840295, "bacc": 0.4237637362637363, "bacc_std": 0.06296442546513618} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06307620778206059, "f1": 0.3545454545454545, "f1_std": 0.06131739173491658, "bacc": 0.34867216117216115, "bacc_std": 0.06365897667345995} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06355376748035277, "f1": 0.4687645687645688, "f1_std": 0.06323455105805896, "bacc": 0.4640567765567766, "bacc_std": 0.06397645660730855} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06560032291737523, "f1": 0.44897342995169076, "f1_std": 0.06536589933874433, "bacc": 0.44345238095238093, "bacc_std": 0.06578232501622489} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.25, "acc_std": 0.04917092215729552, "f1": 0.2216019847598795, "f1_std": 0.04390156410175785, "bacc": 0.2516025641025641, "bacc_std": 0.049688884553710594} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06383915624303384, "f1": 0.3683835369896751, "f1_std": 0.06755205328042414, "bacc": 0.3839285714285714, "bacc_std": 0.0637914267039946} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0641007320251013, "f1": 0.3832175925925926, "f1_std": 0.06491093164509434, "bacc": 0.4001831501831502, "bacc_std": 0.06359963841586123} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.061048821281673056, "f1": 0.3945663531870428, "f1_std": 0.05199550131470788, "bacc": 0.4519230769230769, "bacc_std": 0.05940785293188987} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0639157521490678, "f1": 0.40235127191648934, "f1_std": 0.06218634148848337, "bacc": 0.38553113553113555, "bacc_std": 0.06427175082897377} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 166.81005372000556, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.06784374450632168, "f1": 0.32926245210727967, "f1_std": 0.07013052516871161, "bacc": 0.33241758241758246, "bacc_std": 0.06842712410303668} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06186911169758719, "f1": 0.49215686274509807, "f1_std": 0.06203652020557381, "bacc": 0.5128205128205128, "bacc_std": 0.061156859108231244} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06219719546941288, "f1": 0.5129310344827587, "f1_std": 0.06400205017073944, "bacc": 0.5203754578754579, "bacc_std": 0.06254609648693672} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06700918421552951, "f1": 0.4648432601880878, "f1_std": 0.06725136130837574, "bacc": 0.4658882783882784, "bacc_std": 0.06721777341358681} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06797580495420115, "f1": 0.43106008323399625, "f1_std": 0.06923165820526607, "bacc": 0.42261904761904756, "bacc_std": 0.06817241793934554} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.061773242038545, "f1": 0.30890888499584157, "f1_std": 0.06125974120921621, "bacc": 0.30975274725274726, "bacc_std": 0.06242253019692582} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.06577804709239961, "f1": 0.49160996424864495, "f1_std": 0.06574633040882259, "bacc": 0.5054945054945055, "bacc_std": 0.06638266144567449} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06865784302091077, "f1": 0.46339597902097907, "f1_std": 0.06973150996538761, "bacc": 0.4658882783882784, "bacc_std": 0.06887129522239946} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06792872831267721, "f1": 0.5469530469530469, "f1_std": 0.06900500443767416, "bacc": 0.5544871794871795, "bacc_std": 0.06773842679928224} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06937245117659113, "f1": 0.44043637521898393, "f1_std": 0.0697466543310598, "bacc": 0.4432234432234432, "bacc_std": 0.06975672412616275} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06470775220378368, "f1": 0.5159774436090225, "f1_std": 0.06684436641690726, "bacc": 0.5187728937728937, "bacc_std": 0.06478298240971794} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06207525495804676, "f1": 0.3809790209790209, "f1_std": 0.06225583294262831, "bacc": 0.40293040293040294, "bacc_std": 0.06207664567488739} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.057724029740372905, "f1": 0.45483954451345754, "f1_std": 0.06524871152378642, "bacc": 0.48305860805860806, "bacc_std": 0.058170527403685245} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0655451761637971, "f1": 0.4739057239057239, "f1_std": 0.06590849766958944, "bacc": 0.47458791208791207, "bacc_std": 0.06540824472701073} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06821670785642789, "f1": 0.47963875205254514, "f1_std": 0.06923374625600366, "bacc": 0.4789377289377289, "bacc_std": 0.06822053982562544} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06514839682960323, "f1": 0.44568185697217955, "f1_std": 0.06706248079388707, "bacc": 0.4608516483516484, "bacc_std": 0.06494580527508842} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.061208961617140026, "f1": 0.5619539279161081, "f1_std": 0.06662156156671377, "bacc": 0.575091575091575, "bacc_std": 0.06107875829953317} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06196887938178582, "f1": 0.4257440476190476, "f1_std": 0.06322630939411145, "bacc": 0.43864468864468864, "bacc_std": 0.061456413876477066} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06794646343623136, "f1": 0.413711317418214, "f1_std": 0.0662866328170163, "bacc": 0.40773809523809523, "bacc_std": 0.06847619168860274} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.28846153846153844, "acc_std": 0.06202810014098926, "f1": 0.2859663865546218, "f1_std": 0.059416904500076424, "bacc": 0.28456959706959706, "bacc_std": 0.06135681805459609} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06801644423512988, "f1": 0.4447396536212326, "f1_std": 0.07036323041764078, "bacc": 0.44047619047619047, "bacc_std": 0.06838090940146124} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06283485445921906, "f1": 0.3591269841269841, "f1_std": 0.0628621703738408, "bacc": 0.3660714285714286, "bacc_std": 0.06304377114830217} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06876337954801165, "f1": 0.4131444657760447, "f1_std": 0.07001378586947185, "bacc": 0.40064102564102566, "bacc_std": 0.06878385461318579} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.05359385949106417, "f1": 0.46190476190476193, "f1_std": 0.04461178108629741, "bacc": 0.5096153846153846, "bacc_std": 0.05203345519747334} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0647747462478958, "f1": 0.40123655913978495, "f1_std": 0.06527856641917759, "bacc": 0.4033882783882784, "bacc_std": 0.06493950279832605} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06346656157276698, "f1": 0.4157102977667494, "f1_std": 0.06642868533296961, "bacc": 0.4210164835164835, "bacc_std": 0.06319457382366415} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06511595256238296, "f1": 0.39428571428571424, "f1_std": 0.06577679802447842, "bacc": 0.40613553113553114, "bacc_std": 0.06564383941161023} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06184438382465208, "f1": 0.39095291064145343, "f1_std": 0.05832989693307423, "bacc": 0.4191849816849817, "bacc_std": 0.060942008915609344} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06359751167399759, "f1": 0.3740495012234143, "f1_std": 0.06136501402847278, "bacc": 0.37934981684981683, "bacc_std": 0.06303850407474332} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06814967863625515, "f1": 0.3590993935821522, "f1_std": 0.06866135574565348, "bacc": 0.36469780219780223, "bacc_std": 0.06818729765465267} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06230592352665095, "f1": 0.3551724137931035, "f1_std": 0.06170813402900485, "bacc": 0.36446886446886445, "bacc_std": 0.06214966474749692} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 21.54434690031882, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06293017639091533, "f1": 0.3621636983705949, "f1_std": 0.06325570233282847, "bacc": 0.3660714285714286, "bacc_std": 0.06314164988090143} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06541287850688282, "f1": 0.4193376068376068, "f1_std": 0.06233107036551939, "bacc": 0.43292124542124544, "bacc_std": 0.06431139103192927} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05974891992289621, "f1": 0.3802966002454493, "f1_std": 0.06271063745681489, "bacc": 0.4015567765567766, "bacc_std": 0.05964430620476494} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06592128172764379, "f1": 0.48408488063660476, "f1_std": 0.06557500417367033, "bacc": 0.48054029304029305, "bacc_std": 0.06620330250955045} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06844256435424241, "f1": 0.4871861230556883, "f1_std": 0.0676254777597571, "bacc": 0.480540293040293, "bacc_std": 0.06815431984510657} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06542843554231442, "f1": 0.4172092275540551, "f1_std": 0.06548927313373361, "bacc": 0.4210164835164835, "bacc_std": 0.06537943236728051} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.060323033272501034, "f1": 0.43418409547441805, "f1_std": 0.05619428332410025, "bacc": 0.4489468864468864, "bacc_std": 0.06142920259961692} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06722578532838237, "f1": 0.5012479871175524, "f1_std": 0.06707425893230956, "bacc": 0.5041208791208791, "bacc_std": 0.0677534322615662} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06678837017424923, "f1": 0.4227880184331797, "f1_std": 0.06693543250161488, "bacc": 0.4210164835164835, "bacc_std": 0.06660401216438756} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06526101439603893, "f1": 0.42115384615384616, "f1_std": 0.06584533473743305, "bacc": 0.42719780219780223, "bacc_std": 0.06579111184695192} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06047274313872237, "f1": 0.4448093413610655, "f1_std": 0.05954840442563885, "bacc": 0.4578754578754579, "bacc_std": 0.06004892370302983} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05634439482683585, "f1": 0.4411112720950449, "f1_std": 0.05477922941648492, "bacc": 0.475503663003663, "bacc_std": 0.05556740275800569} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.061686408695526114, "f1": 0.5383333333333333, "f1_std": 0.06031806730424589, "bacc": 0.5352564102564102, "bacc_std": 0.061621475533268286} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.06578981908971925, "f1": 0.47238095238095235, "f1_std": 0.06947095039422635, "bacc": 0.49221611721611724, "bacc_std": 0.06513195489117536} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06151085919426397, "f1": 0.4301587301587302, "f1_std": 0.05886509405013649, "bacc": 0.45352564102564097, "bacc_std": 0.060590708112524524} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06276747499429751, "f1": 0.3774801587301587, "f1_std": 0.0625171226009185, "bacc": 0.37957875457875456, "bacc_std": 0.062385821661453435} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.07069447347481737, "f1": 0.43916449894710763, "f1_std": 0.07067938661862344, "bacc": 0.44184981684981683, "bacc_std": 0.07079127142743492} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06654014004107889, "f1": 0.47919467787114844, "f1_std": 0.0678641185958291, "bacc": 0.48466117216117216, "bacc_std": 0.06704438462851176} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 10000.0, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.07023075869913141, "f1": 0.38446969696969696, "f1_std": 0.069882987401856, "bacc": 0.38255494505494503, "bacc_std": 0.07015011344882505} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05880888714387131, "f1": 0.4124137931034483, "f1_std": 0.0578605988823677, "bacc": 0.4251373626373627, "bacc_std": 0.05947586095518086} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06287504043783329, "f1": 0.36620879120879124, "f1_std": 0.06245891614855424, "bacc": 0.3676739926739927, "bacc_std": 0.06334429448170893} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06095175849723554, "f1": 0.5078976034858388, "f1_std": 0.06479013218466448, "bacc": 0.532051282051282, "bacc_std": 0.060544617753621674} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0628946239976888, "f1": 0.3819143819143819, "f1_std": 0.06263887207843015, "bacc": 0.3869047619047619, "bacc_std": 0.06337128318057263} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0655075202331808, "f1": 0.49338161838161837, "f1_std": 0.0671966398746151, "bacc": 0.5057234432234432, "bacc_std": 0.06566979077280131} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.061465148637593156, "f1": 0.4178571428571428, "f1_std": 0.06352615483160763, "bacc": 0.43452380952380953, "bacc_std": 0.0610157098305593} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06243494839483003, "f1": 0.4696632996632996, "f1_std": 0.06279036473736897, "bacc": 0.47733516483516486, "bacc_std": 0.062167649338168804} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06773014676689464, "f1": 0.4363663711489798, "f1_std": 0.06710250138546157, "bacc": 0.4432234432234432, "bacc_std": 0.06822597890136231} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06499722342726987, "f1": 0.4447829131652661, "f1_std": 0.06647248899619328, "bacc": 0.43887362637362637, "bacc_std": 0.06490268367920983} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06288902595682984, "f1": 0.39068035426731074, "f1_std": 0.06316379308822381, "bacc": 0.40453296703296704, "bacc_std": 0.06300585230744246} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0663784648189153, "f1": 0.5105933721450963, "f1_std": 0.06336991634014841, "bacc": 0.5070970695970696, "bacc_std": 0.06702949681482474} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.06305931977630973, "f1": 0.48052362707535123, "f1_std": 0.0677152556307167, "bacc": 0.49793956043956045, "bacc_std": 0.06277796609434343} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06741256664756085, "f1": 0.5164835164835165, "f1_std": 0.06691936741700327, "bacc": 0.5233516483516483, "bacc_std": 0.06782695451655976} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.060776533154240725, "f1": 0.3847052280311457, "f1_std": 0.05667312297319862, "bacc": 0.39720695970695974, "bacc_std": 0.05976015082443982} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06805057073614293, "f1": 0.42133152173913047, "f1_std": 0.06892136736373881, "bacc": 0.4198717948717948, "bacc_std": 0.06795356906321504} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06736234045253033, "f1": 0.4503968253968254, "f1_std": 0.0678090643815176, "bacc": 0.4581043956043956, "bacc_std": 0.06706549437469816} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.058550034234624834, "f1": 0.37938948306595366, "f1_std": 0.058279640383632536, "bacc": 0.37797619047619047, "bacc_std": 0.057820193771665446} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0662119601630933, "f1": 0.36547619047619045, "f1_std": 0.06679293948458302, "bacc": 0.36034798534798534, "bacc_std": 0.06569022478264115} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.058324979677989344, "f1": 0.436902966314731, "f1_std": 0.05927452749465568, "bacc": 0.4741300366300366, "bacc_std": 0.05716100693112418} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.07001335460099885, "f1": 0.4826427598166728, "f1_std": 0.07052680425092261, "bacc": 0.48511904761904756, "bacc_std": 0.07006149305029821} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06498479567282683, "f1": 0.44142866250159374, "f1_std": 0.06689547898580975, "bacc": 0.4608516483516484, "bacc_std": 0.06488574198052018} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_age | train | 100 | 103.93 | 999.88 | 0.72776 | 0.17333 | 0.72271 | 0.17868 | 0.72738 | 0.17407 | +| flat_mae | reg | logistic | aabc_age | test | 100 | 103.93 | 999.88 | 0.44654 | 0.062484 | 0.43637 | 0.061663 | 0.44549 | 0.062359 | + + +done! total time: 0:05:29 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4a630c739be093047ed93507244f33586b88b05 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..94b04e6805cffb8050ac6d2abf0d14d0969854bc --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,train,0.8903591682419659,0.01317854237586415,0.8867053645279452,0.01372115215163955,0.8835772833723654,0.013982740893911163 +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,test,0.9090909090909091,0.04117282261776632,0.9071259709557582,0.04153779797849791,0.9166666666666667,0.038766835556008836 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,train,0.996219281663516,0.0027242587669117978,0.9961285128805621,0.002784505657655915,0.9967320261437909,0.0023547923001574146 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,test,0.8363636363636363,0.04912697664940049,0.8343927735028438,0.04930838944801519,0.8410326086956521,0.048324626073035 +flat_mae,patch,logistic,aabc_sex,2,0.005994842503189409,train,0.8979206049149339,0.013746576972907142,0.8945187876639489,0.014316300379346133,0.8916952431196694,0.01463327879322643 +flat_mae,patch,logistic,aabc_sex,2,0.005994842503189409,test,0.9090909090909091,0.03937360780695029,0.9071259709557582,0.040046181101594816,0.9096467391304348,0.03939160416233892 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9395085066162571,0.010375086771695265,0.9379053320421398,0.010664301653059025,0.9373736041501802,0.010850812913942774 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7818181818181819,0.05605771524078133,0.7727272727272727,0.059254543600323666,0.7697010869565217,0.05908271661619735 +flat_mae,patch,logistic,aabc_sex,4,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,4,2.782559402207126,test,0.8545454545454545,0.04765601260271447,0.8505434782608696,0.0491858824146365,0.8505434782608696,0.04930694346253553 +flat_mae,patch,logistic,aabc_sex,5,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,5,21.54434690031882,test,0.8363636363636363,0.047605472287705904,0.8307692307692308,0.04984310655364956,0.8288043478260869,0.05021844553229477 +flat_mae,patch,logistic,aabc_sex,6,0.3593813663804626,train,0.994328922495274,0.0031184188995689406,0.9941893034853195,0.0031926430945579034,0.9944898736774231,0.0030704181102385293 +flat_mae,patch,logistic,aabc_sex,6,0.3593813663804626,test,0.9272727272727272,0.0342988133182669,0.9252717391304348,0.03540397038453564,0.9252717391304348,0.035978014861688494 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,train,0.9395085066162571,0.010278673227438192,0.9375792796247677,0.010684881925201216,0.9349409419971277,0.011158916909545574 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,test,0.9090909090909091,0.037733804472912615,0.9071259709557582,0.03853127153593407,0.9096467391304348,0.03815539932405344 +flat_mae,patch,logistic,aabc_sex,8,0.3593813663804626,train,0.994328922495274,0.0032379794389759513,0.9941893034853195,0.0033159985389485157,0.9944898736774231,0.003224703012324804 +flat_mae,patch,logistic,aabc_sex,8,0.3593813663804626,test,0.8545454545454545,0.045615076527520684,0.8521505376344086,0.046133212139747255,0.8566576086956521,0.04576822489103413 +flat_mae,patch,logistic,aabc_sex,9,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,9,21.54434690031882,test,0.8909090909090909,0.04262679093932739,0.8891129032258065,0.04301775641175362,0.8940217391304348,0.04201854118884831 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,train,0.9338374291115312,0.010506922356129626,0.9320413294426397,0.010826936381487854,0.9312553122893402,0.011088671626981149 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,test,0.8909090909090909,0.04280225528930969,0.8891129032258065,0.04322234726985258,0.8940217391304348,0.04203106979181472 +flat_mae,patch,logistic,aabc_sex,11,0.3593813663804626,train,0.994328922495274,0.003358171852571198,0.9941893034853195,0.0034392650272465847,0.9944898736774231,0.003328215168721222 +flat_mae,patch,logistic,aabc_sex,11,0.3593813663804626,test,0.8909090909090909,0.042335144535124915,0.8863636363636364,0.044806265254742794,0.8817934782608696,0.045588573770937585 +flat_mae,patch,logistic,aabc_sex,12,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,12,10000.0,test,0.7818181818181819,0.0537926720868924,0.76890756302521,0.059109969617885505,0.7635869565217391,0.05784551870599951 +flat_mae,patch,logistic,aabc_sex,13,0.3593813663804626,train,0.994328922495274,0.0030929210762900986,0.9941893034853195,0.003167136104266519,0.9944898736774231,0.0030480816294180166 +flat_mae,patch,logistic,aabc_sex,13,0.3593813663804626,test,0.8909090909090909,0.04003718932349797,0.8863636363636364,0.04222863744507983,0.8817934782608696,0.04292366521154935 +flat_mae,patch,logistic,aabc_sex,14,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,14,2.782559402207126,test,0.9272727272727272,0.034168256497312186,0.9260752688172043,0.03447556859460794,0.9313858695652174,0.03278549005346483 +flat_mae,patch,logistic,aabc_sex,15,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,15,2.782559402207126,test,0.8909090909090909,0.04156470212584131,0.8879076086956521,0.042833834942494896,0.8879076086956521,0.04304454110114572 +flat_mae,patch,logistic,aabc_sex,16,0.046415888336127774,train,0.9319470699432892,0.010970846619989521,0.9300552388787683,0.011299376294972118,0.9290131598229725,0.011515648266899502 +flat_mae,patch,logistic,aabc_sex,16,0.046415888336127774,test,0.8727272727272727,0.04801205082886366,0.8683760683760684,0.050560430637310624,0.8661684782608696,0.05144456453128287 +flat_mae,patch,logistic,aabc_sex,17,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,17,21.54434690031882,test,0.8363636363636363,0.048836295945258686,0.8307692307692308,0.05083679266640807,0.8288043478260869,0.050732492151574986 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,train,0.9017013232514177,0.012882258350739346,0.8985663293902475,0.013391758073471717,0.8961795480524049,0.013727442112769422 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,test,0.8181818181818182,0.051958025016703255,0.8176392572944298,0.0518280668079557,0.8315217391304348,0.048842469825766226 +flat_mae,patch,logistic,aabc_sex,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,19,2.782559402207126,test,0.8181818181818182,0.0541499723373173,0.8166666666666667,0.05409582459211839,0.8254076086956521,0.052616614651699915 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9376181474480151,0.011102364409823231,0.9357586819802679,0.011492314788403683,0.9339151206072862,0.01187306292332052 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8727272727272727,0.04473435272863111,0.8683760683760684,0.0469029358183802,0.8661684782608696,0.04736433942244797 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,train,0.943289224952741,0.0101616228516304,0.9416372462488967,0.010491655498178107,0.9400334124681262,0.01074524734167047 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,test,0.7636363636363637,0.05334192217343714,0.7472605160834218,0.05952418153078315,0.7418478260869565,0.057535078064946676 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,test,0.8545454545454545,0.04738586461476939,0.8505434782608696,0.04913543807149063,0.8505434782608696,0.04942144171764175 +flat_mae,patch,logistic,aabc_sex,23,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,23,21.54434690031882,test,0.9272727272727272,0.034680234683259446,0.9260752688172043,0.03495602602332031,0.9313858695652174,0.0331053287114331 +flat_mae,patch,logistic,aabc_sex,24,0.005994842503189409,train,0.9035916824196597,0.013026897765557136,0.9008478594030804,0.013444792506810035,0.8996380315952988,0.013679276134829731 +flat_mae,patch,logistic,aabc_sex,24,0.005994842503189409,test,0.8545454545454545,0.049177028837550305,0.8484848484848485,0.051981231917138854,0.8444293478260869,0.05228457557187376 +flat_mae,patch,logistic,aabc_sex,25,0.005994842503189409,train,0.8922495274102079,0.013505036544076674,0.8888888888888888,0.01399415696030752,0.8867932823353557,0.014187198984560353 +flat_mae,patch,logistic,aabc_sex,25,0.005994842503189409,test,0.9818181818181818,0.01828349750435446,0.9814251941911516,0.018512548584299526,0.984375,0.015712380667804608 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,train,0.9319470699432892,0.010593690841287308,0.9300552388787683,0.010889709453066903,0.9290131598229725,0.010994645012447917 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,test,0.8363636363636363,0.048307609379547437,0.8307692307692308,0.05071198843845818,0.8288043478260869,0.05097988034242785 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,train,0.9357277882797732,0.010531777766177745,0.9339410589410589,0.01086998788589555,0.9328892992174448,0.011195951955900679 +flat_mae,patch,logistic,aabc_sex,27,0.046415888336127774,test,0.8727272727272727,0.043575275421954274,0.8683760683760684,0.04522884203277424,0.8661684782608696,0.04550288283623707 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,train,0.9338374291115312,0.010653243161925065,0.9319544133158395,0.010991707500177484,0.9306471467510771,0.011256581647394806 +flat_mae,patch,logistic,aabc_sex,28,0.046415888336127774,test,0.8727272727272727,0.04335362949145831,0.8663658451926415,0.04653021527025345,0.8600543478260869,0.04719478731568314 +flat_mae,patch,logistic,aabc_sex,29,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,29,1291.5496650148827,test,0.8181818181818182,0.04991670417176897,0.8131793478260869,0.05140770678777188,0.8131793478260869,0.05152521348686567 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,train,0.9338374291115312,0.010855155402818032,0.9318652687669509,0.011231502322526106,0.930038981212814,0.011554572702446742 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,test,0.8363636363636363,0.05250707626454968,0.8328267477203647,0.05355653704835512,0.8349184782608696,0.05315867774959293 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,train,0.9073724007561437,0.01297709691313929,0.9044834307992202,0.013437920558939904,0.9022978399132449,0.013653106286330308 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,test,0.8727272727272727,0.042817699283133205,0.8683760683760684,0.044469537195166976,0.8661684782608696,0.04449648157261069 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.9376181474480151,0.010508631585291727,0.9358427325549344,0.010848562302830577,0.9345232861455495,0.011119210456481118 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8909090909090909,0.041034216723958065,0.884453781512605,0.04532303427299883,0.8756793478260869,0.046552994820911804 +flat_mae,patch,logistic,aabc_sex,33,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,33,21.54434690031882,test,0.8909090909090909,0.038868134774213775,0.884453781512605,0.04296197820572849,0.8756793478260869,0.044194630696822446 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.994328922495274,0.003316004666777391,0.9941961885745005,0.0033855540258479963,0.9950980392156863,0.002866285079616405 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.8727272727272727,0.0456399853687839,0.8699763593380614,0.04655171928823117,0.8722826086956521,0.04636882654416331 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,train,0.8695652173913043,0.014494555708349358,0.8651233256351039,0.01514417528007857,0.8623201148919957,0.015450777010279783 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,test,0.8727272727272727,0.04231177473223895,0.8639095086603039,0.04796536929538607,0.8539402173913043,0.04864011634752779 +flat_mae,patch,logistic,aabc_sex,36,0.005994842503189409,train,0.9017013232514177,0.012433722146126109,0.8984254992319508,0.012990747227447653,0.8955713825141417,0.013410372370417584 +flat_mae,patch,logistic,aabc_sex,36,0.005994842503189409,test,0.8909090909090909,0.043397467268139686,0.8891129032258065,0.04383004467776189,0.8940217391304348,0.04269020009409331 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9395085066162571,0.010120437119362276,0.9376638680217999,0.010483698968405338,0.9355491075353908,0.010829673524227638 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7636363636363637,0.05718948062550381,0.7518222839291913,0.0617684556787956,0.7479619565217391,0.060634551088185364 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,train,0.9376181474480151,0.01092530198888672,0.9359246820459175,0.011255421384778176,0.9351314516838125,0.011519273900891303 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,test,0.8545454545454545,0.04255103304531088,0.8428571428571429,0.04914971930877,0.8322010869565217,0.04872800363200585 +flat_mae,patch,logistic,aabc_sex,39,0.3593813663804626,train,0.9924385633270322,0.0035766554617719343,0.9922570257611241,0.003657125058753498,0.9928558867493186,0.0033783188079882973 +flat_mae,patch,logistic,aabc_sex,39,0.3593813663804626,test,0.8909090909090909,0.04155133835399606,0.8891129032258065,0.04197417244591232,0.8940217391304348,0.04090394951243674 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,test,0.8545454545454545,0.044684443648010545,0.8484848484848485,0.04728004267003601,0.8444293478260869,0.047714912529550166 +flat_mae,patch,logistic,aabc_sex,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,41,2.782559402207126,test,0.8909090909090909,0.04297597252972914,0.8879076086956521,0.0444819331434469,0.8879076086956521,0.04477000376223868 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,test,0.8363636363636363,0.05208257905789911,0.8328267477203647,0.05327864410115794,0.8349184782608696,0.053185613616201936 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9281663516068053,0.011625580401029765,0.9257724802079641,0.012106209654690496,0.9227043582754477,0.01257730402148703 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.9090909090909091,0.03697100047379106,0.9071259709557582,0.03784912981263753,0.9096467391304348,0.03772085824166873 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,train,0.9395085066162571,0.010675861602788944,0.9379053320421398,0.010974862668262962,0.9373736041501802,0.011176433331777308 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,test,0.8181818181818182,0.049907603885779084,0.8131793478260869,0.05127742657949745,0.8131793478260869,0.05113553750752834 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.9338374291115312,0.0111456217792155,0.9319544133158395,0.01149568215320704,0.9306471467510771,0.011693692136080588 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9636363636363636,0.02440745042735182,0.9630376344086022,0.024547986237309635,0.96875,0.02097515271100547 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9376181474480151,0.010887494705429702,0.9358427325549344,0.01121986709513401,0.9345232861455495,0.01142424225706409 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8909090909090909,0.04321921825353023,0.8879076086956521,0.044628228260540144,0.8879076086956521,0.04490788970782748 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,train,0.994328922495274,0.0032278124973143907,0.9941822314276811,0.0033157604448735855,0.99388170813916,0.003559332546937603 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,test,0.8545454545454545,0.04497586496947072,0.84593837535014,0.04929704231373217,0.8383152173913043,0.04951068572709034 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,train,0.9017013232514177,0.013097217588952543,0.8985663293902475,0.013598319083583789,0.8961795480524049,0.013916692464808032 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,test,0.8909090909090909,0.042143101354023965,0.8863636363636364,0.04481043781409082,0.8817934782608696,0.04567415846166708 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,train,0.996219281663516,0.002668887241702534,0.9961238606055277,0.002735896277913794,0.9961238606055277,0.0027530573563953097 +flat_mae,patch,logistic,aabc_sex,49,0.3593813663804626,test,0.8545454545454545,0.04494479258117994,0.8521505376344086,0.0457321003264925,0.8566576086956521,0.045438270714608264 +flat_mae,patch,logistic,aabc_sex,50,0.005994842503189409,train,0.9017013232514177,0.012644779637389667,0.8985663293902475,0.01313519686504807,0.8961795480524049,0.013440160806337951 +flat_mae,patch,logistic,aabc_sex,50,0.005994842503189409,test,0.8909090909090909,0.04261562201434377,0.8863636363636364,0.04498333568900711,0.8817934782608696,0.045880089454211424 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,train,0.994328922495274,0.003125901944588931,0.9941893034853195,0.0032002368947670044,0.9944898736774231,0.003055090480862435 +flat_mae,patch,logistic,aabc_sex,51,0.3593813663804626,test,0.8909090909090909,0.04090034654018763,0.8863636363636364,0.04344170628074031,0.8817934782608696,0.044513893161724216 +flat_mae,patch,logistic,aabc_sex,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,52,2.782559402207126,test,0.8,0.05383954121597904,0.790003471017008,0.05784213427936538,0.7853260869565217,0.05716234591206114 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.9338374291115312,0.011660887651802933,0.9319544133158395,0.012028020956783897,0.9306471467510771,0.01228380216063251 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.8909090909090909,0.043503069142227055,0.8879076086956521,0.04497216206387446,0.8879076086956521,0.04530751422932359 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,train,0.941398865784499,0.01004882218285427,0.9396520951935851,0.010402090118619811,0.9377912600017586,0.010760498079795323 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,test,0.8545454545454545,0.049221859015671846,0.8521505376344086,0.04974025236559959,0.8566576086956521,0.04887304758197895 +flat_mae,patch,logistic,aabc_sex,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,55,2.782559402207126,test,0.8363636363636363,0.048726675708689916,0.8307692307692308,0.05102350758462779,0.8288043478260869,0.05138925309817285 +flat_mae,patch,logistic,aabc_sex,56,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,56,166.81005372000556,test,0.8363636363636363,0.04988401754682122,0.8307692307692308,0.05210936158792554,0.8288043478260869,0.05207642003582463 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,train,0.9924385633270322,0.0038497943014019365,0.9922570257611241,0.003938232282987985,0.9928558867493186,0.0037179225871772693 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,test,0.8909090909090909,0.04243186078782493,0.8891129032258065,0.04293253086990514,0.8940217391304348,0.041781842456207294 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,train,0.945179584120983,0.009202621798671427,0.9434697855750487,0.00954283633978279,0.9410592338579677,0.009945261018429982 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,test,0.7636363636363637,0.056049753576916446,0.7585275244849713,0.05735285232045231,0.7601902173913043,0.05715415525064073 +flat_mae,patch,logistic,aabc_sex,59,0.3593813663804626,train,0.994328922495274,0.003000325762547843,0.9941893034853195,0.003072800620712348,0.9944898736774231,0.0029761225269131306 +flat_mae,patch,logistic,aabc_sex,59,0.3593813663804626,test,0.8181818181818182,0.050450453560526166,0.8151881720430108,0.05129084821973903,0.8192934782608696,0.051175930773522726 +flat_mae,patch,logistic,aabc_sex,60,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,60,2.782559402207126,test,0.8181818181818182,0.05007234435715472,0.8035714285714286,0.05770134431335501,0.7948369565217391,0.05589502960491657 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,train,0.8941398865784499,0.013771956627704862,0.8907637393433434,0.0143276970041136,0.8884272692634603,0.014685290479316798 +flat_mae,patch,logistic,aabc_sex,61,0.005994842503189409,test,0.9272727272727272,0.03388285116848619,0.9266666666666667,0.0337774673825512,0.9375,0.029118075222917813 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,train,0.9319470699432892,0.011281989763016895,0.9300552388787683,0.011662181448669358,0.9290131598229725,0.012049185787367902 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,test,0.8909090909090909,0.04134428718268859,0.8879076086956521,0.04269598498200412,0.8879076086956521,0.04294544523969931 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,train,0.9924385633270322,0.003913794070637637,0.9922570257611241,0.004001730524227295,0.9928558867493186,0.0037260673472837404 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,test,0.9454545454545454,0.029973233238070968,0.9427282193682749,0.03261339614853133,0.9347826086956521,0.03583756148030224 +flat_mae,patch,logistic,aabc_sex,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,64,21.54434690031882,test,0.9272727272727272,0.0350988026920182,0.9252717391304348,0.03617917990240815,0.9252717391304348,0.03646325869151765 +flat_mae,patch,logistic,aabc_sex,65,0.3593813663804626,train,0.9924385633270322,0.0037137259351006716,0.9922570257611241,0.003798091270989395,0.9928558867493186,0.003552332972216515 +flat_mae,patch,logistic,aabc_sex,65,0.3593813663804626,test,0.8909090909090909,0.040256023624005995,0.884453781512605,0.04449853694027689,0.8756793478260869,0.04556133437299857 +flat_mae,patch,logistic,aabc_sex,66,0.005994842503189409,train,0.9035916824196597,0.011933303068037315,0.9007179630604141,0.012313530308583447,0.8990298660570357,0.012460408531900738 +flat_mae,patch,logistic,aabc_sex,66,0.005994842503189409,test,0.7818181818181819,0.05497089838652345,0.7727272727272727,0.05804979070500848,0.7697010869565217,0.05763370951254375 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,train,0.994328922495274,0.0031885960726365874,0.9941961885745005,0.003254756640317508,0.9950980392156863,0.002756155755596006 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,test,0.9454545454545454,0.030408241597053815,0.9442755825734549,0.030896735953411896,0.9470108695652174,0.029901162251695916 +flat_mae,patch,logistic,aabc_sex,68,0.005994842503189409,train,0.8979206049149339,0.013194683161698974,0.8946650343667955,0.013755052571751655,0.8923034086579326,0.014158208703972509 +flat_mae,patch,logistic,aabc_sex,68,0.005994842503189409,test,0.9090909090909091,0.03885738280362057,0.9045470322804582,0.042070079932440566,0.8974184782608696,0.043712359140952936 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,train,0.9319470699432892,0.01077242298822536,0.9299646954986761,0.01110885709327437,0.9284049942847094,0.011314072400217123 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,test,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,train,0.9338374291115312,0.011117201464843499,0.9316802273020792,0.011586622699246069,0.9288226501362877,0.012092125093261756 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,test,0.9090909090909091,0.0378967540422943,0.905982905982906,0.039539253985870666,0.9035326086956521,0.04053994012493329 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9395085066162571,0.009636382085930765,0.9376638680217999,0.0099842705125286,0.9355491075353908,0.010364219371078892 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.04317262677298356,0.8484848484848485,0.04564800341842223,0.8444293478260869,0.0459018546482156 +flat_mae,patch,logistic,aabc_sex,72,0.000774263682681127,train,0.8695652173913043,0.014237550269093334,0.8647314529667471,0.014960858086545127,0.8611037838154694,0.015266531240531546 +flat_mae,patch,logistic,aabc_sex,72,0.000774263682681127,test,0.8727272727272727,0.044236120763074246,0.8683760683760684,0.04608209209658461,0.8661684782608696,0.04647672730710621 +flat_mae,patch,logistic,aabc_sex,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,73,21.54434690031882,test,0.8909090909090909,0.043731653532208876,0.8879076086956521,0.04511498546999362,0.8879076086956521,0.04544896176175565 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,train,0.941398865784499,0.00992016905654182,0.9396520951935851,0.010264625770580611,0.9377912600017586,0.010634325734776809 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,test,0.8545454545454545,0.04579976541857712,0.84593837535014,0.05011912478432602,0.8383152173913043,0.05070311061386225 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.9376181474480151,0.010537116354743132,0.9357586819802679,0.010908238130248377,0.9339151206072862,0.011283176349969597 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.8545454545454545,0.043384119423837755,0.8505434782608696,0.04484254186318451,0.8505434782608696,0.04501957589196547 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.9357277882797732,0.010421905838062972,0.9337678597731625,0.010814242496142571,0.9316729681409186,0.011287440622514638 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8181818181818182,0.05193932887800456,0.8131793478260869,0.05338679019627279,0.8131793478260869,0.05317165281341706 +flat_mae,patch,logistic,aabc_sex,77,0.005994842503189409,train,0.8979206049149339,0.013117663067734965,0.8949470432480142,0.01353677454086502,0.8935197397344588,0.013749267003730456 +flat_mae,patch,logistic,aabc_sex,77,0.005994842503189409,test,0.8181818181818182,0.05142723335528083,0.8035714285714286,0.058756901716593084,0.7948369565217391,0.056965334606812766 +flat_mae,patch,logistic,aabc_sex,78,0.005994842503189409,train,0.8960302457466919,0.013249755982839723,0.89293113663378,0.013758970080483265,0.8912775872680911,0.01415060876965011 +flat_mae,patch,logistic,aabc_sex,78,0.005994842503189409,test,0.9454545454545454,0.03086574500762996,0.9442755825734549,0.031335090229914805,0.9470108695652174,0.030057897792347018 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,train,0.9073724007561437,0.012455443869339566,0.9046113762737312,0.01289538477154066,0.9029060054515079,0.01315452309457557 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,test,0.8181818181818182,0.05304170821077836,0.8106060606060606,0.0562151492464375,0.8070652173913043,0.056095126269245274 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,train,0.994328922495274,0.003136882303944245,0.9941961885745005,0.0032030230998253296,0.9950980392156863,0.002711455455533508 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,test,0.8545454545454545,0.047579424680134166,0.84593837535014,0.052523498566100425,0.8383152173913043,0.05296057861981076 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,train,0.994328922495274,0.003217786678329657,0.9941893034853195,0.003294901020232589,0.9944898736774231,0.0031773078129976313 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,test,0.8545454545454545,0.05002466829491752,0.8505434782608696,0.051612501009473614,0.8505434782608696,0.052184173017458736 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.9905482041587902,0.004096673063989462,0.9903269809575008,0.004184733719960871,0.991221899821214,0.003810944890385806 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.9090909090909091,0.0378155768333589,0.905982905982906,0.03926695181194285,0.9035326086956521,0.03974940470702493 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,train,0.8922495274102079,0.013610520373712763,0.8890377234204629,0.014092613991757126,0.8874014478736187,0.014346069860750795 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,test,0.8363636363636363,0.051161387816455704,0.8281846581048247,0.054657195980105495,0.8226902173913043,0.054534270479420464 +flat_mae,patch,logistic,aabc_sex,84,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,84,21.54434690031882,test,0.7636363636363637,0.05875457850982992,0.7633234028467395,0.05872485924443031,0.7785326086956521,0.05703047161340528 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,train,0.9262759924385633,0.011659777030713105,0.9240784423403167,0.012080205695287172,0.9222867024238695,0.012483632469377429 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,test,0.8909090909090909,0.044111294256593006,0.8891129032258065,0.044618949304083395,0.8940217391304348,0.04380374399465143 +flat_mae,patch,logistic,aabc_sex,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,86,2.782559402207126,test,0.8909090909090909,0.042644236528409186,0.8879076086956521,0.04392190742123186,0.8879076086956521,0.044151988166547564 +flat_mae,patch,logistic,aabc_sex,87,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,87,2.782559402207126,test,0.8181818181818182,0.04497498294506723,0.7989766081871346,0.055373599668240674,0.7887228260869565,0.052285456582587306 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,train,0.994328922495274,0.0032870492336992945,0.9941961885745005,0.003356270326759491,0.9950980392156863,0.0028412566088675288 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,test,0.9090909090909091,0.03782692206693338,0.9045470322804582,0.04090331052123845,0.8974184782608696,0.04288909546362678 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,train,0.9338374291115312,0.011521090745358842,0.9320413294426397,0.0118591769727415,0.9312553122893402,0.012066032164729599 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,test,0.8545454545454545,0.04311359586905653,0.8428571428571429,0.04994541649517294,0.8322010869565217,0.04951343616202821 +flat_mae,patch,logistic,aabc_sex,90,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,90,2.782559402207126,test,0.8181818181818182,0.05003477798756041,0.8106060606060606,0.05344004624696483,0.8070652173913043,0.0538811347516391 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.9376181474480151,0.011255758533163138,0.9357586819802679,0.011660867330308293,0.9339151206072862,0.012076523330992392 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.8727272727272727,0.044760239144406515,0.8699763593380614,0.04559287473620254,0.8722826086956521,0.04539177692808507 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,train,0.994328922495274,0.0031833648393194574,0.9941893034853195,0.003259221850523137,0.9944898736774231,0.0031330381736912123 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,test,0.9272727272727272,0.03323954629679126,0.9242424242424243,0.03520353241417387,0.9191576086956521,0.0367852744721431 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.994328922495274,0.0035067543372097566,0.9941961885745005,0.0035797329570775908,0.9950980392156863,0.00303116510520255 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8545454545454545,0.047146460740235596,0.84593837535014,0.05163677142825968,0.8383152173913043,0.051663413291282226 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,train,0.8998109640831758,0.012978273776205949,0.8966861598440545,0.013499290118187965,0.8945455611243003,0.013864825447930693 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,test,0.8909090909090909,0.04125149634150527,0.8879076086956521,0.04250676555669201,0.8879076086956521,0.04263557676053613 +flat_mae,patch,logistic,aabc_sex,95,0.005994842503189409,train,0.8960302457466919,0.012895256012080998,0.8927875243664718,0.013357689903799382,0.890669421729828,0.013568329251993657 +flat_mae,patch,logistic,aabc_sex,95,0.005994842503189409,test,0.8727272727272727,0.04530121046309344,0.8699763593380614,0.04613891519280841,0.8722826086956521,0.045791037429532805 +flat_mae,patch,logistic,aabc_sex,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,96,166.81005372000556,test,0.9272727272727272,0.03618960172103685,0.9252717391304348,0.03716104162218202,0.9252717391304348,0.03707382406270872 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.9886578449905482,0.004642640529322586,0.9883855386416862,0.004747997208526523,0.9889797473548463,0.0045716532791548 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.8909090909090909,0.0400429521455265,0.8863636363636364,0.04257519386334678,0.8817934782608696,0.04358411655200887 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,train,0.994328922495274,0.0031856988691024795,0.9941961885745005,0.0032539894921399834,0.9950980392156863,0.0027536514734562297 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,test,0.7636363636363637,0.05498339343582758,0.7472605160834218,0.061317252022155315,0.7418478260869565,0.059314618508377055 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.8998109640831758,0.013645385045181282,0.896824549847097,0.01409797444742376,0.8951537266625633,0.014283561592689589 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8909090909090909,0.043094376538664506,0.884453781512605,0.04786993040209631,0.8756793478260869,0.04912332884657082 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.9376181474480151,0.010560581798126613,0.935672514619883,0.01096269631156762,0.9333069550690232,0.011406215987906584 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.9090909090909091,0.03863392933507759,0.9079959852793577,0.03873888965433061,0.9157608695652174,0.03624767820097727 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb227ac578246da23bd6954936271c31b1bb4208 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:57:48 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:23:23 time: 5.9482 data: 4.7862 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:51 time: 0.2455 data: 0.0897 max mem: 3581 +extract (train) [ 40/236] eta: 0:01:12 time: 0.2117 data: 0.0714 max mem: 3581 +extract (train) [ 60/236] eta: 0:00:56 time: 0.2212 data: 0.0794 max mem: 3581 +extract (train) [ 80/236] eta: 0:00:45 time: 0.2054 data: 0.0686 max mem: 3581 +extract (train) [100/236] eta: 0:00:37 time: 0.2169 data: 0.0747 max mem: 3581 +extract (train) [120/236] eta: 0:00:31 time: 0.2345 data: 0.0844 max mem: 3581 +extract (train) [140/236] eta: 0:00:25 time: 0.2486 data: 0.0913 max mem: 3581 +extract (train) [160/236] eta: 0:00:19 time: 0.2198 data: 0.0762 max mem: 3581 +extract (train) [180/236] eta: 0:00:14 time: 0.2078 data: 0.0690 max mem: 3581 +extract (train) [200/236] eta: 0:00:09 time: 0.2435 data: 0.0890 max mem: 3581 +extract (train) [220/236] eta: 0:00:04 time: 0.2163 data: 0.0723 max mem: 3581 +extract (train) [235/236] eta: 0:00:00 time: 0.1931 data: 0.0620 max mem: 3581 +extract (train) Total time: 0:00:58 (0.2493 s / it) +extract (validation) [ 0/29] eta: 0:02:33 time: 5.2799 data: 5.1242 max mem: 3581 +extract (validation) [20/29] eta: 0:00:04 time: 0.2216 data: 0.0741 max mem: 3581 +extract (validation) [28/29] eta: 0:00:00 time: 0.1914 data: 0.0588 max mem: 3581 +extract (validation) Total time: 0:00:11 (0.3984 s / it) +extract (test) [ 0/28] eta: 0:02:21 time: 5.0486 data: 4.8682 max mem: 3581 +extract (test) [20/28] eta: 0:00:03 time: 0.2135 data: 0.0733 max mem: 3581 +extract (test) [27/28] eta: 0:00:00 time: 0.1757 data: 0.0524 max mem: 3581 +extract (test) Total time: 0:00:10 (0.3848 s / it) +feature extraction time: 0:01:21 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | train | 0.89036 | 0.013179 | 0.88671 | 0.013721 | 0.88358 | 0.013983 | +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | test | 0.90909 | 0.041173 | 0.90713 | 0.041538 | 0.91667 | 0.038767 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04912697664940049, "f1": 0.8343927735028438, "f1_std": 0.04930838944801519, "bacc": 0.8410326086956521, "bacc_std": 0.048324626073035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03937360780695029, "f1": 0.9071259709557582, "f1_std": 0.040046181101594816, "bacc": 0.9096467391304348, "bacc_std": 0.03939160416233892} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05605771524078133, "f1": 0.7727272727272727, "f1_std": 0.059254543600323666, "bacc": 0.7697010869565217, "bacc_std": 0.05908271661619735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04765601260271447, "f1": 0.8505434782608696, "f1_std": 0.0491858824146365, "bacc": 0.8505434782608696, "bacc_std": 0.04930694346253553} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047605472287705904, "f1": 0.8307692307692308, "f1_std": 0.04984310655364956, "bacc": 0.8288043478260869, "bacc_std": 0.05021844553229477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.0342988133182669, "f1": 0.9252717391304348, "f1_std": 0.03540397038453564, "bacc": 0.9252717391304348, "bacc_std": 0.035978014861688494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037733804472912615, "f1": 0.9071259709557582, "f1_std": 0.03853127153593407, "bacc": 0.9096467391304348, "bacc_std": 0.03815539932405344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045615076527520684, "f1": 0.8521505376344086, "f1_std": 0.046133212139747255, "bacc": 0.8566576086956521, "bacc_std": 0.04576822489103413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04262679093932739, "f1": 0.8891129032258065, "f1_std": 0.04301775641175362, "bacc": 0.8940217391304348, "bacc_std": 0.04201854118884831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04280225528930969, "f1": 0.8891129032258065, "f1_std": 0.04322234726985258, "bacc": 0.8940217391304348, "bacc_std": 0.04203106979181472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042335144535124915, "f1": 0.8863636363636364, "f1_std": 0.044806265254742794, "bacc": 0.8817934782608696, "bacc_std": 0.045588573770937585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 10000.0, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.0537926720868924, "f1": 0.76890756302521, "f1_std": 0.059109969617885505, "bacc": 0.7635869565217391, "bacc_std": 0.05784551870599951} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04003718932349797, "f1": 0.8863636363636364, "f1_std": 0.04222863744507983, "bacc": 0.8817934782608696, "bacc_std": 0.04292366521154935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 2.782559402207126, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034168256497312186, "f1": 0.9260752688172043, "f1_std": 0.03447556859460794, "bacc": 0.9313858695652174, "bacc_std": 0.03278549005346483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04156470212584131, "f1": 0.8879076086956521, "f1_std": 0.042833834942494896, "bacc": 0.8879076086956521, "bacc_std": 0.04304454110114572} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04801205082886366, "f1": 0.8683760683760684, "f1_std": 0.050560430637310624, "bacc": 0.8661684782608696, "bacc_std": 0.05144456453128287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048836295945258686, "f1": 0.8307692307692308, "f1_std": 0.05083679266640807, "bacc": 0.8288043478260869, "bacc_std": 0.050732492151574986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051958025016703255, "f1": 0.8176392572944298, "f1_std": 0.0518280668079557, "bacc": 0.8315217391304348, "bacc_std": 0.048842469825766226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0541499723373173, "f1": 0.8166666666666667, "f1_std": 0.05409582459211839, "bacc": 0.8254076086956521, "bacc_std": 0.052616614651699915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04473435272863111, "f1": 0.8683760683760684, "f1_std": 0.0469029358183802, "bacc": 0.8661684782608696, "bacc_std": 0.04736433942244797} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05334192217343714, "f1": 0.7472605160834218, "f1_std": 0.05952418153078315, "bacc": 0.7418478260869565, "bacc_std": 0.057535078064946676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04738586461476939, "f1": 0.8505434782608696, "f1_std": 0.04913543807149063, "bacc": 0.8505434782608696, "bacc_std": 0.04942144171764175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 21.54434690031882, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034680234683259446, "f1": 0.9260752688172043, "f1_std": 0.03495602602332031, "bacc": 0.9313858695652174, "bacc_std": 0.0331053287114331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049177028837550305, "f1": 0.8484848484848485, "f1_std": 0.051981231917138854, "bacc": 0.8444293478260869, "bacc_std": 0.05228457557187376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.01828349750435446, "f1": 0.9814251941911516, "f1_std": 0.018512548584299526, "bacc": 0.984375, "bacc_std": 0.015712380667804608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048307609379547437, "f1": 0.8307692307692308, "f1_std": 0.05071198843845818, "bacc": 0.8288043478260869, "bacc_std": 0.05097988034242785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043575275421954274, "f1": 0.8683760683760684, "f1_std": 0.04522884203277424, "bacc": 0.8661684782608696, "bacc_std": 0.04550288283623707} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04335362949145831, "f1": 0.8663658451926415, "f1_std": 0.04653021527025345, "bacc": 0.8600543478260869, "bacc_std": 0.04719478731568314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 1291.5496650148827, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04991670417176897, "f1": 0.8131793478260869, "f1_std": 0.05140770678777188, "bacc": 0.8131793478260869, "bacc_std": 0.05152521348686567} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05250707626454968, "f1": 0.8328267477203647, "f1_std": 0.05355653704835512, "bacc": 0.8349184782608696, "bacc_std": 0.05315867774959293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042817699283133205, "f1": 0.8683760683760684, "f1_std": 0.044469537195166976, "bacc": 0.8661684782608696, "bacc_std": 0.04449648157261069} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041034216723958065, "f1": 0.884453781512605, "f1_std": 0.04532303427299883, "bacc": 0.8756793478260869, "bacc_std": 0.046552994820911804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.038868134774213775, "f1": 0.884453781512605, "f1_std": 0.04296197820572849, "bacc": 0.8756793478260869, "bacc_std": 0.044194630696822446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0456399853687839, "f1": 0.8699763593380614, "f1_std": 0.04655171928823117, "bacc": 0.8722826086956521, "bacc_std": 0.04636882654416331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04231177473223895, "f1": 0.8639095086603039, "f1_std": 0.04796536929538607, "bacc": 0.8539402173913043, "bacc_std": 0.04864011634752779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043397467268139686, "f1": 0.8891129032258065, "f1_std": 0.04383004467776189, "bacc": 0.8940217391304348, "bacc_std": 0.04269020009409331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05718948062550381, "f1": 0.7518222839291913, "f1_std": 0.0617684556787956, "bacc": 0.7479619565217391, "bacc_std": 0.060634551088185364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04255103304531088, "f1": 0.8428571428571429, "f1_std": 0.04914971930877, "bacc": 0.8322010869565217, "bacc_std": 0.04872800363200585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04155133835399606, "f1": 0.8891129032258065, "f1_std": 0.04197417244591232, "bacc": 0.8940217391304348, "bacc_std": 0.04090394951243674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044684443648010545, "f1": 0.8484848484848485, "f1_std": 0.04728004267003601, "bacc": 0.8444293478260869, "bacc_std": 0.047714912529550166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04297597252972914, "f1": 0.8879076086956521, "f1_std": 0.0444819331434469, "bacc": 0.8879076086956521, "bacc_std": 0.04477000376223868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05208257905789911, "f1": 0.8328267477203647, "f1_std": 0.05327864410115794, "bacc": 0.8349184782608696, "bacc_std": 0.053185613616201936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03697100047379106, "f1": 0.9071259709557582, "f1_std": 0.03784912981263753, "bacc": 0.9096467391304348, "bacc_std": 0.03772085824166873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049907603885779084, "f1": 0.8131793478260869, "f1_std": 0.05127742657949745, "bacc": 0.8131793478260869, "bacc_std": 0.05113553750752834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.02440745042735182, "f1": 0.9630376344086022, "f1_std": 0.024547986237309635, "bacc": 0.96875, "bacc_std": 0.02097515271100547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04321921825353023, "f1": 0.8879076086956521, "f1_std": 0.044628228260540144, "bacc": 0.8879076086956521, "bacc_std": 0.04490788970782748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04497586496947072, "f1": 0.84593837535014, "f1_std": 0.04929704231373217, "bacc": 0.8383152173913043, "bacc_std": 0.04951068572709034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042143101354023965, "f1": 0.8863636363636364, "f1_std": 0.04481043781409082, "bacc": 0.8817934782608696, "bacc_std": 0.04567415846166708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04494479258117994, "f1": 0.8521505376344086, "f1_std": 0.0457321003264925, "bacc": 0.8566576086956521, "bacc_std": 0.045438270714608264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04261562201434377, "f1": 0.8863636363636364, "f1_std": 0.04498333568900711, "bacc": 0.8817934782608696, "bacc_std": 0.045880089454211424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04090034654018763, "f1": 0.8863636363636364, "f1_std": 0.04344170628074031, "bacc": 0.8817934782608696, "bacc_std": 0.044513893161724216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.8, "acc_std": 0.05383954121597904, "f1": 0.790003471017008, "f1_std": 0.05784213427936538, "bacc": 0.7853260869565217, "bacc_std": 0.05716234591206114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043503069142227055, "f1": 0.8879076086956521, "f1_std": 0.04497216206387446, "bacc": 0.8879076086956521, "bacc_std": 0.04530751422932359} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049221859015671846, "f1": 0.8521505376344086, "f1_std": 0.04974025236559959, "bacc": 0.8566576086956521, "bacc_std": 0.04887304758197895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048726675708689916, "f1": 0.8307692307692308, "f1_std": 0.05102350758462779, "bacc": 0.8288043478260869, "bacc_std": 0.05138925309817285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04988401754682122, "f1": 0.8307692307692308, "f1_std": 0.05210936158792554, "bacc": 0.8288043478260869, "bacc_std": 0.05207642003582463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04243186078782493, "f1": 0.8891129032258065, "f1_std": 0.04293253086990514, "bacc": 0.8940217391304348, "bacc_std": 0.041781842456207294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.056049753576916446, "f1": 0.7585275244849713, "f1_std": 0.05735285232045231, "bacc": 0.7601902173913043, "bacc_std": 0.05715415525064073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050450453560526166, "f1": 0.8151881720430108, "f1_std": 0.05129084821973903, "bacc": 0.8192934782608696, "bacc_std": 0.051175930773522726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05007234435715472, "f1": 0.8035714285714286, "f1_std": 0.05770134431335501, "bacc": 0.7948369565217391, "bacc_std": 0.05589502960491657} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03388285116848619, "f1": 0.9266666666666667, "f1_std": 0.0337774673825512, "bacc": 0.9375, "bacc_std": 0.029118075222917813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04134428718268859, "f1": 0.8879076086956521, "f1_std": 0.04269598498200412, "bacc": 0.8879076086956521, "bacc_std": 0.04294544523969931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029973233238070968, "f1": 0.9427282193682749, "f1_std": 0.03261339614853133, "bacc": 0.9347826086956521, "bacc_std": 0.03583756148030224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.0350988026920182, "f1": 0.9252717391304348, "f1_std": 0.03617917990240815, "bacc": 0.9252717391304348, "bacc_std": 0.03646325869151765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040256023624005995, "f1": 0.884453781512605, "f1_std": 0.04449853694027689, "bacc": 0.8756793478260869, "bacc_std": 0.04556133437299857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05497089838652345, "f1": 0.7727272727272727, "f1_std": 0.05804979070500848, "bacc": 0.7697010869565217, "bacc_std": 0.05763370951254375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030408241597053815, "f1": 0.9442755825734549, "f1_std": 0.030896735953411896, "bacc": 0.9470108695652174, "bacc_std": 0.029901162251695916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03885738280362057, "f1": 0.9045470322804582, "f1_std": 0.042070079932440566, "bacc": 0.8974184782608696, "bacc_std": 0.043712359140952936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 1.0, "acc_std": 0.0, "f1": 1.0, "f1_std": 0.0, "bacc": 1.0, "bacc_std": 0.0} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.0378967540422943, "f1": 0.905982905982906, "f1_std": 0.039539253985870666, "bacc": 0.9035326086956521, "bacc_std": 0.04053994012493329} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04317262677298356, "f1": 0.8484848484848485, "f1_std": 0.04564800341842223, "bacc": 0.8444293478260869, "bacc_std": 0.0459018546482156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.000774263682681127, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044236120763074246, "f1": 0.8683760683760684, "f1_std": 0.04608209209658461, "bacc": 0.8661684782608696, "bacc_std": 0.04647672730710621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043731653532208876, "f1": 0.8879076086956521, "f1_std": 0.04511498546999362, "bacc": 0.8879076086956521, "bacc_std": 0.04544896176175565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04579976541857712, "f1": 0.84593837535014, "f1_std": 0.05011912478432602, "bacc": 0.8383152173913043, "bacc_std": 0.05070311061386225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.043384119423837755, "f1": 0.8505434782608696, "f1_std": 0.04484254186318451, "bacc": 0.8505434782608696, "bacc_std": 0.04501957589196547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05193932887800456, "f1": 0.8131793478260869, "f1_std": 0.05338679019627279, "bacc": 0.8131793478260869, "bacc_std": 0.05317165281341706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05142723335528083, "f1": 0.8035714285714286, "f1_std": 0.058756901716593084, "bacc": 0.7948369565217391, "bacc_std": 0.056965334606812766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03086574500762996, "f1": 0.9442755825734549, "f1_std": 0.031335090229914805, "bacc": 0.9470108695652174, "bacc_std": 0.030057897792347018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05304170821077836, "f1": 0.8106060606060606, "f1_std": 0.0562151492464375, "bacc": 0.8070652173913043, "bacc_std": 0.056095126269245274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047579424680134166, "f1": 0.84593837535014, "f1_std": 0.052523498566100425, "bacc": 0.8383152173913043, "bacc_std": 0.05296057861981076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.05002466829491752, "f1": 0.8505434782608696, "f1_std": 0.051612501009473614, "bacc": 0.8505434782608696, "bacc_std": 0.052184173017458736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.0378155768333589, "f1": 0.905982905982906, "f1_std": 0.03926695181194285, "bacc": 0.9035326086956521, "bacc_std": 0.03974940470702493} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.051161387816455704, "f1": 0.8281846581048247, "f1_std": 0.054657195980105495, "bacc": 0.8226902173913043, "bacc_std": 0.054534270479420464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 21.54434690031882, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05875457850982992, "f1": 0.7633234028467395, "f1_std": 0.05872485924443031, "bacc": 0.7785326086956521, "bacc_std": 0.05703047161340528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.044111294256593006, "f1": 0.8891129032258065, "f1_std": 0.044618949304083395, "bacc": 0.8940217391304348, "bacc_std": 0.04380374399465143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042644236528409186, "f1": 0.8879076086956521, "f1_std": 0.04392190742123186, "bacc": 0.8879076086956521, "bacc_std": 0.044151988166547564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04497498294506723, "f1": 0.7989766081871346, "f1_std": 0.055373599668240674, "bacc": 0.7887228260869565, "bacc_std": 0.052285456582587306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03782692206693338, "f1": 0.9045470322804582, "f1_std": 0.04090331052123845, "bacc": 0.8974184782608696, "bacc_std": 0.04288909546362678} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04311359586905653, "f1": 0.8428571428571429, "f1_std": 0.04994541649517294, "bacc": 0.8322010869565217, "bacc_std": 0.04951343616202821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05003477798756041, "f1": 0.8106060606060606, "f1_std": 0.05344004624696483, "bacc": 0.8070652173913043, "bacc_std": 0.0538811347516391} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044760239144406515, "f1": 0.8699763593380614, "f1_std": 0.04559287473620254, "bacc": 0.8722826086956521, "bacc_std": 0.04539177692808507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03323954629679126, "f1": 0.9242424242424243, "f1_std": 0.03520353241417387, "bacc": 0.9191576086956521, "bacc_std": 0.0367852744721431} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047146460740235596, "f1": 0.84593837535014, "f1_std": 0.05163677142825968, "bacc": 0.8383152173913043, "bacc_std": 0.051663413291282226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04125149634150527, "f1": 0.8879076086956521, "f1_std": 0.04250676555669201, "bacc": 0.8879076086956521, "bacc_std": 0.04263557676053613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04530121046309344, "f1": 0.8699763593380614, "f1_std": 0.04613891519280841, "bacc": 0.8722826086956521, "bacc_std": 0.045791037429532805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03618960172103685, "f1": 0.9252717391304348, "f1_std": 0.03716104162218202, "bacc": 0.9252717391304348, "bacc_std": 0.03707382406270872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.0400429521455265, "f1": 0.8863636363636364, "f1_std": 0.04257519386334678, "bacc": 0.8817934782608696, "bacc_std": 0.04358411655200887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05498339343582758, "f1": 0.7472605160834218, "f1_std": 0.061317252022155315, "bacc": 0.7418478260869565, "bacc_std": 0.059314618508377055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043094376538664506, "f1": 0.884453781512605, "f1_std": 0.04786993040209631, "bacc": 0.8756793478260869, "bacc_std": 0.04912332884657082} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03863392933507759, "f1": 0.9079959852793577, "f1_std": 0.03873888965433061, "bacc": 0.9157608695652174, "bacc_std": 0.03624767820097727} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 120.29 | 1006.6 | 0.95811 | 0.041186 | 0.95684 | 0.042478 | 0.95603 | 0.043525 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 120.29 | 1006.6 | 0.86818 | 0.048096 | 0.86326 | 0.050547 | 0.86214 | 0.051691 | + + +done! total time: 0:05:21 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6f5c76c85fa32c280c89f0a58455710b7c598bf --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_sex reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..a5a1a75b58d7e85bc0f9c420dfa6c5d82ba4a1fc --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,aabc_sex,,0.005994842503189409,train,0.8827977315689981,0.014136487061395853,0.8790598542729874,0.014668374161495796,0.8764271077283372,0.014872087338345774 +flat_mae,reg,logistic,aabc_sex,,0.005994842503189409,test,0.9090909090909091,0.041547248817430986,0.9071259709557582,0.041914009513307364,0.9166666666666667,0.03913123067267065 +flat_mae,reg,logistic,aabc_sex,1,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,1,2.782559402207126,test,0.8181818181818182,0.04963457872248615,0.8131793478260869,0.05114581480552598,0.8131793478260869,0.05122066949963678 +flat_mae,reg,logistic,aabc_sex,2,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,2,166.81005372000556,test,0.8363636363636363,0.05188512422363925,0.8343927735028438,0.05225702289866257,0.8410326086956521,0.05141273963797065 +flat_mae,reg,logistic,aabc_sex,3,0.3593813663804626,train,0.9886578449905482,0.0045463967231452,0.9883855386416862,0.004649210679147612,0.9889797473548463,0.004463994788417824 +flat_mae,reg,logistic,aabc_sex,3,0.3593813663804626,test,0.8363636363636363,0.04903397371733596,0.8250265111346766,0.054673373568046524,0.8165760869565217,0.05421412444926987 +flat_mae,reg,logistic,aabc_sex,4,0.046415888336127774,train,0.9376181474480151,0.01055393736774507,0.9358427325549344,0.010912968276863003,0.9345232861455495,0.011299183037960008 +flat_mae,reg,logistic,aabc_sex,4,0.046415888336127774,test,0.8363636363636363,0.05110554303522874,0.8328267477203647,0.05195841064492983,0.8349184782608696,0.051621821989718285 +flat_mae,reg,logistic,aabc_sex,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,5,2.782559402207126,test,0.8363636363636363,0.04865443674400136,0.8307692307692308,0.050894657371479475,0.8288043478260869,0.051147502136806196 +flat_mae,reg,logistic,aabc_sex,6,0.3593813663804626,train,0.9905482041587902,0.0041494498996771986,0.9903155058088658,0.004248802599657847,0.9906137342829509,0.004199079815685069 +flat_mae,reg,logistic,aabc_sex,6,0.3593813663804626,test,0.8545454545454545,0.04845929023332217,0.8484848484848485,0.051309649482925775,0.8444293478260869,0.05154459717771612 +flat_mae,reg,logistic,aabc_sex,7,0.3593813663804626,train,0.9867674858223062,0.00512931326051876,0.9864577733405013,0.005240958816660393,0.9873457604267417,0.005002261058936897 +flat_mae,reg,logistic,aabc_sex,7,0.3593813663804626,test,0.9090909090909091,0.039096895947469706,0.9071259709557582,0.039851289716056816,0.9096467391304348,0.03949124032094313 +flat_mae,reg,logistic,aabc_sex,8,0.3593813663804626,train,0.994328922495274,0.0032478196999326104,0.9941961885745005,0.003316149142067638,0.9950980392156863,0.002807347420366587 +flat_mae,reg,logistic,aabc_sex,8,0.3593813663804626,test,0.8363636363636363,0.04792534138748972,0.8343927735028438,0.048189416985361305,0.8410326086956521,0.04727554170075885 +flat_mae,reg,logistic,aabc_sex,9,0.046415888336127774,train,0.9338374291115312,0.010878221916520435,0.9321260333229466,0.01117489732835956,0.9318634778276034,0.011352911607858063 +flat_mae,reg,logistic,aabc_sex,9,0.046415888336127774,test,0.8,0.053244063029918014,0.790003471017008,0.056985589296998825,0.7853260869565217,0.05647921769790101 +flat_mae,reg,logistic,aabc_sex,10,0.3593813663804626,train,0.994328922495274,0.003098215056926024,0.9941961885745005,0.003164518667587455,0.9950980392156863,0.002678032295937691 +flat_mae,reg,logistic,aabc_sex,10,0.3593813663804626,test,0.8727272727272727,0.04517148226389471,0.8711943793911007,0.04534258973080901,0.8783967391304348,0.04378780932734153 +flat_mae,reg,logistic,aabc_sex,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,11,166.81005372000556,test,0.8545454545454545,0.049584375038250535,0.8484848484848485,0.05218258416989441,0.8444293478260869,0.05246564644426059 +flat_mae,reg,logistic,aabc_sex,12,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,12,21.54434690031882,test,0.7636363636363637,0.054871558515175216,0.7518222839291913,0.058687219457857,0.7479619565217391,0.057904129587482 +flat_mae,reg,logistic,aabc_sex,13,0.3593813663804626,train,0.994328922495274,0.0032355064142038773,0.9941822314276811,0.003322819796177192,0.99388170813916,0.003544059237470832 +flat_mae,reg,logistic,aabc_sex,13,0.3593813663804626,test,0.8727272727272727,0.0453570445201548,0.8711943793911007,0.04554111878442664,0.8783967391304348,0.044090095776604374 +flat_mae,reg,logistic,aabc_sex,14,0.046415888336127774,train,0.9319470699432892,0.011025712326654148,0.9301434985474073,0.011347204536024658,0.9296213253612357,0.011586405261473517 +flat_mae,reg,logistic,aabc_sex,14,0.046415888336127774,test,0.8909090909090909,0.04160437039979858,0.8879076086956521,0.04310602254016953,0.8879076086956521,0.043792733652901275 +flat_mae,reg,logistic,aabc_sex,15,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,15,2.782559402207126,test,0.8545454545454545,0.04510373717817642,0.84593837535014,0.04954368172403884,0.8383152173913043,0.04970529678473679 +flat_mae,reg,logistic,aabc_sex,16,0.005994842503189409,train,0.8865784499054821,0.013961190526868639,0.8831197525408749,0.0144650275493355,0.8812831560127787,0.014693432730908873 +flat_mae,reg,logistic,aabc_sex,16,0.005994842503189409,test,0.8181818181818182,0.05391945017744537,0.8151881720430108,0.054833697507646544,0.8192934782608696,0.05480247372642879 +flat_mae,reg,logistic,aabc_sex,17,0.3593813663804626,train,0.994328922495274,0.003154983967521973,0.9941961885745005,0.0032216758881285336,0.9950980392156863,0.0027271021549332103 +flat_mae,reg,logistic,aabc_sex,17,0.3593813663804626,test,0.8545454545454545,0.044752395014168014,0.84593837535014,0.04931824190169963,0.8383152173913043,0.04956049769180805 +flat_mae,reg,logistic,aabc_sex,18,0.005994842503189409,train,0.8960302457466919,0.012968611198272638,0.8927875243664718,0.013457487670065488,0.890669421729828,0.013767289346121453 +flat_mae,reg,logistic,aabc_sex,18,0.005994842503189409,test,0.8,0.05364177476556865,0.7989365237620472,0.053592871293765575,0.8097826086956521,0.05204354485833863 +flat_mae,reg,logistic,aabc_sex,19,0.3593813663804626,train,0.9924385633270322,0.003823762528312626,0.9922477212110554,0.0039197535883728615,0.9922477212110554,0.003954097226783159 +flat_mae,reg,logistic,aabc_sex,19,0.3593813663804626,test,0.8363636363636363,0.049347732244612294,0.8328267477203647,0.05020600659950374,0.8349184782608696,0.04977145687166564 +flat_mae,reg,logistic,aabc_sex,20,0.3593813663804626,train,0.9924385633270322,0.004009520867775961,0.9922570257611241,0.004099504116099101,0.9928558867493186,0.003817904424233618 +flat_mae,reg,logistic,aabc_sex,20,0.3593813663804626,test,0.8727272727272727,0.04432910727801897,0.8683760683760684,0.046685481133414454,0.8661684782608696,0.047274144279217056 +flat_mae,reg,logistic,aabc_sex,21,0.3593813663804626,train,0.9924385633270322,0.0036336145431969634,0.9922381665052675,0.0037365275295373444,0.9916395556727923,0.004064741362635509 +flat_mae,reg,logistic,aabc_sex,21,0.3593813663804626,test,0.7636363636363637,0.05373807305689113,0.7555555555555555,0.05641368691541304,0.7540760869565217,0.05634798301556164 +flat_mae,reg,logistic,aabc_sex,22,0.005994842503189409,train,0.8846880907372401,0.01311602847149999,0.8812508969938286,0.013556818511561007,0.8796491690846742,0.013733753403981077 +flat_mae,reg,logistic,aabc_sex,22,0.005994842503189409,test,0.9272727272727272,0.03380050368835911,0.9252717391304348,0.034869660102593646,0.9252717391304348,0.03511004002002761 +flat_mae,reg,logistic,aabc_sex,23,0.046415888336127774,train,0.9357277882797732,0.01034440464117014,0.9340244152947736,0.010646449004114567,0.9334974647557079,0.010892822827379857 +flat_mae,reg,logistic,aabc_sex,23,0.046415888336127774,test,0.8545454545454545,0.0482386765713506,0.8533333333333333,0.048264050697296314,0.8627717391304348,0.046209459473510434 +flat_mae,reg,logistic,aabc_sex,24,0.046415888336127774,train,0.9395085066162571,0.010483001382776205,0.9379817696884434,0.010748344446243731,0.9379817696884434,0.010854864048711605 +flat_mae,reg,logistic,aabc_sex,24,0.046415888336127774,test,0.8181818181818182,0.0559917349272666,0.8151881720430108,0.05675357600934304,0.8192934782608696,0.05618637607666179 +flat_mae,reg,logistic,aabc_sex,25,0.046415888336127774,train,0.9319470699432892,0.010410559951593523,0.9300552388787683,0.010745858176526936,0.9290131598229725,0.011043107261051519 +flat_mae,reg,logistic,aabc_sex,25,0.046415888336127774,test,0.9636363636363636,0.023204901163688015,0.9630376344086022,0.02334271983025949,0.96875,0.01994171193754438 +flat_mae,reg,logistic,aabc_sex,26,0.3593813663804626,train,0.994328922495274,0.003226552390385193,0.9941893034853195,0.0033031111552819068,0.9944898736774231,0.0031591037269086348 +flat_mae,reg,logistic,aabc_sex,26,0.3593813663804626,test,0.8363636363636363,0.04919744668507275,0.8307692307692308,0.051588624204507535,0.8288043478260869,0.05181706962135183 +flat_mae,reg,logistic,aabc_sex,27,0.3593813663804626,train,0.9867674858223062,0.0048508413759392145,0.9864417081324122,0.004965102826746719,0.9867375948884786,0.004881473247665552 +flat_mae,reg,logistic,aabc_sex,27,0.3593813663804626,test,0.9090909090909091,0.039749363527312565,0.9071259709557582,0.04031653999398343,0.9096467391304348,0.039346803556667836 +flat_mae,reg,logistic,aabc_sex,28,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,28,166.81005372000556,test,0.8545454545454545,0.0475432817163859,0.8521505376344086,0.04820042148296156,0.8566576086956521,0.04759193630902456 +flat_mae,reg,logistic,aabc_sex,29,0.3593813663804626,train,0.9924385633270322,0.003929765280963707,0.9922477212110554,0.004030086222851296,0.9922477212110554,0.00411077851581568 +flat_mae,reg,logistic,aabc_sex,29,0.3593813663804626,test,0.7818181818181819,0.055806229102238834,0.7758152173913043,0.05763422944091793,0.7758152173913043,0.05760300902191158 +flat_mae,reg,logistic,aabc_sex,30,0.046415888336127774,train,0.9357277882797732,0.010743369346718382,0.9340244152947736,0.01104597420163019,0.9334974647557079,0.011214558404668007 +flat_mae,reg,logistic,aabc_sex,30,0.046415888336127774,test,0.8,0.05532695904072636,0.7931623931623932,0.05781394568470329,0.7914402173913043,0.05777918201523962 +flat_mae,reg,logistic,aabc_sex,31,0.005994842503189409,train,0.888468809073724,0.013959761192476905,0.8849902534113061,0.014472927995417854,0.8829171429408833,0.014738970493339887 +flat_mae,reg,logistic,aabc_sex,31,0.005994842503189409,test,0.8181818181818182,0.05063838416076504,0.8151881720430108,0.05106182939170531,0.8192934782608696,0.050183454721416554 +flat_mae,reg,logistic,aabc_sex,32,0.005994842503189409,train,0.8941398865784499,0.013478978815749217,0.8911970382558618,0.013862338099736552,0.8902517658782496,0.013956091602091299 +flat_mae,reg,logistic,aabc_sex,32,0.005994842503189409,test,0.8727272727272727,0.04456437446070997,0.8683760683760684,0.046747055806663884,0.8661684782608696,0.0473827223443044 +flat_mae,reg,logistic,aabc_sex,33,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,33,2.782559402207126,test,0.8727272727272727,0.044809370330562405,0.8683760683760684,0.04657877980078554,0.8661684782608696,0.04686970817246869 +flat_mae,reg,logistic,aabc_sex,34,0.005994842503189409,train,0.8922495274102079,0.013959208258828765,0.8890377234204629,0.01440910992341852,0.8874014478736187,0.01454501221761604 +flat_mae,reg,logistic,aabc_sex,34,0.005994842503189409,test,0.8909090909090909,0.042739634575372346,0.8863636363636364,0.04521444630226576,0.8817934782608696,0.04610032235431791 +flat_mae,reg,logistic,aabc_sex,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,35,2.782559402207126,test,0.7818181818181819,0.05533817895076396,0.78,0.05546431213640254,0.7880434782608696,0.054517657117702505 +flat_mae,reg,logistic,aabc_sex,36,0.046415888336127774,train,0.9357277882797732,0.010468706670234635,0.9339410589410589,0.010788897632126381,0.9328892992174448,0.011035256036470324 +flat_mae,reg,logistic,aabc_sex,36,0.046415888336127774,test,0.8545454545454545,0.04644733977205609,0.8505434782608696,0.04800324616721444,0.8505434782608696,0.04820519480980968 +flat_mae,reg,logistic,aabc_sex,37,0.046415888336127774,train,0.9300567107750473,0.01097682331313143,0.9282475209414007,0.011252239929384086,0.9279873384331311,0.011303907867645822 +flat_mae,reg,logistic,aabc_sex,37,0.046415888336127774,test,0.8,0.054989453534291445,0.7931623931623932,0.05762257720419604,0.7914402173913043,0.05770085831524557 +flat_mae,reg,logistic,aabc_sex,38,0.046415888336127774,train,0.9300567107750473,0.011134979962908577,0.9282475209414007,0.011439285076135711,0.9279873384331311,0.011592581265629443 +flat_mae,reg,logistic,aabc_sex,38,0.046415888336127774,test,0.8363636363636363,0.04581321762429382,0.8281846581048247,0.049186712725874604,0.8226902173913043,0.0492319462198244 +flat_mae,reg,logistic,aabc_sex,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,39,2.782559402207126,test,0.7818181818181819,0.05559012933371213,0.7758152173913043,0.05749668367635992,0.7758152173913043,0.05756003764419491 +flat_mae,reg,logistic,aabc_sex,40,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,40,21.54434690031882,test,0.8545454545454545,0.044288625109045604,0.8484848484848485,0.046984170912240414,0.8444293478260869,0.047367192487359544 +flat_mae,reg,logistic,aabc_sex,41,0.000774263682681127,train,0.8449905482041588,0.01572315258132871,0.839364538586876,0.016417618619742308,0.8362129605205311,0.016536086908137484 +flat_mae,reg,logistic,aabc_sex,41,0.000774263682681127,test,0.8909090909090909,0.04012878441770913,0.884453781512605,0.044360877187844254,0.8756793478260869,0.04548866801733046 +flat_mae,reg,logistic,aabc_sex,42,0.3593813663804626,train,0.996219281663516,0.0026786702915615273,0.9961285128805621,0.0027386094902615036,0.9967320261437909,0.0023153865755490915 +flat_mae,reg,logistic,aabc_sex,42,0.3593813663804626,test,0.7818181818181819,0.05595067485320373,0.7782258064516129,0.05668690741802004,0.7819293478260869,0.0562098689810325 +flat_mae,reg,logistic,aabc_sex,43,0.3593813663804626,train,0.9886578449905482,0.004711720132040736,0.9883991228070176,0.004804440963387363,0.9895879128931093,0.0043313360726201814 +flat_mae,reg,logistic,aabc_sex,43,0.3593813663804626,test,0.8909090909090909,0.04042633950309915,0.89,0.04041177629277131,0.9001358695652174,0.03744149922050894 +flat_mae,reg,logistic,aabc_sex,44,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,44,166.81005372000556,test,0.7818181818181819,0.054965221160203484,0.7782258064516129,0.05595547972546642,0.7819293478260869,0.05618779208233672 +flat_mae,reg,logistic,aabc_sex,45,0.046415888336127774,train,0.9338374291115312,0.01087605888053909,0.9320413294426397,0.01120754936870135,0.9312553122893402,0.011450650306389563 +flat_mae,reg,logistic,aabc_sex,45,0.046415888336127774,test,0.9454545454545454,0.02804520553889753,0.9447975911676145,0.028078716100433287,0.953125,0.02410134850999006 +flat_mae,reg,logistic,aabc_sex,46,0.3593813663804626,train,0.9924385633270322,0.004082062440101553,0.9922477212110554,0.004185816784972519,0.9922477212110554,0.004258395770993863 +flat_mae,reg,logistic,aabc_sex,46,0.3593813663804626,test,0.8909090909090909,0.042093386928833555,0.89,0.04206652355762537,0.9001358695652174,0.0391341451667822 +flat_mae,reg,logistic,aabc_sex,47,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,47,2.782559402207126,test,0.8363636363636363,0.046064905700436205,0.8307692307692308,0.04809902735285537,0.8288043478260869,0.04855133895350049 +flat_mae,reg,logistic,aabc_sex,48,0.005994842503189409,train,0.888468809073724,0.014007242546543742,0.8851443102071458,0.01444020141346111,0.8835253084791466,0.014556564390030662 +flat_mae,reg,logistic,aabc_sex,48,0.005994842503189409,test,0.8909090909090909,0.042143101354023965,0.8863636363636364,0.04481043781409082,0.8817934782608696,0.04567415846166708 +flat_mae,reg,logistic,aabc_sex,49,0.046415888336127774,train,0.9395085066162571,0.010301243403258385,0.9378268790033496,0.010616348556950997,0.9367654386119171,0.010846950214732132 +flat_mae,reg,logistic,aabc_sex,49,0.046415888336127774,test,0.8,0.0512920659854704,0.7975911676145868,0.05194149876681347,0.8036684782608696,0.05166028341999323 +flat_mae,reg,logistic,aabc_sex,50,0.005994842503189409,train,0.888468809073724,0.012928338138478783,0.8851443102071458,0.013325753476181378,0.8835253084791466,0.013467382349393934 +flat_mae,reg,logistic,aabc_sex,50,0.005994842503189409,test,0.9090909090909091,0.03931815554386931,0.905982905982906,0.040837203264558045,0.9035326086956521,0.041432378512871 +flat_mae,reg,logistic,aabc_sex,51,0.000774263682681127,train,0.8544423440453687,0.014871069546126162,0.8490481431657901,0.015585508905510569,0.8455992262375802,0.015762551700862396 +flat_mae,reg,logistic,aabc_sex,51,0.000774263682681127,test,0.8,0.051634007628709684,0.790003471017008,0.05592045627482848,0.7853260869565217,0.05560883327106212 +flat_mae,reg,logistic,aabc_sex,52,0.005994842503189409,train,0.8979206049149339,0.013249429641949022,0.8948077772867875,0.013745298800411336,0.8929115741961957,0.014075534067913132 +flat_mae,reg,logistic,aabc_sex,52,0.005994842503189409,test,0.8545454545454545,0.04952983573664636,0.8521505376344086,0.0501609205021172,0.8566576086956521,0.04965181763710768 +flat_mae,reg,logistic,aabc_sex,53,0.3593813663804626,train,0.994328922495274,0.0033293405808292363,0.9941893034853195,0.003407023326519628,0.9944898736774231,0.003209083277892103 +flat_mae,reg,logistic,aabc_sex,53,0.3593813663804626,test,0.8727272727272727,0.04511108053213598,0.8711943793911007,0.04536163174254404,0.8783967391304348,0.04402845516653931 +flat_mae,reg,logistic,aabc_sex,54,0.000774263682681127,train,0.8506616257088847,0.01568102142432427,0.844657559706347,0.01655039601546719,0.8405067557665817,0.016754404754534526 +flat_mae,reg,logistic,aabc_sex,54,0.000774263682681127,test,0.8363636363636363,0.049685277264593194,0.8328267477203647,0.0506353082570226,0.8349184782608696,0.05042154403869542 +flat_mae,reg,logistic,aabc_sex,55,0.3593813663804626,train,0.996219281663516,0.002719795271000758,0.9961190832526338,0.002797287067040424,0.9955156950672646,0.003225945512016594 +flat_mae,reg,logistic,aabc_sex,55,0.3593813663804626,test,0.8545454545454545,0.049093050458346776,0.8521505376344086,0.049638901637455934,0.8566576086956521,0.04889513219384588 +flat_mae,reg,logistic,aabc_sex,56,0.005994842503189409,train,0.8922495274102079,0.01429654081307732,0.8890377234204629,0.014793019859325074,0.8874014478736187,0.015043084446629502 +flat_mae,reg,logistic,aabc_sex,56,0.005994842503189409,test,0.9090909090909091,0.03834086355550457,0.9071259709557582,0.03914050508419046,0.9096467391304348,0.03884152124556326 +flat_mae,reg,logistic,aabc_sex,57,0.005994842503189409,train,0.888468809073724,0.013457026518099156,0.8851443102071458,0.013925249962032548,0.8835253084791466,0.014146144917965052 +flat_mae,reg,logistic,aabc_sex,57,0.005994842503189409,test,0.8363636363636363,0.04961230186020163,0.8307692307692308,0.051962137554074084,0.8288043478260869,0.052191243777938996 +flat_mae,reg,logistic,aabc_sex,58,0.005994842503189409,train,0.9017013232514177,0.012781522853169299,0.8985663293902475,0.013223583641372343,0.8961795480524049,0.013384238899556863 +flat_mae,reg,logistic,aabc_sex,58,0.005994842503189409,test,0.8,0.053263728684616395,0.795677136102668,0.05451382208194657,0.7975543478260869,0.05431029444554952 +flat_mae,reg,logistic,aabc_sex,59,0.005994842503189409,train,0.8960302457466919,0.013295435603546454,0.8927875243664718,0.013794218340138085,0.890669421729828,0.014070022517174307 +flat_mae,reg,logistic,aabc_sex,59,0.005994842503189409,test,0.7636363636363637,0.05632785961311721,0.7623795280824195,0.05642476249978062,0.7724184782608696,0.05532867322262988 +flat_mae,reg,logistic,aabc_sex,60,0.046415888336127774,train,0.9319470699432892,0.011102070865451624,0.930313231850117,0.01136180425988593,0.930837656437762,0.01142076950643541 +flat_mae,reg,logistic,aabc_sex,60,0.046415888336127774,test,0.7818181818181819,0.052031509842872366,0.7642857142857142,0.05930546583437045,0.7574728260869565,0.056951921872704904 +flat_mae,reg,logistic,aabc_sex,61,0.3593813663804626,train,0.994328922495274,0.0033775592715295038,0.9941961885745005,0.0034491179463337603,0.9950980392156863,0.002919491592547562 +flat_mae,reg,logistic,aabc_sex,61,0.3593813663804626,test,0.8909090909090909,0.04050099478423211,0.89,0.04043020044364961,0.9001358695652174,0.037504256906303914 +flat_mae,reg,logistic,aabc_sex,62,0.046415888336127774,train,0.941398865784499,0.01004227114080717,0.9398830580860384,0.010337837118356537,0.939615756616548,0.01063560498989062 +flat_mae,reg,logistic,aabc_sex,62,0.046415888336127774,test,0.8545454545454545,0.04629397975961312,0.8505434782608696,0.047862400316285976,0.8505434782608696,0.04828409606519704 +flat_mae,reg,logistic,aabc_sex,63,0.046415888336127774,train,0.9262759924385633,0.01139033497205813,0.9242746242360844,0.011732421872987753,0.9235030335003956,0.011978251814399819 +flat_mae,reg,logistic,aabc_sex,63,0.046415888336127774,test,0.9636363636363636,0.024455052377307634,0.9626358695652174,0.025180684198686076,0.9626358695652174,0.025500294642975097 +flat_mae,reg,logistic,aabc_sex,64,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,64,2.782559402207126,test,0.8363636363636363,0.047424915863163074,0.8354935194416749,0.04733294796013217,0.8471467391304348,0.045386034602266416 +flat_mae,reg,logistic,aabc_sex,65,0.3593813663804626,train,0.9886578449905482,0.004777188587218002,0.9883855386416862,0.004886475957869993,0.9889797473548463,0.0047207470629441126 +flat_mae,reg,logistic,aabc_sex,65,0.3593813663804626,test,0.8181818181818182,0.05205894942155508,0.8106060606060606,0.05507410254315218,0.8070652173913043,0.05488739082852829 +flat_mae,reg,logistic,aabc_sex,66,0.3593813663804626,train,0.9905482041587902,0.004416388801918006,0.9903155058088658,0.004522671575574556,0.9906137342829509,0.00445628332185043 +flat_mae,reg,logistic,aabc_sex,66,0.3593813663804626,test,0.7818181818181819,0.057697988277232576,0.7758152173913043,0.05939055814143559,0.7758152173913043,0.059377869732161866 +flat_mae,reg,logistic,aabc_sex,67,0.046415888336127774,train,0.9300567107750473,0.010811809406583493,0.9283347429856068,0.01108030304818069,0.9285955039713942,0.011187111979230568 +flat_mae,reg,logistic,aabc_sex,67,0.046415888336127774,test,0.8545454545454545,0.047424985568773184,0.8521505376344086,0.048049540094908375,0.8566576086956521,0.047601271802435674 +flat_mae,reg,logistic,aabc_sex,68,0.3593813663804626,train,0.9924385633270322,0.003872347117926604,0.9922570257611241,0.003960402757503899,0.9928558867493186,0.003723325034432611 +flat_mae,reg,logistic,aabc_sex,68,0.3593813663804626,test,0.8727272727272727,0.04381925309893767,0.8663658451926415,0.047347091405729506,0.8600543478260869,0.04817775788454687 +flat_mae,reg,logistic,aabc_sex,69,0.3593813663804626,train,0.9924385633270322,0.003687501103456333,0.9922570257611241,0.0037716424856010723,0.9928558867493186,0.003548052617726196 +flat_mae,reg,logistic,aabc_sex,69,0.3593813663804626,test,0.8909090909090909,0.04210909090909091,0.8863636363636364,0.04457342609187009,0.8817934782608696,0.04543790561202978 +flat_mae,reg,logistic,aabc_sex,70,0.046415888336127774,train,0.9281663516068053,0.011599046685512879,0.9261694188164776,0.011969857310731462,0.9251370204285002,0.012277447740842974 +flat_mae,reg,logistic,aabc_sex,70,0.046415888336127774,test,0.9454545454545454,0.031217096396654466,0.9442755825734549,0.0317285226650785,0.9470108695652174,0.030670484456375192 +flat_mae,reg,logistic,aabc_sex,71,0.005994842503189409,train,0.8903591682419659,0.013404458885460983,0.887165342747867,0.013845923612240057,0.8857674609455142,0.01407052847102084 +flat_mae,reg,logistic,aabc_sex,71,0.005994842503189409,test,0.8181818181818182,0.046276438500932375,0.8106060606060606,0.048629342420939595,0.8070652173913043,0.04835284151535146 +flat_mae,reg,logistic,aabc_sex,72,0.3593813663804626,train,0.9867674858223062,0.004950014401194405,0.9864417081324122,0.005071911122335713,0.9867375948884786,0.00508184252107423 +flat_mae,reg,logistic,aabc_sex,72,0.3593813663804626,test,0.8909090909090909,0.039511797589584945,0.8863636363636364,0.041818291573856456,0.8817934782608696,0.042773780988049064 +flat_mae,reg,logistic,aabc_sex,73,0.005994842503189409,train,0.8903591682419659,0.013556482995595337,0.8873112181935712,0.013979456112653077,0.8863756264837774,0.014203838121769967 +flat_mae,reg,logistic,aabc_sex,73,0.005994842503189409,test,0.8909090909090909,0.0416412224239236,0.8879076086956521,0.04296266615608477,0.8879076086956521,0.04317413453219381 +flat_mae,reg,logistic,aabc_sex,74,0.005994842503189409,train,0.8846880907372401,0.013526653092093497,0.8810916179337231,0.014046763572033591,0.8790410035464111,0.014340201463571681 +flat_mae,reg,logistic,aabc_sex,74,0.005994842503189409,test,0.8545454545454545,0.04471602223619736,0.84593837535014,0.049063633382526534,0.8383152173913043,0.04951932124326968 +flat_mae,reg,logistic,aabc_sex,75,0.005994842503189409,train,0.8903591682419659,0.013856391502801041,0.8873112181935712,0.014230317367328956,0.8863756264837774,0.01424566961639594 +flat_mae,reg,logistic,aabc_sex,75,0.005994842503189409,test,0.8181818181818182,0.049479392154046134,0.8106060606060606,0.05228727205315103,0.8070652173913043,0.05233413836928761 +flat_mae,reg,logistic,aabc_sex,76,0.3593813663804626,train,0.994328922495274,0.003318555536096841,0.9941893034853195,0.0033971349501014303,0.9944898736774231,0.003248813350258968 +flat_mae,reg,logistic,aabc_sex,76,0.3593813663804626,test,0.8,0.053125505539646144,0.7931623931623932,0.0551915449551664,0.7914402173913043,0.054695090885542814 +flat_mae,reg,logistic,aabc_sex,77,0.046415888336127774,train,0.9338374291115312,0.011103049316509512,0.9322085406620606,0.011373129332985699,0.9324716433658665,0.011446567361853455 +flat_mae,reg,logistic,aabc_sex,77,0.046415888336127774,test,0.8181818181818182,0.04958884172009358,0.8035714285714286,0.0573032058906753,0.7948369565217391,0.055674807316404874 +flat_mae,reg,logistic,aabc_sex,78,0.046415888336127774,train,0.9338374291115312,0.010984776768975982,0.9321260333229466,0.011281538070720498,0.9318634778276034,0.0114531269173555 +flat_mae,reg,logistic,aabc_sex,78,0.046415888336127774,test,0.9272727272727272,0.03502018308262389,0.9260752688172043,0.03531169736734036,0.9313858695652174,0.03346139909217857 +flat_mae,reg,logistic,aabc_sex,79,0.005994842503189409,train,0.8941398865784499,0.013338114417779712,0.8913343310737447,0.013739289404571335,0.8908599314165128,0.013979802641627044 +flat_mae,reg,logistic,aabc_sex,79,0.005994842503189409,test,0.8181818181818182,0.05417039537918463,0.8106060606060606,0.05720888195773276,0.8070652173913043,0.05688133642373455 +flat_mae,reg,logistic,aabc_sex,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,80,2.782559402207126,test,0.7818181818181819,0.05539011458183155,0.7727272727272727,0.058410354850204696,0.7697010869565217,0.05810216337663149 +flat_mae,reg,logistic,aabc_sex,81,0.046415888336127774,train,0.9300567107750473,0.01089168391411197,0.9283347429856068,0.011167623028367652,0.9285955039713942,0.011279737716121018 +flat_mae,reg,logistic,aabc_sex,81,0.046415888336127774,test,0.8909090909090909,0.03941724252487055,0.8863636363636364,0.04177832021992357,0.8817934782608696,0.042891458152001447 +flat_mae,reg,logistic,aabc_sex,82,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,82,166.81005372000556,test,0.8181818181818182,0.0504568353288725,0.8131793478260869,0.05168427006548031,0.8131793478260869,0.05159759487276005 +flat_mae,reg,logistic,aabc_sex,83,0.3593813663804626,train,0.9886578449905482,0.004673793675998426,0.9883855386416862,0.00478092026463877,0.9889797473548463,0.004622178794431896 +flat_mae,reg,logistic,aabc_sex,83,0.3593813663804626,test,0.8363636363636363,0.04895761026955648,0.8250265111346766,0.054809234622594766,0.8165760869565217,0.054214354379246335 +flat_mae,reg,logistic,aabc_sex,84,0.3593813663804626,train,0.9924385633270322,0.0035886645365211765,0.9922570257611241,0.0036713146825437084,0.9928558867493186,0.0034656407450580563 +flat_mae,reg,logistic,aabc_sex,84,0.3593813663804626,test,0.7090909090909091,0.06206303380668123,0.7010869565217391,0.06354053865234086,0.7010869565217391,0.06367914072235896 +flat_mae,reg,logistic,aabc_sex,85,0.3593813663804626,train,0.994328922495274,0.003396305791377026,0.9941961885745005,0.0034681824684204884,0.9950980392156863,0.002935695692219684 +flat_mae,reg,logistic,aabc_sex,85,0.3593813663804626,test,0.8545454545454545,0.04689747565850953,0.8521505376344086,0.04756500518080481,0.8566576086956521,0.04708200989776993 +flat_mae,reg,logistic,aabc_sex,86,0.3593813663804626,train,0.9886578449905482,0.0045973581608771494,0.9883855386416862,0.004699615027699227,0.9889797473548463,0.004449261852262299 +flat_mae,reg,logistic,aabc_sex,86,0.3593813663804626,test,0.8727272727272727,0.04392177870519474,0.8711943793911007,0.04403775248405702,0.8783967391304348,0.042271319776496896 +flat_mae,reg,logistic,aabc_sex,87,0.046415888336127774,train,0.9376181474480151,0.010770071480713886,0.9358427325549344,0.011115201045061654,0.9345232861455495,0.01139472709546068 +flat_mae,reg,logistic,aabc_sex,87,0.046415888336127774,test,0.8,0.05320605197665155,0.790003471017008,0.05717435648394312,0.7853260869565217,0.05666330807208821 +flat_mae,reg,logistic,aabc_sex,88,0.3593813663804626,train,0.9905482041587902,0.004077590172392012,0.9903269809575008,0.004164479089394953,0.991221899821214,0.0038418780627149374 +flat_mae,reg,logistic,aabc_sex,88,0.3593813663804626,test,0.8727272727272727,0.047766304386154285,0.8683760683760684,0.049739173114040504,0.8661684782608696,0.050151244015797986 +flat_mae,reg,logistic,aabc_sex,89,0.005994842503189409,train,0.8960302457466919,0.013797728331637867,0.8930712209248908,0.014235888742879519,0.8918857528063542,0.01442304136163948 +flat_mae,reg,logistic,aabc_sex,89,0.005994842503189409,test,0.8909090909090909,0.03621224850998292,0.8821428571428571,0.04194787636921305,0.8695652173913043,0.04329725365324045 +flat_mae,reg,logistic,aabc_sex,90,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,90,2.782559402207126,test,0.7818181818181819,0.056554644093964906,0.7727272727272727,0.06004410905822466,0.7697010869565217,0.05978429177163898 +flat_mae,reg,logistic,aabc_sex,91,0.046415888336127774,train,0.9319470699432892,0.011046557673804684,0.9301434985474073,0.011364853160882149,0.9296213253612357,0.011613492728524685 +flat_mae,reg,logistic,aabc_sex,91,0.046415888336127774,test,0.8363636363636363,0.05058769980836077,0.8328267477203647,0.051873923019978174,0.8349184782608696,0.051999738906974804 +flat_mae,reg,logistic,aabc_sex,92,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,92,21.54434690031882,test,0.8727272727272727,0.04185427691249447,0.8683760683760684,0.04370431313452757,0.8661684782608696,0.04393026701198401 +flat_mae,reg,logistic,aabc_sex,93,0.046415888336127774,train,0.9281663516068053,0.011693865713752553,0.9261694188164776,0.012043995378834788,0.9251370204285002,0.012195374058543207 +flat_mae,reg,logistic,aabc_sex,93,0.046415888336127774,test,0.8,0.04880726134839545,0.7861435136090491,0.05531308635299152,0.7792119565217391,0.053934203886674865 +flat_mae,reg,logistic,aabc_sex,94,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,94,2.782559402207126,test,0.9272727272727272,0.03424402463409487,0.9252717391304348,0.03535015203081109,0.9252717391304348,0.03567900827990857 +flat_mae,reg,logistic,aabc_sex,95,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,95,2.782559402207126,test,0.8545454545454545,0.04994954148152787,0.8521505376344086,0.05062762858743472,0.8566576086956521,0.050283723307163526 +flat_mae,reg,logistic,aabc_sex,96,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,aabc_sex,96,2.782559402207126,test,0.9090909090909091,0.03805122254527551,0.905982905982906,0.03974101194087587,0.9035326086956521,0.04064363884622748 +flat_mae,reg,logistic,aabc_sex,97,0.046415888336127774,train,0.9319470699432892,0.011170120008528458,0.9301434985474073,0.01146767948241682,0.9296213253612357,0.011592550443449113 +flat_mae,reg,logistic,aabc_sex,97,0.046415888336127774,test,0.8545454545454545,0.0489776471194296,0.8505434782608696,0.050624120505501115,0.8505434782608696,0.05060459357324798 +flat_mae,reg,logistic,aabc_sex,98,0.046415888336127774,train,0.9338374291115312,0.0104985068265935,0.9320413294426397,0.010811739338010036,0.9312553122893402,0.011019512991212614 +flat_mae,reg,logistic,aabc_sex,98,0.046415888336127774,test,0.7818181818181819,0.051007696713015194,0.76890756302521,0.0557025552269082,0.7635869565217391,0.05478986124770611 +flat_mae,reg,logistic,aabc_sex,99,0.005994842503189409,train,0.888468809073724,0.014250140798243058,0.885294582446701,0.014696498031964206,0.8841334740174096,0.014883557035258423 +flat_mae,reg,logistic,aabc_sex,99,0.005994842503189409,test,0.8363636363636363,0.05079325783332185,0.8281846581048247,0.054949525480745914,0.8226902173913043,0.05508293690603394 +flat_mae,reg,logistic,aabc_sex,100,0.046415888336127774,train,0.9338374291115312,0.010896473638112343,0.9321260333229466,0.011190275715334217,0.9318634778276034,0.01135930733100482 +flat_mae,reg,logistic,aabc_sex,100,0.046415888336127774,test,0.8727272727272727,0.044214712782582676,0.8720505151213027,0.04406559251837321,0.8845108695652174,0.04077423863635771 diff --git a/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..11cf3afa48082293e527281c906e0bc2097d117e --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:30:55 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (aabc_sex reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic +model: flat_mae +representation: reg +dataset: aabc_sex +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/aabc_sex__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:21:56 time: 5.5767 data: 4.7610 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:58 time: 0.2954 data: 0.1156 max mem: 3581 +extract (train) [ 40/236] eta: 0:01:17 time: 0.2338 data: 0.0809 max mem: 3581 +extract (train) [ 60/236] eta: 0:00:59 time: 0.2224 data: 0.0769 max mem: 3581 +extract (train) [ 80/236] eta: 0:00:48 time: 0.2386 data: 0.0867 max mem: 3581 +extract (train) [100/236] eta: 0:00:40 time: 0.2242 data: 0.0746 max mem: 3581 +extract (train) [120/236] eta: 0:00:32 time: 0.2072 data: 0.0691 max mem: 3581 +extract (train) [140/236] eta: 0:00:26 time: 0.2281 data: 0.0781 max mem: 3581 +extract (train) [160/236] eta: 0:00:20 time: 0.2424 data: 0.0851 max mem: 3581 +extract (train) [180/236] eta: 0:00:14 time: 0.2347 data: 0.0813 max mem: 3581 +extract (train) [200/236] eta: 0:00:09 time: 0.2471 data: 0.0879 max mem: 3581 +extract (train) [220/236] eta: 0:00:04 time: 0.2203 data: 0.0723 max mem: 3581 +extract (train) [235/236] eta: 0:00:00 time: 0.2052 data: 0.0691 max mem: 3581 +extract (train) Total time: 0:01:01 (0.2587 s / it) +extract (validation) [ 0/29] eta: 0:02:26 time: 5.0574 data: 4.9030 max mem: 3581 +extract (validation) [20/29] eta: 0:00:04 time: 0.2342 data: 0.0804 max mem: 3581 +extract (validation) [28/29] eta: 0:00:00 time: 0.2101 data: 0.0683 max mem: 3581 +extract (validation) Total time: 0:00:12 (0.4147 s / it) +extract (test) [ 0/28] eta: 0:02:32 time: 5.4534 data: 5.2690 max mem: 3581 +extract (test) [20/28] eta: 0:00:04 time: 0.2562 data: 0.0911 max mem: 3581 +extract (test) [27/28] eta: 0:00:00 time: 0.2106 data: 0.0696 max mem: 3581 +extract (test) Total time: 0:00:12 (0.4382 s / it) +feature extraction time: 0:01:25 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_sex | | 0.0059948 | train | 0.8828 | 0.014136 | 0.87906 | 0.014668 | 0.87643 | 0.014872 | +| flat_mae | reg | logistic | aabc_sex | | 0.0059948 | test | 0.90909 | 0.041547 | 0.90713 | 0.041914 | 0.91667 | 0.039131 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04963457872248615, "f1": 0.8131793478260869, "f1_std": 0.05114581480552598, "bacc": 0.8131793478260869, "bacc_std": 0.05122066949963678} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05188512422363925, "f1": 0.8343927735028438, "f1_std": 0.05225702289866257, "bacc": 0.8410326086956521, "bacc_std": 0.05141273963797065} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04903397371733596, "f1": 0.8250265111346766, "f1_std": 0.054673373568046524, "bacc": 0.8165760869565217, "bacc_std": 0.05421412444926987} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05110554303522874, "f1": 0.8328267477203647, "f1_std": 0.05195841064492983, "bacc": 0.8349184782608696, "bacc_std": 0.051621821989718285} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04865443674400136, "f1": 0.8307692307692308, "f1_std": 0.050894657371479475, "bacc": 0.8288043478260869, "bacc_std": 0.051147502136806196} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04845929023332217, "f1": 0.8484848484848485, "f1_std": 0.051309649482925775, "bacc": 0.8444293478260869, "bacc_std": 0.05154459717771612} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039096895947469706, "f1": 0.9071259709557582, "f1_std": 0.039851289716056816, "bacc": 0.9096467391304348, "bacc_std": 0.03949124032094313} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04792534138748972, "f1": 0.8343927735028438, "f1_std": 0.048189416985361305, "bacc": 0.8410326086956521, "bacc_std": 0.04727554170075885} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.053244063029918014, "f1": 0.790003471017008, "f1_std": 0.056985589296998825, "bacc": 0.7853260869565217, "bacc_std": 0.05647921769790101} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04517148226389471, "f1": 0.8711943793911007, "f1_std": 0.04534258973080901, "bacc": 0.8783967391304348, "bacc_std": 0.04378780932734153} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049584375038250535, "f1": 0.8484848484848485, "f1_std": 0.05218258416989441, "bacc": 0.8444293478260869, "bacc_std": 0.05246564644426059} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 21.54434690031882, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.054871558515175216, "f1": 0.7518222839291913, "f1_std": 0.058687219457857, "bacc": 0.7479619565217391, "bacc_std": 0.057904129587482} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0453570445201548, "f1": 0.8711943793911007, "f1_std": 0.04554111878442664, "bacc": 0.8783967391304348, "bacc_std": 0.044090095776604374} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04160437039979858, "f1": 0.8879076086956521, "f1_std": 0.04310602254016953, "bacc": 0.8879076086956521, "bacc_std": 0.043792733652901275} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04510373717817642, "f1": 0.84593837535014, "f1_std": 0.04954368172403884, "bacc": 0.8383152173913043, "bacc_std": 0.04970529678473679} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05391945017744537, "f1": 0.8151881720430108, "f1_std": 0.054833697507646544, "bacc": 0.8192934782608696, "bacc_std": 0.05480247372642879} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044752395014168014, "f1": 0.84593837535014, "f1_std": 0.04931824190169963, "bacc": 0.8383152173913043, "bacc_std": 0.04956049769180805} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05364177476556865, "f1": 0.7989365237620472, "f1_std": 0.053592871293765575, "bacc": 0.8097826086956521, "bacc_std": 0.05204354485833863} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049347732244612294, "f1": 0.8328267477203647, "f1_std": 0.05020600659950374, "bacc": 0.8349184782608696, "bacc_std": 0.04977145687166564} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04432910727801897, "f1": 0.8683760683760684, "f1_std": 0.046685481133414454, "bacc": 0.8661684782608696, "bacc_std": 0.047274144279217056} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05373807305689113, "f1": 0.7555555555555555, "f1_std": 0.05641368691541304, "bacc": 0.7540760869565217, "bacc_std": 0.05634798301556164} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03380050368835911, "f1": 0.9252717391304348, "f1_std": 0.034869660102593646, "bacc": 0.9252717391304348, "bacc_std": 0.03511004002002761} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0482386765713506, "f1": 0.8533333333333333, "f1_std": 0.048264050697296314, "bacc": 0.8627717391304348, "bacc_std": 0.046209459473510434} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0559917349272666, "f1": 0.8151881720430108, "f1_std": 0.05675357600934304, "bacc": 0.8192934782608696, "bacc_std": 0.05618637607666179} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.023204901163688015, "f1": 0.9630376344086022, "f1_std": 0.02334271983025949, "bacc": 0.96875, "bacc_std": 0.01994171193754438} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04919744668507275, "f1": 0.8307692307692308, "f1_std": 0.051588624204507535, "bacc": 0.8288043478260869, "bacc_std": 0.05181706962135183} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039749363527312565, "f1": 0.9071259709557582, "f1_std": 0.04031653999398343, "bacc": 0.9096467391304348, "bacc_std": 0.039346803556667836} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0475432817163859, "f1": 0.8521505376344086, "f1_std": 0.04820042148296156, "bacc": 0.8566576086956521, "bacc_std": 0.04759193630902456} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.055806229102238834, "f1": 0.7758152173913043, "f1_std": 0.05763422944091793, "bacc": 0.7758152173913043, "bacc_std": 0.05760300902191158} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05532695904072636, "f1": 0.7931623931623932, "f1_std": 0.05781394568470329, "bacc": 0.7914402173913043, "bacc_std": 0.05777918201523962} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05063838416076504, "f1": 0.8151881720430108, "f1_std": 0.05106182939170531, "bacc": 0.8192934782608696, "bacc_std": 0.050183454721416554} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04456437446070997, "f1": 0.8683760683760684, "f1_std": 0.046747055806663884, "bacc": 0.8661684782608696, "bacc_std": 0.0473827223443044} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044809370330562405, "f1": 0.8683760683760684, "f1_std": 0.04657877980078554, "bacc": 0.8661684782608696, "bacc_std": 0.04686970817246869} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042739634575372346, "f1": 0.8863636363636364, "f1_std": 0.04521444630226576, "bacc": 0.8817934782608696, "bacc_std": 0.04610032235431791} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05533817895076396, "f1": 0.78, "f1_std": 0.05546431213640254, "bacc": 0.7880434782608696, "bacc_std": 0.054517657117702505} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04644733977205609, "f1": 0.8505434782608696, "f1_std": 0.04800324616721444, "bacc": 0.8505434782608696, "bacc_std": 0.04820519480980968} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.054989453534291445, "f1": 0.7931623931623932, "f1_std": 0.05762257720419604, "bacc": 0.7914402173913043, "bacc_std": 0.05770085831524557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04581321762429382, "f1": 0.8281846581048247, "f1_std": 0.049186712725874604, "bacc": 0.8226902173913043, "bacc_std": 0.0492319462198244} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05559012933371213, "f1": 0.7758152173913043, "f1_std": 0.05749668367635992, "bacc": 0.7758152173913043, "bacc_std": 0.05756003764419491} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 21.54434690031882, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044288625109045604, "f1": 0.8484848484848485, "f1_std": 0.046984170912240414, "bacc": 0.8444293478260869, "bacc_std": 0.047367192487359544} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04012878441770913, "f1": 0.884453781512605, "f1_std": 0.044360877187844254, "bacc": 0.8756793478260869, "bacc_std": 0.04548866801733046} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05595067485320373, "f1": 0.7782258064516129, "f1_std": 0.05668690741802004, "bacc": 0.7819293478260869, "bacc_std": 0.0562098689810325} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04042633950309915, "f1": 0.89, "f1_std": 0.04041177629277131, "bacc": 0.9001358695652174, "bacc_std": 0.03744149922050894} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 166.81005372000556, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.054965221160203484, "f1": 0.7782258064516129, "f1_std": 0.05595547972546642, "bacc": 0.7819293478260869, "bacc_std": 0.05618779208233672} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.02804520553889753, "f1": 0.9447975911676145, "f1_std": 0.028078716100433287, "bacc": 0.953125, "bacc_std": 0.02410134850999006} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042093386928833555, "f1": 0.89, "f1_std": 0.04206652355762537, "bacc": 0.9001358695652174, "bacc_std": 0.0391341451667822} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.046064905700436205, "f1": 0.8307692307692308, "f1_std": 0.04809902735285537, "bacc": 0.8288043478260869, "bacc_std": 0.04855133895350049} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042143101354023965, "f1": 0.8863636363636364, "f1_std": 0.04481043781409082, "bacc": 0.8817934782608696, "bacc_std": 0.04567415846166708} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.0512920659854704, "f1": 0.7975911676145868, "f1_std": 0.05194149876681347, "bacc": 0.8036684782608696, "bacc_std": 0.05166028341999323} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03931815554386931, "f1": 0.905982905982906, "f1_std": 0.040837203264558045, "bacc": 0.9035326086956521, "bacc_std": 0.041432378512871} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.051634007628709684, "f1": 0.790003471017008, "f1_std": 0.05592045627482848, "bacc": 0.7853260869565217, "bacc_std": 0.05560883327106212} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04952983573664636, "f1": 0.8521505376344086, "f1_std": 0.0501609205021172, "bacc": 0.8566576086956521, "bacc_std": 0.04965181763710768} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04511108053213598, "f1": 0.8711943793911007, "f1_std": 0.04536163174254404, "bacc": 0.8783967391304348, "bacc_std": 0.04402845516653931} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049685277264593194, "f1": 0.8328267477203647, "f1_std": 0.0506353082570226, "bacc": 0.8349184782608696, "bacc_std": 0.05042154403869542} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049093050458346776, "f1": 0.8521505376344086, "f1_std": 0.049638901637455934, "bacc": 0.8566576086956521, "bacc_std": 0.04889513219384588} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03834086355550457, "f1": 0.9071259709557582, "f1_std": 0.03914050508419046, "bacc": 0.9096467391304348, "bacc_std": 0.03884152124556326} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04961230186020163, "f1": 0.8307692307692308, "f1_std": 0.051962137554074084, "bacc": 0.8288043478260869, "bacc_std": 0.052191243777938996} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.053263728684616395, "f1": 0.795677136102668, "f1_std": 0.05451382208194657, "bacc": 0.7975543478260869, "bacc_std": 0.05431029444554952} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05632785961311721, "f1": 0.7623795280824195, "f1_std": 0.05642476249978062, "bacc": 0.7724184782608696, "bacc_std": 0.05532867322262988} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.052031509842872366, "f1": 0.7642857142857142, "f1_std": 0.05930546583437045, "bacc": 0.7574728260869565, "bacc_std": 0.056951921872704904} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04050099478423211, "f1": 0.89, "f1_std": 0.04043020044364961, "bacc": 0.9001358695652174, "bacc_std": 0.037504256906303914} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04629397975961312, "f1": 0.8505434782608696, "f1_std": 0.047862400316285976, "bacc": 0.8505434782608696, "bacc_std": 0.04828409606519704} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.024455052377307634, "f1": 0.9626358695652174, "f1_std": 0.025180684198686076, "bacc": 0.9626358695652174, "bacc_std": 0.025500294642975097} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047424915863163074, "f1": 0.8354935194416749, "f1_std": 0.04733294796013217, "bacc": 0.8471467391304348, "bacc_std": 0.045386034602266416} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05205894942155508, "f1": 0.8106060606060606, "f1_std": 0.05507410254315218, "bacc": 0.8070652173913043, "bacc_std": 0.05488739082852829} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.057697988277232576, "f1": 0.7758152173913043, "f1_std": 0.05939055814143559, "bacc": 0.7758152173913043, "bacc_std": 0.059377869732161866} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047424985568773184, "f1": 0.8521505376344086, "f1_std": 0.048049540094908375, "bacc": 0.8566576086956521, "bacc_std": 0.047601271802435674} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04381925309893767, "f1": 0.8663658451926415, "f1_std": 0.047347091405729506, "bacc": 0.8600543478260869, "bacc_std": 0.04817775788454687} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04210909090909091, "f1": 0.8863636363636364, "f1_std": 0.04457342609187009, "bacc": 0.8817934782608696, "bacc_std": 0.04543790561202978} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.031217096396654466, "f1": 0.9442755825734549, "f1_std": 0.0317285226650785, "bacc": 0.9470108695652174, "bacc_std": 0.030670484456375192} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.046276438500932375, "f1": 0.8106060606060606, "f1_std": 0.048629342420939595, "bacc": 0.8070652173913043, "bacc_std": 0.04835284151535146} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.039511797589584945, "f1": 0.8863636363636364, "f1_std": 0.041818291573856456, "bacc": 0.8817934782608696, "bacc_std": 0.042773780988049064} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.0416412224239236, "f1": 0.8879076086956521, "f1_std": 0.04296266615608477, "bacc": 0.8879076086956521, "bacc_std": 0.04317413453219381} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04471602223619736, "f1": 0.84593837535014, "f1_std": 0.049063633382526534, "bacc": 0.8383152173913043, "bacc_std": 0.04951932124326968} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049479392154046134, "f1": 0.8106060606060606, "f1_std": 0.05228727205315103, "bacc": 0.8070652173913043, "bacc_std": 0.05233413836928761} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.053125505539646144, "f1": 0.7931623931623932, "f1_std": 0.0551915449551664, "bacc": 0.7914402173913043, "bacc_std": 0.054695090885542814} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04958884172009358, "f1": 0.8035714285714286, "f1_std": 0.0573032058906753, "bacc": 0.7948369565217391, "bacc_std": 0.055674807316404874} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03502018308262389, "f1": 0.9260752688172043, "f1_std": 0.03531169736734036, "bacc": 0.9313858695652174, "bacc_std": 0.03346139909217857} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05417039537918463, "f1": 0.8106060606060606, "f1_std": 0.05720888195773276, "bacc": 0.8070652173913043, "bacc_std": 0.05688133642373455} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05539011458183155, "f1": 0.7727272727272727, "f1_std": 0.058410354850204696, "bacc": 0.7697010869565217, "bacc_std": 0.05810216337663149} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03941724252487055, "f1": 0.8863636363636364, "f1_std": 0.04177832021992357, "bacc": 0.8817934782608696, "bacc_std": 0.042891458152001447} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0504568353288725, "f1": 0.8131793478260869, "f1_std": 0.05168427006548031, "bacc": 0.8131793478260869, "bacc_std": 0.05159759487276005} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04895761026955648, "f1": 0.8250265111346766, "f1_std": 0.054809234622594766, "bacc": 0.8165760869565217, "bacc_std": 0.054214354379246335} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.7090909090909091, "acc_std": 0.06206303380668123, "f1": 0.7010869565217391, "f1_std": 0.06354053865234086, "bacc": 0.7010869565217391, "bacc_std": 0.06367914072235896} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04689747565850953, "f1": 0.8521505376344086, "f1_std": 0.04756500518080481, "bacc": 0.8566576086956521, "bacc_std": 0.04708200989776993} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04392177870519474, "f1": 0.8711943793911007, "f1_std": 0.04403775248405702, "bacc": 0.8783967391304348, "bacc_std": 0.042271319776496896} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05320605197665155, "f1": 0.790003471017008, "f1_std": 0.05717435648394312, "bacc": 0.7853260869565217, "bacc_std": 0.05666330807208821} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.047766304386154285, "f1": 0.8683760683760684, "f1_std": 0.049739173114040504, "bacc": 0.8661684782608696, "bacc_std": 0.050151244015797986} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03621224850998292, "f1": 0.8821428571428571, "f1_std": 0.04194787636921305, "bacc": 0.8695652173913043, "bacc_std": 0.04329725365324045} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.056554644093964906, "f1": 0.7727272727272727, "f1_std": 0.06004410905822466, "bacc": 0.7697010869565217, "bacc_std": 0.05978429177163898} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05058769980836077, "f1": 0.8328267477203647, "f1_std": 0.051873923019978174, "bacc": 0.8349184782608696, "bacc_std": 0.051999738906974804} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04185427691249447, "f1": 0.8683760683760684, "f1_std": 0.04370431313452757, "bacc": 0.8661684782608696, "bacc_std": 0.04393026701198401} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.04880726134839545, "f1": 0.7861435136090491, "f1_std": 0.05531308635299152, "bacc": 0.7792119565217391, "bacc_std": 0.053934203886674865} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03424402463409487, "f1": 0.9252717391304348, "f1_std": 0.03535015203081109, "bacc": 0.9252717391304348, "bacc_std": 0.03567900827990857} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04994954148152787, "f1": 0.8521505376344086, "f1_std": 0.05062762858743472, "bacc": 0.8566576086956521, "bacc_std": 0.050283723307163526} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 2.782559402207126, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03805122254527551, "f1": 0.905982905982906, "f1_std": 0.03974101194087587, "bacc": 0.9035326086956521, "bacc_std": 0.04064363884622748} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0489776471194296, "f1": 0.8505434782608696, "f1_std": 0.050624120505501115, "bacc": 0.8505434782608696, "bacc_std": 0.05060459357324798} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.051007696713015194, "f1": 0.76890756302521, "f1_std": 0.0557025552269082, "bacc": 0.7635869565217391, "bacc_std": 0.05478986124770611} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05079325783332185, "f1": 0.8281846581048247, "f1_std": 0.054949525480745914, "bacc": 0.8226902173913043, "bacc_std": 0.05508293690603394} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044214712782582676, "f1": 0.8720505151213027, "f1_std": 0.04406559251837321, "bacc": 0.8845108695652174, "bacc_std": 0.04077423863635771} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | aabc_sex | train | 100 | 9.4697 | 36.467 | 0.95405 | 0.046066 | 0.95272 | 0.0475 | 0.9523 | 0.048345 | +| flat_mae | reg | logistic | aabc_sex | test | 100 | 9.4697 | 36.467 | 0.84655 | 0.0483 | 0.84159 | 0.050194 | 0.84177 | 0.051342 | + + +done! total time: 0:05:38 diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bca8b5e1a752d7eeb7cc179646b7829d16805a5e --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..e57aa330812beb6a1a7bd7120a19caf4e29fb48c --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.7977207977207977,0.015574996982413302,0.7941859024652104,0.015968625981728622,0.792714557751658,0.015992012664660285 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.5806451612903226,0.040400090020608334,0.5643243243243243,0.0424587205243781,0.5687352710133542,0.04076138314036456 +flat_mae,patch,logistic,abide_dx,1,0.3593813663804626,train,0.9031339031339032,0.010905429167233527,0.9018593597092128,0.011069515392973623,0.900922849760059,0.011160739341319453 +flat_mae,patch,logistic,abide_dx,1,0.3593813663804626,test,0.7338709677419355,0.039898531551664757,0.732461588754495,0.04015636773697988,0.733718487394958,0.04022913827888883 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,train,0.7962962962962963,0.015676965594968496,0.7923313232449654,0.01610566995540936,0.7904392764857882,0.016066927357321847 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,test,0.6451612903225806,0.04302479743809341,0.6428384393820372,0.04325713733120763,0.6433823529411764,0.043239258225716316 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,train,0.9017094017094017,0.011377267705232327,0.9002469217161368,0.011589061647985224,0.8987449243263197,0.011732048593279192 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,test,0.6048387096774194,0.045481042579640946,0.6017043592264831,0.0459588736314719,0.601890756302521,0.045951187707386966 +flat_mae,patch,logistic,abide_dx,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,4,166.81005372000556,test,0.6209677419354839,0.04362448548247671,0.6167554415729598,0.044398483762407785,0.6165966386554622,0.044195145317579565 +flat_mae,patch,logistic,abide_dx,5,0.3593813663804626,train,0.9088319088319088,0.01070576082429409,0.9074428109961105,0.01091932771841244,0.9057954964931709,0.01106361605271696 +flat_mae,patch,logistic,abide_dx,5,0.3593813663804626,test,0.5725806451612904,0.04314900495390325,0.5723303182143554,0.04311625100167552,0.5756302521008403,0.04311184069609689 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.8091168091168092,0.014696579470665832,0.8059269162210339,0.015019056432290756,0.8044296788482834,0.015024109476112926 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.5564516129032258,0.043407954573918404,0.5550336008351275,0.04336146142899907,0.5561974789915967,0.04339655050556934 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,train,0.7051282051282052,0.017308782632464013,0.6975668233839698,0.01799980297028692,0.6962347729789591,0.017765624993519323 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,test,0.5887096774193549,0.04559811778432817,0.5765651155005022,0.047826286957323653,0.5777310924369747,0.0462758011114918 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9116809116809117,0.011110886110659262,0.9102708943223519,0.011357101912043702,0.9083794758213363,0.011567054495860214 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.6048387096774194,0.04181051789762769,0.5931704050887178,0.04377290473504135,0.5940126050420168,0.042582323096551 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.9017094017094017,0.011241674273021371,0.9001037363652675,0.01146585236810973,0.8981543004798819,0.011547953777699793 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.717741935483871,0.03961406888586514,0.710955710955711,0.041083806048054436,0.7095588235294117,0.04029809683430368 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,train,0.7948717948717948,0.014518151766266765,0.7906231099990886,0.01502502333469632,0.7885566629752676,0.015033693797930913 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,test,0.6693548387096774,0.04552334994503944,0.6682977751680041,0.04567346535030356,0.6701680672268908,0.04579211762227403 +flat_mae,patch,logistic,abide_dx,11,0.046415888336127774,train,0.801994301994302,0.015330614046660694,0.7983004636414561,0.01574131823777327,0.7964931709117755,0.015733168254137363 +flat_mae,patch,logistic,abide_dx,11,0.046415888336127774,test,0.6048387096774194,0.046209435342494894,0.5972691721349506,0.046941434265443706,0.5971638655462186,0.04643584114537313 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,train,0.7863247863247863,0.015102160474997376,0.7827540106951871,0.015416667515814981,0.7813953488372093,0.01540122205194114 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,test,0.5806451612903226,0.04331269553113425,0.5766806722689075,0.04374347207581752,0.5766806722689075,0.04368488590913262 +flat_mae,patch,logistic,abide_dx,13,2.782559402207126,train,0.9971509971509972,0.0020370091445104884,0.9971207087486158,0.0020587963568621985,0.9971207087486158,0.0020728407782225305 +flat_mae,patch,logistic,abide_dx,13,2.782559402207126,test,0.6451612903225806,0.044345802598628706,0.6405797101449275,0.04516140214041959,0.6402310924369747,0.044987559677534275 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.9074074074074074,0.010943249136631344,0.9059630900491514,0.011165817653359464,0.9042081949058693,0.011296127713334872 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.5725806451612904,0.042787483339045244,0.5643931861867832,0.04407580691360447,0.5646008403361344,0.04336659147369708 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,train,0.9002849002849003,0.011135773637299791,0.898905529953917,0.011350571427418014,0.8977482465854558,0.011541150432860948 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,test,0.6048387096774194,0.04211552851307322,0.5972691721349506,0.043302339709568954,0.5971638655462186,0.04275631845640377 +flat_mae,patch,logistic,abide_dx,16,0.005994842503189409,train,0.707977207977208,0.01674512397549605,0.7010314632651578,0.017375355091107953,0.6997046880767811,0.01717196963893119 +flat_mae,patch,logistic,abide_dx,16,0.005994842503189409,test,0.5887096774193549,0.043811306087618726,0.5682392298764252,0.04659106946718435,0.5730042016806722,0.04432977339076404 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.9116809116809117,0.010978095990192182,0.9106334748720812,0.011127388565901947,0.9101513473606497,0.011216502352363955 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.6209677419354839,0.04068296692876455,0.6118548118548119,0.042326546207158235,0.6118697478991597,0.041515189813807474 +flat_mae,patch,logistic,abide_dx,18,0.005994842503189409,train,0.7207977207977208,0.01612759691678238,0.7121024439236692,0.01680092081733238,0.7104466592838686,0.01645747264611013 +flat_mae,patch,logistic,abide_dx,18,0.005994842503189409,test,0.6129032258064516,0.042471447837030094,0.6025641025641025,0.044010330107102195,0.6029411764705883,0.042999529391945894 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,train,0.7834757834757835,0.014906105697935561,0.7798573975044563,0.015151665056512055,0.778516057585825,0.015074866782352345 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,test,0.6532258064516129,0.040400518227479545,0.6429862738533645,0.04244880271821085,0.6428571428571428,0.04137621180961866 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.7934472934472935,0.014817252703205,0.7897574123989218,0.015188641267834672,0.7881506090808417,0.015193406004701536 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.6612903225806451,0.042276836510162064,0.6555555555555556,0.043072963178546926,0.654936974789916,0.042595529939020146 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,train,0.801994301994302,0.014721257292465609,0.799048146645551,0.01502789654233024,0.7979697305278701,0.015091790398800247 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,test,0.6129032258064516,0.04472172020466694,0.6063492063492064,0.04558197485673346,0.60609243697479,0.04510230232821176 +flat_mae,patch,logistic,abide_dx,22,2.782559402207126,train,0.9957264957264957,0.002248025732647508,0.995679778450177,0.002273482423153284,0.9955334071613142,0.002374108751791092 +flat_mae,patch,logistic,abide_dx,22,2.782559402207126,test,0.6370967741935484,0.04416437405289325,0.626380984265149,0.04586084430064009,0.6265756302521008,0.04469554582318528 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,train,0.905982905982906,0.011250955376462136,0.9046172089231453,0.011468849879224217,0.9032115171650055,0.01165632070750981 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,test,0.6370967741935484,0.03972230283293903,0.6217205613178767,0.042685202489317,0.6234243697478992,0.04072999432458781 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,train,0.8062678062678063,0.014465418556794781,0.8027307590584501,0.014900532947827182,0.8009597637504614,0.01497942784054943 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,test,0.5645161290322581,0.0428263287345812,0.555142173797502,0.043999052522899665,0.555672268907563,0.04333304290368861 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,train,0.7891737891737892,0.015948648479311763,0.7858079412856142,0.016277292465227525,0.7845699520118126,0.016295043340450757 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,test,0.5725806451612904,0.04543001558812356,0.5712141971683957,0.045520223365331326,0.5724789915966386,0.04557748695806142 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,train,0.7977207977207977,0.014074736999508876,0.7944914031253866,0.014321925259882646,0.7932078257659654,0.014311850383967803 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,test,0.5967741935483871,0.04515920214066308,0.58994708994709,0.046033520818150334,0.5898109243697479,0.04560616510285363 +flat_mae,patch,logistic,abide_dx,27,0.046415888336127774,train,0.8034188034188035,0.014897281705051347,0.7998297408093097,0.015224878387476009,0.7980804724990771,0.015196934957496857 +flat_mae,patch,logistic,abide_dx,27,0.046415888336127774,test,0.6129032258064516,0.045004092772603685,0.607905138339921,0.04602485018618479,0.6076680672268908,0.04574039648904304 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,train,0.9088319088319088,0.010844268565892993,0.9075075965315349,0.011037705579138931,0.9060908084163898,0.011159004045967598 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,test,0.5967741935483871,0.04191655900517774,0.5950888192267503,0.0423881854050567,0.5961134453781513,0.042674115519398516 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.7891737891737892,0.016129648940840242,0.7853246495636075,0.016525178596861808,0.7836840162421558,0.01650086264072781 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.6370967741935484,0.04147550459958024,0.6283716283716283,0.042942865262446187,0.6281512605042017,0.04210447387832534 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,train,0.6923076923076923,0.01715682573155303,0.6836703188497709,0.017772542811858257,0.6825396825396826,0.01746701831356405 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,test,0.5806451612903226,0.042344211624075805,0.5694444444444444,0.04355632475767991,0.5703781512605042,0.042508557375794884 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,train,0.7948717948717948,0.015056559304322937,0.7911266860618884,0.015478233285093136,0.7894425987449243,0.01549896017551981 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,test,0.5967741935483871,0.044854426927388016,0.5880946053680574,0.046408095846481714,0.5882352941176471,0.045502573451785315 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,train,0.8917378917378918,0.011686962160315721,0.8905213736949242,0.011830972931479502,0.8902916205241787,0.011894334836282324 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,test,0.6774193548387096,0.04088929497176788,0.6704756842944459,0.04183985616368223,0.6696428571428572,0.04115381776723856 +flat_mae,patch,logistic,abide_dx,33,0.3593813663804626,train,0.9088319088319088,0.010663529182202605,0.9076323385498475,0.010820878600466929,0.9066814322628276,0.010889097200785918 +flat_mae,patch,logistic,abide_dx,33,0.3593813663804626,test,0.6370967741935484,0.043086813123512376,0.6330637206549615,0.04380827171333247,0.6328781512605042,0.0435930757647841 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,train,0.9145299145299145,0.010662978457501896,0.9134615384615385,0.010812615373729864,0.9127353266888151,0.010884881383928994 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,test,0.5967741935483871,0.044659245750565076,0.5950888192267503,0.044778884040934476,0.5961134453781513,0.04496267669435562 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,train,0.8048433048433048,0.014653647508843355,0.8020764948602122,0.014908391036033773,0.8011443337024733,0.014929935493665983 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,test,0.6370967741935484,0.04488401053454261,0.6351748937561295,0.04507663643681416,0.6360294117647058,0.04501204296125422 +flat_mae,patch,logistic,abide_dx,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,36,21.54434690031882,test,0.6290322580645161,0.03972427079137321,0.6169755573462261,0.04193865901056573,0.6176470588235294,0.040609827863831244 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,train,0.7877492877492878,0.014783895798164382,0.7839576168788921,0.015168282402782611,0.782392026578073,0.015171565309054945 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,test,0.6129032258064516,0.04183166740045872,0.607905138339921,0.04238009285513294,0.6076680672268908,0.04210498604187981 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,train,0.9017094017094017,0.010929248390707953,0.9006349043540695,0.011068322414825048,0.900516795865633,0.011176281469136198 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,test,0.5967741935483871,0.04249334587071535,0.5836690840719849,0.04454817162913641,0.5850840336134454,0.04317230953145771 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,train,0.9074074074074074,0.010734224220402846,0.9058249040809208,0.010994053889291627,0.9036175710594315,0.011180929960176578 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,test,0.5967741935483871,0.04588641362380527,0.5950888192267503,0.046039857352814524,0.5961134453781513,0.046256106002138905 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,train,0.7962962962962963,0.014966270250705934,0.7928147889780258,0.01527830078080549,0.7913252122554448,0.015248444420239222 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,test,0.6370967741935484,0.0420930662067365,0.6190346145968457,0.04584907230968273,0.6218487394957983,0.04326396705230285 +flat_mae,patch,logistic,abide_dx,41,0.046415888336127774,train,0.7834757834757835,0.015420284439603497,0.7795226130653266,0.015853519143488416,0.7779254337393873,0.01584968861132571 +flat_mae,patch,logistic,abide_dx,41,0.046415888336127774,test,0.6451612903225806,0.04100903113651263,0.6428384393820372,0.041432676486505685,0.6433823529411764,0.041448266599580244 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,train,0.905982905982906,0.010863907977998119,0.9046823568136932,0.01105667806227186,0.9035068290882244,0.011183452784740592 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,test,0.5806451612903226,0.04451401058540449,0.5766806722689075,0.045033613960366134,0.5766806722689075,0.04485464621953559 +flat_mae,patch,logistic,abide_dx,43,0.005994842503189409,train,0.7136752136752137,0.01676481171559889,0.7066016805185029,0.017253727557968907,0.7051679586563308,0.017039687832455654 +flat_mae,patch,logistic,abide_dx,43,0.005994842503189409,test,0.6290322580645161,0.0402157649727232,0.6119727891156463,0.04276951447317943,0.6144957983193278,0.04086769650732438 +flat_mae,patch,logistic,abide_dx,44,0.046415888336127774,train,0.7806267806267806,0.01570580904390856,0.7767931618284676,0.01603321238053933,0.7753414544112218,0.01598738448707762 +flat_mae,patch,logistic,abide_dx,44,0.046415888336127774,test,0.6290322580645161,0.04037375784566807,0.6091008771929824,0.04471320690113011,0.6129201680672269,0.041638970414800684 +flat_mae,patch,logistic,abide_dx,45,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,45,10000.0,test,0.5241935483870968,0.04464200005123429,0.5239148825405089,0.04457550060441844,0.5267857142857143,0.04477871197193377 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,train,0.7193732193732194,0.015757673789124276,0.7119085066713885,0.016341098547964287,0.7103359173126615,0.016103994848874342 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,test,0.5403225806451613,0.04323021161953913,0.5292707292707293,0.044384582898955374,0.5304621848739496,0.04361377345612586 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,train,0.8945868945868946,0.011903422686779321,0.8933367280731292,0.012044553618334338,0.8928755998523441,0.012062220079259042 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,test,0.6209677419354839,0.04393022563390241,0.6203504657677024,0.04402052655954683,0.6228991596638656,0.044239438471120156 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,train,0.9102564102564102,0.01073768575202736,0.9089840815780538,0.010944038231040964,0.9076781100036914,0.01112585524569739 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,test,0.5967741935483871,0.045914978206738656,0.5915678524374176,0.04669320335404661,0.5913865546218487,0.04636034494286604 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,train,0.7193732193732194,0.016728759344113647,0.712440453045498,0.017382887990582026,0.7109265411590993,0.017176909035981923 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,test,0.5483870967741935,0.042778313794648616,0.5337093741606231,0.044664396052974044,0.5362394957983193,0.04322821788943428 +flat_mae,patch,logistic,abide_dx,50,0.3593813663804626,train,0.9188034188034188,0.010233568661483118,0.9178146341463416,0.01037485174665293,0.917201919527501,0.01047306022529338 +flat_mae,patch,logistic,abide_dx,50,0.3593813663804626,test,0.5483870967741935,0.047120243820798824,0.5425559947299078,0.047848660522099275,0.5425420168067226,0.04745547337134893 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,train,0.9102564102564102,0.010586690646620864,0.9091635430038512,0.010724504613349686,0.9085640457733482,0.010794528726151708 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,test,0.6290322580645161,0.042790903173231254,0.6242424242424243,0.043519919296724247,0.6239495798319328,0.0432329479497315 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,train,0.8133903133903134,0.014995268231473983,0.8102009605323173,0.015316113772188654,0.8086009597637505,0.015318193977614802 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,test,0.6290322580645161,0.04489927158618303,0.6242424242424243,0.0454097734149624,0.6239495798319328,0.045281433314186034 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,train,0.9230769230769231,0.010193426426962466,0.9218488434420484,0.010433558661424345,0.9198966408268734,0.010685519563903198 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,test,0.5806451612903226,0.044389356070595604,0.5752305665349143,0.044824359479374805,0.5751050420168067,0.044587687116333055 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,train,0.792022792022792,0.015402843216733075,0.7891458196181699,0.015649248460117594,0.7883351790328534,0.015686040365202106 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,test,0.6774193548387096,0.04299762534884873,0.6704756842944459,0.04419223682214694,0.6696428571428572,0.043589467297855136 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,train,0.8105413105413105,0.015871715604779204,0.80685360833273,0.016293405153652407,0.8048357327427095,0.016286885703606428 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,test,0.5887096774193549,0.0442670068789713,0.5854473942969518,0.044592479342382134,0.585609243697479,0.044518080493830436 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,train,0.782051282051282,0.0157927942603297,0.7774444004699514,0.01620034646007039,0.775452196382429,0.016095807523215704 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,test,0.6935483870967742,0.03925508245360245,0.6869519000797236,0.04070066894636884,0.6859243697478992,0.04003274187544346 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,train,0.801994301994302,0.015946773708938534,0.7987597007937998,0.016313236335748157,0.7973791066814322,0.016337225890988606 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,test,0.6532258064516129,0.04268721695256142,0.6493719997369632,0.043475487552675995,0.6491596638655461,0.04325954477464104 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,train,0.9131054131054132,0.010262019279817843,0.911620294599018,0.010512291344009545,0.9093761535622,0.010738836427410286 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,test,0.6451612903225806,0.042149343026740616,0.6443285528031291,0.04217520536259005,0.6465336134453781,0.04209916134701612 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,train,0.7877492877492878,0.015134401915715655,0.78412170320088,0.015488236416089226,0.782687338501292,0.015497059037456342 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,test,0.6290322580645161,0.04684795074572506,0.6255252100840336,0.04717263490085371,0.6255252100840336,0.0471976047066089 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,train,0.782051282051282,0.015785487442128685,0.7784908936795062,0.016067773294651522,0.7772240679217424,0.01602324232278448 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,test,0.6290322580645161,0.04290766409252793,0.6227513227513227,0.04372948730197435,0.6223739495798319,0.043242652799939524 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,train,0.8005698005698005,0.014339650034230112,0.7970846925713342,0.0146265259765294,0.7954964931709118,0.014590198123464123 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,test,0.6532258064516129,0.04402450303917119,0.650475254015077,0.04437488190875017,0.6507352941176471,0.04434877157813647 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,train,0.9088319088319088,0.01103935335540534,0.9076323385498475,0.011223392079945528,0.9066814322628276,0.011350606967958239 +flat_mae,patch,logistic,abide_dx,62,0.3593813663804626,test,0.5887096774193549,0.04424455203768718,0.5880398671096345,0.04433918970543312,0.5903361344537814,0.04447009011659135 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,train,0.7977207977207977,0.014776725910680551,0.7941859024652104,0.015033101978144855,0.7926172019195274,0.014960837789884648 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,test,0.5967741935483871,0.04321079120528677,0.5963541666666667,0.043323704879572066,0.5992647058823529,0.04339618774456982 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,train,0.8034188034188035,0.015241225574784098,0.8004235612103632,0.015540065883651791,0.7992617201919527,0.015559000768122855 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,test,0.7338709677419355,0.03970155311141992,0.7260127217944425,0.04168513554954457,0.7242647058823529,0.040785025979836104 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,train,0.9074074074074074,0.011064033512157165,0.9063951997538335,0.011205396240212015,0.9062753783684017,0.01132057742211847 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,test,0.5645161290322581,0.04671463393770097,0.5588932806324111,0.04758847117687451,0.5588235294117647,0.04726580237232992 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,train,0.7122507122507122,0.016364363604051812,0.7038840628132308,0.01711916966985851,0.7023994093761535,0.016785858220273054 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,test,0.5967741935483871,0.042428893199655385,0.58994708994709,0.043080823591280334,0.5898109243697479,0.042662183278704614 +flat_mae,patch,logistic,abide_dx,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,67,21.54434690031882,test,0.6048387096774194,0.04114198404545738,0.5972691721349506,0.042150517365606294,0.5971638655462186,0.041603199338319595 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,train,0.9031339031339032,0.011082632434683175,0.9014411363917909,0.011345292405081933,0.8991509782207456,0.011500534750824415 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,test,0.6451612903225806,0.042282986371878384,0.6428384393820372,0.04264398349677213,0.6433823529411764,0.04273815631606301 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,train,0.8974358974358975,0.011431259945745625,0.8960171165240289,0.011637892191349346,0.8948689553340716,0.011769725991797573 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,test,0.6693548387096774,0.04031663181924675,0.6553454003118433,0.04322253876901355,0.6559873949579832,0.041379500689227015 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,train,0.7136752136752137,0.01759405486615828,0.7063330024163186,0.01822934524486971,0.7048726467331119,0.01795638004722893 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,test,0.6129032258064516,0.04404518007247859,0.6003223207091055,0.04556627411860187,0.6013655462184874,0.044438872515284424 +flat_mae,patch,logistic,abide_dx,71,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,71,21.54434690031882,test,0.6451612903225806,0.04335002313405616,0.6405797101449275,0.04400179115886212,0.6402310924369747,0.043831326942211965 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.8034188034188035,0.014913183176066687,0.8005632550211221,0.015148518040153826,0.7995570321151717,0.015126715097908777 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5564516129032258,0.042294600679260545,0.5498646953996436,0.04301643675994882,0.5498949579831933,0.042674900100003764 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.7962962962962963,0.015460600577262756,0.7931187981081331,0.01585904508412557,0.7919158361018825,0.01595765224750318 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.6048387096774194,0.040210408491988876,0.5931704050887178,0.04229606970744609,0.5940126050420168,0.04112256273351882 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,train,0.7051282051282052,0.017498017007176795,0.6972845729998854,0.0181579010553541,0.6959394610557401,0.01788548987908455 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,test,0.5887096774193549,0.041807531223614816,0.5765651155005022,0.04333352393441114,0.5777310924369747,0.042206871848643246 +flat_mae,patch,logistic,abide_dx,75,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,75,21.54434690031882,test,0.5403225806451613,0.04182833706325925,0.5366764995083579,0.04196894305805667,0.5367647058823529,0.04200544461657642 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.9002849002849003,0.011309346747398405,0.8988364337063663,0.011552097364336888,0.8974529346622371,0.011774556152667608 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.5645161290322581,0.04615524858457885,0.5571428571428572,0.04692414578330258,0.5572478991596639,0.04637380476257328 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,train,0.8048433048433048,0.014697820696656408,0.8012026152437373,0.015027686303670981,0.7993724621631598,0.01499972763359697 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,test,0.5564516129032258,0.044252006897587154,0.5550336008351275,0.04423135064101195,0.5561974789915967,0.04425432274186737 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,train,0.801994301994302,0.014203888211941653,0.7989056848743392,0.014502376813833418,0.7976744186046512,0.01452882834970445 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,test,0.6048387096774194,0.04168639109965783,0.5953379953379954,0.04317084942154853,0.5955882352941176,0.04235065972069363 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,train,0.9074074074074074,0.010805415250033647,0.9060297088630275,0.011048624444172504,0.9045035068290882,0.011292062463450181 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,test,0.5645161290322581,0.043013836918987323,0.5571428571428572,0.04377062500032068,0.5572478991596639,0.04331856359716958 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,train,0.7250712250712251,0.016922998983004686,0.7185320605374412,0.01761759403893693,0.7169804355850867,0.017434150500966875 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,test,0.6209677419354839,0.044632488761772164,0.6167554415729598,0.045038395004038584,0.6165966386554622,0.04485148545991481 +flat_mae,patch,logistic,abide_dx,81,0.3593813663804626,train,0.905982905982906,0.010905645009605955,0.9046823568136932,0.011090885815234442,0.9035068290882244,0.011206785603465939 +flat_mae,patch,logistic,abide_dx,81,0.3593813663804626,test,0.6370967741935484,0.042526905849401136,0.6283716283716283,0.044109736824147865,0.6281512605042017,0.04326390303847365 +flat_mae,patch,logistic,abide_dx,82,0.046415888336127774,train,0.792022792022792,0.014951956458601643,0.7887024285655383,0.015268060273177973,0.7874492432631968,0.015289633918976612 +flat_mae,patch,logistic,abide_dx,82,0.046415888336127774,test,0.6532258064516129,0.042632348863450915,0.6448884448884449,0.04397392661090456,0.6444327731092437,0.043191169509171845 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,train,0.7948717948717948,0.015162824628688669,0.7920342330480579,0.015454351481559981,0.7912144702842376,0.015535880429150103 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,test,0.6693548387096774,0.04347818714647869,0.6630211440312852,0.0446643432947874,0.6622899159663866,0.04403980140615029 +flat_mae,patch,logistic,abide_dx,84,0.046415888336127774,train,0.7792022792022792,0.015304328863441666,0.7760719467396562,0.015522616169966303,0.7752307124400148,0.015503668573557025 +flat_mae,patch,logistic,abide_dx,84,0.046415888336127774,test,0.6774193548387096,0.04255520684419886,0.6743697478991597,0.04328031361147974,0.6743697478991597,0.043397513107407006 +flat_mae,patch,logistic,abide_dx,85,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,85,10000.0,test,0.6129032258064516,0.04176870994025121,0.610369206598586,0.04186181911243734,0.6108193277310925,0.0418708480588448 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.7934472934472935,0.014497819141089066,0.7900730691733882,0.014830689656305372,0.7887412329272794,0.014875608966976886 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6290322580645161,0.0447588636952772,0.6274817136886102,0.04511020947128442,0.6286764705882353,0.045313470025704565 +flat_mae,patch,logistic,abide_dx,87,0.046415888336127774,train,0.782051282051282,0.015157986792424695,0.7789613409752735,0.015421423637833928,0.778110003691399,0.015444449780336617 +flat_mae,patch,logistic,abide_dx,87,0.046415888336127774,test,0.5967741935483871,0.04525803002780743,0.5950888192267503,0.04525695974543873,0.5961134453781513,0.0451775676446022 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,train,0.9017094017094017,0.011358833944497304,0.9001762032829455,0.011602094277491686,0.8984496124031007,0.011781965578338008 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,test,0.5725806451612904,0.04472972075515952,0.5678306043269548,0.04542499826731016,0.5677521008403361,0.04535859730572318 +flat_mae,patch,logistic,abide_dx,89,0.046415888336127774,train,0.7977207977207977,0.014519868042312544,0.7938658527506885,0.014879394453045204,0.7920265780730897,0.014845879924412808 +flat_mae,patch,logistic,abide_dx,89,0.046415888336127774,test,0.6129032258064516,0.04330523215679608,0.6119947848761408,0.0434481461161103,0.6139705882352942,0.0435515463782355 +flat_mae,patch,logistic,abide_dx,90,0.3593813663804626,train,0.9031339031339032,0.011049702096855362,0.9017939433838051,0.011239053552212407,0.9006275378368402,0.011374476120610212 +flat_mae,patch,logistic,abide_dx,90,0.3593813663804626,test,0.6612903225806451,0.040781568340822166,0.6539994685091681,0.041992994666290716,0.6533613445378151,0.04137635910494337 +flat_mae,patch,logistic,abide_dx,91,0.3593813663804626,train,0.8988603988603988,0.010935718467177053,0.8974265046355845,0.011134634753985635,0.8961609449981542,0.01129948664146678 +flat_mae,patch,logistic,abide_dx,91,0.3593813663804626,test,0.5645161290322581,0.04051918521450612,0.5616653574234092,0.040663885189991006,0.5619747899159664,0.04073524557834684 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9943019943019943,0.002905450764081003,0.9942447694628451,0.0029321763635876833,0.9945367294204503,0.002784118693125294 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.5967741935483871,0.04355952963782403,0.5929621848739496,0.04412463016314972,0.5929621848739496,0.044036842817705694 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,train,0.792022792022792,0.014853316996663138,0.7880592570535249,0.015239319732470603,0.7862679955703211,0.015213931803874951 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,test,0.5645161290322581,0.04432113382152816,0.5528846153846154,0.04550197227746857,0.5540966386554622,0.04457714694430579 +flat_mae,patch,logistic,abide_dx,94,0.3593813663804626,train,0.905982905982906,0.011067265378204296,0.9047458491295302,0.011243310086247113,0.9038021410114434,0.011369410659567702 +flat_mae,patch,logistic,abide_dx,94,0.3593813663804626,test,0.6370967741935484,0.041550703702196934,0.6330637206549615,0.04217335265295382,0.6328781512605042,0.04206328946688988 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,train,0.7905982905982906,0.015556777552369184,0.7866918572323314,0.015937050657913992,0.7849760059062385,0.015900598062012005 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,test,0.5967741935483871,0.04605248272843337,0.5929621848739496,0.046371179239883646,0.5929621848739496,0.04636956280697081 +flat_mae,patch,logistic,abide_dx,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,96,166.81005372000556,test,0.5403225806451613,0.04520452711390854,0.5400533610984577,0.045296208454763294,0.5430672268907563,0.045321801339292575 +flat_mae,patch,logistic,abide_dx,97,0.046415888336127774,train,0.7849002849002849,0.015187040932559506,0.7801677744480967,0.015714831219500992,0.7780361757105942,0.015658558794406946 +flat_mae,patch,logistic,abide_dx,97,0.046415888336127774,test,0.6935483870967742,0.04065031649334807,0.6906512605042017,0.041162650773883515,0.6906512605042017,0.0411287984520238 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,train,0.9074074074074074,0.01083235093537503,0.9060946873424365,0.011026428849969287,0.904798818752307,0.011153440706386287 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,test,0.6048387096774194,0.04677924333141494,0.6035753898349319,0.04682057444806304,0.6050420168067226,0.046903946103486194 +flat_mae,patch,logistic,abide_dx,99,2.782559402207126,train,0.9928774928774928,0.003221132565181949,0.9928080107695428,0.0032492837087416807,0.9932447397563677,0.003064976617646381 +flat_mae,patch,logistic,abide_dx,99,2.782559402207126,test,0.5967741935483871,0.041754806187435,0.5941345902068604,0.042076361807650495,0.5945378151260504,0.04204364720441456 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,train,0.717948717948718,0.015887655396975214,0.7103091236494598,0.016513297372211667,0.70874861572536,0.016277685345876203 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,test,0.6129032258064516,0.04331325710928689,0.6003223207091055,0.045304419215719025,0.6013655462184874,0.04404603996420269 diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..572edc5dc06c2d25b72a001dfbbd1c9fee295af0 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:55:30 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:22:13 time: 4.6142 data: 3.7981 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:55 time: 0.2210 data: 0.0823 max mem: 3005 +extract (train) [ 40/289] eta: 0:01:16 time: 0.1821 data: 0.0594 max mem: 3005 +extract (train) [ 60/289] eta: 0:01:02 time: 0.1952 data: 0.0661 max mem: 3005 +extract (train) [ 80/289] eta: 0:00:52 time: 0.1924 data: 0.0655 max mem: 3005 +extract (train) [100/289] eta: 0:00:45 time: 0.1843 data: 0.0615 max mem: 3005 +extract (train) [120/289] eta: 0:00:39 time: 0.1932 data: 0.0681 max mem: 3005 +extract (train) [140/289] eta: 0:00:33 time: 0.1960 data: 0.0666 max mem: 3005 +extract (train) [160/289] eta: 0:00:28 time: 0.1858 data: 0.0631 max mem: 3005 +extract (train) [180/289] eta: 0:00:23 time: 0.1973 data: 0.0686 max mem: 3005 +extract (train) [200/289] eta: 0:00:19 time: 0.1898 data: 0.0668 max mem: 3005 +extract (train) [220/289] eta: 0:00:14 time: 0.1815 data: 0.0625 max mem: 3005 +extract (train) [240/289] eta: 0:00:10 time: 0.1933 data: 0.0693 max mem: 3005 +extract (train) [260/289] eta: 0:00:06 time: 0.1729 data: 0.0599 max mem: 3005 +extract (train) [280/289] eta: 0:00:01 time: 0.1560 data: 0.0495 max mem: 3005 +extract (train) [288/289] eta: 0:00:00 time: 0.1579 data: 0.0515 max mem: 3005 +extract (train) Total time: 0:00:59 (0.2047 s / it) +extract (validation) [ 0/62] eta: 0:04:17 time: 4.1493 data: 3.9182 max mem: 3005 +extract (validation) [20/62] eta: 0:00:17 time: 0.2262 data: 0.0826 max mem: 3005 +extract (validation) [40/62] eta: 0:00:06 time: 0.1795 data: 0.0638 max mem: 3005 +extract (validation) [60/62] eta: 0:00:00 time: 0.1743 data: 0.0591 max mem: 3005 +extract (validation) [61/62] eta: 0:00:00 time: 0.1750 data: 0.0595 max mem: 3005 +extract (validation) Total time: 0:00:16 (0.2627 s / it) +extract (test) [ 0/62] eta: 0:03:57 time: 3.8321 data: 3.6963 max mem: 3005 +extract (test) [20/62] eta: 0:00:16 time: 0.2273 data: 0.0837 max mem: 3005 +extract (test) [40/62] eta: 0:00:06 time: 0.1781 data: 0.0609 max mem: 3005 +extract (test) [60/62] eta: 0:00:00 time: 0.1642 data: 0.0538 max mem: 3005 +extract (test) [61/62] eta: 0:00:00 time: 0.1641 data: 0.0540 max mem: 3005 +extract (test) Total time: 0:00:15 (0.2538 s / it) +feature extraction time: 0:01:31 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.79772 | 0.015575 | 0.79419 | 0.015969 | 0.79271 | 0.015992 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.58065 | 0.0404 | 0.56432 | 0.042459 | 0.56874 | 0.040761 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.7338709677419355, "acc_std": 0.039898531551664757, "f1": 0.732461588754495, "f1_std": 0.04015636773697988, "bacc": 0.733718487394958, "bacc_std": 0.04022913827888883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04302479743809341, "f1": 0.6428384393820372, "f1_std": 0.04325713733120763, "bacc": 0.6433823529411764, "bacc_std": 0.043239258225716316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.045481042579640946, "f1": 0.6017043592264831, "f1_std": 0.0459588736314719, "bacc": 0.601890756302521, "bacc_std": 0.045951187707386966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04362448548247671, "f1": 0.6167554415729598, "f1_std": 0.044398483762407785, "bacc": 0.6165966386554622, "bacc_std": 0.044195145317579565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04314900495390325, "f1": 0.5723303182143554, "f1_std": 0.04311625100167552, "bacc": 0.5756302521008403, "bacc_std": 0.04311184069609689} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.043407954573918404, "f1": 0.5550336008351275, "f1_std": 0.04336146142899907, "bacc": 0.5561974789915967, "bacc_std": 0.04339655050556934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04559811778432817, "f1": 0.5765651155005022, "f1_std": 0.047826286957323653, "bacc": 0.5777310924369747, "bacc_std": 0.0462758011114918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04181051789762769, "f1": 0.5931704050887178, "f1_std": 0.04377290473504135, "bacc": 0.5940126050420168, "bacc_std": 0.042582323096551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.717741935483871, "acc_std": 0.03961406888586514, "f1": 0.710955710955711, "f1_std": 0.041083806048054436, "bacc": 0.7095588235294117, "bacc_std": 0.04029809683430368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04552334994503944, "f1": 0.6682977751680041, "f1_std": 0.04567346535030356, "bacc": 0.6701680672268908, "bacc_std": 0.04579211762227403} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.046209435342494894, "f1": 0.5972691721349506, "f1_std": 0.046941434265443706, "bacc": 0.5971638655462186, "bacc_std": 0.04643584114537313} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04331269553113425, "f1": 0.5766806722689075, "f1_std": 0.04374347207581752, "bacc": 0.5766806722689075, "bacc_std": 0.04368488590913262} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.044345802598628706, "f1": 0.6405797101449275, "f1_std": 0.04516140214041959, "bacc": 0.6402310924369747, "bacc_std": 0.044987559677534275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.042787483339045244, "f1": 0.5643931861867832, "f1_std": 0.04407580691360447, "bacc": 0.5646008403361344, "bacc_std": 0.04336659147369708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04211552851307322, "f1": 0.5972691721349506, "f1_std": 0.043302339709568954, "bacc": 0.5971638655462186, "bacc_std": 0.04275631845640377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.043811306087618726, "f1": 0.5682392298764252, "f1_std": 0.04659106946718435, "bacc": 0.5730042016806722, "bacc_std": 0.04432977339076404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04068296692876455, "f1": 0.6118548118548119, "f1_std": 0.042326546207158235, "bacc": 0.6118697478991597, "bacc_std": 0.041515189813807474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.042471447837030094, "f1": 0.6025641025641025, "f1_std": 0.044010330107102195, "bacc": 0.6029411764705883, "bacc_std": 0.042999529391945894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.040400518227479545, "f1": 0.6429862738533645, "f1_std": 0.04244880271821085, "bacc": 0.6428571428571428, "bacc_std": 0.04137621180961866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.042276836510162064, "f1": 0.6555555555555556, "f1_std": 0.043072963178546926, "bacc": 0.654936974789916, "bacc_std": 0.042595529939020146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04472172020466694, "f1": 0.6063492063492064, "f1_std": 0.04558197485673346, "bacc": 0.60609243697479, "bacc_std": 0.04510230232821176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04416437405289325, "f1": 0.626380984265149, "f1_std": 0.04586084430064009, "bacc": 0.6265756302521008, "bacc_std": 0.04469554582318528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.03972230283293903, "f1": 0.6217205613178767, "f1_std": 0.042685202489317, "bacc": 0.6234243697478992, "bacc_std": 0.04072999432458781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.0428263287345812, "f1": 0.555142173797502, "f1_std": 0.043999052522899665, "bacc": 0.555672268907563, "bacc_std": 0.04333304290368861} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04543001558812356, "f1": 0.5712141971683957, "f1_std": 0.045520223365331326, "bacc": 0.5724789915966386, "bacc_std": 0.04557748695806142} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04515920214066308, "f1": 0.58994708994709, "f1_std": 0.046033520818150334, "bacc": 0.5898109243697479, "bacc_std": 0.04560616510285363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.045004092772603685, "f1": 0.607905138339921, "f1_std": 0.04602485018618479, "bacc": 0.6076680672268908, "bacc_std": 0.04574039648904304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04191655900517774, "f1": 0.5950888192267503, "f1_std": 0.0423881854050567, "bacc": 0.5961134453781513, "bacc_std": 0.042674115519398516} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04147550459958024, "f1": 0.6283716283716283, "f1_std": 0.042942865262446187, "bacc": 0.6281512605042017, "bacc_std": 0.04210447387832534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042344211624075805, "f1": 0.5694444444444444, "f1_std": 0.04355632475767991, "bacc": 0.5703781512605042, "bacc_std": 0.042508557375794884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044854426927388016, "f1": 0.5880946053680574, "f1_std": 0.046408095846481714, "bacc": 0.5882352941176471, "bacc_std": 0.045502573451785315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04088929497176788, "f1": 0.6704756842944459, "f1_std": 0.04183985616368223, "bacc": 0.6696428571428572, "bacc_std": 0.04115381776723856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.043086813123512376, "f1": 0.6330637206549615, "f1_std": 0.04380827171333247, "bacc": 0.6328781512605042, "bacc_std": 0.0435930757647841} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044659245750565076, "f1": 0.5950888192267503, "f1_std": 0.044778884040934476, "bacc": 0.5961134453781513, "bacc_std": 0.04496267669435562} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04488401053454261, "f1": 0.6351748937561295, "f1_std": 0.04507663643681416, "bacc": 0.6360294117647058, "bacc_std": 0.04501204296125422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03972427079137321, "f1": 0.6169755573462261, "f1_std": 0.04193865901056573, "bacc": 0.6176470588235294, "bacc_std": 0.040609827863831244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04183166740045872, "f1": 0.607905138339921, "f1_std": 0.04238009285513294, "bacc": 0.6076680672268908, "bacc_std": 0.04210498604187981} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04249334587071535, "f1": 0.5836690840719849, "f1_std": 0.04454817162913641, "bacc": 0.5850840336134454, "bacc_std": 0.04317230953145771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04588641362380527, "f1": 0.5950888192267503, "f1_std": 0.046039857352814524, "bacc": 0.5961134453781513, "bacc_std": 0.046256106002138905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0420930662067365, "f1": 0.6190346145968457, "f1_std": 0.04584907230968273, "bacc": 0.6218487394957983, "bacc_std": 0.04326396705230285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04100903113651263, "f1": 0.6428384393820372, "f1_std": 0.041432676486505685, "bacc": 0.6433823529411764, "bacc_std": 0.041448266599580244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04451401058540449, "f1": 0.5766806722689075, "f1_std": 0.045033613960366134, "bacc": 0.5766806722689075, "bacc_std": 0.04485464621953559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0402157649727232, "f1": 0.6119727891156463, "f1_std": 0.04276951447317943, "bacc": 0.6144957983193278, "bacc_std": 0.04086769650732438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04037375784566807, "f1": 0.6091008771929824, "f1_std": 0.04471320690113011, "bacc": 0.6129201680672269, "bacc_std": 0.041638970414800684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 10000.0, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04464200005123429, "f1": 0.5239148825405089, "f1_std": 0.04457550060441844, "bacc": 0.5267857142857143, "bacc_std": 0.04477871197193377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04323021161953913, "f1": 0.5292707292707293, "f1_std": 0.044384582898955374, "bacc": 0.5304621848739496, "bacc_std": 0.04361377345612586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04393022563390241, "f1": 0.6203504657677024, "f1_std": 0.04402052655954683, "bacc": 0.6228991596638656, "bacc_std": 0.044239438471120156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.045914978206738656, "f1": 0.5915678524374176, "f1_std": 0.04669320335404661, "bacc": 0.5913865546218487, "bacc_std": 0.04636034494286604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.042778313794648616, "f1": 0.5337093741606231, "f1_std": 0.044664396052974044, "bacc": 0.5362394957983193, "bacc_std": 0.04322821788943428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.047120243820798824, "f1": 0.5425559947299078, "f1_std": 0.047848660522099275, "bacc": 0.5425420168067226, "bacc_std": 0.04745547337134893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.042790903173231254, "f1": 0.6242424242424243, "f1_std": 0.043519919296724247, "bacc": 0.6239495798319328, "bacc_std": 0.0432329479497315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04489927158618303, "f1": 0.6242424242424243, "f1_std": 0.0454097734149624, "bacc": 0.6239495798319328, "bacc_std": 0.045281433314186034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044389356070595604, "f1": 0.5752305665349143, "f1_std": 0.044824359479374805, "bacc": 0.5751050420168067, "bacc_std": 0.044587687116333055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04299762534884873, "f1": 0.6704756842944459, "f1_std": 0.04419223682214694, "bacc": 0.6696428571428572, "bacc_std": 0.043589467297855136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0442670068789713, "f1": 0.5854473942969518, "f1_std": 0.044592479342382134, "bacc": 0.585609243697479, "bacc_std": 0.044518080493830436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.03925508245360245, "f1": 0.6869519000797236, "f1_std": 0.04070066894636884, "bacc": 0.6859243697478992, "bacc_std": 0.04003274187544346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04268721695256142, "f1": 0.6493719997369632, "f1_std": 0.043475487552675995, "bacc": 0.6491596638655461, "bacc_std": 0.04325954477464104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042149343026740616, "f1": 0.6443285528031291, "f1_std": 0.04217520536259005, "bacc": 0.6465336134453781, "bacc_std": 0.04209916134701612} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04684795074572506, "f1": 0.6255252100840336, "f1_std": 0.04717263490085371, "bacc": 0.6255252100840336, "bacc_std": 0.0471976047066089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04290766409252793, "f1": 0.6227513227513227, "f1_std": 0.04372948730197435, "bacc": 0.6223739495798319, "bacc_std": 0.043242652799939524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04402450303917119, "f1": 0.650475254015077, "f1_std": 0.04437488190875017, "bacc": 0.6507352941176471, "bacc_std": 0.04434877157813647} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04424455203768718, "f1": 0.5880398671096345, "f1_std": 0.04433918970543312, "bacc": 0.5903361344537814, "bacc_std": 0.04447009011659135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04321079120528677, "f1": 0.5963541666666667, "f1_std": 0.043323704879572066, "bacc": 0.5992647058823529, "bacc_std": 0.04339618774456982} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.7338709677419355, "acc_std": 0.03970155311141992, "f1": 0.7260127217944425, "f1_std": 0.04168513554954457, "bacc": 0.7242647058823529, "bacc_std": 0.040785025979836104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04671463393770097, "f1": 0.5588932806324111, "f1_std": 0.04758847117687451, "bacc": 0.5588235294117647, "bacc_std": 0.04726580237232992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042428893199655385, "f1": 0.58994708994709, "f1_std": 0.043080823591280334, "bacc": 0.5898109243697479, "bacc_std": 0.042662183278704614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04114198404545738, "f1": 0.5972691721349506, "f1_std": 0.042150517365606294, "bacc": 0.5971638655462186, "bacc_std": 0.041603199338319595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042282986371878384, "f1": 0.6428384393820372, "f1_std": 0.04264398349677213, "bacc": 0.6433823529411764, "bacc_std": 0.04273815631606301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04031663181924675, "f1": 0.6553454003118433, "f1_std": 0.04322253876901355, "bacc": 0.6559873949579832, "bacc_std": 0.041379500689227015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04404518007247859, "f1": 0.6003223207091055, "f1_std": 0.04556627411860187, "bacc": 0.6013655462184874, "bacc_std": 0.044438872515284424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 21.54434690031882, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04335002313405616, "f1": 0.6405797101449275, "f1_std": 0.04400179115886212, "bacc": 0.6402310924369747, "bacc_std": 0.043831326942211965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.042294600679260545, "f1": 0.5498646953996436, "f1_std": 0.04301643675994882, "bacc": 0.5498949579831933, "bacc_std": 0.042674900100003764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.040210408491988876, "f1": 0.5931704050887178, "f1_std": 0.04229606970744609, "bacc": 0.5940126050420168, "bacc_std": 0.04112256273351882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.041807531223614816, "f1": 0.5765651155005022, "f1_std": 0.04333352393441114, "bacc": 0.5777310924369747, "bacc_std": 0.042206871848643246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 21.54434690031882, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04182833706325925, "f1": 0.5366764995083579, "f1_std": 0.04196894305805667, "bacc": 0.5367647058823529, "bacc_std": 0.04200544461657642} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04615524858457885, "f1": 0.5571428571428572, "f1_std": 0.04692414578330258, "bacc": 0.5572478991596639, "bacc_std": 0.04637380476257328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.044252006897587154, "f1": 0.5550336008351275, "f1_std": 0.04423135064101195, "bacc": 0.5561974789915967, "bacc_std": 0.04425432274186737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04168639109965783, "f1": 0.5953379953379954, "f1_std": 0.04317084942154853, "bacc": 0.5955882352941176, "bacc_std": 0.04235065972069363} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.043013836918987323, "f1": 0.5571428571428572, "f1_std": 0.04377062500032068, "bacc": 0.5572478991596639, "bacc_std": 0.04331856359716958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.044632488761772164, "f1": 0.6167554415729598, "f1_std": 0.045038395004038584, "bacc": 0.6165966386554622, "bacc_std": 0.04485148545991481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042526905849401136, "f1": 0.6283716283716283, "f1_std": 0.044109736824147865, "bacc": 0.6281512605042017, "bacc_std": 0.04326390303847365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.042632348863450915, "f1": 0.6448884448884449, "f1_std": 0.04397392661090456, "bacc": 0.6444327731092437, "bacc_std": 0.043191169509171845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04347818714647869, "f1": 0.6630211440312852, "f1_std": 0.0446643432947874, "bacc": 0.6622899159663866, "bacc_std": 0.04403980140615029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04255520684419886, "f1": 0.6743697478991597, "f1_std": 0.04328031361147974, "bacc": 0.6743697478991597, "bacc_std": 0.043397513107407006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 10000.0, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04176870994025121, "f1": 0.610369206598586, "f1_std": 0.04186181911243734, "bacc": 0.6108193277310925, "bacc_std": 0.0418708480588448} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0447588636952772, "f1": 0.6274817136886102, "f1_std": 0.04511020947128442, "bacc": 0.6286764705882353, "bacc_std": 0.045313470025704565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04525803002780743, "f1": 0.5950888192267503, "f1_std": 0.04525695974543873, "bacc": 0.5961134453781513, "bacc_std": 0.0451775676446022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04472972075515952, "f1": 0.5678306043269548, "f1_std": 0.04542499826731016, "bacc": 0.5677521008403361, "bacc_std": 0.04535859730572318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04330523215679608, "f1": 0.6119947848761408, "f1_std": 0.0434481461161103, "bacc": 0.6139705882352942, "bacc_std": 0.0435515463782355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.040781568340822166, "f1": 0.6539994685091681, "f1_std": 0.041992994666290716, "bacc": 0.6533613445378151, "bacc_std": 0.04137635910494337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04051918521450612, "f1": 0.5616653574234092, "f1_std": 0.040663885189991006, "bacc": 0.5619747899159664, "bacc_std": 0.04073524557834684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04355952963782403, "f1": 0.5929621848739496, "f1_std": 0.04412463016314972, "bacc": 0.5929621848739496, "bacc_std": 0.044036842817705694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04432113382152816, "f1": 0.5528846153846154, "f1_std": 0.04550197227746857, "bacc": 0.5540966386554622, "bacc_std": 0.04457714694430579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.041550703702196934, "f1": 0.6330637206549615, "f1_std": 0.04217335265295382, "bacc": 0.6328781512605042, "bacc_std": 0.04206328946688988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04605248272843337, "f1": 0.5929621848739496, "f1_std": 0.046371179239883646, "bacc": 0.5929621848739496, "bacc_std": 0.04636956280697081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04520452711390854, "f1": 0.5400533610984577, "f1_std": 0.045296208454763294, "bacc": 0.5430672268907563, "bacc_std": 0.045321801339292575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.04065031649334807, "f1": 0.6906512605042017, "f1_std": 0.041162650773883515, "bacc": 0.6906512605042017, "bacc_std": 0.0411287984520238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04677924333141494, "f1": 0.6035753898349319, "f1_std": 0.04682057444806304, "bacc": 0.6050420168067226, "bacc_std": 0.046903946103486194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.041754806187435, "f1": 0.5941345902068604, "f1_std": 0.042076361807650495, "bacc": 0.5945378151260504, "bacc_std": 0.04204364720441456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04331325710928689, "f1": 0.6003223207091055, "f1_std": 0.045304419215719025, "bacc": 0.6013655462184874, "bacc_std": 0.04404603996420269} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 204.45 | 1406.6 | 0.84728 | 0.085848 | 0.84444 | 0.087903 | 0.84324 | 0.088274 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 204.45 | 1406.6 | 0.61387 | 0.041775 | 0.60758 | 0.04193 | 0.60816 | 0.041523 | + + +done! total time: 0:06:09 diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00527638b9e18530bf7900b037eefc4e1b66b6c7 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (abide_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic +model: flat_mae +representation: reg +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..8102605b57d3c12eb17b701979f92b9d29cbd520 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,abide_dx,,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,,1291.5496650148827,test,0.5,0.04021131746199242,0.49153439153439155,0.040685315909197245,0.4927991620843153,0.04023697018301884 +flat_mae,reg,logistic,abide_dx,1,0.000774263682681127,train,0.6524216524216524,0.015964149452537598,0.6255672627114939,0.018166506330893745,0.6315983757844222,0.01650898784367695 +flat_mae,reg,logistic,abide_dx,1,0.000774263682681127,test,0.5725806451612904,0.04122939998977878,0.5400013998740113,0.04632135079095777,0.5519957983193278,0.042010376017866814 +flat_mae,reg,logistic,abide_dx,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,2,21.54434690031882,test,0.5725806451612904,0.044364808263745226,0.5703170970905524,0.04472242902836469,0.5709033613445378,0.04478154355191937 +flat_mae,reg,logistic,abide_dx,3,0.005994842503189409,train,0.7222222222222222,0.016431832326154538,0.71220011647391,0.017312232145437884,0.7105574012550757,0.016864061527036015 +flat_mae,reg,logistic,abide_dx,3,0.005994842503189409,test,0.6209677419354839,0.04252370031464255,0.6049081418208935,0.04556299713378325,0.6071428571428572,0.04350268693249158 +flat_mae,reg,logistic,abide_dx,4,0.046415888336127774,train,0.8361823361823362,0.013950454242716848,0.8329937214907064,0.014379547081636163,0.830749354005168,0.014474104072405566 +flat_mae,reg,logistic,abide_dx,4,0.046415888336127774,test,0.6693548387096774,0.041223089793876525,0.665680278818965,0.04179873331028653,0.6654411764705883,0.04168660030177146 +flat_mae,reg,logistic,abide_dx,5,0.005994842503189409,train,0.7165242165242165,0.015979382175243136,0.7072419364640943,0.016824623974952015,0.7056847545219638,0.016448987844499212 +flat_mae,reg,logistic,abide_dx,5,0.005994842503189409,test,0.5483870967741935,0.04374066274673275,0.5441176470588236,0.04404566899605417,0.5441176470588236,0.04408493498877857 +flat_mae,reg,logistic,abide_dx,6,0.046415888336127774,train,0.8361823361823362,0.013662476261769991,0.8339719029374202,0.01390939666852822,0.8331118493909192,0.013990369598417287 +flat_mae,reg,logistic,abide_dx,6,0.046415888336127774,test,0.5483870967741935,0.0438439314224973,0.5425559947299078,0.04396745201304886,0.5425420168067226,0.043835111344378286 +flat_mae,reg,logistic,abide_dx,7,2.782559402207126,train,0.9985754985754985,0.0013109871655805597,0.998559926150059,0.001325987261487221,0.9984126984126984,0.0014608142702183446 +flat_mae,reg,logistic,abide_dx,7,2.782559402207126,test,0.5080645161290323,0.04545642045134813,0.5025974879989479,0.04607796544532155,0.5026260504201681,0.04571968486164626 +flat_mae,reg,logistic,abide_dx,8,0.000774263682681127,train,0.6595441595441596,0.0159865597542192,0.634135012724089,0.018245797269966953,0.6392395717977113,0.01660280236320749 +flat_mae,reg,logistic,abide_dx,8,0.000774263682681127,test,0.5080645161290323,0.03706917276118731,0.4540599061710574,0.042048637851081164,0.48214285714285715,0.037208988688184515 +flat_mae,reg,logistic,abide_dx,9,0.000774263682681127,train,0.6509971509971509,0.016476915303420367,0.6273037542662117,0.01843966082323576,0.6317829457364341,0.017013580648082366 +flat_mae,reg,logistic,abide_dx,9,0.000774263682681127,test,0.6209677419354839,0.040473588207023556,0.5881563140414104,0.045570344698050796,0.5992647058823529,0.041291573749357255 +flat_mae,reg,logistic,abide_dx,10,0.005994842503189409,train,0.7150997150997151,0.016992451116303858,0.7053020444145921,0.017901808736875472,0.7038021410114433,0.01747002929962887 +flat_mae,reg,logistic,abide_dx,10,0.005994842503189409,test,0.6048387096774194,0.04380717610688843,0.6017043592264831,0.04398720774354094,0.601890756302521,0.044044412943653716 +flat_mae,reg,logistic,abide_dx,11,0.046415888336127774,train,0.8276353276353277,0.014212903694871076,0.8245561855191004,0.01458383074736351,0.8227021040974529,0.014650143114162812 +flat_mae,reg,logistic,abide_dx,11,0.046415888336127774,test,0.49193548387096775,0.04549488256089874,0.48628920891694616,0.04545792314570265,0.486344537815126,0.045386771424864046 +flat_mae,reg,logistic,abide_dx,12,0.005994842503189409,train,0.7136752136752137,0.015937484911267293,0.7039869932343841,0.01665623587226142,0.7025101513473606,0.016279654346652225 +flat_mae,reg,logistic,abide_dx,12,0.005994842503189409,test,0.5161290322580645,0.04462491091641088,0.5115546218487395,0.044820238114670626,0.5115546218487395,0.044686943333102344 +flat_mae,reg,logistic,abide_dx,13,0.046415888336127774,train,0.8233618233618234,0.01472780139269096,0.8199955333879252,0.015140035318720971,0.8179401993355482,0.015188753215617443 +flat_mae,reg,logistic,abide_dx,13,0.046415888336127774,test,0.5887096774193549,0.0443149380648556,0.5808311791608669,0.04547162839844848,0.5808823529411764,0.044832341322672026 +flat_mae,reg,logistic,abide_dx,14,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,14,1291.5496650148827,test,0.5161290322580645,0.0446761499761444,0.5129615082482325,0.04477072980412274,0.5131302521008403,0.04469577746960429 +flat_mae,reg,logistic,abide_dx,15,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,15,1291.5496650148827,test,0.5080645161290323,0.046375710837368044,0.5072633704644649,0.04653614649275502,0.5089285714285714,0.04697293984110661 +flat_mae,reg,logistic,abide_dx,16,0.3593813663804626,train,0.9515669515669516,0.00821168373369865,0.9509925507363026,0.008323647729544413,0.9504614248800296,0.008458045110720997 +flat_mae,reg,logistic,abide_dx,16,0.3593813663804626,test,0.5725806451612904,0.04404546357574748,0.5723303182143554,0.04414957699571731,0.5756302521008403,0.04410346790952812 +flat_mae,reg,logistic,abide_dx,17,0.005994842503189409,train,0.7236467236467237,0.015938560242884245,0.7128701181510748,0.016946111616784977,0.7112587670727206,0.01645170720022196 +flat_mae,reg,logistic,abide_dx,17,0.005994842503189409,test,0.5967741935483871,0.042578763399279955,0.5860042735042735,0.044516315536871215,0.5866596638655462,0.043355201116715425 +flat_mae,reg,logistic,abide_dx,18,0.005994842503189409,train,0.7207977207977208,0.015478530484728591,0.7111960035263003,0.01630803247989568,0.7095607235142118,0.015926954564930134 +flat_mae,reg,logistic,abide_dx,18,0.005994842503189409,test,0.5967741935483871,0.04112838709171402,0.5880946053680574,0.04282581232333815,0.5882352941176471,0.04190199454107612 +flat_mae,reg,logistic,abide_dx,19,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,19,21.54434690031882,test,0.5564516129032258,0.042435591152498627,0.5498646953996436,0.04350803880072657,0.5498949579831933,0.04295319853984079 +flat_mae,reg,logistic,abide_dx,20,0.046415888336127774,train,0.8404558404558404,0.013526396762919193,0.8377896613190731,0.01384686589588748,0.8361018826135105,0.013934268839325492 +flat_mae,reg,logistic,abide_dx,20,0.046415888336127774,test,0.5806451612903226,0.0444713747682407,0.5694444444444444,0.04604342783875328,0.5703781512605042,0.04500703652939608 +flat_mae,reg,logistic,abide_dx,21,0.046415888336127774,train,0.8404558404558404,0.013609503811855841,0.838138293930186,0.013840535379856725,0.8369878183831672,0.013880407079182332 +flat_mae,reg,logistic,abide_dx,21,0.046415888336127774,test,0.5483870967741935,0.0448491517198441,0.5386659580122243,0.04599141128170967,0.539390756302521,0.04523935302016926 +flat_mae,reg,logistic,abide_dx,22,0.3593813663804626,train,0.9615384615384616,0.006552876871315579,0.9610451267359006,0.0066542673365286315,0.9600959763750461,0.006839218470271811 +flat_mae,reg,logistic,abide_dx,22,0.3593813663804626,test,0.5806451612903226,0.042522256521589086,0.5735449735449736,0.04390127158809868,0.5735294117647058,0.04317119746569617 +flat_mae,reg,logistic,abide_dx,23,0.005994842503189409,train,0.7435897435897436,0.01635275518591399,0.7350543478260869,0.017134033896224442,0.732890365448505,0.016792549044236993 +flat_mae,reg,logistic,abide_dx,23,0.005994842503189409,test,0.5645161290322581,0.04420662627312351,0.5503626107977437,0.0464588837568087,0.5525210084033614,0.04485030372020436 +flat_mae,reg,logistic,abide_dx,24,0.005994842503189409,train,0.7421652421652422,0.015577403439795514,0.7345370140167098,0.016300771699422834,0.7324843115540789,0.01604499595126906 +flat_mae,reg,logistic,abide_dx,24,0.005994842503189409,test,0.5645161290322581,0.04220255835043109,0.5528846153846154,0.043813483425559414,0.5540966386554622,0.04270612318981612 +flat_mae,reg,logistic,abide_dx,25,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,25,21.54434690031882,test,0.47580645161290325,0.04355463216869907,0.47495277180639695,0.04365847869116898,0.47636554621848737,0.043951965757416424 +flat_mae,reg,logistic,abide_dx,26,0.005994842503189409,train,0.717948717948718,0.016410989418576875,0.7105817682088869,0.016957089915978555,0.7090439276485788,0.0167188859591715 +flat_mae,reg,logistic,abide_dx,26,0.005994842503189409,test,0.5,0.04762579596402643,0.49153439153439155,0.048375794686562446,0.49212184873949577,0.04778625037982866 +flat_mae,reg,logistic,abide_dx,27,0.3593813663804626,train,0.9515669515669516,0.007563230643862336,0.9510227198108872,0.007652721486413645,0.9507567368032483,0.007734621907240021 +flat_mae,reg,logistic,abide_dx,27,0.3593813663804626,test,0.5161290322580645,0.04529782069629296,0.5098814229249011,0.04592855938491241,0.5099789915966386,0.04564173267515083 +flat_mae,reg,logistic,abide_dx,28,0.005994842503189409,train,0.7108262108262108,0.016344870875546244,0.6993766283080363,0.017341760412334713,0.6981543004798818,0.016799682727553965 +flat_mae,reg,logistic,abide_dx,28,0.005994842503189409,test,0.5967741935483871,0.0402474493604354,0.575109649122807,0.04270293403220464,0.5803571428571428,0.04063752178105717 +flat_mae,reg,logistic,abide_dx,29,0.000774263682681127,train,0.6452991452991453,0.01602795019767226,0.6162966052397625,0.01823631035521495,0.6236618678479143,0.016501916121083676 +flat_mae,reg,logistic,abide_dx,29,0.000774263682681127,test,0.6048387096774194,0.038337783451294835,0.566216891554223,0.044700411318270794,0.58140756302521,0.03926274836517694 +flat_mae,reg,logistic,abide_dx,30,0.3593813663804626,train,0.9558404558404558,0.007868054602519008,0.9552445999222611,0.008002392521825332,0.9540420819490587,0.008244790838825596 +flat_mae,reg,logistic,abide_dx,30,0.3593813663804626,test,0.5967741935483871,0.04457271707199395,0.5880946053680574,0.04552347760689952,0.5882352941176471,0.04483136429234844 +flat_mae,reg,logistic,abide_dx,31,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,31,1291.5496650148827,test,0.5403225806451613,0.04262606021397292,0.5366764995083579,0.04306877686576638,0.5367647058823529,0.04300682394362748 +flat_mae,reg,logistic,abide_dx,32,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,32,1291.5496650148827,test,0.5564516129032258,0.042852416554712114,0.5457875457875458,0.04421930207049997,0.5467436974789917,0.043234549872219266 +flat_mae,reg,logistic,abide_dx,33,0.005994842503189409,train,0.717948717948718,0.01619201864138293,0.7088650016337542,0.01685788043643842,0.7072720561092654,0.016496390959919675 +flat_mae,reg,logistic,abide_dx,33,0.005994842503189409,test,0.6048387096774194,0.043001497324074454,0.5931704050887178,0.044942929223739075,0.5940126050420168,0.043670488935123104 +flat_mae,reg,logistic,abide_dx,34,2.782559402207126,train,0.9985754985754985,0.0013419831681920694,0.9985607764426576,0.0013551311022198273,0.9987080103359174,0.0012171475246393363 +flat_mae,reg,logistic,abide_dx,34,2.782559402207126,test,0.6209677419354839,0.04282832413946654,0.6137071651090342,0.044027514371261976,0.6134453781512605,0.04355595319491475 +flat_mae,reg,logistic,abide_dx,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,35,2.782559402207126,test,0.5645161290322581,0.04720595773172501,0.5603991596638656,0.04765481303472666,0.5603991596638656,0.04756812043771446 +flat_mae,reg,logistic,abide_dx,36,0.005994842503189409,train,0.7264957264957265,0.015242027850380596,0.7170899626380085,0.01614737829750824,0.7153193060169805,0.015784880174827903 +flat_mae,reg,logistic,abide_dx,36,0.005994842503189409,test,0.5725806451612904,0.040862434503954985,0.5544708833299437,0.04295957001206188,0.5582983193277311,0.0411748630254496 +flat_mae,reg,logistic,abide_dx,37,0.046415888336127774,train,0.8219373219373219,0.013955143018684639,0.8178625469354206,0.014439243425267806,0.815171650055371,0.014417784986824516 +flat_mae,reg,logistic,abide_dx,37,0.046415888336127774,test,0.6451612903225806,0.04358999207328456,0.6405797101449275,0.044059918419273786,0.6402310924369747,0.04382240394713269 +flat_mae,reg,logistic,abide_dx,38,0.005994842503189409,train,0.7236467236467237,0.01618464732101914,0.714447463768116,0.01701394846049055,0.712735326688815,0.016636751321319396 +flat_mae,reg,logistic,abide_dx,38,0.005994842503189409,test,0.5403225806451613,0.045060749259520146,0.5239442311578096,0.0467317205488849,0.5273109243697479,0.04541022753651133 +flat_mae,reg,logistic,abide_dx,39,0.3593813663804626,train,0.9501424501424501,0.008388588332813631,0.9495029420650563,0.00851691141007329,0.9485788113695091,0.00868958922311065 +flat_mae,reg,logistic,abide_dx,39,0.3593813663804626,test,0.5564516129032258,0.04326496481011873,0.551522325244953,0.043879963656833026,0.5514705882352942,0.04364687270168266 +flat_mae,reg,logistic,abide_dx,40,0.3593813663804626,train,0.9430199430199431,0.008785376915182514,0.9422702115936546,0.0089284133504859,0.9412329272794389,0.009122087329681705 +flat_mae,reg,logistic,abide_dx,40,0.3593813663804626,test,0.5241935483870968,0.045961943607137436,0.5101439571476397,0.04758328895488483,0.5126050420168067,0.046311369273730606 +flat_mae,reg,logistic,abide_dx,41,0.000774263682681127,train,0.6296296296296297,0.016379081657871653,0.602307886732266,0.018352182392334406,0.6091546696197858,0.016816667675147143 +flat_mae,reg,logistic,abide_dx,41,0.000774263682681127,test,0.5725806451612904,0.03700013710355115,0.5256586070010827,0.042685340286017504,0.5472689075630253,0.03745462221344418 +flat_mae,reg,logistic,abide_dx,42,0.046415888336127774,train,0.8404558404558404,0.013838787566053825,0.8372847682119205,0.01424456016881669,0.8349206349206348,0.014285760428127767 +flat_mae,reg,logistic,abide_dx,42,0.046415888336127774,test,0.5403225806451613,0.04259225159511678,0.5352140461629513,0.04318399142092255,0.5351890756302521,0.04301345839091506 +flat_mae,reg,logistic,abide_dx,43,0.005994842503189409,train,0.7122507122507122,0.017071459712954633,0.7023550648587381,0.018031152412243616,0.700922849760059,0.017594763866606203 +flat_mae,reg,logistic,abide_dx,43,0.005994842503189409,test,0.5806451612903226,0.04278840748118905,0.5581140350877193,0.045529150077842605,0.5640756302521008,0.04320783077815039 +flat_mae,reg,logistic,abide_dx,44,0.046415888336127774,train,0.8376068376068376,0.014809910430815391,0.8348930481283423,0.015125045999634844,0.8332225913621263,0.015144733480409454 +flat_mae,reg,logistic,abide_dx,44,0.046415888336127774,test,0.5403225806451613,0.04473619052820733,0.5267492467358554,0.046636764868278106,0.5288865546218487,0.045206298604058205 +flat_mae,reg,logistic,abide_dx,45,0.3593813663804626,train,0.9472934472934473,0.007813499260369599,0.9466516046213094,0.007914777103562988,0.9459948320413436,0.008000862205214735 +flat_mae,reg,logistic,abide_dx,45,0.3593813663804626,test,0.6290322580645161,0.04139244867717051,0.6255252100840336,0.04174629353053041,0.6255252100840336,0.04160694872387399 +flat_mae,reg,logistic,abide_dx,46,0.005994842503189409,train,0.7407407407407407,0.016489841443168993,0.7334629538456403,0.0171190043913925,0.7314876338132152,0.01686349940052099 +flat_mae,reg,logistic,abide_dx,46,0.005994842503189409,test,0.5080645161290323,0.044493577738113674,0.4962370962370962,0.045711473872924194,0.49789915966386555,0.04492066958331657 +flat_mae,reg,logistic,abide_dx,47,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,47,2.782559402207126,test,0.5564516129032258,0.04370201099295437,0.5457875457875458,0.045169133451433655,0.5467436974789917,0.04427000476543586 +flat_mae,reg,logistic,abide_dx,48,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,48,21.54434690031882,test,0.532258064516129,0.04666147709115973,0.5291961246399581,0.04692566022323797,0.5294117647058824,0.04695294975810968 +flat_mae,reg,logistic,abide_dx,49,0.046415888336127774,train,0.8347578347578347,0.014020011046161802,0.8311940298507463,0.014416504348526366,0.8285714285714285,0.014387545527920792 +flat_mae,reg,logistic,abide_dx,49,0.046415888336127774,test,0.5161290322580645,0.04404584453039153,0.5079365079365079,0.04442280850009461,0.5084033613445378,0.04404538504731026 +flat_mae,reg,logistic,abide_dx,50,0.005994842503189409,train,0.7293447293447294,0.015441253767731991,0.7211979832944541,0.016115968792753753,0.7193798449612403,0.0158389298915895 +flat_mae,reg,logistic,abide_dx,50,0.005994842503189409,test,0.5,0.04525224423982633,0.4805405405405405,0.04663453729966798,0.48581932773109243,0.04527218462079072 +flat_mae,reg,logistic,abide_dx,51,0.005994842503189409,train,0.7407407407407407,0.015128151206303692,0.7332024724356833,0.015837964229807145,0.7311923218899963,0.015589092098686913 +flat_mae,reg,logistic,abide_dx,51,0.005994842503189409,test,0.5403225806451613,0.043478064487408825,0.5208460443359773,0.045688208634310855,0.5257352941176471,0.043813006964349914 +flat_mae,reg,logistic,abide_dx,52,2.782559402207126,train,0.9985754985754985,0.0014187462087574272,0.9985607764426576,0.0014325877799534858,0.9987080103359174,0.001286769817245127 +flat_mae,reg,logistic,abide_dx,52,2.782559402207126,test,0.5725806451612904,0.044256712572222454,0.5725528455284552,0.04450193499189018,0.578781512605042,0.044181707992031534 +flat_mae,reg,logistic,abide_dx,53,0.005994842503189409,train,0.7250712250712251,0.016545309261086032,0.7163636363636363,0.017452120337524506,0.7146179401993356,0.01708891220170477 +flat_mae,reg,logistic,abide_dx,53,0.005994842503189409,test,0.5725806451612904,0.04543881032631651,0.5643931861867832,0.04623100948039727,0.5646008403361344,0.04570739387609973 +flat_mae,reg,logistic,abide_dx,54,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,54,10000.0,test,0.5725806451612904,0.04362721658615355,0.5678306043269548,0.04394140246967209,0.5677521008403361,0.04368922706101794 +flat_mae,reg,logistic,abide_dx,55,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,55,21.54434690031882,test,0.5564516129032258,0.04250015302659604,0.5406479423452549,0.043961674982146745,0.54359243697479,0.0426528008201386 +flat_mae,reg,logistic,abide_dx,56,0.046415888336127774,train,0.8276353276353277,0.014416714223864925,0.8245561855191004,0.014812452637410689,0.8227021040974529,0.014865851575626959 +flat_mae,reg,logistic,abide_dx,56,0.046415888336127774,test,0.5645161290322581,0.04411974057998249,0.5588932806324111,0.045057473603852684,0.5588235294117647,0.04471356944231574 +flat_mae,reg,logistic,abide_dx,57,0.000774263682681127,train,0.6467236467236467,0.015204211873715193,0.6181602842479273,0.017355362495424007,0.6252491694352159,0.015684980373145207 +flat_mae,reg,logistic,abide_dx,57,0.000774263682681127,test,0.5645161290322581,0.04322148509935249,0.5374412821221332,0.046713743957894696,0.546218487394958,0.04374465268147566 +flat_mae,reg,logistic,abide_dx,58,0.3593813663804626,train,0.9387464387464387,0.008764875656133958,0.9378350381709258,0.008942896784737132,0.9361757105943153,0.009191674196970815 +flat_mae,reg,logistic,abide_dx,58,0.3593813663804626,test,0.5161290322580645,0.04582490541501996,0.5141065830721003,0.04582154332409476,0.5147058823529411,0.045908906891529704 +flat_mae,reg,logistic,abide_dx,59,0.005994842503189409,train,0.7207977207977208,0.016783784088115858,0.7115036231884058,0.01769529467766111,0.7098560354374308,0.017298304346380983 +flat_mae,reg,logistic,abide_dx,59,0.005994842503189409,test,0.6048387096774194,0.04398326472014381,0.6017043592264831,0.04437002128633136,0.601890756302521,0.04438130025861846 +flat_mae,reg,logistic,abide_dx,60,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,60,1291.5496650148827,test,0.532258064516129,0.043371621535637714,0.5262187088274045,0.043519949403136846,0.5262605042016807,0.0432510687116338 +flat_mae,reg,logistic,abide_dx,61,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,61,1291.5496650148827,test,0.5725806451612904,0.04489129835562913,0.5678306043269548,0.04563384533435661,0.5677521008403361,0.04528436615496163 +flat_mae,reg,logistic,abide_dx,62,2.782559402207126,train,0.9985754985754985,0.0014402435019664507,0.9985607764426576,0.0014543023265676022,0.9987080103359174,0.0013062673622486656 +flat_mae,reg,logistic,abide_dx,62,2.782559402207126,test,0.5241935483870968,0.044315454655593306,0.5189057670809496,0.04444781018804762,0.51890756302521,0.0444513747516699 +flat_mae,reg,logistic,abide_dx,63,0.046415888336127774,train,0.8390313390313391,0.013137850047511963,0.8361557765591598,0.013462287812867305,0.83421926910299,0.01352517578651141 +flat_mae,reg,logistic,abide_dx,63,0.046415888336127774,test,0.5725806451612904,0.04606035944018007,0.5712141971683957,0.04607826874939562,0.5724789915966386,0.04604473216535596 +flat_mae,reg,logistic,abide_dx,64,0.046415888336127774,train,0.8361823361823362,0.013935810870556835,0.833382522604706,0.014281952780724345,0.8316352897748247,0.014375446196344656 +flat_mae,reg,logistic,abide_dx,64,0.046415888336127774,test,0.6048387096774194,0.04393536544913224,0.5931704050887178,0.045878101717530664,0.5940126050420168,0.044549466069127554 +flat_mae,reg,logistic,abide_dx,65,0.3593813663804626,train,0.9558404558404558,0.0075120868884786885,0.9553026957637998,0.007620398425184847,0.9546327057954964,0.007786331434943611 +flat_mae,reg,logistic,abide_dx,65,0.3593813663804626,test,0.5241935483870968,0.04372884950323037,0.5040336248389939,0.045725258616803405,0.509453781512605,0.04396272097349049 +flat_mae,reg,logistic,abide_dx,66,0.005994842503189409,train,0.7150997150997151,0.015484081751541218,0.7068159037754761,0.016088489025426086,0.7052787006275378,0.015787408117895356 +flat_mae,reg,logistic,abide_dx,66,0.005994842503189409,test,0.5806451612903226,0.040252878465593854,0.5670158474348643,0.041397233019437076,0.5688025210084033,0.040329913229860764 +flat_mae,reg,logistic,abide_dx,67,0.005994842503189409,train,0.7193732193732194,0.016304218533380772,0.7089252075813224,0.01730757649871516,0.7073827980804726,0.01682137584786081 +flat_mae,reg,logistic,abide_dx,67,0.005994842503189409,test,0.5967741935483871,0.042583394339403474,0.5836690840719849,0.04453469887474047,0.5850840336134454,0.0432035729987184 +flat_mae,reg,logistic,abide_dx,68,0.005994842503189409,train,0.7321937321937322,0.015252652355196296,0.7226835664335665,0.01604993521760534,0.7207825765965301,0.015659499604534662 +flat_mae,reg,logistic,abide_dx,68,0.005994842503189409,test,0.5483870967741935,0.04110751809224775,0.5441176470588236,0.04192912448369498,0.5441176470588236,0.04186767393995724 +flat_mae,reg,logistic,abide_dx,69,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,69,21.54434690031882,test,0.5080645161290323,0.045309305253555074,0.5041625696492953,0.0454497409870625,0.5042016806722689,0.045313691914442664 +flat_mae,reg,logistic,abide_dx,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,70,2.782559402207126,test,0.5483870967741935,0.044979527659021946,0.5473272490221643,0.04500918156484203,0.5488445378151261,0.045125245127530515 +flat_mae,reg,logistic,abide_dx,71,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,71,166.81005372000556,test,0.5645161290322581,0.04151331857544904,0.5475675675675675,0.0435652446528134,0.5509453781512605,0.0418444896660064 +flat_mae,reg,logistic,abide_dx,72,0.005994842503189409,train,0.7250712250712251,0.015368805583500397,0.7174897149374764,0.01595645824322748,0.7157991878922112,0.015693467934397007 +flat_mae,reg,logistic,abide_dx,72,0.005994842503189409,test,0.5161290322580645,0.04163583701617468,0.4972972972972973,0.043536487093062674,0.5021008403361344,0.041915279964627454 +flat_mae,reg,logistic,abide_dx,73,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,73,2.782559402207126,test,0.532258064516129,0.04389023533252272,0.5197649572649572,0.04557488235891501,0.5215336134453781,0.04448019178421936 +flat_mae,reg,logistic,abide_dx,74,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,74,1291.5496650148827,test,0.5645161290322581,0.043522620812314707,0.5626959247648903,0.04373684121343223,0.5635504201680672,0.043855624599864106 +flat_mae,reg,logistic,abide_dx,75,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,75,1291.5496650148827,test,0.5483870967741935,0.04421572318643069,0.5441176470588236,0.04452513421200618,0.5441176470588236,0.04431630267818382 +flat_mae,reg,logistic,abide_dx,76,0.005994842503189409,train,0.7236467236467237,0.016909467942155993,0.7156114266622118,0.017759920563324164,0.7139165743816906,0.017441528427448064 +flat_mae,reg,logistic,abide_dx,76,0.005994842503189409,test,0.5483870967741935,0.04318111555871272,0.5337093741606231,0.044751098760600974,0.5362394957983193,0.043485948906254554 +flat_mae,reg,logistic,abide_dx,77,0.046415888336127774,train,0.8361823361823362,0.01352529978610843,0.833382522604706,0.013835052496685126,0.8316352897748247,0.01387790381724097 +flat_mae,reg,logistic,abide_dx,77,0.046415888336127774,test,0.5403225806451613,0.04641479275481498,0.5292707292707293,0.047221258004933486,0.5304621848739496,0.04647878301009199 +flat_mae,reg,logistic,abide_dx,78,0.005994842503189409,train,0.7279202279202279,0.016446832013891157,0.7184115523465704,0.0172967140448199,0.7166112956810631,0.016892668934028507 +flat_mae,reg,logistic,abide_dx,78,0.005994842503189409,test,0.5806451612903226,0.04192573087579157,0.5694444444444444,0.04345186564870351,0.5703781512605042,0.04233421209744486 +flat_mae,reg,logistic,abide_dx,79,0.005994842503189409,train,0.7236467236467237,0.016085920187309703,0.7161614645858343,0.016634141028759493,0.7145071982281285,0.016387969255164832 +flat_mae,reg,logistic,abide_dx,79,0.005994842503189409,test,0.5725806451612904,0.04479807859712018,0.5544708833299437,0.04778610336684719,0.5582983193277311,0.045636862691834604 +flat_mae,reg,logistic,abide_dx,80,0.005994842503189409,train,0.7193732193732194,0.01765990306774658,0.7113553113553113,0.0185553816490829,0.7097452934662237,0.01823522462759864 +flat_mae,reg,logistic,abide_dx,80,0.005994842503189409,test,0.5887096774193549,0.04619394260058858,0.5873947935016637,0.046243546784963126,0.5887605042016807,0.0460513093147577 +flat_mae,reg,logistic,abide_dx,81,0.005994842503189409,train,0.7222222222222222,0.01700761640426848,0.713716870606885,0.0177670911874849,0.7120339608711702,0.017418496127432296 +flat_mae,reg,logistic,abide_dx,81,0.005994842503189409,test,0.5887096774193549,0.044350073057763145,0.5765651155005022,0.04662290971957915,0.5777310924369747,0.04516150895323189 +flat_mae,reg,logistic,abide_dx,82,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,82,166.81005372000556,test,0.5403225806451613,0.043331872157120095,0.5366764995083579,0.04357148788371103,0.5367647058823529,0.04351076813726926 +flat_mae,reg,logistic,abide_dx,83,0.046415888336127774,train,0.8418803418803419,0.014446748188085706,0.8389305860733931,0.014819928126356298,0.8368032484311554,0.014885148445168635 +flat_mae,reg,logistic,abide_dx,83,0.046415888336127774,test,0.5967741935483871,0.04546692374734435,0.5915678524374176,0.04635690247737306,0.5913865546218487,0.046032149427751544 +flat_mae,reg,logistic,abide_dx,84,0.005994842503189409,train,0.7250712250712251,0.015071048421241897,0.7177580801399899,0.015613893260542248,0.71609449981543,0.015402205987233557 +flat_mae,reg,logistic,abide_dx,84,0.005994842503189409,test,0.532258064516129,0.045297846539815126,0.5107482993197279,0.048082712211417925,0.5168067226890757,0.04571351628469196 +flat_mae,reg,logistic,abide_dx,85,0.005994842503189409,train,0.7250712250712251,0.01593514174588686,0.7185320605374412,0.01654034328674336,0.7169804355850867,0.01638835776493162 +flat_mae,reg,logistic,abide_dx,85,0.005994842503189409,test,0.6048387096774194,0.040363218232150305,0.5931704050887178,0.04223721759892342,0.5940126050420168,0.040949968416033405 +flat_mae,reg,logistic,abide_dx,86,0.005994842503189409,train,0.7321937321937322,0.016238919123449564,0.724676018258134,0.016845535570833493,0.7228497600590624,0.016570757026948178 +flat_mae,reg,logistic,abide_dx,86,0.005994842503189409,test,0.5806451612903226,0.0427083708746652,0.5752305665349143,0.043600070890850266,0.5751050420168067,0.04332722686337498 +flat_mae,reg,logistic,abide_dx,87,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,87,21.54434690031882,test,0.5564516129032258,0.04625052802240984,0.5498646953996436,0.04700164936463469,0.5498949579831933,0.046574576787682884 +flat_mae,reg,logistic,abide_dx,88,0.3593813663804626,train,0.9572649572649573,0.007710803973518998,0.9566737985516788,0.007849757445986526,0.9553340716131413,0.008127643877422408 +flat_mae,reg,logistic,abide_dx,88,0.3593813663804626,test,0.6129032258064516,0.044380215614407165,0.6025641025641025,0.04597612069377747,0.6029411764705883,0.04491192886751429 +flat_mae,reg,logistic,abide_dx,89,0.005994842503189409,train,0.7150997150997151,0.015889289900008548,0.7071021470831212,0.016472013771978475,0.7055740125507568,0.016199023382043848 +flat_mae,reg,logistic,abide_dx,89,0.005994842503189409,test,0.5564516129032258,0.044003819245840514,0.5529334644378892,0.04441113376689632,0.553046218487395,0.04434268957605878 +flat_mae,reg,logistic,abide_dx,90,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,90,166.81005372000556,test,0.6048387096774194,0.043417317691749516,0.5880957223239103,0.0464279268058896,0.5908613445378151,0.04433107881938251 +flat_mae,reg,logistic,abide_dx,91,0.005994842503189409,train,0.7250712250712251,0.016793802478590764,0.7172161172161171,0.017436594111536047,0.7155038759689922,0.01713804297537732 +flat_mae,reg,logistic,abide_dx,91,0.005994842503189409,test,0.5645161290322581,0.04355679727631296,0.5588932806324111,0.04411240046637215,0.5588235294117647,0.04389249520316596 +flat_mae,reg,logistic,abide_dx,92,0.3593813663804626,train,0.9615384615384616,0.00709726352792573,0.9610451267359006,0.007202260432723842,0.9600959763750461,0.007354721606153766 +flat_mae,reg,logistic,abide_dx,92,0.3593813663804626,test,0.5483870967741935,0.04480494203081861,0.5441176470588236,0.04564049266497684,0.5441176470588236,0.04547978739892274 +flat_mae,reg,logistic,abide_dx,93,0.005994842503189409,train,0.7222222222222222,0.016247590234863413,0.7134243994347622,0.01692834227735243,0.7117386489479514,0.01657694066427005 +flat_mae,reg,logistic,abide_dx,93,0.005994842503189409,test,0.5403225806451613,0.04618177674843228,0.5239442311578096,0.04742992350814339,0.5273109243697479,0.046203307416906966 +flat_mae,reg,logistic,abide_dx,94,0.005994842503189409,train,0.7236467236467237,0.017001833824325793,0.7147465167522642,0.017923002811282424,0.713030638612034,0.01755263374355143 +flat_mae,reg,logistic,abide_dx,94,0.005994842503189409,test,0.6048387096774194,0.040992004499443714,0.5880957223239103,0.043919093087948745,0.5908613445378151,0.04182648833617171 +flat_mae,reg,logistic,abide_dx,95,0.046415888336127774,train,0.8547008547008547,0.01301957395351162,0.8520480692938376,0.01336743131032262,0.8499077150239941,0.013466386282071125 +flat_mae,reg,logistic,abide_dx,95,0.046415888336127774,test,0.6048387096774194,0.04289370195044404,0.6017043592264831,0.043255655210732276,0.601890756302521,0.04319377191498198 +flat_mae,reg,logistic,abide_dx,96,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,abide_dx,96,21.54434690031882,test,0.5887096774193549,0.042119506283589805,0.5826018084614877,0.04310218598977751,0.5824579831932774,0.042652699931173 +flat_mae,reg,logistic,abide_dx,97,0.000774263682681127,train,0.6339031339031339,0.017115554096425335,0.6012095410507887,0.01964132263849574,0.6109634551495017,0.01761630141233804 +flat_mae,reg,logistic,abide_dx,97,0.000774263682681127,test,0.6129032258064516,0.03626321209585334,0.5814345991561182,0.041316923060690874,0.5919117647058824,0.037228474592613955 +flat_mae,reg,logistic,abide_dx,98,0.3593813663804626,train,0.9515669515669516,0.008367965148331782,0.9509296798546065,0.008499252445918665,0.9498708010335917,0.008674525841651671 +flat_mae,reg,logistic,abide_dx,98,0.3593813663804626,test,0.5645161290322581,0.04592810685146317,0.5634941329856584,0.046220876806691326,0.5651260504201681,0.04627832685663943 +flat_mae,reg,logistic,abide_dx,99,0.3593813663804626,train,0.9401709401709402,0.008984652442535171,0.939461386203668,0.009100248614580982,0.9389442598744924,0.009178255156768934 +flat_mae,reg,logistic,abide_dx,99,0.3593813663804626,test,0.5887096774193549,0.041614466087180835,0.5854473942969518,0.04195262204733698,0.585609243697479,0.041796710550513094 +flat_mae,reg,logistic,abide_dx,100,0.005994842503189409,train,0.7122507122507122,0.016141514862037565,0.7029834865152442,0.017013269909166514,0.7015134736064969,0.016645077799540067 +flat_mae,reg,logistic,abide_dx,100,0.005994842503189409,test,0.5967741935483871,0.04358515771389211,0.5782312925170068,0.04682076854123017,0.5819327731092437,0.04453093328937262 diff --git a/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f6d94689d57a845afcacdfa6c24559682d36598a --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:29:14 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (abide_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic +model: flat_mae +representation: reg +dataset: abide_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/abide_dx__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:19:54 time: 4.1346 data: 3.3155 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:39 time: 0.1817 data: 0.0621 max mem: 3005 +extract (train) [ 40/289] eta: 0:01:07 time: 0.1687 data: 0.0546 max mem: 3005 +extract (train) [ 60/289] eta: 0:00:54 time: 0.1698 data: 0.0556 max mem: 3005 +extract (train) [ 80/289] eta: 0:00:45 time: 0.1645 data: 0.0534 max mem: 3005 +extract (train) [100/289] eta: 0:00:39 time: 0.1644 data: 0.0519 max mem: 3005 +extract (train) [120/289] eta: 0:00:34 time: 0.1695 data: 0.0558 max mem: 3005 +extract (train) [140/289] eta: 0:00:29 time: 0.1710 data: 0.0560 max mem: 3005 +extract (train) [160/289] eta: 0:00:24 time: 0.1620 data: 0.0520 max mem: 3005 +extract (train) [180/289] eta: 0:00:20 time: 0.1685 data: 0.0548 max mem: 3005 +extract (train) [200/289] eta: 0:00:16 time: 0.1727 data: 0.0563 max mem: 3005 +extract (train) [220/289] eta: 0:00:12 time: 0.1721 data: 0.0579 max mem: 3005 +extract (train) [240/289] eta: 0:00:09 time: 0.1886 data: 0.0653 max mem: 3005 +extract (train) [260/289] eta: 0:00:05 time: 0.1808 data: 0.0653 max mem: 3005 +extract (train) [280/289] eta: 0:00:01 time: 0.1545 data: 0.0487 max mem: 3005 +extract (train) [288/289] eta: 0:00:00 time: 0.1501 data: 0.0463 max mem: 3005 +extract (train) Total time: 0:00:53 (0.1852 s / it) +extract (validation) [ 0/62] eta: 0:03:40 time: 3.5484 data: 3.3582 max mem: 3005 +extract (validation) [20/62] eta: 0:00:15 time: 0.2145 data: 0.0745 max mem: 3005 +extract (validation) [40/62] eta: 0:00:05 time: 0.1567 data: 0.0490 max mem: 3005 +extract (validation) [60/62] eta: 0:00:00 time: 0.1528 data: 0.0475 max mem: 3005 +extract (validation) [61/62] eta: 0:00:00 time: 0.1523 data: 0.0471 max mem: 3005 +extract (validation) Total time: 0:00:14 (0.2340 s / it) +extract (test) [ 0/62] eta: 0:04:09 time: 4.0261 data: 3.8294 max mem: 3005 +extract (test) [20/62] eta: 0:00:16 time: 0.2120 data: 0.0745 max mem: 3005 +extract (test) [40/62] eta: 0:00:06 time: 0.1625 data: 0.0509 max mem: 3005 +extract (test) [60/62] eta: 0:00:00 time: 0.1492 data: 0.0457 max mem: 3005 +extract (test) [61/62] eta: 0:00:00 time: 0.1501 data: 0.0460 max mem: 3005 +extract (test) Total time: 0:00:14 (0.2410 s / it) +feature extraction time: 0:01:23 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|------:|----------:|--------:|---------:|-------:|-----------:| +| flat_mae | reg | logistic | abide_dx | | 1291.5 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | reg | logistic | abide_dx | | 1291.5 | test | 0.5 | 0.040211 | 0.49153 | 0.040685 | 0.4928 | 0.040237 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.000774263682681127, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04122939998977878, "f1": 0.5400013998740113, "f1_std": 0.04632135079095777, "bacc": 0.5519957983193278, "bacc_std": 0.042010376017866814} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044364808263745226, "f1": 0.5703170970905524, "f1_std": 0.04472242902836469, "bacc": 0.5709033613445378, "bacc_std": 0.04478154355191937} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04252370031464255, "f1": 0.6049081418208935, "f1_std": 0.04556299713378325, "bacc": 0.6071428571428572, "bacc_std": 0.04350268693249158} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.041223089793876525, "f1": 0.665680278818965, "f1_std": 0.04179873331028653, "bacc": 0.6654411764705883, "bacc_std": 0.04168660030177146} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04374066274673275, "f1": 0.5441176470588236, "f1_std": 0.04404566899605417, "bacc": 0.5441176470588236, "bacc_std": 0.04408493498877857} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.0438439314224973, "f1": 0.5425559947299078, "f1_std": 0.04396745201304886, "bacc": 0.5425420168067226, "bacc_std": 0.043835111344378286} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 2.782559402207126, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.04545642045134813, "f1": 0.5025974879989479, "f1_std": 0.04607796544532155, "bacc": 0.5026260504201681, "bacc_std": 0.04571968486164626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.03706917276118731, "f1": 0.4540599061710574, "f1_std": 0.042048637851081164, "bacc": 0.48214285714285715, "bacc_std": 0.037208988688184515} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.000774263682681127, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.040473588207023556, "f1": 0.5881563140414104, "f1_std": 0.045570344698050796, "bacc": 0.5992647058823529, "bacc_std": 0.041291573749357255} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04380717610688843, "f1": 0.6017043592264831, "f1_std": 0.04398720774354094, "bacc": 0.601890756302521, "bacc_std": 0.044044412943653716} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.49193548387096775, "acc_std": 0.04549488256089874, "f1": 0.48628920891694616, "f1_std": 0.04545792314570265, "bacc": 0.486344537815126, "bacc_std": 0.045386771424864046} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04462491091641088, "f1": 0.5115546218487395, "f1_std": 0.044820238114670626, "bacc": 0.5115546218487395, "bacc_std": 0.044686943333102344} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0443149380648556, "f1": 0.5808311791608669, "f1_std": 0.04547162839844848, "bacc": 0.5808823529411764, "bacc_std": 0.044832341322672026} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 1291.5496650148827, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.0446761499761444, "f1": 0.5129615082482325, "f1_std": 0.04477072980412274, "bacc": 0.5131302521008403, "bacc_std": 0.04469577746960429} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 1291.5496650148827, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.046375710837368044, "f1": 0.5072633704644649, "f1_std": 0.04653614649275502, "bacc": 0.5089285714285714, "bacc_std": 0.04697293984110661} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04404546357574748, "f1": 0.5723303182143554, "f1_std": 0.04414957699571731, "bacc": 0.5756302521008403, "bacc_std": 0.04410346790952812} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042578763399279955, "f1": 0.5860042735042735, "f1_std": 0.044516315536871215, "bacc": 0.5866596638655462, "bacc_std": 0.043355201116715425} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04112838709171402, "f1": 0.5880946053680574, "f1_std": 0.04282581232333815, "bacc": 0.5882352941176471, "bacc_std": 0.04190199454107612} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.042435591152498627, "f1": 0.5498646953996436, "f1_std": 0.04350803880072657, "bacc": 0.5498949579831933, "bacc_std": 0.04295319853984079} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.0444713747682407, "f1": 0.5694444444444444, "f1_std": 0.04604342783875328, "bacc": 0.5703781512605042, "bacc_std": 0.04500703652939608} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.0448491517198441, "f1": 0.5386659580122243, "f1_std": 0.04599141128170967, "bacc": 0.539390756302521, "bacc_std": 0.04523935302016926} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042522256521589086, "f1": 0.5735449735449736, "f1_std": 0.04390127158809868, "bacc": 0.5735294117647058, "bacc_std": 0.04317119746569617} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04420662627312351, "f1": 0.5503626107977437, "f1_std": 0.0464588837568087, "bacc": 0.5525210084033614, "bacc_std": 0.04485030372020436} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04220255835043109, "f1": 0.5528846153846154, "f1_std": 0.043813483425559414, "bacc": 0.5540966386554622, "bacc_std": 0.04270612318981612} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 21.54434690031882, "split": "test", "acc": 0.47580645161290325, "acc_std": 0.04355463216869907, "f1": 0.47495277180639695, "f1_std": 0.04365847869116898, "bacc": 0.47636554621848737, "bacc_std": 0.043951965757416424} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.04762579596402643, "f1": 0.49153439153439155, "f1_std": 0.048375794686562446, "bacc": 0.49212184873949577, "bacc_std": 0.04778625037982866} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04529782069629296, "f1": 0.5098814229249011, "f1_std": 0.04592855938491241, "bacc": 0.5099789915966386, "bacc_std": 0.04564173267515083} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.0402474493604354, "f1": 0.575109649122807, "f1_std": 0.04270293403220464, "bacc": 0.5803571428571428, "bacc_std": 0.04063752178105717} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.000774263682681127, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.038337783451294835, "f1": 0.566216891554223, "f1_std": 0.044700411318270794, "bacc": 0.58140756302521, "bacc_std": 0.03926274836517694} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04457271707199395, "f1": 0.5880946053680574, "f1_std": 0.04552347760689952, "bacc": 0.5882352941176471, "bacc_std": 0.04483136429234844} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 1291.5496650148827, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04262606021397292, "f1": 0.5366764995083579, "f1_std": 0.04306877686576638, "bacc": 0.5367647058823529, "bacc_std": 0.04300682394362748} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 1291.5496650148827, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.042852416554712114, "f1": 0.5457875457875458, "f1_std": 0.04421930207049997, "bacc": 0.5467436974789917, "bacc_std": 0.043234549872219266} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043001497324074454, "f1": 0.5931704050887178, "f1_std": 0.044942929223739075, "bacc": 0.5940126050420168, "bacc_std": 0.043670488935123104} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04282832413946654, "f1": 0.6137071651090342, "f1_std": 0.044027514371261976, "bacc": 0.6134453781512605, "bacc_std": 0.04355595319491475} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04720595773172501, "f1": 0.5603991596638656, "f1_std": 0.04765481303472666, "bacc": 0.5603991596638656, "bacc_std": 0.04756812043771446} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.040862434503954985, "f1": 0.5544708833299437, "f1_std": 0.04295957001206188, "bacc": 0.5582983193277311, "bacc_std": 0.0411748630254496} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04358999207328456, "f1": 0.6405797101449275, "f1_std": 0.044059918419273786, "bacc": 0.6402310924369747, "bacc_std": 0.04382240394713269} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.045060749259520146, "f1": 0.5239442311578096, "f1_std": 0.0467317205488849, "bacc": 0.5273109243697479, "bacc_std": 0.04541022753651133} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04326496481011873, "f1": 0.551522325244953, "f1_std": 0.043879963656833026, "bacc": 0.5514705882352942, "bacc_std": 0.04364687270168266} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.045961943607137436, "f1": 0.5101439571476397, "f1_std": 0.04758328895488483, "bacc": 0.5126050420168067, "bacc_std": 0.046311369273730606} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.03700013710355115, "f1": 0.5256586070010827, "f1_std": 0.042685340286017504, "bacc": 0.5472689075630253, "bacc_std": 0.03745462221344418} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04259225159511678, "f1": 0.5352140461629513, "f1_std": 0.04318399142092255, "bacc": 0.5351890756302521, "bacc_std": 0.04301345839091506} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04278840748118905, "f1": 0.5581140350877193, "f1_std": 0.045529150077842605, "bacc": 0.5640756302521008, "bacc_std": 0.04320783077815039} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04473619052820733, "f1": 0.5267492467358554, "f1_std": 0.046636764868278106, "bacc": 0.5288865546218487, "bacc_std": 0.045206298604058205} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04139244867717051, "f1": 0.6255252100840336, "f1_std": 0.04174629353053041, "bacc": 0.6255252100840336, "bacc_std": 0.04160694872387399} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.044493577738113674, "f1": 0.4962370962370962, "f1_std": 0.045711473872924194, "bacc": 0.49789915966386555, "bacc_std": 0.04492066958331657} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04370201099295437, "f1": 0.5457875457875458, "f1_std": 0.045169133451433655, "bacc": 0.5467436974789917, "bacc_std": 0.04427000476543586} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 21.54434690031882, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04666147709115973, "f1": 0.5291961246399581, "f1_std": 0.04692566022323797, "bacc": 0.5294117647058824, "bacc_std": 0.04695294975810968} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04404584453039153, "f1": 0.5079365079365079, "f1_std": 0.04442280850009461, "bacc": 0.5084033613445378, "bacc_std": 0.04404538504731026} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.04525224423982633, "f1": 0.4805405405405405, "f1_std": 0.04663453729966798, "bacc": 0.48581932773109243, "bacc_std": 0.04527218462079072} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.043478064487408825, "f1": 0.5208460443359773, "f1_std": 0.045688208634310855, "bacc": 0.5257352941176471, "bacc_std": 0.043813006964349914} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044256712572222454, "f1": 0.5725528455284552, "f1_std": 0.04450193499189018, "bacc": 0.578781512605042, "bacc_std": 0.044181707992031534} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04543881032631651, "f1": 0.5643931861867832, "f1_std": 0.04623100948039727, "bacc": 0.5646008403361344, "bacc_std": 0.04570739387609973} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 10000.0, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04362721658615355, "f1": 0.5678306043269548, "f1_std": 0.04394140246967209, "bacc": 0.5677521008403361, "bacc_std": 0.04368922706101794} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04250015302659604, "f1": 0.5406479423452549, "f1_std": 0.043961674982146745, "bacc": 0.54359243697479, "bacc_std": 0.0426528008201386} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04411974057998249, "f1": 0.5588932806324111, "f1_std": 0.045057473603852684, "bacc": 0.5588235294117647, "bacc_std": 0.04471356944231574} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.000774263682681127, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04322148509935249, "f1": 0.5374412821221332, "f1_std": 0.046713743957894696, "bacc": 0.546218487394958, "bacc_std": 0.04374465268147566} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04582490541501996, "f1": 0.5141065830721003, "f1_std": 0.04582154332409476, "bacc": 0.5147058823529411, "bacc_std": 0.045908906891529704} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04398326472014381, "f1": 0.6017043592264831, "f1_std": 0.04437002128633136, "bacc": 0.601890756302521, "bacc_std": 0.04438130025861846} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 1291.5496650148827, "split": "test", "acc": 0.532258064516129, "acc_std": 0.043371621535637714, "f1": 0.5262187088274045, "f1_std": 0.043519949403136846, "bacc": 0.5262605042016807, "bacc_std": 0.0432510687116338} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 1291.5496650148827, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04489129835562913, "f1": 0.5678306043269548, "f1_std": 0.04563384533435661, "bacc": 0.5677521008403361, "bacc_std": 0.04528436615496163} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 2.782559402207126, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.044315454655593306, "f1": 0.5189057670809496, "f1_std": 0.04444781018804762, "bacc": 0.51890756302521, "bacc_std": 0.0444513747516699} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04606035944018007, "f1": 0.5712141971683957, "f1_std": 0.04607826874939562, "bacc": 0.5724789915966386, "bacc_std": 0.04604473216535596} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04393536544913224, "f1": 0.5931704050887178, "f1_std": 0.045878101717530664, "bacc": 0.5940126050420168, "bacc_std": 0.044549466069127554} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04372884950323037, "f1": 0.5040336248389939, "f1_std": 0.045725258616803405, "bacc": 0.509453781512605, "bacc_std": 0.04396272097349049} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.040252878465593854, "f1": 0.5670158474348643, "f1_std": 0.041397233019437076, "bacc": 0.5688025210084033, "bacc_std": 0.040329913229860764} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042583394339403474, "f1": 0.5836690840719849, "f1_std": 0.04453469887474047, "bacc": 0.5850840336134454, "bacc_std": 0.0432035729987184} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04110751809224775, "f1": 0.5441176470588236, "f1_std": 0.04192912448369498, "bacc": 0.5441176470588236, "bacc_std": 0.04186767393995724} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 21.54434690031882, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.045309305253555074, "f1": 0.5041625696492953, "f1_std": 0.0454497409870625, "bacc": 0.5042016806722689, "bacc_std": 0.045313691914442664} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.044979527659021946, "f1": 0.5473272490221643, "f1_std": 0.04500918156484203, "bacc": 0.5488445378151261, "bacc_std": 0.045125245127530515} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 166.81005372000556, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04151331857544904, "f1": 0.5475675675675675, "f1_std": 0.0435652446528134, "bacc": 0.5509453781512605, "bacc_std": 0.0418444896660064} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04163583701617468, "f1": 0.4972972972972973, "f1_std": 0.043536487093062674, "bacc": 0.5021008403361344, "bacc_std": 0.041915279964627454} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 2.782559402207126, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04389023533252272, "f1": 0.5197649572649572, "f1_std": 0.04557488235891501, "bacc": 0.5215336134453781, "bacc_std": 0.04448019178421936} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 1291.5496650148827, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.043522620812314707, "f1": 0.5626959247648903, "f1_std": 0.04373684121343223, "bacc": 0.5635504201680672, "bacc_std": 0.043855624599864106} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 1291.5496650148827, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04421572318643069, "f1": 0.5441176470588236, "f1_std": 0.04452513421200618, "bacc": 0.5441176470588236, "bacc_std": 0.04431630267818382} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04318111555871272, "f1": 0.5337093741606231, "f1_std": 0.044751098760600974, "bacc": 0.5362394957983193, "bacc_std": 0.043485948906254554} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04641479275481498, "f1": 0.5292707292707293, "f1_std": 0.047221258004933486, "bacc": 0.5304621848739496, "bacc_std": 0.04647878301009199} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04192573087579157, "f1": 0.5694444444444444, "f1_std": 0.04345186564870351, "bacc": 0.5703781512605042, "bacc_std": 0.04233421209744486} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04479807859712018, "f1": 0.5544708833299437, "f1_std": 0.04778610336684719, "bacc": 0.5582983193277311, "bacc_std": 0.045636862691834604} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04619394260058858, "f1": 0.5873947935016637, "f1_std": 0.046243546784963126, "bacc": 0.5887605042016807, "bacc_std": 0.0460513093147577} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.044350073057763145, "f1": 0.5765651155005022, "f1_std": 0.04662290971957915, "bacc": 0.5777310924369747, "bacc_std": 0.04516150895323189} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 166.81005372000556, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.043331872157120095, "f1": 0.5366764995083579, "f1_std": 0.04357148788371103, "bacc": 0.5367647058823529, "bacc_std": 0.04351076813726926} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04546692374734435, "f1": 0.5915678524374176, "f1_std": 0.04635690247737306, "bacc": 0.5913865546218487, "bacc_std": 0.046032149427751544} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.532258064516129, "acc_std": 0.045297846539815126, "f1": 0.5107482993197279, "f1_std": 0.048082712211417925, "bacc": 0.5168067226890757, "bacc_std": 0.04571351628469196} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.040363218232150305, "f1": 0.5931704050887178, "f1_std": 0.04223721759892342, "bacc": 0.5940126050420168, "bacc_std": 0.040949968416033405} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.0427083708746652, "f1": 0.5752305665349143, "f1_std": 0.043600070890850266, "bacc": 0.5751050420168067, "bacc_std": 0.04332722686337498} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04625052802240984, "f1": 0.5498646953996436, "f1_std": 0.04700164936463469, "bacc": 0.5498949579831933, "bacc_std": 0.046574576787682884} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.044380215614407165, "f1": 0.6025641025641025, "f1_std": 0.04597612069377747, "bacc": 0.6029411764705883, "bacc_std": 0.04491192886751429} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.044003819245840514, "f1": 0.5529334644378892, "f1_std": 0.04441113376689632, "bacc": 0.553046218487395, "bacc_std": 0.04434268957605878} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 166.81005372000556, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043417317691749516, "f1": 0.5880957223239103, "f1_std": 0.0464279268058896, "bacc": 0.5908613445378151, "bacc_std": 0.04433107881938251} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04355679727631296, "f1": 0.5588932806324111, "f1_std": 0.04411240046637215, "bacc": 0.5588235294117647, "bacc_std": 0.04389249520316596} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04480494203081861, "f1": 0.5441176470588236, "f1_std": 0.04564049266497684, "bacc": 0.5441176470588236, "bacc_std": 0.04547978739892274} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04618177674843228, "f1": 0.5239442311578096, "f1_std": 0.04742992350814339, "bacc": 0.5273109243697479, "bacc_std": 0.046203307416906966} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.040992004499443714, "f1": 0.5880957223239103, "f1_std": 0.043919093087948745, "bacc": 0.5908613445378151, "bacc_std": 0.04182648833617171} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04289370195044404, "f1": 0.6017043592264831, "f1_std": 0.043255655210732276, "bacc": 0.601890756302521, "bacc_std": 0.04319377191498198} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 21.54434690031882, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042119506283589805, "f1": 0.5826018084614877, "f1_std": 0.04310218598977751, "bacc": 0.5824579831932774, "bacc_std": 0.042652699931173} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.03626321209585334, "f1": 0.5814345991561182, "f1_std": 0.041316923060690874, "bacc": 0.5919117647058824, "bacc_std": 0.037228474592613955} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04592810685146317, "f1": 0.5634941329856584, "f1_std": 0.046220876806691326, "bacc": 0.5651260504201681, "bacc_std": 0.04627832685663943} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.041614466087180835, "f1": 0.5854473942969518, "f1_std": 0.04195262204733698, "bacc": 0.585609243697479, "bacc_std": 0.041796710550513094} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04358515771389211, "f1": 0.5782312925170068, "f1_std": 0.04682076854123017, "bacc": 0.5819327731092437, "bacc_std": 0.04453093328937262} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | abide_dx | train | 100 | 210.33 | 1049.3 | 0.84306 | 0.12838 | 0.83743 | 0.13439 | 0.83687 | 0.13409 | +| flat_mae | reg | logistic | abide_dx | test | 100 | 210.33 | 1049.3 | 0.56371 | 0.036329 | 0.55299 | 0.03692 | 0.55525 | 0.03607 | + + +done! total time: 0:05:30 diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207e2bc358ca8d66246cdeaeb52fa2895331426f --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..6f58b43ed45e991982a7affc1ed3374d50a4ecc9 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.736986301369863,0.022671072465599785,0.726027397260274,0.024026307106920976,0.7232246443182512,0.02351070571581182 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6307692307692307,0.05644710777649605,0.61,0.06109391751171119,0.6105212355212355,0.05829199236291242 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,train,0.8410958904109589,0.01898728100419976,0.835891472868217,0.01987320270215835,0.8319594553336997,0.019942070504266093 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,test,0.49230769230769234,0.06346966661234053,0.4844989185291997,0.06376734958371198,0.4845559845559846,0.06385728634023324 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,train,0.736986301369863,0.022377035651001173,0.7283720930232558,0.023350672748464123,0.7260945228063748,0.023089498615033618 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,test,0.676923076923077,0.0526765186900366,0.6500897205844656,0.05987911749736285,0.6510617760617761,0.05543656868978343 +flat_mae,patch,logistic,adhd200_dx,3,0.046415888336127774,train,0.8547945205479452,0.019631814590533782,0.8517863025873231,0.02016818591553664,0.8505526042620749,0.02039022304446573 +flat_mae,patch,logistic,adhd200_dx,3,0.046415888336127774,test,0.49230769230769234,0.059535114197297095,0.4844989185291997,0.06082565635246116,0.4845559845559846,0.06087637786403143 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,train,0.7342465753424657,0.022483528551099086,0.7252636398202826,0.023660466860713537,0.7229498687183245,0.023422341465873697 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,test,0.676923076923077,0.05429889665098908,0.6690909090909091,0.056265262037081584,0.6684362934362934,0.05601510831727781 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7616438356164383,0.022003083425742743,0.752542372881356,0.02338303185864446,0.7493741222446113,0.023086928206288267 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5076923076923077,0.0627418039627252,0.5019157088122606,0.06340110117435203,0.5024131274131274,0.0636619959059035 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7479452054794521,0.022210021253891897,0.7380328278100231,0.023352905916340778,0.7350857910484215,0.022942470908238352 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.676923076923077,0.05893335118298301,0.6690909090909091,0.060831191550194876,0.6684362934362934,0.06036481614543186 +flat_mae,patch,logistic,adhd200_dx,7,0.3593813663804626,train,0.9671232876712329,0.009128594449846133,0.9664686428221461,0.00934357534824725,0.9651340294315198,0.009782695749385418 +flat_mae,patch,logistic,adhd200_dx,7,0.3593813663804626,test,0.6153846153846154,0.060461060871083015,0.61207925519217,0.06090581194746758,0.6143822393822393,0.06129245287722318 +flat_mae,patch,logistic,adhd200_dx,8,0.046415888336127774,train,0.8493150684931506,0.01819578764258218,0.8454116324377604,0.018851207667709077,0.8428283568419125,0.01904629499032738 +flat_mae,patch,logistic,adhd200_dx,8,0.046415888336127774,test,0.6461538461538462,0.06176769612041289,0.6336682185738789,0.0646208856459686,0.6327220077220077,0.06323641801879819 +flat_mae,patch,logistic,adhd200_dx,9,0.005994842503189409,train,0.7561643835616438,0.02279904789075827,0.7473969875817451,0.024029911191017744,0.7445197533125725,0.023771067565846873 +flat_mae,patch,logistic,adhd200_dx,9,0.005994842503189409,test,0.6,0.05836327774679137,0.570630081300813,0.06493302827828147,0.5748069498069498,0.060318293698718675 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8575342465753425,0.018247890857181214,0.8534408203607611,0.0189805804587479,0.8501099102399707,0.019100950744978005 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5076923076923077,0.06131377443047558,0.49317738791423005,0.06318566848161157,0.49372586872586877,0.06226510902650502 +flat_mae,patch,logistic,adhd200_dx,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,11,166.81005372000556,test,0.46153846153846156,0.059838828897188334,0.45691095726903797,0.060201859250893594,0.4575289575289575,0.06037687690621589 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7506849315068493,0.022030425744240825,0.7422576414808837,0.02307924055597505,0.7396653843805336,0.022823558248011702 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5692307692307692,0.0581691148046776,0.545,0.06367267066462992,0.5477799227799228,0.06021001444873945 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7479452054794521,0.022415617963644503,0.7396899224806202,0.023530060040975073,0.7372381999145142,0.023320219446715963 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6,0.05926970947742583,0.5921814671814671,0.060467843380877184,0.5921814671814671,0.06030194409209051 +flat_mae,patch,logistic,adhd200_dx,14,0.3593813663804626,train,0.958904109589041,0.010205582053359401,0.9581181870338497,0.01042528929346622,0.9571350064114307,0.010686320476029009 +flat_mae,patch,logistic,adhd200_dx,14,0.3593813663804626,test,0.6307692307692307,0.05512225036414153,0.61,0.06024211655178937,0.6105212355212355,0.0572356371628652 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,train,0.8438356164383561,0.017773538411567907,0.839195863380249,0.018572506069954517,0.835821579043781,0.018762675512063114 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,test,0.5230769230769231,0.06221654111380413,0.5226249703861644,0.06227191142422147,0.528957528957529,0.06284885593817233 +flat_mae,patch,logistic,adhd200_dx,16,0.000774263682681127,train,0.6904109589041096,0.022845130197235792,0.680590727102355,0.023703954559146614,0.6790926299077975,0.02335524231483464 +flat_mae,patch,logistic,adhd200_dx,16,0.000774263682681127,test,0.5846153846153846,0.06200573365356755,0.5830363506771205,0.06226449636624405,0.5873552123552124,0.06259884347180143 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7534246575342466,0.02036819202550118,0.7437277663358921,0.021465903661172733,0.7406576296024913,0.02112677723754414 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.5692307692307692,0.05786588043349203,0.5190274841437632,0.06655771411859447,0.5347490347490347,0.05926772193030242 +flat_mae,patch,logistic,adhd200_dx,18,0.3593813663804626,train,0.9506849315068493,0.010898048842256657,0.9497029642332191,0.01115294589933462,0.9484185137693106,0.011490516196262464 +flat_mae,patch,logistic,adhd200_dx,18,0.3593813663804626,test,0.5846153846153846,0.06058473258573654,0.5810455956075435,0.060658633532547815,0.583011583011583,0.06082974823104945 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,train,0.7397260273972602,0.02279155670931678,0.7265917602996255,0.024681394433803655,0.7234994199181779,0.023943298042293254 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,test,0.7076923076923077,0.05071765212119338,0.677124183006536,0.060104572604927416,0.678088803088803,0.054254743928171006 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,train,0.8438356164383561,0.017965501437032887,0.8388820481843737,0.018880542050266312,0.8351041094217501,0.019096944118937996 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,test,0.6615384615384615,0.05558084894305564,0.6474358974358974,0.05908604396647506,0.6462355212355213,0.057597766628503265 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,train,0.8493150684931506,0.018855382107384158,0.8442216514444901,0.01979351211309589,0.8399584783537889,0.019874718438091184 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,test,0.6461538461538462,0.059686367666386776,0.644808743169399,0.0598042486417191,0.6500965250965252,0.05974803936272061 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7342465753424657,0.02343105191903794,0.7222026065328092,0.025095725997464624,0.71936252060817,0.024439796745286624 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.5846153846153846,0.06195932240701229,0.578226387887527,0.06321031258420289,0.5786679536679536,0.06332420252019956 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7589041095890411,0.020565226705446873,0.7488584474885844,0.021912479217794142,0.7455119985345301,0.021490270321397862 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6,0.05290156986966036,0.5626293995859213,0.06011219713564719,0.5704633204633205,0.054513799997934605 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.736986301369863,0.021998402507721428,0.726642950758285,0.023393416170239576,0.7239421139402821,0.0230061650857203 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6153846153846154,0.05396526605909029,0.5751633986928104,0.06257043338927759,0.583976833976834,0.055926309226842226 +flat_mae,patch,logistic,adhd200_dx,25,0.3593813663804626,train,0.9780821917808219,0.00748895778109515,0.9776799364031065,0.0076431089436857025,0.9769951761616902,0.007965834869762002 +flat_mae,patch,logistic,adhd200_dx,25,0.3593813663804626,test,0.6153846153846154,0.06072708178019476,0.6150201374081972,0.06104743528002603,0.6230694980694981,0.06147535762829122 +flat_mae,patch,logistic,adhd200_dx,26,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,26,2.782559402207126,test,0.5230769230769231,0.05807675802319897,0.49987589972697943,0.06048281525216684,0.502895752895753,0.058700483953276664 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7452054794520548,0.022283644802205443,0.7365929742606834,0.023359809576144863,0.7340935458264639,0.023099528578929423 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.6153846153846154,0.05616472813421724,0.5966741126830479,0.05998318128833522,0.597007722007722,0.05748681059407874 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,train,0.6904109589041096,0.022621288528495345,0.6778766176459103,0.02402477235156567,0.6762227514196739,0.02348347230967364 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,test,0.5538461538461539,0.05498984091221119,0.5167905665214048,0.06118312058663456,0.5255791505791506,0.05634713413972269 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7424657534246575,0.021659627323844792,0.7334855828983347,0.0227762576998466,0.7309488917384136,0.022491026934995394 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.5846153846153846,0.05880849272484415,0.5644080416976918,0.06277181937533093,0.5656370656370656,0.06034075757029706 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,train,0.7150684931506849,0.023238590664626444,0.7051329853343276,0.02457189612509015,0.703089698968065,0.024199796729066667 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,test,0.6,0.06106943569684725,0.588206627680312,0.06286218139269198,0.5878378378378378,0.06212568403779811 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,train,0.8575342465753425,0.018402104850767414,0.853711925021581,0.01904704101815408,0.8508273798620016,0.019191912740595594 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,test,0.6,0.057051696685327044,0.5775,0.06164073871817058,0.5791505791505791,0.058829090792381 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,train,0.8328767123287671,0.019748294129768314,0.8282352941176471,0.02049006195056883,0.8253953715576724,0.02056743948451976 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,test,0.5538461538461539,0.062494693739245814,0.5500119360229172,0.06278335547379611,0.5516409266409266,0.06306627942051389 +flat_mae,patch,logistic,adhd200_dx,33,0.3593813663804626,train,0.9616438356164384,0.009864986104149038,0.9609971301215119,0.01003770887706725,0.9609971301215119,0.01016517499791273 +flat_mae,patch,logistic,adhd200_dx,33,0.3593813663804626,test,0.5076923076923077,0.06222907861162714,0.5066413662239089,0.062375120045571,0.5111003861003861,0.06314012951513881 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.8191780821917808,0.018846666746099795,0.8143266740658528,0.019576431801346703,0.8118245099835135,0.01969303068314216 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.6,0.059823574937838644,0.5976190476190476,0.06038866409891794,0.6008687258687259,0.06101389118339152 +flat_mae,patch,logistic,adhd200_dx,35,0.046415888336127774,train,0.8191780821917808,0.02044405223795972,0.8139825796886583,0.021255021582324524,0.8111070403614825,0.02130973714480295 +flat_mae,patch,logistic,adhd200_dx,35,0.046415888336127774,test,0.6307692307692307,0.05904730026545713,0.6198830409356726,0.06131830300213717,0.6192084942084942,0.06047067536240479 +flat_mae,patch,logistic,adhd200_dx,36,0.000774263682681127,train,0.6767123287671233,0.02268551054783455,0.6616547259929613,0.024090781430180955,0.6604994809794223,0.023351259091518566 +flat_mae,patch,logistic,adhd200_dx,36,0.000774263682681127,test,0.5846153846153846,0.0573216813075907,0.5644080416976918,0.061183225213688026,0.5656370656370656,0.058440390555011684 +flat_mae,patch,logistic,adhd200_dx,37,0.3593813663804626,train,0.9561643835616438,0.010413281644280783,0.9551459293394777,0.010727963369363817,0.9525554130793186,0.011215977048308531 +flat_mae,patch,logistic,adhd200_dx,37,0.3593813663804626,test,0.5538461538461539,0.06058676403581364,0.5521501544309813,0.06061313197006737,0.555984555984556,0.06125868275669722 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.736986301369863,0.022441481040874678,0.7272387594968239,0.023688890044074522,0.724659583562313,0.023352387491527574 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6,0.05771671278677644,0.5833333333333333,0.06018052941950577,0.5834942084942085,0.0583012178782073 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7342465753424657,0.02138774032692531,0.720835797358565,0.02316007656302751,0.7179275813641082,0.022463613959903855 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6307692307692307,0.05557134334513997,0.5962732919254659,0.06360166806099267,0.6018339768339769,0.05774939578125974 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7506849315068493,0.021713386431177116,0.7399979647590196,0.023095349742332207,0.73679550589241,0.022657001661702633 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.6461538461538462,0.055046833207313996,0.6167649320687003,0.06327763037070143,0.6196911196911197,0.05784712085957968 +flat_mae,patch,logistic,adhd200_dx,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,41,2.782559402207126,test,0.5230769230769231,0.06312312754267828,0.521263958184842,0.06310035117410394,0.5246138996138996,0.06349741374939191 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7397260273972602,0.022034823937147244,0.7291883068704555,0.023260962639654645,0.7263692984063015,0.022818241643245238 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.5692307692307692,0.06220817123214069,0.5691287878787878,0.062329257772546665,0.5781853281853282,0.06244866035129053 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,train,0.8438356164383561,0.01884346516423556,0.8385569842242898,0.0198370497493733,0.8343866397997192,0.019972175145699777 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,test,0.6615384615384615,0.05345489499397196,0.6515594541910331,0.055231601484318785,0.6505791505791505,0.05460599964563224 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,train,0.736986301369863,0.02243594146272497,0.726642950758285,0.023955510679938057,0.7239421139402821,0.023545710904846924 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,test,0.6923076923076923,0.05600516459701249,0.6832358674463938,0.0584889271458851,0.6819498069498069,0.05795302792475816 +flat_mae,patch,logistic,adhd200_dx,45,0.3593813663804626,train,0.9726027397260274,0.008326142340549934,0.9720999205038832,0.008501563877055213,0.9714233376076205,0.008811643382863315 +flat_mae,patch,logistic,adhd200_dx,45,0.3593813663804626,test,0.49230769230769234,0.0592998598040591,0.4595616024187452,0.06255599770317187,0.4671814671814672,0.059586941198270196 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,train,0.7205479452054795,0.021412909927848604,0.7060727594265143,0.023063906094483513,0.7036392501679184,0.02227857088354947 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,test,0.6615384615384615,0.05616627049064592,0.6366869918699187,0.06188476865140096,0.6375482625482626,0.0578532082547815 +flat_mae,patch,logistic,adhd200_dx,47,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,47,1291.5496650148827,test,0.4461538461538462,0.06208321195863332,0.4353281853281853,0.06221683377673098,0.4353281853281853,0.06187134648254613 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,train,0.8383561643835616,0.019413023546194553,0.8332287867171588,0.020285912066087358,0.8295322708676803,0.02034117140839064 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,test,0.7384615384615385,0.057188495553464586,0.7344388368180725,0.058010089952723,0.7355212355212355,0.05770930226510579 +flat_mae,patch,logistic,adhd200_dx,49,0.3593813663804626,train,0.9616438356164384,0.010038767413960403,0.9609398887054363,0.010237723144509263,0.960279660499481,0.010451363443225363 +flat_mae,patch,logistic,adhd200_dx,49,0.3593813663804626,test,0.47692307692307695,0.06214367014946067,0.47078544061302685,0.06227614531707869,0.471042471042471,0.06267683537982019 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,train,0.7315068493150685,0.021202216307636076,0.7175993178803765,0.023074126998925318,0.7147829272760579,0.02234056923697512 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,test,0.6307692307692307,0.058338851021095185,0.6198830409356726,0.060517607895642575,0.6192084942084942,0.05970006035320329 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,train,0.8493150684931506,0.018395936467431942,0.8448381137879596,0.019209337793439484,0.8413934175978507,0.01940085577952177 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,test,0.6,0.06060213834179148,0.588206627680312,0.06296038057677508,0.5878378378378378,0.06213841530038157 +flat_mae,patch,logistic,adhd200_dx,52,0.3593813663804626,train,0.9534246575342465,0.011159188975575532,0.9526729667681091,0.011328881093854683,0.9529981071014227,0.011303793406234474 +flat_mae,patch,logistic,adhd200_dx,52,0.3593813663804626,test,0.5846153846153846,0.06015379893761672,0.5745454545454545,0.061643760577646395,0.5743243243243243,0.06141562965815703 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.736986301369863,0.022304436529752083,0.7289100699387263,0.023322963993510706,0.7268119924284057,0.02315220139426738 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.6307692307692307,0.05473409417436958,0.5962732919254659,0.06330534820694765,0.6018339768339769,0.057216699588721484 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,train,0.7013698630136986,0.022489382161408448,0.6946872433983838,0.02313721933352806,0.6938236551260915,0.023126412654679972 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,test,0.5846153846153846,0.060374931119565746,0.5699583435432491,0.06342570900661158,0.5699806949806949,0.062065476222793084 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7589041095890411,0.02233476278860868,0.7494227048617612,0.023670147636037454,0.746229468156561,0.023353961964844617 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5846153846153846,0.061365132824978214,0.578226387887527,0.06272632256906939,0.5786679536679536,0.06257792413472815 +flat_mae,patch,logistic,adhd200_dx,56,0.046415888336127774,train,0.8465753424657534,0.019069897602191743,0.8421670373115888,0.019833573366169883,0.8389662331318313,0.019975607899248588 +flat_mae,patch,logistic,adhd200_dx,56,0.046415888336127774,test,0.6307692307692307,0.05902384634933429,0.6285714285714286,0.05917443196616547,0.6322393822393823,0.05917293796877482 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,train,0.7506849315068493,0.022401367140543773,0.7417205153925708,0.023546249644148417,0.7389479147585027,0.023220703875411117 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,test,0.5538461538461539,0.056174293432170105,0.5250692869740489,0.060803143173105714,0.5299227799227799,0.05748310715789043 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7479452054794521,0.021763215766929314,0.7391561024111359,0.022690627854469105,0.7365207302924833,0.022376501248246264 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.5692307692307692,0.05713370594517378,0.5608108108108107,0.05826054684871688,0.5608108108108107,0.057933356271142836 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7424657534246575,0.02224109184055689,0.734031007751938,0.023148706416558234,0.7316663613604445,0.022884717822337693 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6307692307692307,0.058478540037952505,0.61,0.06326772974240158,0.6105212355212355,0.060426747779189965 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7424657534246575,0.02228209257375057,0.734031007751938,0.02317448774349867,0.7316663613604445,0.0228891620445195 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6,0.05975136650166729,0.5833333333333333,0.06230900776667523,0.5834942084942085,0.060738241701349914 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7205479452054795,0.022534887945238315,0.7082288401253918,0.023901401613578115,0.7057916590340111,0.02329196835480933 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6153846153846154,0.05708155900725054,0.5966741126830479,0.06083665109983289,0.597007722007722,0.058555694672886856 +flat_mae,patch,logistic,adhd200_dx,62,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,62,1291.5496650148827,test,0.5384615384615384,0.06289471619654378,0.5357142857142857,0.06326821451985225,0.5381274131274132,0.0637710570356842 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7452054794520548,0.023712426738637295,0.7376353560414589,0.024657728405011667,0.7355284850705257,0.024548516724411006 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6153846153846154,0.056151772353698136,0.5966741126830479,0.059562975713683096,0.597007722007722,0.05751246377892909 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,train,0.726027397260274,0.021732584475799194,0.7152530737065468,0.023033752694536905,0.7127984368321426,0.022657283971097588 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,test,0.6,0.05841957198176434,0.5775,0.06265739863100267,0.5791505791505791,0.05987853938558305 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7342465753424657,0.021960530964612424,0.721529640320589,0.02352048861376513,0.7186450509861391,0.02290717376727102 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.6923076923076923,0.05481259303325821,0.6832358674463938,0.057284312506670154,0.6819498069498069,0.056757767462371635 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7397260273972602,0.02329332727446008,0.7291883068704555,0.024712881896305935,0.7263692984063015,0.024221932288657398 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6,0.05926281650511004,0.5833333333333333,0.062406521918864415,0.5834942084942085,0.06063199390081958 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.736986301369863,0.021744019912416968,0.7272387594968239,0.023132108177145897,0.724659583562313,0.022811464384917938 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6461538461538462,0.0506793049741482,0.6003742314889067,0.0632871223715093,0.6110038610038611,0.05374128222830568 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7452054794520548,0.022394055319265745,0.7309560985662088,0.024607698561027607,0.7276363192281858,0.023714090866786774 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.6923076923076923,0.055371998562920514,0.675,0.060691192310872694,0.6732625482625483,0.05793218202438802 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7561643835616438,0.021677795449206182,0.7484298647089345,0.022741531611702986,0.7459546925566343,0.022635743945846727 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5384615384615384,0.06335726818568381,0.5192307692307693,0.06590136385137899,0.5207528957528957,0.06398582668621695 +flat_mae,patch,logistic,adhd200_dx,70,0.000774263682681127,train,0.6931506849315069,0.02417692899574361,0.6780696780696781,0.02600271086467814,0.6764975270196006,0.02513127681626824 +flat_mae,patch,logistic,adhd200_dx,70,0.000774263682681127,test,0.6153846153846154,0.05657586413274538,0.5834401435529352,0.0641531600167423,0.5883204633204633,0.0584955240240973 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,train,0.8383561643835616,0.018993537927870407,0.8328923170040894,0.01991557024639741,0.8288148012456494,0.019926591057252335 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,test,0.5846153846153846,0.060380677970600406,0.5699583435432491,0.062415240514641535,0.5699806949806949,0.061080549458561485 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.736986301369863,0.021777034908134978,0.7272387594968239,0.02284135302857527,0.724659583562313,0.022468928506575846 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.6307692307692307,0.05876439770702855,0.6153846153846154,0.061694403404769234,0.6148648648648649,0.0600340155785898 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.6493150684931507,0.02460395762308798,0.6292063492063491,0.026438006683588103,0.6297704097209501,0.025255644727407107 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.5846153846153846,0.05988628276913942,0.5644080416976918,0.06289834954020679,0.5656370656370656,0.06084252420438804 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.7232876712328767,0.02161667864512175,0.7133381544466995,0.02265615762749864,0.7110887219881541,0.022288059887483433 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.6307692307692307,0.05987229802676625,0.6198830409356726,0.0617080769939448,0.6192084942084942,0.06049774823971548 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7315068493150685,0.021605497517702,0.7203196347031964,0.022883744666750444,0.7176528057641814,0.022376828357438323 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.5692307692307692,0.06247195820630387,0.5666666666666667,0.06267517293474048,0.5694980694980695,0.06300016208765352 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.736986301369863,0.021995256999089496,0.726027397260274,0.023349044586800004,0.7232246443182512,0.02285512212664699 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.5384615384615384,0.05078694329714533,0.4846723044397463,0.057781043294810965,0.5033783783783784,0.05132175544024846 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.8328767123287671,0.019536971509686793,0.8272276497838891,0.020573652355880843,0.8232429626915796,0.02065443881036219 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6307692307692307,0.059188124508555234,0.6198830409356726,0.06182880283584087,0.6192084942084942,0.06059147180978105 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.852054794520548,0.018528019081556123,0.847803928836175,0.01930032541611592,0.844538071685901,0.01951548343485174 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5538461538461539,0.05757205107592361,0.5321419707123356,0.06247008871010618,0.5342664092664092,0.059503432631688245 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7397260273972602,0.022191967528387598,0.7314700803072895,0.023046610658625857,0.7292391768944251,0.02275152657592622 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.6615384615384615,0.056460750361299196,0.6474358974358974,0.059891082985933396,0.6462355212355213,0.058099117943830575 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7589041095890411,0.02139902400953617,0.7499688628720886,0.02256880064599966,0.7469469377785919,0.022299636377421047 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6,0.062050752275763475,0.5833333333333333,0.06559790941373282,0.5834942084942085,0.06376988841160226 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,train,0.7534246575342466,0.021759825151351206,0.7453488372093022,0.022895150402121048,0.742810038468584,0.022733025738111503 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,test,0.6,0.05374562924296164,0.5626293995859213,0.06130198904583865,0.5704633204633205,0.05542340387438326 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7342465753424657,0.023228627163694518,0.7240989674654198,0.024597942126075873,0.7215149294742627,0.024161735465065438 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.676923076923077,0.056837982133585215,0.6741465743614228,0.05735189513063677,0.6771235521235521,0.05735901507753353 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,train,0.7315068493150685,0.021341156693987857,0.7190013826043238,0.022734341000566063,0.7162178665201197,0.022075051932901796 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,test,0.7230769230769231,0.05376609432912422,0.7115384615384616,0.05721667524391386,0.708976833976834,0.05602954785781308 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7342465753424657,0.021446591442025018,0.7246910988250481,0.022504082739348476,0.7222323990962936,0.022159505810369337 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.5384615384615384,0.06191420672255148,0.5357142857142857,0.06239070810887396,0.5381274131274132,0.06325492067059366 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,train,0.6794520547945205,0.02265882357067674,0.6566818609362564,0.02542456986580551,0.6571869084691946,0.02370528116809493 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,test,0.6307692307692307,0.055720987090744044,0.61,0.05982703118599223,0.6105212355212355,0.05728436661374922 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,train,0.726027397260274,0.022459602231731112,0.7152530737065468,0.023883994026181982,0.7127984368321426,0.023414489150963357 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,test,0.5846153846153846,0.06141344223843803,0.5644080416976918,0.06482630161447209,0.5656370656370656,0.06264175796256376 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7452054794520548,0.022441697110233548,0.7336581691500129,0.023974162317456124,0.7305061977163094,0.02339096295800568 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.6153846153846154,0.056701907130089976,0.5905769715293525,0.06197558964448915,0.5926640926640927,0.05843548127416353 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,train,0.6767123287671233,0.022774397730093203,0.66324200913242,0.02405759175371941,0.6619344202234841,0.023447925708690993 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,test,0.6307692307692307,0.05778559841877027,0.6198830409356726,0.06006733660235526,0.6192084942084942,0.05942084302347426 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,train,0.958904109589041,0.010885246987970287,0.9581808877109096,0.011085606004142759,0.9578524760334616,0.011204986165764652 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,test,0.5846153846153846,0.06056193660774458,0.5745454545454545,0.06148901456792756,0.5743243243243243,0.060898362511979574 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7342465753424657,0.02148399448753773,0.7222026065328092,0.023106928132849803,0.71936252060817,0.02253052987575046 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.7230769230769231,0.05589964803903129,0.7149122807017544,0.058135616864521795,0.7133204633204633,0.057311116346538314 +flat_mae,patch,logistic,adhd200_dx,91,0.005994842503189409,train,0.7479452054794521,0.021065679132306905,0.7362053795877326,0.02254515027163367,0.7329333821823288,0.022041972052992218 +flat_mae,patch,logistic,adhd200_dx,91,0.005994842503189409,test,0.5384615384615384,0.04934466632448289,0.4724025974025974,0.05674185993197115,0.49903474903474904,0.049460252488525966 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7424657534246575,0.02086819586833093,0.732337889284154,0.022206506971558716,0.7295139524943518,0.021820443529538044 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.5846153846153846,0.05664296986831604,0.5501153550371699,0.0616835870549611,0.556949806949807,0.057542353004983976 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7342465753424657,0.022722290724700694,0.7228549734244495,0.024142433895972946,0.7200799902302009,0.02362131078709122 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.6615384615384615,0.048109106766247016,0.6130952380952381,0.06154465273811954,0.6245173745173745,0.05145965075485483 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,train,0.7534246575342466,0.02200305067642311,0.7448266219239373,0.02320556951708245,0.7420925688465531,0.02298285844478475 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,test,0.6615384615384615,0.05071418927758622,0.622093023255814,0.06174997341569947,0.6288610038610039,0.053915474110534135 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7780821917808219,0.020773148365150693,0.7710429105777943,0.02184611344384471,0.7682420467729132,0.021807454218072835 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.4461538461538462,0.05917239081125188,0.4298245614035088,0.06096780062254247,0.430984555984556,0.06003696039894371 +flat_mae,patch,logistic,adhd200_dx,96,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,96,1291.5496650148827,test,0.6,0.05862774753531889,0.599146110056926,0.05956710352079246,0.6182432432432432,0.05771964209037853 +flat_mae,patch,logistic,adhd200_dx,97,0.3593813663804626,train,0.9726027397260274,0.008191918265214307,0.9721798780487805,0.008310683342456332,0.9728582768516822,0.008230780151985593 +flat_mae,patch,logistic,adhd200_dx,97,0.3593813663804626,test,0.5384615384615384,0.06012130538292349,0.5125,0.06290093559558944,0.5164092664092664,0.06049343559696568 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.736986301369863,0.022183867352790747,0.7253918495297806,0.02350130287481903,0.7225071746962203,0.02295956719294616 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6461538461538462,0.0573660188469027,0.6289401836684041,0.06018647528876045,0.6283783783783784,0.058143782733863336 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7506849315068493,0.02306887775034877,0.7432776064491695,0.02413026548374929,0.7411003236245954,0.02398669171910817 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5384615384615384,0.05886017932404871,0.5192307692307693,0.06118129800929612,0.5207528957528957,0.059680682892341845 +flat_mae,patch,logistic,adhd200_dx,100,0.046415888336127774,train,0.8356164383561644,0.01892858768604589,0.8305687937117039,0.019929427981668812,0.8271050864016609,0.02014799817760946 +flat_mae,patch,logistic,adhd200_dx,100,0.046415888336127774,test,0.6461538461538462,0.05523462596347914,0.6289401836684041,0.05916063835314509,0.6283783783783784,0.057051485134966516 diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..51280e448a480520c28ee45fa81075a226a94530 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:55:58 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:13:59 time: 5.5590 data: 4.5145 max mem: 2698 +extract (train) [ 20/151] eta: 0:01:04 time: 0.2409 data: 0.0879 max mem: 3005 +extract (train) [ 40/151] eta: 0:00:38 time: 0.1880 data: 0.0635 max mem: 3005 +extract (train) [ 60/151] eta: 0:00:26 time: 0.1743 data: 0.0614 max mem: 3005 +extract (train) [ 80/151] eta: 0:00:18 time: 0.1807 data: 0.0607 max mem: 3005 +extract (train) [100/151] eta: 0:00:12 time: 0.1736 data: 0.0569 max mem: 3005 +extract (train) [120/151] eta: 0:00:07 time: 0.1837 data: 0.0641 max mem: 3005 +extract (train) [140/151] eta: 0:00:02 time: 0.1582 data: 0.0506 max mem: 3005 +extract (train) [150/151] eta: 0:00:00 time: 0.1559 data: 0.0510 max mem: 3005 +extract (train) Total time: 0:00:33 (0.2212 s / it) +extract (validation) [ 0/32] eta: 0:02:34 time: 4.8332 data: 4.6941 max mem: 3005 +extract (validation) [20/32] eta: 0:00:04 time: 0.1831 data: 0.0628 max mem: 3005 +extract (validation) [31/32] eta: 0:00:00 time: 0.1515 data: 0.0463 max mem: 3005 +extract (validation) Total time: 0:00:10 (0.3296 s / it) +extract (test) [ 0/33] eta: 0:02:04 time: 3.7855 data: 3.6538 max mem: 3005 +extract (test) [20/33] eta: 0:00:04 time: 0.1881 data: 0.0639 max mem: 3005 +extract (test) [32/33] eta: 0:00:00 time: 0.1607 data: 0.0525 max mem: 3005 +extract (test) Total time: 0:00:09 (0.2948 s / it) +feature extraction time: 0:00:53 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.73699 | 0.022671 | 0.72603 | 0.024026 | 0.72322 | 0.023511 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.63077 | 0.056447 | 0.61 | 0.061094 | 0.61052 | 0.058292 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06346966661234053, "f1": 0.4844989185291997, "f1_std": 0.06376734958371198, "bacc": 0.4845559845559846, "bacc_std": 0.06385728634023324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.0526765186900366, "f1": 0.6500897205844656, "f1_std": 0.05987911749736285, "bacc": 0.6510617760617761, "bacc_std": 0.05543656868978343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.059535114197297095, "f1": 0.4844989185291997, "f1_std": 0.06082565635246116, "bacc": 0.4845559845559846, "bacc_std": 0.06087637786403143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05429889665098908, "f1": 0.6690909090909091, "f1_std": 0.056265262037081584, "bacc": 0.6684362934362934, "bacc_std": 0.05601510831727781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.0627418039627252, "f1": 0.5019157088122606, "f1_std": 0.06340110117435203, "bacc": 0.5024131274131274, "bacc_std": 0.0636619959059035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05893335118298301, "f1": 0.6690909090909091, "f1_std": 0.060831191550194876, "bacc": 0.6684362934362934, "bacc_std": 0.06036481614543186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060461060871083015, "f1": 0.61207925519217, "f1_std": 0.06090581194746758, "bacc": 0.6143822393822393, "bacc_std": 0.06129245287722318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06176769612041289, "f1": 0.6336682185738789, "f1_std": 0.0646208856459686, "bacc": 0.6327220077220077, "bacc_std": 0.06323641801879819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05836327774679137, "f1": 0.570630081300813, "f1_std": 0.06493302827828147, "bacc": 0.5748069498069498, "bacc_std": 0.060318293698718675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06131377443047558, "f1": 0.49317738791423005, "f1_std": 0.06318566848161157, "bacc": 0.49372586872586877, "bacc_std": 0.06226510902650502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.059838828897188334, "f1": 0.45691095726903797, "f1_std": 0.060201859250893594, "bacc": 0.4575289575289575, "bacc_std": 0.06037687690621589} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0581691148046776, "f1": 0.545, "f1_std": 0.06367267066462992, "bacc": 0.5477799227799228, "bacc_std": 0.06021001444873945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05926970947742583, "f1": 0.5921814671814671, "f1_std": 0.060467843380877184, "bacc": 0.5921814671814671, "bacc_std": 0.06030194409209051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05512225036414153, "f1": 0.61, "f1_std": 0.06024211655178937, "bacc": 0.6105212355212355, "bacc_std": 0.0572356371628652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06221654111380413, "f1": 0.5226249703861644, "f1_std": 0.06227191142422147, "bacc": 0.528957528957529, "bacc_std": 0.06284885593817233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06200573365356755, "f1": 0.5830363506771205, "f1_std": 0.06226449636624405, "bacc": 0.5873552123552124, "bacc_std": 0.06259884347180143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05786588043349203, "f1": 0.5190274841437632, "f1_std": 0.06655771411859447, "bacc": 0.5347490347490347, "bacc_std": 0.05926772193030242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06058473258573654, "f1": 0.5810455956075435, "f1_std": 0.060658633532547815, "bacc": 0.583011583011583, "bacc_std": 0.06082974823104945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05071765212119338, "f1": 0.677124183006536, "f1_std": 0.060104572604927416, "bacc": 0.678088803088803, "bacc_std": 0.054254743928171006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05558084894305564, "f1": 0.6474358974358974, "f1_std": 0.05908604396647506, "bacc": 0.6462355212355213, "bacc_std": 0.057597766628503265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.059686367666386776, "f1": 0.644808743169399, "f1_std": 0.0598042486417191, "bacc": 0.6500965250965252, "bacc_std": 0.05974803936272061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06195932240701229, "f1": 0.578226387887527, "f1_std": 0.06321031258420289, "bacc": 0.5786679536679536, "bacc_std": 0.06332420252019956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05290156986966036, "f1": 0.5626293995859213, "f1_std": 0.06011219713564719, "bacc": 0.5704633204633205, "bacc_std": 0.054513799997934605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05396526605909029, "f1": 0.5751633986928104, "f1_std": 0.06257043338927759, "bacc": 0.583976833976834, "bacc_std": 0.055926309226842226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06072708178019476, "f1": 0.6150201374081972, "f1_std": 0.06104743528002603, "bacc": 0.6230694980694981, "bacc_std": 0.06147535762829122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05807675802319897, "f1": 0.49987589972697943, "f1_std": 0.06048281525216684, "bacc": 0.502895752895753, "bacc_std": 0.058700483953276664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05616472813421724, "f1": 0.5966741126830479, "f1_std": 0.05998318128833522, "bacc": 0.597007722007722, "bacc_std": 0.05748681059407874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05498984091221119, "f1": 0.5167905665214048, "f1_std": 0.06118312058663456, "bacc": 0.5255791505791506, "bacc_std": 0.05634713413972269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05880849272484415, "f1": 0.5644080416976918, "f1_std": 0.06277181937533093, "bacc": 0.5656370656370656, "bacc_std": 0.06034075757029706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06106943569684725, "f1": 0.588206627680312, "f1_std": 0.06286218139269198, "bacc": 0.5878378378378378, "bacc_std": 0.06212568403779811} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.057051696685327044, "f1": 0.5775, "f1_std": 0.06164073871817058, "bacc": 0.5791505791505791, "bacc_std": 0.058829090792381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.062494693739245814, "f1": 0.5500119360229172, "f1_std": 0.06278335547379611, "bacc": 0.5516409266409266, "bacc_std": 0.06306627942051389} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06222907861162714, "f1": 0.5066413662239089, "f1_std": 0.062375120045571, "bacc": 0.5111003861003861, "bacc_std": 0.06314012951513881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.059823574937838644, "f1": 0.5976190476190476, "f1_std": 0.06038866409891794, "bacc": 0.6008687258687259, "bacc_std": 0.06101389118339152} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05904730026545713, "f1": 0.6198830409356726, "f1_std": 0.06131830300213717, "bacc": 0.6192084942084942, "bacc_std": 0.06047067536240479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.0573216813075907, "f1": 0.5644080416976918, "f1_std": 0.061183225213688026, "bacc": 0.5656370656370656, "bacc_std": 0.058440390555011684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06058676403581364, "f1": 0.5521501544309813, "f1_std": 0.06061313197006737, "bacc": 0.555984555984556, "bacc_std": 0.06125868275669722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05771671278677644, "f1": 0.5833333333333333, "f1_std": 0.06018052941950577, "bacc": 0.5834942084942085, "bacc_std": 0.0583012178782073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05557134334513997, "f1": 0.5962732919254659, "f1_std": 0.06360166806099267, "bacc": 0.6018339768339769, "bacc_std": 0.05774939578125974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.055046833207313996, "f1": 0.6167649320687003, "f1_std": 0.06327763037070143, "bacc": 0.6196911196911197, "bacc_std": 0.05784712085957968} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06312312754267828, "f1": 0.521263958184842, "f1_std": 0.06310035117410394, "bacc": 0.5246138996138996, "bacc_std": 0.06349741374939191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06220817123214069, "f1": 0.5691287878787878, "f1_std": 0.062329257772546665, "bacc": 0.5781853281853282, "bacc_std": 0.06244866035129053} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05345489499397196, "f1": 0.6515594541910331, "f1_std": 0.055231601484318785, "bacc": 0.6505791505791505, "bacc_std": 0.05460599964563224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05600516459701249, "f1": 0.6832358674463938, "f1_std": 0.0584889271458851, "bacc": 0.6819498069498069, "bacc_std": 0.05795302792475816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.0592998598040591, "f1": 0.4595616024187452, "f1_std": 0.06255599770317187, "bacc": 0.4671814671814672, "bacc_std": 0.059586941198270196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05616627049064592, "f1": 0.6366869918699187, "f1_std": 0.06188476865140096, "bacc": 0.6375482625482626, "bacc_std": 0.0578532082547815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 1291.5496650148827, "split": "test", "acc": 0.4461538461538462, "acc_std": 0.06208321195863332, "f1": 0.4353281853281853, "f1_std": 0.06221683377673098, "bacc": 0.4353281853281853, "bacc_std": 0.06187134648254613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.057188495553464586, "f1": 0.7344388368180725, "f1_std": 0.058010089952723, "bacc": 0.7355212355212355, "bacc_std": 0.05770930226510579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06214367014946067, "f1": 0.47078544061302685, "f1_std": 0.06227614531707869, "bacc": 0.471042471042471, "bacc_std": 0.06267683537982019} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058338851021095185, "f1": 0.6198830409356726, "f1_std": 0.060517607895642575, "bacc": 0.6192084942084942, "bacc_std": 0.05970006035320329} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06060213834179148, "f1": 0.588206627680312, "f1_std": 0.06296038057677508, "bacc": 0.5878378378378378, "bacc_std": 0.06213841530038157} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06015379893761672, "f1": 0.5745454545454545, "f1_std": 0.061643760577646395, "bacc": 0.5743243243243243, "bacc_std": 0.06141562965815703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05473409417436958, "f1": 0.5962732919254659, "f1_std": 0.06330534820694765, "bacc": 0.6018339768339769, "bacc_std": 0.057216699588721484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060374931119565746, "f1": 0.5699583435432491, "f1_std": 0.06342570900661158, "bacc": 0.5699806949806949, "bacc_std": 0.062065476222793084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061365132824978214, "f1": 0.578226387887527, "f1_std": 0.06272632256906939, "bacc": 0.5786679536679536, "bacc_std": 0.06257792413472815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05902384634933429, "f1": 0.6285714285714286, "f1_std": 0.05917443196616547, "bacc": 0.6322393822393823, "bacc_std": 0.05917293796877482} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.056174293432170105, "f1": 0.5250692869740489, "f1_std": 0.060803143173105714, "bacc": 0.5299227799227799, "bacc_std": 0.05748310715789043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05713370594517378, "f1": 0.5608108108108107, "f1_std": 0.05826054684871688, "bacc": 0.5608108108108107, "bacc_std": 0.057933356271142836} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058478540037952505, "f1": 0.61, "f1_std": 0.06326772974240158, "bacc": 0.6105212355212355, "bacc_std": 0.060426747779189965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05975136650166729, "f1": 0.5833333333333333, "f1_std": 0.06230900776667523, "bacc": 0.5834942084942085, "bacc_std": 0.060738241701349914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05708155900725054, "f1": 0.5966741126830479, "f1_std": 0.06083665109983289, "bacc": 0.597007722007722, "bacc_std": 0.058555694672886856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 1291.5496650148827, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06289471619654378, "f1": 0.5357142857142857, "f1_std": 0.06326821451985225, "bacc": 0.5381274131274132, "bacc_std": 0.0637710570356842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.056151772353698136, "f1": 0.5966741126830479, "f1_std": 0.059562975713683096, "bacc": 0.597007722007722, "bacc_std": 0.05751246377892909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05841957198176434, "f1": 0.5775, "f1_std": 0.06265739863100267, "bacc": 0.5791505791505791, "bacc_std": 0.05987853938558305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05481259303325821, "f1": 0.6832358674463938, "f1_std": 0.057284312506670154, "bacc": 0.6819498069498069, "bacc_std": 0.056757767462371635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05926281650511004, "f1": 0.5833333333333333, "f1_std": 0.062406521918864415, "bacc": 0.5834942084942085, "bacc_std": 0.06063199390081958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0506793049741482, "f1": 0.6003742314889067, "f1_std": 0.0632871223715093, "bacc": 0.6110038610038611, "bacc_std": 0.05374128222830568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.055371998562920514, "f1": 0.675, "f1_std": 0.060691192310872694, "bacc": 0.6732625482625483, "bacc_std": 0.05793218202438802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06335726818568381, "f1": 0.5192307692307693, "f1_std": 0.06590136385137899, "bacc": 0.5207528957528957, "bacc_std": 0.06398582668621695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05657586413274538, "f1": 0.5834401435529352, "f1_std": 0.0641531600167423, "bacc": 0.5883204633204633, "bacc_std": 0.0584955240240973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060380677970600406, "f1": 0.5699583435432491, "f1_std": 0.062415240514641535, "bacc": 0.5699806949806949, "bacc_std": 0.061080549458561485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05876439770702855, "f1": 0.6153846153846154, "f1_std": 0.061694403404769234, "bacc": 0.6148648648648649, "bacc_std": 0.0600340155785898} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05988628276913942, "f1": 0.5644080416976918, "f1_std": 0.06289834954020679, "bacc": 0.5656370656370656, "bacc_std": 0.06084252420438804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05987229802676625, "f1": 0.6198830409356726, "f1_std": 0.0617080769939448, "bacc": 0.6192084942084942, "bacc_std": 0.06049774823971548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06247195820630387, "f1": 0.5666666666666667, "f1_std": 0.06267517293474048, "bacc": 0.5694980694980695, "bacc_std": 0.06300016208765352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05078694329714533, "f1": 0.4846723044397463, "f1_std": 0.057781043294810965, "bacc": 0.5033783783783784, "bacc_std": 0.05132175544024846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059188124508555234, "f1": 0.6198830409356726, "f1_std": 0.06182880283584087, "bacc": 0.6192084942084942, "bacc_std": 0.06059147180978105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05757205107592361, "f1": 0.5321419707123356, "f1_std": 0.06247008871010618, "bacc": 0.5342664092664092, "bacc_std": 0.059503432631688245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.056460750361299196, "f1": 0.6474358974358974, "f1_std": 0.059891082985933396, "bacc": 0.6462355212355213, "bacc_std": 0.058099117943830575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.062050752275763475, "f1": 0.5833333333333333, "f1_std": 0.06559790941373282, "bacc": 0.5834942084942085, "bacc_std": 0.06376988841160226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05374562924296164, "f1": 0.5626293995859213, "f1_std": 0.06130198904583865, "bacc": 0.5704633204633205, "bacc_std": 0.05542340387438326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.056837982133585215, "f1": 0.6741465743614228, "f1_std": 0.05735189513063677, "bacc": 0.6771235521235521, "bacc_std": 0.05735901507753353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05376609432912422, "f1": 0.7115384615384616, "f1_std": 0.05721667524391386, "bacc": 0.708976833976834, "bacc_std": 0.05602954785781308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06191420672255148, "f1": 0.5357142857142857, "f1_std": 0.06239070810887396, "bacc": 0.5381274131274132, "bacc_std": 0.06325492067059366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055720987090744044, "f1": 0.61, "f1_std": 0.05982703118599223, "bacc": 0.6105212355212355, "bacc_std": 0.05728436661374922} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06141344223843803, "f1": 0.5644080416976918, "f1_std": 0.06482630161447209, "bacc": 0.5656370656370656, "bacc_std": 0.06264175796256376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.056701907130089976, "f1": 0.5905769715293525, "f1_std": 0.06197558964448915, "bacc": 0.5926640926640927, "bacc_std": 0.05843548127416353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05778559841877027, "f1": 0.6198830409356726, "f1_std": 0.06006733660235526, "bacc": 0.6192084942084942, "bacc_std": 0.05942084302347426} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06056193660774458, "f1": 0.5745454545454545, "f1_std": 0.06148901456792756, "bacc": 0.5743243243243243, "bacc_std": 0.060898362511979574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05589964803903129, "f1": 0.7149122807017544, "f1_std": 0.058135616864521795, "bacc": 0.7133204633204633, "bacc_std": 0.057311116346538314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.04934466632448289, "f1": 0.4724025974025974, "f1_std": 0.05674185993197115, "bacc": 0.49903474903474904, "bacc_std": 0.049460252488525966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05664296986831604, "f1": 0.5501153550371699, "f1_std": 0.0616835870549611, "bacc": 0.556949806949807, "bacc_std": 0.057542353004983976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.048109106766247016, "f1": 0.6130952380952381, "f1_std": 0.06154465273811954, "bacc": 0.6245173745173745, "bacc_std": 0.05145965075485483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05071418927758622, "f1": 0.622093023255814, "f1_std": 0.06174997341569947, "bacc": 0.6288610038610039, "bacc_std": 0.053915474110534135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.4461538461538462, "acc_std": 0.05917239081125188, "f1": 0.4298245614035088, "f1_std": 0.06096780062254247, "bacc": 0.430984555984556, "bacc_std": 0.06003696039894371} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 1291.5496650148827, "split": "test", "acc": 0.6, "acc_std": 0.05862774753531889, "f1": 0.599146110056926, "f1_std": 0.05956710352079246, "bacc": 0.6182432432432432, "bacc_std": 0.05771964209037853} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06012130538292349, "f1": 0.5125, "f1_std": 0.06290093559558944, "bacc": 0.5164092664092664, "bacc_std": 0.06049343559696568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0573660188469027, "f1": 0.6289401836684041, "f1_std": 0.06018647528876045, "bacc": 0.6283783783783784, "bacc_std": 0.058143782733863336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05886017932404871, "f1": 0.5192307692307693, "f1_std": 0.06118129800929612, "bacc": 0.5207528957528957, "bacc_std": 0.059680682892341845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05523462596347914, "f1": 0.6289401836684041, "f1_std": 0.05916063835314509, "bacc": 0.6283783783783784, "bacc_std": 0.057051485134966516} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 40.522 | 221.75 | 0.79581 | 0.094217 | 0.78801 | 0.098481 | 0.78573 | 0.098979 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 40.522 | 221.75 | 0.598 | 0.060585 | 0.58096 | 0.06139 | 0.58365 | 0.060032 | + + +done! total time: 0:04:58 diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc3cda8d575d8ba918ffc6587ec68b0ae7c8c95d --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adhd200_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +model: flat_mae +representation: reg +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..44e05a6f04494104db6f7b318a8479341de01313 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,adhd200_dx,,0.005994842503189409,train,0.7726027397260274,0.02242123487490863,0.7649163103616852,0.023469520848672883,0.7619527385968126,0.023241533685078114 +flat_mae,reg,logistic,adhd200_dx,,0.005994842503189409,test,0.5846153846153846,0.060966720963889456,0.5699583435432491,0.06393931781659724,0.5699806949806949,0.06255849265689853 +flat_mae,reg,logistic,adhd200_dx,1,0.005994842503189409,train,0.7643835616438356,0.022419086847087103,0.7561676609495401,0.023539439662008527,0.7532362459546925,0.023278065753961536 +flat_mae,reg,logistic,adhd200_dx,1,0.005994842503189409,test,0.6153846153846154,0.05732111149031574,0.6018132810585641,0.05919993469100661,0.6013513513513513,0.05820367742224786 +flat_mae,reg,logistic,adhd200_dx,2,0.005994842503189409,train,0.7643835616438356,0.020693849841916554,0.7576136644427971,0.021521021179106706,0.7553886548207852,0.02144787075890727 +flat_mae,reg,logistic,adhd200_dx,2,0.005994842503189409,test,0.6461538461538462,0.05568870366579143,0.6233308138070043,0.06151467804298453,0.6240347490347491,0.05818744884052347 +flat_mae,reg,logistic,adhd200_dx,3,0.005994842503189409,train,0.7863013698630137,0.02211233878098768,0.7793023255813953,0.023182574663175255,0.7762410697930023,0.023116973878158475 +flat_mae,reg,logistic,adhd200_dx,3,0.005994842503189409,test,0.5692307692307692,0.05802538518134789,0.5512820512820513,0.060832088420983255,0.5521235521235521,0.058998912637704924 +flat_mae,reg,logistic,adhd200_dx,4,0.005994842503189409,train,0.7780821917808219,0.021925666399524528,0.7714888584877223,0.0229018393978762,0.7689595163949441,0.022862918824186576 +flat_mae,reg,logistic,adhd200_dx,4,0.005994842503189409,test,0.6615384615384615,0.05685797524633133,0.6474358974358974,0.061025246660579256,0.6462355212355213,0.05938107231457297 +flat_mae,reg,logistic,adhd200_dx,5,0.005994842503189409,train,0.7972602739726027,0.021732943672753014,0.7910348455777682,0.022760381306973804,0.7881022165231727,0.02273227479340616 +flat_mae,reg,logistic,adhd200_dx,5,0.005994842503189409,test,0.5076923076923077,0.06176955838594983,0.4980694980694981,0.06279079813216022,0.4980694980694981,0.06263820463683102 +flat_mae,reg,logistic,adhd200_dx,6,0.005994842503189409,train,0.7726027397260274,0.02089910997861613,0.7644264041492679,0.02196611029360956,0.7612352689747817,0.021709986235836125 +flat_mae,reg,logistic,adhd200_dx,6,0.005994842503189409,test,0.7538461538461538,0.05413849432748696,0.7490347490347491,0.055504711104001445,0.7490347490347491,0.05551664148669393 +flat_mae,reg,logistic,adhd200_dx,7,0.005994842503189409,train,0.7589041095890411,0.022975326130629397,0.7533026113671275,0.023633248474321522,0.7519692251328082,0.023649603231310212 +flat_mae,reg,logistic,adhd200_dx,7,0.005994842503189409,test,0.5846153846153846,0.05713151857076077,0.5644080416976918,0.06073755194638699,0.5656370656370656,0.05843368374853434 +flat_mae,reg,logistic,adhd200_dx,8,0.005994842503189409,train,0.7726027397260274,0.02126733112641918,0.7662873674059788,0.02229510392334506,0.7641051474629053,0.02231133808487044 +flat_mae,reg,logistic,adhd200_dx,8,0.005994842503189409,test,0.676923076923077,0.06055894287493943,0.6612062546537603,0.06426380848500225,0.6597490347490347,0.06216835393123916 +flat_mae,reg,logistic,adhd200_dx,9,0.000774263682681127,train,0.7013698630136986,0.024103149202203623,0.6912756364990804,0.02513359112490317,0.6895188373939061,0.02479190665930507 +flat_mae,reg,logistic,adhd200_dx,9,0.000774263682681127,test,0.5846153846153846,0.05543532721071086,0.5411764705882354,0.06396992755478527,0.5526061776061776,0.05717131925131015 +flat_mae,reg,logistic,adhd200_dx,10,0.005994842503189409,train,0.7917808219178082,0.02080872256734165,0.7865759832584478,0.0214708078843814,0.7846827868351957,0.021493928400244913 +flat_mae,reg,logistic,adhd200_dx,10,0.005994842503189409,test,0.5538461538461539,0.05876239186950146,0.5321419707123356,0.06281587871713505,0.5342664092664092,0.06032408962158811 +flat_mae,reg,logistic,adhd200_dx,11,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adhd200_dx,11,1291.5496650148827,test,0.5692307692307692,0.06234829446332278,0.5608108108108107,0.06341431018955371,0.5608108108108107,0.06325437148848224 +flat_mae,reg,logistic,adhd200_dx,12,0.005994842503189409,train,0.7698630136986301,0.022656059980858505,0.7632505559673832,0.02337051232569755,0.7609604933748549,0.023149724650501743 +flat_mae,reg,logistic,adhd200_dx,12,0.005994842503189409,test,0.5538461538461539,0.05702175231160816,0.5250692869740489,0.06307393531158556,0.5299227799227799,0.05884074633744766 +flat_mae,reg,logistic,adhd200_dx,13,0.005994842503189409,train,0.7890410958904109,0.0226484114744419,0.7819103120222861,0.023675584101364614,0.7786682542590218,0.023508936112537144 +flat_mae,reg,logistic,adhd200_dx,13,0.005994842503189409,test,0.5846153846153846,0.060776350118510826,0.5745454545454545,0.06219963786062973,0.5743243243243243,0.06146986933980504 +flat_mae,reg,logistic,adhd200_dx,14,0.3593813663804626,train,0.989041095890411,0.005681943429930224,0.9888399682015532,0.005797279184894289,0.9881388532698296,0.006140374665113788 +flat_mae,reg,logistic,adhd200_dx,14,0.3593813663804626,test,0.5692307692307692,0.0555246434973063,0.545,0.05966040164641109,0.5477799227799228,0.05681876628435885 +flat_mae,reg,logistic,adhd200_dx,15,0.000774263682681127,train,0.6986301369863014,0.02241065813510202,0.6786560379049815,0.024960814082324354,0.677764547841485,0.023514626576009768 +flat_mae,reg,logistic,adhd200_dx,15,0.000774263682681127,test,0.5538461538461539,0.062428759872126226,0.5469838981014179,0.06310839438938437,0.5472972972972974,0.06321125255149328 +flat_mae,reg,logistic,adhd200_dx,16,0.005994842503189409,train,0.7780821917808219,0.019874415999827748,0.772732513894334,0.02058320988792226,0.7711119252610368,0.02062270104793488 +flat_mae,reg,logistic,adhd200_dx,16,0.005994842503189409,test,0.5692307692307692,0.061803207438644654,0.5608108108108107,0.06283102964647934,0.5608108108108107,0.06256166394250548 +flat_mae,reg,logistic,adhd200_dx,17,0.046415888336127774,train,0.9068493150684932,0.014867775009097644,0.9045208346156213,0.015342205218048967,0.9016913964706601,0.015658904347816176 +flat_mae,reg,logistic,adhd200_dx,17,0.046415888336127774,test,0.6307692307692307,0.05716757470483715,0.6153846153846154,0.060258857982781064,0.6148648648648649,0.05865145152686348 +flat_mae,reg,logistic,adhd200_dx,18,0.000774263682681127,train,0.7232876712328767,0.022698067800013426,0.7062105019883489,0.025037768703292104,0.7039140257678451,0.023818551395195776 +flat_mae,reg,logistic,adhd200_dx,18,0.000774263682681127,test,0.5692307692307692,0.059640032621757176,0.5376016260162602,0.06529933092992857,0.5434362934362934,0.06105948071714534 +flat_mae,reg,logistic,adhd200_dx,19,0.005994842503189409,train,0.7780821917808219,0.022434867509417884,0.7705809775818855,0.02359990968375584,0.7675245771508823,0.02343848968216463 +flat_mae,reg,logistic,adhd200_dx,19,0.005994842503189409,test,0.6923076923076923,0.05280389980218816,0.6697154471544715,0.05895637130557245,0.6689189189189189,0.05549879523627344 +flat_mae,reg,logistic,adhd200_dx,20,0.005994842503189409,train,0.7534246575342466,0.02111951425884495,0.7458531905675558,0.022101320035783786,0.7435275080906149,0.02197733625243712 +flat_mae,reg,logistic,adhd200_dx,20,0.005994842503189409,test,0.6461538461538462,0.05449685078573009,0.6289401836684041,0.05882185801443634,0.6283783783783784,0.05660964798554247 +flat_mae,reg,logistic,adhd200_dx,21,0.005994842503189409,train,0.7698630136986301,0.021486168225927096,0.761333914559721,0.022793195712350302,0.7580906148867314,0.022540457870241466 +flat_mae,reg,logistic,adhd200_dx,21,0.005994842503189409,test,0.7538461538461538,0.05204753403204198,0.746588693957115,0.05405688992817761,0.7446911196911197,0.0537647861535753 +flat_mae,reg,logistic,adhd200_dx,22,0.005994842503189409,train,0.7780821917808219,0.021126148732204062,0.7701028763384421,0.02227271906273522,0.7668071075288514,0.02202836158481215 +flat_mae,reg,logistic,adhd200_dx,22,0.005994842503189409,test,0.5692307692307692,0.0622407313505654,0.5512820512820513,0.06530417432672812,0.5521235521235521,0.06349233228743814 +flat_mae,reg,logistic,adhd200_dx,23,0.005994842503189409,train,0.7808219178082192,0.021347384735687903,0.7736434108527132,0.02234758219205307,0.7706692312389326,0.022187338160718294 +flat_mae,reg,logistic,adhd200_dx,23,0.005994842503189409,test,0.6153846153846154,0.05396059048457297,0.5905769715293525,0.05887461511598266,0.5926640926640927,0.055383810651781884 +flat_mae,reg,logistic,adhd200_dx,24,0.005994842503189409,train,0.7753424657534247,0.020547503922202184,0.7693149586878776,0.02126179634458402,0.7672498015509556,0.021241797332712597 +flat_mae,reg,logistic,adhd200_dx,24,0.005994842503189409,test,0.6461538461538462,0.048876368607784514,0.6003742314889067,0.06012579225772673,0.6110038610038611,0.051387912493349196 +flat_mae,reg,logistic,adhd200_dx,25,0.000774263682681127,train,0.6767123287671233,0.023822262989141335,0.6499057094550656,0.027511052608812085,0.6518898455150516,0.025145558251698993 +flat_mae,reg,logistic,adhd200_dx,25,0.000774263682681127,test,0.6,0.05938760454812517,0.570630081300813,0.06522858468048676,0.5748069498069498,0.061064081542918196 +flat_mae,reg,logistic,adhd200_dx,26,0.005994842503189409,train,0.7780821917808219,0.021282770190918218,0.7705809775818855,0.022265067304853812,0.7675245771508823,0.02204904377457982 +flat_mae,reg,logistic,adhd200_dx,26,0.005994842503189409,test,0.5692307692307692,0.05744627745032229,0.5512820512820513,0.06067839554813978,0.5521235521235521,0.058737612160318795 +flat_mae,reg,logistic,adhd200_dx,27,0.005994842503189409,train,0.7808219178082192,0.021219746006178077,0.7745243390165555,0.0220638251834969,0.7721041704829944,0.02198136354451646 +flat_mae,reg,logistic,adhd200_dx,27,0.005994842503189409,test,0.6153846153846154,0.057541678231763545,0.6018132810585641,0.060328564764931016,0.6013513513513513,0.058931787610778635 +flat_mae,reg,logistic,adhd200_dx,28,0.000774263682681127,train,0.7232876712328767,0.02247342000826933,0.7120844104622738,0.023817952239028595,0.7096537827440923,0.023382704017744026 +flat_mae,reg,logistic,adhd200_dx,28,0.000774263682681127,test,0.5846153846153846,0.05695204252640105,0.5578231292517006,0.06177529825087155,0.5612934362934363,0.058389551517483367 +flat_mae,reg,logistic,adhd200_dx,29,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adhd200_dx,29,21.54434690031882,test,0.49230769230769234,0.06211441247155437,0.48000000000000004,0.06308432966278382,0.48021235521235517,0.06256674482047607 +flat_mae,reg,logistic,adhd200_dx,30,0.046415888336127774,train,0.8904109589041096,0.017054414035476296,0.8878648233486943,0.017591374531144868,0.8856933504304818,0.01793040605280599 +flat_mae,reg,logistic,adhd200_dx,30,0.046415888336127774,test,0.5538461538461539,0.060912656451482486,0.5521501544309813,0.060913506902595074,0.555984555984556,0.06141399000226569 +flat_mae,reg,logistic,adhd200_dx,31,0.046415888336127774,train,0.9013698630136986,0.015066675855545759,0.8992454913507545,0.0154793156731661,0.8975544971606522,0.01571361776358952 +flat_mae,reg,logistic,adhd200_dx,31,0.046415888336127774,test,0.5076923076923077,0.059359771606763284,0.48,0.06401069752289423,0.48503861003861004,0.06065439996251827 +flat_mae,reg,logistic,adhd200_dx,32,0.046415888336127774,train,0.8849315068493151,0.015887478236691187,0.8822580645161291,0.01636938993323832,0.8801215118764121,0.016635746884678216 +flat_mae,reg,logistic,adhd200_dx,32,0.046415888336127774,test,0.5076923076923077,0.061826058733041825,0.5066413662239089,0.06205957359483718,0.5111003861003861,0.06254516812737378 +flat_mae,reg,logistic,adhd200_dx,33,0.005994842503189409,train,0.7589041095890411,0.021954746955214884,0.7528774542992552,0.022749746269827224,0.7512517555107773,0.022770217399920828 +flat_mae,reg,logistic,adhd200_dx,33,0.005994842503189409,test,0.6153846153846154,0.06119345596756599,0.6139225469232596,0.06142016883113983,0.6187258687258688,0.061751777804244345 +flat_mae,reg,logistic,adhd200_dx,34,0.046415888336127774,train,0.9068493150684932,0.01513775761996904,0.9051397297132024,0.015472846354641731,0.9045612749587837,0.01572815432102199 +flat_mae,reg,logistic,adhd200_dx,34,0.046415888336127774,test,0.5692307692307692,0.05971744910669453,0.5666666666666667,0.06013660658282585,0.5694980694980695,0.06059751631650118 +flat_mae,reg,logistic,adhd200_dx,35,0.005994842503189409,train,0.7698630136986301,0.021474080961112484,0.7602739726027397,0.022826206255770067,0.7566556756426696,0.022463061879786604 +flat_mae,reg,logistic,adhd200_dx,35,0.005994842503189409,test,0.6,0.059036332209157945,0.5775,0.06332160848966392,0.5791505791505791,0.06056600402172626 +flat_mae,reg,logistic,adhd200_dx,36,0.000774263682681127,train,0.7068493150684931,0.023288808352576077,0.6949805140540921,0.024775973692324392,0.6929382670818831,0.024262215799061466 +flat_mae,reg,logistic,adhd200_dx,36,0.000774263682681127,test,0.49230769230769234,0.05942210056454549,0.46760982874162327,0.06078925045954281,0.47152509652509655,0.059165382520059394 +flat_mae,reg,logistic,adhd200_dx,37,0.005994842503189409,train,0.7698630136986301,0.02068127265109079,0.7618381804623415,0.021565163471886657,0.7588080845087622,0.021316996396039584 +flat_mae,reg,logistic,adhd200_dx,37,0.005994842503189409,test,0.6,0.05872423569522354,0.5921814671814671,0.05997256128337034,0.5921814671814671,0.05941916698153296 +flat_mae,reg,logistic,adhd200_dx,38,0.000774263682681127,train,0.7013698630136986,0.022800717677089785,0.6912756364990804,0.023961317670900654,0.6895188373939061,0.023666328770630768 +flat_mae,reg,logistic,adhd200_dx,38,0.000774263682681127,test,0.5230769230769231,0.0563342434248346,0.5062484685126194,0.05723667792795509,0.5072393822393823,0.05625512603169517 +flat_mae,reg,logistic,adhd200_dx,39,0.005994842503189409,train,0.7726027397260274,0.02186860985130681,0.7639197350477304,0.023082675648450502,0.7605177993527508,0.022773208232713824 +flat_mae,reg,logistic,adhd200_dx,39,0.005994842503189409,test,0.6307692307692307,0.05758612587673462,0.6036585365853658,0.06360174879325577,0.6061776061776062,0.05942221257132464 +flat_mae,reg,logistic,adhd200_dx,40,0.005994842503189409,train,0.7890410958904109,0.021474852037485635,0.7831822565091611,0.022225600729585564,0.7808206631251144,0.022160220847283134 +flat_mae,reg,logistic,adhd200_dx,40,0.005994842503189409,test,0.5846153846153846,0.05160867777129904,0.5411764705882354,0.060171622436753926,0.5526061776061776,0.05346578557115574 +flat_mae,reg,logistic,adhd200_dx,41,0.000774263682681127,train,0.6986301369863014,0.022589049569268285,0.6813492063492064,0.024813395985488682,0.6799169567075777,0.023656519052474713 +flat_mae,reg,logistic,adhd200_dx,41,0.000774263682681127,test,0.6,0.060256775994525606,0.588206627680312,0.061986682381927616,0.5878378378378378,0.0611511147579981 +flat_mae,reg,logistic,adhd200_dx,42,0.005994842503189409,train,0.7808219178082192,0.021281647214510493,0.7745243390165555,0.022073468317653364,0.7721041704829944,0.022013140255425497 +flat_mae,reg,logistic,adhd200_dx,42,0.005994842503189409,test,0.5384615384615384,0.06441712440927305,0.5357142857142857,0.06458082049819673,0.5381274131274132,0.06486043561218585 +flat_mae,reg,logistic,adhd200_dx,43,0.005994842503189409,train,0.7534246575342466,0.022270830963467443,0.7476958525345623,0.022856169501660056,0.7463973865787384,0.02284754310325412 +flat_mae,reg,logistic,adhd200_dx,43,0.005994842503189409,test,0.676923076923077,0.058278060260206904,0.6655231560891939,0.06072203630142716,0.6640926640926641,0.05957287560919078 +flat_mae,reg,logistic,adhd200_dx,44,0.005994842503189409,train,0.7561643835616438,0.021897330159584425,0.7484298647089345,0.022925576184923212,0.7459546925566343,0.02276169950546737 +flat_mae,reg,logistic,adhd200_dx,44,0.005994842503189409,test,0.6923076923076923,0.05575887224755831,0.6794871794871795,0.059478359877654,0.6776061776061776,0.058252363430950284 +flat_mae,reg,logistic,adhd200_dx,45,0.3593813663804626,train,0.9945205479452055,0.003759734721919631,0.9944281614459303,0.0038242367856955923,0.9944281614459303,0.003894802433903879 +flat_mae,reg,logistic,adhd200_dx,45,0.3593813663804626,test,0.4461538461538462,0.060070262607251367,0.4230769230769231,0.061665950771569125,0.4266409266409266,0.060139336851295445 +flat_mae,reg,logistic,adhd200_dx,46,0.005994842503189409,train,0.7643835616438356,0.021346765881183002,0.757148604320109,0.02221499903399535,0.7546711851987543,0.02204952788096454 +flat_mae,reg,logistic,adhd200_dx,46,0.005994842503189409,test,0.676923076923077,0.057735114375505166,0.656084656084656,0.06353924885680176,0.6554054054054055,0.05995392480634988 +flat_mae,reg,logistic,adhd200_dx,47,0.046415888336127774,train,0.915068493150685,0.014219889654114493,0.9130210855644981,0.014712248145756467,0.9104078891127801,0.015214936785315896 +flat_mae,reg,logistic,adhd200_dx,47,0.046415888336127774,test,0.5076923076923077,0.060764081573471136,0.5019157088122606,0.061357251541475505,0.5024131274131274,0.06153706105127399 +flat_mae,reg,logistic,adhd200_dx,48,0.000774263682681127,train,0.7068493150684931,0.02282267689375492,0.6887576605223101,0.025208013967375462,0.6871985101056359,0.023950222100525824 +flat_mae,reg,logistic,adhd200_dx,48,0.000774263682681127,test,0.6307692307692307,0.0565268454838498,0.5962732919254659,0.0643208208325403,0.6018339768339769,0.05876392741301429 +flat_mae,reg,logistic,adhd200_dx,49,0.005994842503189409,train,0.7808219178082192,0.02120385112859054,0.7726989662473533,0.02226614579075195,0.7692342919948708,0.022000609316220616 +flat_mae,reg,logistic,adhd200_dx,49,0.005994842503189409,test,0.6,0.06151796581766334,0.5921814671814671,0.0628369838229098,0.5921814671814671,0.06223577450185256 +flat_mae,reg,logistic,adhd200_dx,50,0.005994842503189409,train,0.7863013698630137,0.021474404634802234,0.7797394318252151,0.022490838718624725,0.7769585394150332,0.022472980264041546 +flat_mae,reg,logistic,adhd200_dx,50,0.005994842503189409,test,0.5846153846153846,0.05881250923573615,0.578226387887527,0.05973031176323162,0.5786679536679536,0.05928546449389097 +flat_mae,reg,logistic,adhd200_dx,51,0.005994842503189409,train,0.7890410958904109,0.02113657773555541,0.7846706660230921,0.0217451587335311,0.783690541613238,0.021886620378061113 +flat_mae,reg,logistic,adhd200_dx,51,0.005994842503189409,test,0.6153846153846154,0.05867526922763041,0.6018132810585641,0.06141517024658891,0.6013513513513513,0.06004794415114077 +flat_mae,reg,logistic,adhd200_dx,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adhd200_dx,52,2.782559402207126,test,0.47692307692307695,0.06410098541645788,0.4738095238095238,0.06407058935647114,0.4753861003861004,0.06475086751279212 +flat_mae,reg,logistic,adhd200_dx,53,0.005994842503189409,train,0.7698630136986301,0.021747195547190935,0.7618381804623415,0.022973477418009244,0.7588080845087622,0.022785659084270476 +flat_mae,reg,logistic,adhd200_dx,53,0.005994842503189409,test,0.6153846153846154,0.051532821735048924,0.5656241646618552,0.06244566809604247,0.5796332046332047,0.05393035918643237 +flat_mae,reg,logistic,adhd200_dx,54,0.000774263682681127,train,0.6931506849315069,0.02196844219824774,0.6849179923541744,0.022808280223041245,0.6836722232399096,0.022692977622895055 +flat_mae,reg,logistic,adhd200_dx,54,0.000774263682681127,test,0.5538461538461539,0.06097123193982843,0.5381034060279344,0.06297478497137203,0.5386100386100386,0.061625158808060625 +flat_mae,reg,logistic,adhd200_dx,55,0.005994842503189409,train,0.7863013698630137,0.02234734918872499,0.7801612305411416,0.023113319337981986,0.7776760090370641,0.022979788077389804 +flat_mae,reg,logistic,adhd200_dx,55,0.005994842503189409,test,0.5692307692307692,0.06229542899260572,0.5608108108108107,0.06417603741196264,0.5608108108108107,0.06377483672741105 +flat_mae,reg,logistic,adhd200_dx,56,0.046415888336127774,train,0.8931506849315068,0.016804573629732103,0.8905749140972719,0.017367676212905175,0.8881205348965012,0.01773543536006105 +flat_mae,reg,logistic,adhd200_dx,56,0.046415888336127774,test,0.6153846153846154,0.06222140273716016,0.606060606060606,0.0633600564493716,0.6056949806949807,0.06293525311300469 +flat_mae,reg,logistic,adhd200_dx,57,0.005994842503189409,train,0.7753424657534247,0.022139972672562805,0.7684440180726619,0.023175780014125805,0.7658148623068938,0.02305174941051608 +flat_mae,reg,logistic,adhd200_dx,57,0.005994842503189409,test,0.5692307692307692,0.05971010044100207,0.5376016260162602,0.06497698500465403,0.5434362934362934,0.06083256416702827 +flat_mae,reg,logistic,adhd200_dx,58,0.005994842503189409,train,0.7671232876712328,0.021015012371642716,0.7610907046765387,0.021738016060876962,0.7592507785308664,0.0216707756650606 +flat_mae,reg,logistic,adhd200_dx,58,0.005994842503189409,test,0.5846153846153846,0.05836327774679137,0.5745454545454545,0.06044242180347428,0.5743243243243243,0.05975191483880074 +flat_mae,reg,logistic,adhd200_dx,59,0.005994842503189409,train,0.7616438356164383,0.02066464831537482,0.7535869759212843,0.021760526044888043,0.7508090614886731,0.021603531869332646 +flat_mae,reg,logistic,adhd200_dx,59,0.005994842503189409,test,0.6153846153846154,0.056546270522990036,0.5905769715293525,0.06179568931075528,0.5926640926640927,0.05823338833932702 +flat_mae,reg,logistic,adhd200_dx,60,0.000774263682681127,train,0.7150684931506849,0.022974131674887075,0.7038631966548088,0.024132149356195034,0.7016547597240032,0.02367589394232996 +flat_mae,reg,logistic,adhd200_dx,60,0.000774263682681127,test,0.6307692307692307,0.055705939683092426,0.61,0.06027814739562973,0.6105212355212355,0.05745663647533252 +flat_mae,reg,logistic,adhd200_dx,61,0.005994842503189409,train,0.7452054794520548,0.02230715215671514,0.738129218900675,0.023019379830766674,0.7362459546925566,0.022883229041975588 +flat_mae,reg,logistic,adhd200_dx,61,0.005994842503189409,test,0.5846153846153846,0.060278611512850556,0.5699583435432491,0.06240800199590214,0.5699806949806949,0.06112224580298174 +flat_mae,reg,logistic,adhd200_dx,62,0.000774263682681127,train,0.7041095890410959,0.023007229358176225,0.693143604433927,0.024214952954936027,0.6912285522378946,0.023744600045135428 +flat_mae,reg,logistic,adhd200_dx,62,0.000774263682681127,test,0.5076923076923077,0.06278630235420468,0.4980694980694981,0.06352965379908398,0.4980694980694981,0.06328466198793517 +flat_mae,reg,logistic,adhd200_dx,63,0.005994842503189409,train,0.7917808219178082,0.021871372041859474,0.7861943519546184,0.022684965730070623,0.7839653172131648,0.02271690951955192 +flat_mae,reg,logistic,adhd200_dx,63,0.005994842503189409,test,0.5538461538461539,0.058265712511200524,0.543030303030303,0.05918160467559252,0.542953667953668,0.05866978195470122 +flat_mae,reg,logistic,adhd200_dx,64,0.005994842503189409,train,0.7698630136986301,0.020856340688010244,0.7636884942656308,0.021514134957239014,0.7616779629968858,0.02145815949339548 +flat_mae,reg,logistic,adhd200_dx,64,0.005994842503189409,test,0.6153846153846154,0.05965991190705361,0.606060606060606,0.06128097356207435,0.6056949806949807,0.06072572170258904 +flat_mae,reg,logistic,adhd200_dx,65,0.005994842503189409,train,0.7616438356164383,0.021332831278886268,0.7535869759212843,0.02232021410071776,0.7508090614886731,0.022105881015924124 +flat_mae,reg,logistic,adhd200_dx,65,0.005994842503189409,test,0.676923076923077,0.05637355542698447,0.6690909090909091,0.05844863582008846,0.6684362934362934,0.05805637048015969 +flat_mae,reg,logistic,adhd200_dx,66,0.046415888336127774,train,0.9178082191780822,0.014915437878529593,0.9160379094589621,0.01533378238605707,0.9142700128228614,0.015758936968037728 +flat_mae,reg,logistic,adhd200_dx,66,0.046415888336127774,test,0.47692307692307695,0.060746410515622594,0.4666988416988417,0.06120940501885036,0.4666988416988417,0.06089428202083259 +flat_mae,reg,logistic,adhd200_dx,67,0.005994842503189409,train,0.7753424657534247,0.021393130290918414,0.7693149586878776,0.022205827138254742,0.7672498015509556,0.022175787948051473 +flat_mae,reg,logistic,adhd200_dx,67,0.005994842503189409,test,0.6461538461538462,0.052124711626350295,0.6091503267973856,0.06218879448752807,0.6153474903474904,0.05487580436071236 +flat_mae,reg,logistic,adhd200_dx,68,0.000774263682681127,train,0.6657534246575343,0.023622351864281528,0.6414653784219002,0.026675744798737216,0.6428985772730048,0.024744428780926597 +flat_mae,reg,logistic,adhd200_dx,68,0.000774263682681127,test,0.6,0.05494949590844047,0.5626293995859213,0.06258288726070199,0.5704633204633205,0.05672177286415473 +flat_mae,reg,logistic,adhd200_dx,69,0.005994842503189409,train,0.7726027397260274,0.020729469605237565,0.7667120998606203,0.021522673185574347,0.7648226170849362,0.021545566229258843 +flat_mae,reg,logistic,adhd200_dx,69,0.005994842503189409,test,0.47692307692307695,0.059149146557338744,0.4475,0.06127820507524919,0.45366795366795365,0.05899998637966995 +flat_mae,reg,logistic,adhd200_dx,70,0.000774263682681127,train,0.7123287671232876,0.023649596275957075,0.6978119455943228,0.025740984753859743,0.6956402271478292,0.024847505233339184 +flat_mae,reg,logistic,adhd200_dx,70,0.000774263682681127,test,0.5846153846153846,0.05975485224535248,0.5644080416976918,0.06387092388195088,0.5656370656370656,0.06118382387951826 +flat_mae,reg,logistic,adhd200_dx,71,0.005994842503189409,train,0.7808219178082192,0.02233916893610557,0.7745243390165555,0.02326759297394261,0.7721041704829944,0.023176960739379315 +flat_mae,reg,logistic,adhd200_dx,71,0.005994842503189409,test,0.6307692307692307,0.0601238878773747,0.6198830409356726,0.06231875614111929,0.6192084942084942,0.06131328358778849 +flat_mae,reg,logistic,adhd200_dx,72,0.005994842503189409,train,0.7753424657534247,0.0216194459424802,0.7684440180726619,0.022639739919334698,0.7658148623068938,0.022558641936114076 +flat_mae,reg,logistic,adhd200_dx,72,0.005994842503189409,test,0.6461538461538462,0.05845667996005815,0.6407113674597452,0.058747436263322655,0.6414092664092663,0.058466684872039376 +flat_mae,reg,logistic,adhd200_dx,73,0.000774263682681127,train,0.673972602739726,0.024466162079147036,0.6583714144139183,0.02597717765626541,0.657354826891372,0.025134225167986007 +flat_mae,reg,logistic,adhd200_dx,73,0.000774263682681127,test,0.6153846153846154,0.06030985082479375,0.6018132810585641,0.06260837425227185,0.6013513513513513,0.06140188329000794 +flat_mae,reg,logistic,adhd200_dx,74,0.005994842503189409,train,0.7753424657534247,0.021263009992003954,0.7697267187788515,0.021890259971633404,0.7679672711729865,0.021829581245033224 +flat_mae,reg,logistic,adhd200_dx,74,0.005994842503189409,test,0.7076923076923077,0.05676448593888018,0.7006060606060607,0.0580909392037952,0.6998069498069499,0.057413121984807594 +flat_mae,reg,logistic,adhd200_dx,75,0.005994842503189409,train,0.7726027397260274,0.022332737524133128,0.7662873674059788,0.023111261899131017,0.7641051474629053,0.023003139476489916 +flat_mae,reg,logistic,adhd200_dx,75,0.005994842503189409,test,0.5538461538461539,0.06351818637209346,0.5521501544309813,0.06366112715187054,0.555984555984556,0.06412224915518626 +flat_mae,reg,logistic,adhd200_dx,76,0.005994842503189409,train,0.7780821917808219,0.02097814427115426,0.7705809775818855,0.0220893968328423,0.7675245771508823,0.021956614262994698 +flat_mae,reg,logistic,adhd200_dx,76,0.005994842503189409,test,0.6,0.054930885029985776,0.5626293995859213,0.06171869175497417,0.5704633204633205,0.05633010966890393 +flat_mae,reg,logistic,adhd200_dx,77,0.005994842503189409,train,0.7643835616438356,0.022281567728991265,0.7539968652037617,0.02382654406441794,0.7503663674665689,0.023353745816021225 +flat_mae,reg,logistic,adhd200_dx,77,0.005994842503189409,test,0.7384615384615385,0.04989303588263395,0.7215923406399596,0.055821323607093146,0.7181467181467182,0.05316993558308879 +flat_mae,reg,logistic,adhd200_dx,78,0.005994842503189409,train,0.8082191780821918,0.02038083941168246,0.803763440860215,0.020911765000818393,0.8021157721194359,0.020957463442400324 +flat_mae,reg,logistic,adhd200_dx,78,0.005994842503189409,test,0.5692307692307692,0.05712615749218991,0.545,0.0624503985548758,0.5477799227799228,0.059125495105952205 +flat_mae,reg,logistic,adhd200_dx,79,0.005994842503189409,train,0.7945205479452054,0.02056152975610163,0.789197680596946,0.021262472315474804,0.7871099713012151,0.021216706683251076 +flat_mae,reg,logistic,adhd200_dx,79,0.005994842503189409,test,0.6153846153846154,0.061059396826601574,0.6018132810585641,0.06377619717532886,0.6013513513513513,0.06225991387106898 +flat_mae,reg,logistic,adhd200_dx,80,0.000774263682681127,train,0.7068493150684931,0.02225730155075545,0.6935636999897998,0.023882789107453336,0.6915033278378213,0.023199915839297768 +flat_mae,reg,logistic,adhd200_dx,80,0.000774263682681127,test,0.5846153846153846,0.05950963325129925,0.5578231292517006,0.06512379398822098,0.5612934362934363,0.06135810484338013 +flat_mae,reg,logistic,adhd200_dx,81,0.046415888336127774,train,0.8958904109589041,0.015681671013629127,0.8936480186480187,0.016081585407836663,0.8919826586065824,0.01627445772068188 +flat_mae,reg,logistic,adhd200_dx,81,0.046415888336127774,test,0.5692307692307692,0.05475065372066563,0.5289855072463768,0.06144046767420895,0.5390926640926641,0.055975044492249174 +flat_mae,reg,logistic,adhd200_dx,82,0.005994842503189409,train,0.7616438356164383,0.021902906578116818,0.7550241080038573,0.022892135399957828,0.7529614703547658,0.022832419024004062 +flat_mae,reg,logistic,adhd200_dx,82,0.005994842503189409,test,0.7076923076923077,0.0559890099105997,0.7065811356616774,0.05599732287806977,0.7128378378378378,0.05578201261735133 +flat_mae,reg,logistic,adhd200_dx,83,0.005994842503189409,train,0.7506849315068493,0.022253437770100318,0.7427766032417196,0.02305671613328639,0.7403828540025645,0.022784370085206163 +flat_mae,reg,logistic,adhd200_dx,83,0.005994842503189409,test,0.6923076923076923,0.05746732748101874,0.6794871794871795,0.060383119204670925,0.6776061776061776,0.058925531117912405 +flat_mae,reg,logistic,adhd200_dx,84,0.005994842503189409,train,0.7753424657534247,0.021185364428742016,0.7665075204393684,0.02245326199260242,0.7629449838187702,0.022154646478791215 +flat_mae,reg,logistic,adhd200_dx,84,0.005994842503189409,test,0.6461538461538462,0.06059750614900988,0.6431129147767964,0.06130502928473608,0.6457528957528957,0.06162906996357857 +flat_mae,reg,logistic,adhd200_dx,85,0.000774263682681127,train,0.6821917808219178,0.02233525412114295,0.6580409640111132,0.025361295582925856,0.6588966233131831,0.02347206839393817 +flat_mae,reg,logistic,adhd200_dx,85,0.000774263682681127,test,0.676923076923077,0.056413721105721105,0.656084656084656,0.06189569505040753,0.6554054054054055,0.05883294585355072 +flat_mae,reg,logistic,adhd200_dx,86,0.005994842503189409,train,0.7698630136986301,0.021803765635989947,0.7636884942656308,0.022572614576559076,0.7616779629968858,0.02253072582743535 +flat_mae,reg,logistic,adhd200_dx,86,0.005994842503189409,test,0.5692307692307692,0.06051589648853143,0.545,0.06406063704468434,0.5477799227799228,0.06149700967028417 +flat_mae,reg,logistic,adhd200_dx,87,0.000774263682681127,train,0.6986301369863014,0.02318622545508041,0.6830196425187898,0.024678406593713714,0.6813518959516395,0.023821651803502597 +flat_mae,reg,logistic,adhd200_dx,87,0.000774263682681127,test,0.6307692307692307,0.05249198304525815,0.587737843551797,0.06372234762548432,0.5974903474903475,0.055459984854817616 +flat_mae,reg,logistic,adhd200_dx,88,0.005994842503189409,train,0.7808219178082192,0.02064400632593424,0.7736434108527132,0.021700901233164458,0.7706692312389326,0.02163518535348889 +flat_mae,reg,logistic,adhd200_dx,88,0.005994842503189409,test,0.6153846153846154,0.05826606998281765,0.6018132810585641,0.0610008410575349,0.6013513513513513,0.059678466842842184 +flat_mae,reg,logistic,adhd200_dx,89,0.005994842503189409,train,0.7917808219178082,0.019980437796487675,0.7869431643625192,0.020545385806167313,0.7854002564572266,0.02050045994438374 +flat_mae,reg,logistic,adhd200_dx,89,0.005994842503189409,test,0.6,0.0588278081005672,0.5833333333333333,0.061943750191823424,0.5834942084942085,0.060231712291767915 +flat_mae,reg,logistic,adhd200_dx,90,0.005994842503189409,train,0.7424657534246575,0.02259268585032154,0.734031007751938,0.02354366193504742,0.7316663613604445,0.023290595240852204 +flat_mae,reg,logistic,adhd200_dx,90,0.005994842503189409,test,0.676923076923077,0.05804316696019637,0.6612062546537603,0.06234514041378354,0.6597490347490347,0.05991734217854129 +flat_mae,reg,logistic,adhd200_dx,91,0.046415888336127774,train,0.9095890410958904,0.014338686422289781,0.9074095426976916,0.014835635136661982,0.9048360505587104,0.015289495183141663 +flat_mae,reg,logistic,adhd200_dx,91,0.046415888336127774,test,0.5846153846153846,0.05755080081594185,0.5578231292517006,0.06202383678910752,0.5612934362934363,0.058642516347298564 +flat_mae,reg,logistic,adhd200_dx,92,0.005994842503189409,train,0.7753424657534247,0.021066012644116748,0.7684440180726619,0.02206567668376042,0.7658148623068938,0.021982419496429935 +flat_mae,reg,logistic,adhd200_dx,92,0.005994842503189409,test,0.5846153846153846,0.05580435816791144,0.5501153550371699,0.06178487265216961,0.556949806949807,0.05715103673326448 +flat_mae,reg,logistic,adhd200_dx,93,0.005994842503189409,train,0.7753424657534247,0.0211544740732503,0.7675086999751429,0.022144832027527827,0.764379923062832,0.021927327126582175 +flat_mae,reg,logistic,adhd200_dx,93,0.005994842503189409,test,0.676923076923077,0.05262074646543777,0.6500897205844656,0.06064270123830016,0.6510617760617761,0.055678121708125965 +flat_mae,reg,logistic,adhd200_dx,94,0.005994842503189409,train,0.7780821917808219,0.020609788214300474,0.7705809775818855,0.02170463068142658,0.7675245771508823,0.021559789983932213 +flat_mae,reg,logistic,adhd200_dx,94,0.005994842503189409,test,0.6307692307692307,0.05167372376566886,0.587737843551797,0.061917648148998465,0.5974903474903475,0.0542452231430262 +flat_mae,reg,logistic,adhd200_dx,95,0.000774263682681127,train,0.7068493150684931,0.022866892486786472,0.6997824566258485,0.02352032703584416,0.6986780240581303,0.023456686856195914 +flat_mae,reg,logistic,adhd200_dx,95,0.000774263682681127,test,0.5076923076923077,0.06250056378443941,0.49317738791423005,0.06417652014908862,0.49372586872586877,0.0631084767862466 +flat_mae,reg,logistic,adhd200_dx,96,0.005994842503189409,train,0.7534246575342466,0.022333456100655073,0.7476958525345623,0.022919960907532764,0.7463973865787384,0.022895198694193215 +flat_mae,reg,logistic,adhd200_dx,96,0.005994842503189409,test,0.6923076923076923,0.0589264429647113,0.6862934362934363,0.0604700174178674,0.6862934362934363,0.06036773019755417 +flat_mae,reg,logistic,adhd200_dx,97,0.005994842503189409,train,0.7589041095890411,0.021761152486782766,0.7510077519379845,0.022809054921217143,0.7483818770226537,0.022623011079551423 +flat_mae,reg,logistic,adhd200_dx,97,0.005994842503189409,test,0.6,0.051700466389115594,0.5427489177489178,0.06382579315831705,0.5617760617760618,0.05383130104825736 +flat_mae,reg,logistic,adhd200_dx,98,0.000774263682681127,train,0.6986301369863014,0.021721429145969345,0.6813492063492064,0.02346260205844029,0.6799169567075777,0.022486324326565933 +flat_mae,reg,logistic,adhd200_dx,98,0.000774263682681127,test,0.6615384615384615,0.055268288092169576,0.6366869918699187,0.06184568915952588,0.6375482625482626,0.05753845878985159 +flat_mae,reg,logistic,adhd200_dx,99,0.005994842503189409,train,0.7753424657534247,0.02275591831485628,0.7697267187788515,0.023413407063021453,0.7679672711729865,0.02332154404869716 +flat_mae,reg,logistic,adhd200_dx,99,0.005994842503189409,test,0.5692307692307692,0.05907643429284977,0.545,0.06384405121374152,0.5477799227799228,0.06060157039772176 +flat_mae,reg,logistic,adhd200_dx,100,0.005994842503189409,train,0.7589041095890411,0.02244418543712821,0.7515008974438324,0.023510053923840177,0.7490993466446846,0.023409091063050787 +flat_mae,reg,logistic,adhd200_dx,100,0.005994842503189409,test,0.676923076923077,0.05337491377153075,0.6612062546537603,0.05689084672434203,0.6597490347490347,0.055042859509587656 diff --git a/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a61e07a6a32efb44db287d0bbc49cd39303a91e3 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:29:26 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adhd200_dx reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +model: flat_mae +representation: reg +dataset: adhd200_dx +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adhd200_dx__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:11:16 time: 4.4828 data: 3.5218 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:49 time: 0.1733 data: 0.0589 max mem: 3005 +extract (train) [ 40/151] eta: 0:00:30 time: 0.1600 data: 0.0477 max mem: 3005 +extract (train) [ 60/151] eta: 0:00:20 time: 0.1409 data: 0.0387 max mem: 3005 +extract (train) [ 80/151] eta: 0:00:14 time: 0.1570 data: 0.0477 max mem: 3005 +extract (train) [100/151] eta: 0:00:10 time: 0.1542 data: 0.0467 max mem: 3005 +extract (train) [120/151] eta: 0:00:05 time: 0.1553 data: 0.0472 max mem: 3005 +extract (train) [140/151] eta: 0:00:02 time: 0.1377 data: 0.0377 max mem: 3005 +extract (train) [150/151] eta: 0:00:00 time: 0.1353 data: 0.0368 max mem: 3005 +extract (train) Total time: 0:00:27 (0.1841 s / it) +extract (validation) [ 0/32] eta: 0:01:57 time: 3.6734 data: 3.5264 max mem: 3005 +extract (validation) [20/32] eta: 0:00:04 time: 0.1785 data: 0.0604 max mem: 3005 +extract (validation) [31/32] eta: 0:00:00 time: 0.1304 data: 0.0333 max mem: 3005 +extract (validation) Total time: 0:00:08 (0.2784 s / it) +extract (test) [ 0/33] eta: 0:02:00 time: 3.6417 data: 3.5029 max mem: 3005 +extract (test) [20/33] eta: 0:00:04 time: 0.1695 data: 0.0535 max mem: 3005 +extract (test) [32/33] eta: 0:00:00 time: 0.1348 data: 0.0372 max mem: 3005 +extract (test) Total time: 0:00:08 (0.2700 s / it) +feature extraction time: 0:00:45 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adhd200_dx | | 0.0059948 | train | 0.7726 | 0.022421 | 0.76492 | 0.02347 | 0.76195 | 0.023242 | +| flat_mae | reg | logistic | adhd200_dx | | 0.0059948 | test | 0.58462 | 0.060967 | 0.56996 | 0.063939 | 0.56998 | 0.062558 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05732111149031574, "f1": 0.6018132810585641, "f1_std": 0.05919993469100661, "bacc": 0.6013513513513513, "bacc_std": 0.05820367742224786} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05568870366579143, "f1": 0.6233308138070043, "f1_std": 0.06151467804298453, "bacc": 0.6240347490347491, "bacc_std": 0.05818744884052347} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05802538518134789, "f1": 0.5512820512820513, "f1_std": 0.060832088420983255, "bacc": 0.5521235521235521, "bacc_std": 0.058998912637704924} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05685797524633133, "f1": 0.6474358974358974, "f1_std": 0.061025246660579256, "bacc": 0.6462355212355213, "bacc_std": 0.05938107231457297} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06176955838594983, "f1": 0.4980694980694981, "f1_std": 0.06279079813216022, "bacc": 0.4980694980694981, "bacc_std": 0.06263820463683102} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05413849432748696, "f1": 0.7490347490347491, "f1_std": 0.055504711104001445, "bacc": 0.7490347490347491, "bacc_std": 0.05551664148669393} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05713151857076077, "f1": 0.5644080416976918, "f1_std": 0.06073755194638699, "bacc": 0.5656370656370656, "bacc_std": 0.05843368374853434} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06055894287493943, "f1": 0.6612062546537603, "f1_std": 0.06426380848500225, "bacc": 0.6597490347490347, "bacc_std": 0.06216835393123916} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05543532721071086, "f1": 0.5411764705882354, "f1_std": 0.06396992755478527, "bacc": 0.5526061776061776, "bacc_std": 0.05717131925131015} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05876239186950146, "f1": 0.5321419707123356, "f1_std": 0.06281587871713505, "bacc": 0.5342664092664092, "bacc_std": 0.06032408962158811} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 1291.5496650148827, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06234829446332278, "f1": 0.5608108108108107, "f1_std": 0.06341431018955371, "bacc": 0.5608108108108107, "bacc_std": 0.06325437148848224} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05702175231160816, "f1": 0.5250692869740489, "f1_std": 0.06307393531158556, "bacc": 0.5299227799227799, "bacc_std": 0.05884074633744766} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060776350118510826, "f1": 0.5745454545454545, "f1_std": 0.06219963786062973, "bacc": 0.5743243243243243, "bacc_std": 0.06146986933980504} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0555246434973063, "f1": 0.545, "f1_std": 0.05966040164641109, "bacc": 0.5477799227799228, "bacc_std": 0.05681876628435885} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.062428759872126226, "f1": 0.5469838981014179, "f1_std": 0.06310839438938437, "bacc": 0.5472972972972974, "bacc_std": 0.06321125255149328} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.061803207438644654, "f1": 0.5608108108108107, "f1_std": 0.06283102964647934, "bacc": 0.5608108108108107, "bacc_std": 0.06256166394250548} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05716757470483715, "f1": 0.6153846153846154, "f1_std": 0.060258857982781064, "bacc": 0.6148648648648649, "bacc_std": 0.05865145152686348} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059640032621757176, "f1": 0.5376016260162602, "f1_std": 0.06529933092992857, "bacc": 0.5434362934362934, "bacc_std": 0.06105948071714534} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05280389980218816, "f1": 0.6697154471544715, "f1_std": 0.05895637130557245, "bacc": 0.6689189189189189, "bacc_std": 0.05549879523627344} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05449685078573009, "f1": 0.6289401836684041, "f1_std": 0.05882185801443634, "bacc": 0.6283783783783784, "bacc_std": 0.05660964798554247} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05204753403204198, "f1": 0.746588693957115, "f1_std": 0.05405688992817761, "bacc": 0.7446911196911197, "bacc_std": 0.0537647861535753} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0622407313505654, "f1": 0.5512820512820513, "f1_std": 0.06530417432672812, "bacc": 0.5521235521235521, "bacc_std": 0.06349233228743814} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05396059048457297, "f1": 0.5905769715293525, "f1_std": 0.05887461511598266, "bacc": 0.5926640926640927, "bacc_std": 0.055383810651781884} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.048876368607784514, "f1": 0.6003742314889067, "f1_std": 0.06012579225772673, "bacc": 0.6110038610038611, "bacc_std": 0.051387912493349196} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05938760454812517, "f1": 0.570630081300813, "f1_std": 0.06522858468048676, "bacc": 0.5748069498069498, "bacc_std": 0.061064081542918196} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05744627745032229, "f1": 0.5512820512820513, "f1_std": 0.06067839554813978, "bacc": 0.5521235521235521, "bacc_std": 0.058737612160318795} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.057541678231763545, "f1": 0.6018132810585641, "f1_std": 0.060328564764931016, "bacc": 0.6013513513513513, "bacc_std": 0.058931787610778635} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05695204252640105, "f1": 0.5578231292517006, "f1_std": 0.06177529825087155, "bacc": 0.5612934362934363, "bacc_std": 0.058389551517483367} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 21.54434690031882, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06211441247155437, "f1": 0.48000000000000004, "f1_std": 0.06308432966278382, "bacc": 0.48021235521235517, "bacc_std": 0.06256674482047607} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.060912656451482486, "f1": 0.5521501544309813, "f1_std": 0.060913506902595074, "bacc": 0.555984555984556, "bacc_std": 0.06141399000226569} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.059359771606763284, "f1": 0.48, "f1_std": 0.06401069752289423, "bacc": 0.48503861003861004, "bacc_std": 0.06065439996251827} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.061826058733041825, "f1": 0.5066413662239089, "f1_std": 0.06205957359483718, "bacc": 0.5111003861003861, "bacc_std": 0.06254516812737378} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06119345596756599, "f1": 0.6139225469232596, "f1_std": 0.06142016883113983, "bacc": 0.6187258687258688, "bacc_std": 0.061751777804244345} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05971744910669453, "f1": 0.5666666666666667, "f1_std": 0.06013660658282585, "bacc": 0.5694980694980695, "bacc_std": 0.06059751631650118} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.059036332209157945, "f1": 0.5775, "f1_std": 0.06332160848966392, "bacc": 0.5791505791505791, "bacc_std": 0.06056600402172626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.000774263682681127, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.05942210056454549, "f1": 0.46760982874162327, "f1_std": 0.06078925045954281, "bacc": 0.47152509652509655, "bacc_std": 0.059165382520059394} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05872423569522354, "f1": 0.5921814671814671, "f1_std": 0.05997256128337034, "bacc": 0.5921814671814671, "bacc_std": 0.05941916698153296} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.000774263682681127, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0563342434248346, "f1": 0.5062484685126194, "f1_std": 0.05723667792795509, "bacc": 0.5072393822393823, "bacc_std": 0.05625512603169517} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05758612587673462, "f1": 0.6036585365853658, "f1_std": 0.06360174879325577, "bacc": 0.6061776061776062, "bacc_std": 0.05942221257132464} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05160867777129904, "f1": 0.5411764705882354, "f1_std": 0.060171622436753926, "bacc": 0.5526061776061776, "bacc_std": 0.05346578557115574} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.060256775994525606, "f1": 0.588206627680312, "f1_std": 0.061986682381927616, "bacc": 0.5878378378378378, "bacc_std": 0.0611511147579981} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06441712440927305, "f1": 0.5357142857142857, "f1_std": 0.06458082049819673, "bacc": 0.5381274131274132, "bacc_std": 0.06486043561218585} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.058278060260206904, "f1": 0.6655231560891939, "f1_std": 0.06072203630142716, "bacc": 0.6640926640926641, "bacc_std": 0.05957287560919078} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05575887224755831, "f1": 0.6794871794871795, "f1_std": 0.059478359877654, "bacc": 0.6776061776061776, "bacc_std": 0.058252363430950284} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.4461538461538462, "acc_std": 0.060070262607251367, "f1": 0.4230769230769231, "f1_std": 0.061665950771569125, "bacc": 0.4266409266409266, "bacc_std": 0.060139336851295445} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.057735114375505166, "f1": 0.656084656084656, "f1_std": 0.06353924885680176, "bacc": 0.6554054054054055, "bacc_std": 0.05995392480634988} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.060764081573471136, "f1": 0.5019157088122606, "f1_std": 0.061357251541475505, "bacc": 0.5024131274131274, "bacc_std": 0.06153706105127399} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0565268454838498, "f1": 0.5962732919254659, "f1_std": 0.0643208208325403, "bacc": 0.6018339768339769, "bacc_std": 0.05876392741301429} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06151796581766334, "f1": 0.5921814671814671, "f1_std": 0.0628369838229098, "bacc": 0.5921814671814671, "bacc_std": 0.06223577450185256} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05881250923573615, "f1": 0.578226387887527, "f1_std": 0.05973031176323162, "bacc": 0.5786679536679536, "bacc_std": 0.05928546449389097} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05867526922763041, "f1": 0.6018132810585641, "f1_std": 0.06141517024658891, "bacc": 0.6013513513513513, "bacc_std": 0.06004794415114077} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06410098541645788, "f1": 0.4738095238095238, "f1_std": 0.06407058935647114, "bacc": 0.4753861003861004, "bacc_std": 0.06475086751279212} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.051532821735048924, "f1": 0.5656241646618552, "f1_std": 0.06244566809604247, "bacc": 0.5796332046332047, "bacc_std": 0.05393035918643237} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06097123193982843, "f1": 0.5381034060279344, "f1_std": 0.06297478497137203, "bacc": 0.5386100386100386, "bacc_std": 0.061625158808060625} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06229542899260572, "f1": 0.5608108108108107, "f1_std": 0.06417603741196264, "bacc": 0.5608108108108107, "bacc_std": 0.06377483672741105} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06222140273716016, "f1": 0.606060606060606, "f1_std": 0.0633600564493716, "bacc": 0.6056949806949807, "bacc_std": 0.06293525311300469} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05971010044100207, "f1": 0.5376016260162602, "f1_std": 0.06497698500465403, "bacc": 0.5434362934362934, "bacc_std": 0.06083256416702827} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05836327774679137, "f1": 0.5745454545454545, "f1_std": 0.06044242180347428, "bacc": 0.5743243243243243, "bacc_std": 0.05975191483880074} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.056546270522990036, "f1": 0.5905769715293525, "f1_std": 0.06179568931075528, "bacc": 0.5926640926640927, "bacc_std": 0.05823338833932702} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055705939683092426, "f1": 0.61, "f1_std": 0.06027814739562973, "bacc": 0.6105212355212355, "bacc_std": 0.05745663647533252} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060278611512850556, "f1": 0.5699583435432491, "f1_std": 0.06240800199590214, "bacc": 0.5699806949806949, "bacc_std": 0.06112224580298174} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06278630235420468, "f1": 0.4980694980694981, "f1_std": 0.06352965379908398, "bacc": 0.4980694980694981, "bacc_std": 0.06328466198793517} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.058265712511200524, "f1": 0.543030303030303, "f1_std": 0.05918160467559252, "bacc": 0.542953667953668, "bacc_std": 0.05866978195470122} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05965991190705361, "f1": 0.606060606060606, "f1_std": 0.06128097356207435, "bacc": 0.6056949806949807, "bacc_std": 0.06072572170258904} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05637355542698447, "f1": 0.6690909090909091, "f1_std": 0.05844863582008846, "bacc": 0.6684362934362934, "bacc_std": 0.05805637048015969} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.060746410515622594, "f1": 0.4666988416988417, "f1_std": 0.06120940501885036, "bacc": 0.4666988416988417, "bacc_std": 0.06089428202083259} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.052124711626350295, "f1": 0.6091503267973856, "f1_std": 0.06218879448752807, "bacc": 0.6153474903474904, "bacc_std": 0.05487580436071236} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05494949590844047, "f1": 0.5626293995859213, "f1_std": 0.06258288726070199, "bacc": 0.5704633204633205, "bacc_std": 0.05672177286415473} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.059149146557338744, "f1": 0.4475, "f1_std": 0.06127820507524919, "bacc": 0.45366795366795365, "bacc_std": 0.05899998637966995} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05975485224535248, "f1": 0.5644080416976918, "f1_std": 0.06387092388195088, "bacc": 0.5656370656370656, "bacc_std": 0.06118382387951826} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0601238878773747, "f1": 0.6198830409356726, "f1_std": 0.06231875614111929, "bacc": 0.6192084942084942, "bacc_std": 0.06131328358778849} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05845667996005815, "f1": 0.6407113674597452, "f1_std": 0.058747436263322655, "bacc": 0.6414092664092663, "bacc_std": 0.058466684872039376} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06030985082479375, "f1": 0.6018132810585641, "f1_std": 0.06260837425227185, "bacc": 0.6013513513513513, "bacc_std": 0.06140188329000794} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05676448593888018, "f1": 0.7006060606060607, "f1_std": 0.0580909392037952, "bacc": 0.6998069498069499, "bacc_std": 0.057413121984807594} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06351818637209346, "f1": 0.5521501544309813, "f1_std": 0.06366112715187054, "bacc": 0.555984555984556, "bacc_std": 0.06412224915518626} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.054930885029985776, "f1": 0.5626293995859213, "f1_std": 0.06171869175497417, "bacc": 0.5704633204633205, "bacc_std": 0.05633010966890393} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.04989303588263395, "f1": 0.7215923406399596, "f1_std": 0.055821323607093146, "bacc": 0.7181467181467182, "bacc_std": 0.05316993558308879} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05712615749218991, "f1": 0.545, "f1_std": 0.0624503985548758, "bacc": 0.5477799227799228, "bacc_std": 0.059125495105952205} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.061059396826601574, "f1": 0.6018132810585641, "f1_std": 0.06377619717532886, "bacc": 0.6013513513513513, "bacc_std": 0.06225991387106898} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05950963325129925, "f1": 0.5578231292517006, "f1_std": 0.06512379398822098, "bacc": 0.5612934362934363, "bacc_std": 0.06135810484338013} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05475065372066563, "f1": 0.5289855072463768, "f1_std": 0.06144046767420895, "bacc": 0.5390926640926641, "bacc_std": 0.055975044492249174} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.0559890099105997, "f1": 0.7065811356616774, "f1_std": 0.05599732287806977, "bacc": 0.7128378378378378, "bacc_std": 0.05578201261735133} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05746732748101874, "f1": 0.6794871794871795, "f1_std": 0.060383119204670925, "bacc": 0.6776061776061776, "bacc_std": 0.058925531117912405} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06059750614900988, "f1": 0.6431129147767964, "f1_std": 0.06130502928473608, "bacc": 0.6457528957528957, "bacc_std": 0.06162906996357857} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.676923076923077, "acc_std": 0.056413721105721105, "f1": 0.656084656084656, "f1_std": 0.06189569505040753, "bacc": 0.6554054054054055, "bacc_std": 0.05883294585355072} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06051589648853143, "f1": 0.545, "f1_std": 0.06406063704468434, "bacc": 0.5477799227799228, "bacc_std": 0.06149700967028417} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05249198304525815, "f1": 0.587737843551797, "f1_std": 0.06372234762548432, "bacc": 0.5974903474903475, "bacc_std": 0.055459984854817616} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05826606998281765, "f1": 0.6018132810585641, "f1_std": 0.0610008410575349, "bacc": 0.6013513513513513, "bacc_std": 0.059678466842842184} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.0588278081005672, "f1": 0.5833333333333333, "f1_std": 0.061943750191823424, "bacc": 0.5834942084942085, "bacc_std": 0.060231712291767915} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05804316696019637, "f1": 0.6612062546537603, "f1_std": 0.06234514041378354, "bacc": 0.6597490347490347, "bacc_std": 0.05991734217854129} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05755080081594185, "f1": 0.5578231292517006, "f1_std": 0.06202383678910752, "bacc": 0.5612934362934363, "bacc_std": 0.058642516347298564} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05580435816791144, "f1": 0.5501153550371699, "f1_std": 0.06178487265216961, "bacc": 0.556949806949807, "bacc_std": 0.05715103673326448} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05262074646543777, "f1": 0.6500897205844656, "f1_std": 0.06064270123830016, "bacc": 0.6510617760617761, "bacc_std": 0.055678121708125965} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05167372376566886, "f1": 0.587737843551797, "f1_std": 0.061917648148998465, "bacc": 0.5974903474903475, "bacc_std": 0.0542452231430262} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.000774263682681127, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06250056378443941, "f1": 0.49317738791423005, "f1_std": 0.06417652014908862, "bacc": 0.49372586872586877, "bacc_std": 0.0631084767862466} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.0589264429647113, "f1": 0.6862934362934363, "f1_std": 0.0604700174178674, "bacc": 0.6862934362934363, "bacc_std": 0.06036773019755417} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.051700466389115594, "f1": 0.5427489177489178, "f1_std": 0.06382579315831705, "bacc": 0.5617760617760618, "bacc_std": 0.05383130104825736} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.055268288092169576, "f1": 0.6366869918699187, "f1_std": 0.06184568915952588, "bacc": 0.6375482625482626, "bacc_std": 0.05753845878985159} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05907643429284977, "f1": 0.545, "f1_std": 0.06384405121374152, "bacc": 0.5477799227799228, "bacc_std": 0.06060157039772176} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05337491377153075, "f1": 0.6612062546537603, "f1_std": 0.05689084672434203, "bacc": 0.6597490347490347, "bacc_std": 0.055042859509587656} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adhd200_dx | train | 100 | 13.175 | 129.15 | 0.78263 | 0.073259 | 0.77488 | 0.077197 | 0.77282 | 0.077266 | +| flat_mae | reg | logistic | adhd200_dx | test | 100 | 13.175 | 129.15 | 0.60077 | 0.062363 | 0.58213 | 0.063706 | 0.58434 | 0.062472 | + + +done! total time: 0:04:43 diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9384011f3359a45ca97e1da6b1cc036d8259ba33 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..22403bfd955a4073b6870890a14ab242f328f273 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,test,0.7073170731707317,0.06659771171170208,0.603225806451613,0.08352044837440238,0.6128472222222222,0.08986580271487443 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,test,0.8048780487804879,0.06079497145678979,0.7515151515151515,0.07631676201392011,0.7693548387096774,0.08243464481619936 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.046415888336127774,train,0.8997289972899729,0.013012504910229558,0.8378060515342671,0.024310835135681077,0.7970252280384584,0.025997745159662643 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.046415888336127774,test,0.6829268292682927,0.06497024441335794,0.5547201336675021,0.08614643690537145,0.5532258064516129,0.08362788215839026 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,test,0.6829268292682927,0.05033484547159764,0.4696517412935323,0.06851470244266303,0.4854838709677419,0.05592079829170615 +flat_mae,patch,logistic,adni_ad_vs_cn,4,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,21.54434690031882,test,0.8048780487804879,0.06078769091718673,0.7354838709677419,0.08100091574989322,0.7354838709677419,0.08325631473833399 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,train,0.981029810298103,0.0069113895044137855,0.9726796763445977,0.010320393587492814,0.9593023255813953,0.014827341436794655 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,test,0.6829268292682927,0.05075993586124509,0.4696517412935323,0.07049121063144048,0.4854838709677419,0.05858028021415383 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.046415888336127774,train,0.8888888888888888,0.014420400271077249,0.8221895751307516,0.026539407019590823,0.7859109211931958,0.028132124021067618 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.046415888336127774,test,0.7804878048780488,0.05466155671285311,0.6660633484162897,0.09068984003200109,0.6516129032258065,0.0828851665406192 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,test,0.7804878048780488,0.06120151324176142,0.6917293233082706,0.08732816291849894,0.685483870967742,0.08743596223634545 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,1291.5496650148827,test,0.6585365853658537,0.05911299537738394,0.5017361111111112,0.07445590617349368,0.5032258064516129,0.06836372120277645 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,test,0.7560975609756098,0.06971841042316852,0.7184065934065934,0.07345169170446748,0.7709677419354839,0.07806356011144812 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,test,0.7073170731707317,0.06362088410817263,0.603225806451613,0.08164177943527912,0.603225806451613,0.08249961851518396 +flat_mae,patch,logistic,adni_ad_vs_cn,11,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,2.782559402207126,test,0.8292682926829268,0.048179616346447775,0.7402714932126697,0.08055159804491613,0.717741935483871,0.07725730156536202 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,test,0.5853658536585366,0.07208534831710818,0.4863669859985261,0.07849937111508284,0.4887096774193548,0.08548234837353695 +flat_mae,patch,logistic,adni_ad_vs_cn,13,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,1291.5496650148827,test,0.7317073170731707,0.06038691428623988,0.6232247284878863,0.08645028065313905,0.6193548387096774,0.0840511030816394 +flat_mae,patch,logistic,adni_ad_vs_cn,14,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,14,2.782559402207126,test,0.7560975609756098,0.06550767287299236,0.6893939393939394,0.07896948400308917,0.7032258064516128,0.084241974640693 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,train,0.8861788617886179,0.013357920133851774,0.8128260869565218,0.025582156737532037,0.7720026296326732,0.02616478534537021 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,test,0.7317073170731707,0.05186299619938014,0.5512437810945273,0.08975076481899173,0.5516129032258065,0.07202611237380277 +flat_mae,patch,logistic,adni_ad_vs_cn,16,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,16,2.782559402207126,test,0.7804878048780488,0.05031962089952331,0.6328358208955224,0.0947270341183192,0.6177419354838709,0.07751251088974528 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,test,0.7073170731707317,0.07187410501731797,0.6272727272727273,0.08739019115511117,0.6370967741935484,0.0919715005285295 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.3593813663804626,train,0.983739837398374,0.0061611761346703365,0.9766829555986183,0.009083670995210362,0.9651162790697674,0.013217872056356696 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.3593813663804626,test,0.8292682926829268,0.04604361277328205,0.7144278606965174,0.09506656102972419,0.6838709677419355,0.08180272830389253 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,test,0.6829268292682927,0.06600156831190582,0.5547201336675021,0.08284137531755736,0.5532258064516129,0.08047273664012997 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,train,0.986449864498645,0.0057789165887559895,0.9808134274809954,0.00828950113744657,0.9749774015942148,0.011011187885179446 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,test,0.7317073170731707,0.06887381663222672,0.6479313036690086,0.0851714584910086,0.6532258064516129,0.08921113511502902 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,test,0.7560975609756098,0.055757164490570665,0.6117424242424243,0.09464435726170588,0.6016129032258064,0.08070539531199994 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,train,0.991869918699187,0.004776230367648643,0.9884880564885973,0.006881890345470686,0.9825581395348837,0.010246680265478827 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,test,0.7804878048780488,0.054107456923288604,0.6660633484162897,0.08650477214861539,0.6516129032258065,0.07911593963460453 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.6829268292682927,0.06874789056306317,0.6072218128224024,0.0813964073257685,0.6209677419354839,0.08862649771831659 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.005994842503189409,train,0.8211382113821138,0.013353740964603246,0.6671131765992345,0.032096990272646925,0.6405620839838935,0.02513860227161134 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.005994842503189409,test,0.7317073170731707,0.05050597057430525,0.5512437810945273,0.08688492578968747,0.5516129032258065,0.06976373059218836 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.986449864498645,0.006211758100965898,0.9808134274809954,0.00895175440156044,0.9749774015942148,0.012110589902827663 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.7073170731707317,0.060349960953024064,0.5729166666666666,0.08663307927595014,0.5693548387096774,0.081047382179615 +flat_mae,patch,logistic,adni_ad_vs_cn,26,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,2.782559402207126,test,0.7073170731707317,0.06667984006272958,0.603225806451613,0.08361058263975459,0.603225806451613,0.08362848937516268 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,test,0.7317073170731707,0.06322485674487899,0.6232247284878863,0.0858730039863233,0.6193548387096774,0.08345621045479806 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,test,0.7560975609756098,0.05761921402712905,0.6440972222222222,0.08382110168231488,0.635483870967742,0.0791656478018304 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,test,0.7317073170731707,0.06385459016630178,0.6232247284878863,0.08966046853871479,0.6193548387096774,0.08679746995646169 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.3593813663804626,train,0.991869918699187,0.004556957105907904,0.9884880564885973,0.006562160507939575,0.9825581395348837,0.00977626262837225 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.3593813663804626,test,0.7804878048780488,0.05476193678463647,0.6660633484162897,0.09192554303290096,0.6516129032258065,0.08401113417858935 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,train,0.978319783197832,0.007088958488524521,0.9689106074648244,0.010417130211020233,0.9575355411290984,0.014133427662054629 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,test,0.7073170731707317,0.04386282765860187,0.4831932773109243,0.06787352239816728,0.5016129032258064,0.051972279491936176 +flat_mae,patch,logistic,adni_ad_vs_cn,32,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,21.54434690031882,test,0.6341463414634146,0.0757246019996615,0.5684210526315789,0.08149618109984197,0.5887096774193548,0.09114699703028004 +flat_mae,patch,logistic,adni_ad_vs_cn,33,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,2.782559402207126,test,0.6829268292682927,0.07047428345288444,0.6410774410774411,0.07401949895288094,0.6887096774193548,0.08465310864539982 +flat_mae,patch,logistic,adni_ad_vs_cn,34,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,166.81005372000556,test,0.7560975609756098,0.06330752680862385,0.6693548387096775,0.08667448592636284,0.6693548387096775,0.08824294720559629 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,test,0.8048780487804879,0.044037299640950565,0.6554621848739496,0.09935620285180007,0.6338709677419355,0.07907414584491582 +flat_mae,patch,logistic,adni_ad_vs_cn,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,21.54434690031882,test,0.6341463414634146,0.07190290233824981,0.5199063231850116,0.08098997721534544,0.5209677419354839,0.08378844395422648 +flat_mae,patch,logistic,adni_ad_vs_cn,37,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,166.81005372000556,test,0.6341463414634146,0.0736944509440196,0.5684210526315789,0.07896200275317426,0.5887096774193548,0.09020108020377117 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.046415888336127774,train,0.8943089430894309,0.013375186037231077,0.8308632543926662,0.02440751246555143,0.7934916591338648,0.026067060548125243 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.046415888336127774,test,0.7073170731707317,0.07047718714866438,0.6272727272727273,0.08652154613456321,0.6370967741935484,0.09138377047793435 +flat_mae,patch,logistic,adni_ad_vs_cn,39,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,39,166.81005372000556,test,0.6341463414634146,0.06680797239469836,0.48621553884711777,0.07957532501539784,0.48709677419354835,0.0782979338910862 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.7560975609756098,0.07040570795096193,0.6893939393939394,0.08411994180597884,0.7032258064516128,0.08891078810479182 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,train,0.991869918699187,0.004437336506682395,0.9884880564885973,0.006379982047314622,0.9825581395348837,0.009519634714917517 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,test,0.7804878048780488,0.06458035497472517,0.7280766396462786,0.0766797607012638,0.7532258064516129,0.08446358228591917 +flat_mae,patch,logistic,adni_ad_vs_cn,42,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,42,166.81005372000556,test,0.7317073170731707,0.05846375550639864,0.5918552036199095,0.09099059760072874,0.5854838709677419,0.08281719466107057 +flat_mae,patch,logistic,adni_ad_vs_cn,43,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,1291.5496650148827,test,0.7560975609756098,0.0639846055250783,0.6693548387096775,0.08589982264167,0.6693548387096775,0.08746334299788845 +flat_mae,patch,logistic,adni_ad_vs_cn,44,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,2.782559402207126,test,0.7804878048780488,0.05381960305329092,0.6660633484162897,0.08775569710366908,0.6516129032258065,0.0810652349043266 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,test,0.7560975609756098,0.06024883953305914,0.6440972222222222,0.09193857547408515,0.635483870967742,0.08609523433762384 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,test,0.8292682926829268,0.034960979353200604,0.6800445930880714,0.09533760341734848,0.65,0.07167000767406125 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,test,0.7804878048780488,0.06286930628545062,0.6917293233082706,0.08720745016886557,0.685483870967742,0.08611854925934863 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,test,0.7073170731707317,0.06563633517300829,0.646551724137931,0.0741690183935421,0.6709677419354838,0.08122072519913281 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,166.81005372000556,test,0.7317073170731707,0.06655303423029467,0.6479313036690086,0.08453056562277879,0.6532258064516129,0.08733652039075417 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,test,0.8292682926829268,0.04986547041588738,0.7402714932126697,0.08692582706118779,0.717741935483871,0.08243122690834453 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,train,0.8943089430894309,0.01333819176911933,0.8326335988835263,0.023610003916538745,0.79753882816994,0.02509110770006151 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,test,0.6829268292682927,0.05827347372038849,0.5176470588235295,0.08315472676910456,0.5193548387096775,0.07321389550705097 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,train,0.9051490514905149,0.013635585310926061,0.849799383613421,0.024164650560569278,0.8127003040512779,0.026492363187943378 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,test,0.7560975609756098,0.045052127914508355,0.569327731092437,0.08977054207151601,0.567741935483871,0.06720767347237366 +flat_mae,patch,logistic,adni_ad_vs_cn,53,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,21.54434690031882,test,0.8048780487804879,0.059835189071975034,0.7354838709677419,0.08407657742551267,0.7354838709677419,0.08654874292064928 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,train,0.8970189701897019,0.013423556201901304,0.8360690235690236,0.024182695467623395,0.7993056126222369,0.025907325512106488 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,test,0.7560975609756098,0.06409932432515265,0.6693548387096775,0.08557404967542567,0.6693548387096775,0.08805939078787292 +flat_mae,patch,logistic,adni_ad_vs_cn,55,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,21.54434690031882,test,0.8292682926829268,0.057460350243356786,0.7885040530582166,0.06718145001331652,0.8193548387096774,0.0705154693325692 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,test,0.7804878048780488,0.05859060515554147,0.6660633484162897,0.0913618620570825,0.6516129032258065,0.08443953781437895 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.8970189701897019,0.01361197703772119,0.8343179884677191,0.025418722057300978,0.7952584435861616,0.027355932791768994 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7804878048780488,0.05801802833719562,0.6660633484162897,0.09515233109140805,0.6516129032258065,0.08573536080037968 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,test,0.7317073170731707,0.06185542815041882,0.6232247284878863,0.08600915015869696,0.6193548387096774,0.08422982848280944 +flat_mae,patch,logistic,adni_ad_vs_cn,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,166.81005372000556,test,0.6341463414634146,0.06602896269317507,0.48621553884711777,0.07691364648259792,0.48709677419354835,0.0742378927087761 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,test,0.7317073170731707,0.06807930078751422,0.6835087719298245,0.07460852391503017,0.7209677419354839,0.08193881121006218 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,test,0.6097560975609756,0.0716349228056941,0.5030303030303029,0.08068207140686545,0.5048387096774194,0.0860830651771983 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9024390243902439,0.012628157517694288,0.8446969696969697,0.023061835830965726,0.8068863505629058,0.025602824978778135 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.7073170731707317,0.05513988484440616,0.5340909090909092,0.08061714403777925,0.535483870967742,0.06908559892762273 +flat_mae,patch,logistic,adni_ad_vs_cn,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,2.782559402207126,test,0.7804878048780488,0.058978333169582065,0.6917293233082706,0.08569915622414821,0.685483870967742,0.08586611283869239 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.3593813663804626,train,0.978319783197832,0.007036915044173485,0.9689106074648244,0.010368393164033478,0.9575355411290984,0.014197759378198546 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.3593813663804626,test,0.7560975609756098,0.06225919225006996,0.6693548387096775,0.08598813135283102,0.6693548387096775,0.08815170320734522 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,test,0.7804878048780488,0.056919033052370935,0.6660633484162897,0.09136626602942338,0.6516129032258065,0.08340194689008903 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.3593813663804626,train,0.986449864498645,0.005909847754597149,0.9806516564069758,0.008656925730362952,0.9709302325581395,0.012678685008409017 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.3593813663804626,test,0.7560975609756098,0.06289231422501661,0.6693548387096775,0.08539934199671462,0.6693548387096775,0.08639384799801583 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,test,0.7804878048780488,0.0582515621878334,0.6917293233082706,0.08364599160728516,0.685483870967742,0.08440995121858703 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,train,0.994579945799458,0.0038815876240197032,0.9924192620593311,0.005428738445172623,0.9924192620593311,0.006037167168640404 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,test,0.6829268292682927,0.07407279728778225,0.6072218128224024,0.08568844316139154,0.6209677419354839,0.09379628661036565 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.7073170731707317,0.0712056618674036,0.646551724137931,0.08073908997077259,0.6709677419354838,0.08916030361015106 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,train,0.8888888888888888,0.013358097169782682,0.8202715706190526,0.025021037589338994,0.7818637521571206,0.025893924033706706 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,test,0.8536585365853658,0.03896031324730817,0.7415966386554622,0.09473202622716693,0.7,0.07986864215698174 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,train,0.991869918699187,0.004370580609940401,0.9885825675299359,0.006167270308947905,0.986605308570959,0.007839817648690415 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,test,0.6829268292682927,0.06376443859074103,0.5547201336675021,0.0834093823235783,0.5532258064516129,0.08069560899537369 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.046415888336127774,train,0.8997289972899729,0.012536579507600882,0.8395369336545807,0.02287312074479978,0.8010723970745337,0.024826301434941807 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.046415888336127774,test,0.7317073170731707,0.03999512165374124,0.4972129319955407,0.0719958327686336,0.5177419354838709,0.051018908065823286 +flat_mae,patch,logistic,adni_ad_vs_cn,73,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,2.782559402207126,test,0.7317073170731707,0.06853300919408768,0.6676492262343405,0.08025685169071795,0.6870967741935483,0.08690830714123987 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,train,0.983739837398374,0.0064038439260993675,0.9768796992481203,0.009291203637536242,0.9691634481058427,0.012633303264636114 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,test,0.7560975609756098,0.04543440925464816,0.569327731092437,0.0875924742866146,0.567741935483871,0.06601480299417731 +flat_mae,patch,logistic,adni_ad_vs_cn,75,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,2.782559402207126,test,0.7317073170731707,0.05085526698047084,0.5512437810945273,0.08912872672361447,0.5516129032258065,0.07158737004855159 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,test,0.6097560975609756,0.07448983337758959,0.5494505494505495,0.07799528984550821,0.5725806451612903,0.08787735345798961 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.3593813663804626,train,0.983739837398374,0.006644329910049955,0.9768796992481203,0.009598298579421615,0.9691634481058427,0.012788521353864485 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.3593813663804626,test,0.7804878048780488,0.054159326063412845,0.6660633484162897,0.08994371040408121,0.6516129032258065,0.08326188734021547 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,2.782559402207126,test,0.7073170731707317,0.06679847964443392,0.603225806451613,0.08613637455816871,0.603225806451613,0.08691172240955548 +flat_mae,patch,logistic,adni_ad_vs_cn,79,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,21.54434690031882,test,0.8292682926829268,0.060668181096036555,0.7759562841530054,0.07889166480981656,0.7854838709677419,0.08255657480603514 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.7073170731707317,0.05349907569795384,0.5340909090909092,0.0817724721574741,0.535483870967742,0.06871907129484064 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,test,0.7804878048780488,0.05564865990477135,0.6917293233082706,0.08157943930432904,0.685483870967742,0.08058712563367687 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,train,0.989159891598916,0.005396762964194108,0.9847141673570836,0.007702542297785249,0.9807913550825869,0.010285584835780954 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,test,0.7804878048780488,0.0522958681228696,0.6660633484162897,0.08866437731187976,0.6516129032258065,0.08184449202047435 +flat_mae,patch,logistic,adni_ad_vs_cn,83,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,1291.5496650148827,test,0.7804878048780488,0.0628143445387551,0.7119437939110069,0.08124378997025508,0.7193548387096774,0.08572956778687289 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.8997289972899729,0.01375432493100872,0.8412164912484736,0.024630521410827475,0.805119566110609,0.02690099899865939 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.7317073170731707,0.05783819149146154,0.5918552036199095,0.08839277403402,0.5854838709677419,0.07892250338187731 +flat_mae,patch,logistic,adni_ad_vs_cn,85,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,21.54434690031882,test,0.8048780487804879,0.0555817430512815,0.7152777777777778,0.0859018925936977,0.7016129032258065,0.08425874443978 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,test,0.5365853658536586,0.07037345776218691,0.4533333333333333,0.0699933611424567,0.4564516129032258,0.08031055642607103 +flat_mae,patch,logistic,adni_ad_vs_cn,87,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,2.782559402207126,test,0.8536585365853658,0.05360127765353744,0.8016129032258064,0.07519158491310474,0.8016129032258064,0.07969690241096064 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,test,0.7073170731707317,0.0665415025720408,0.6272727272727273,0.08064739979193049,0.6370967741935484,0.08573949461318543 +flat_mae,patch,logistic,adni_ad_vs_cn,89,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,21.54434690031882,test,0.7560975609756098,0.06521166744144605,0.6893939393939394,0.08110764068822261,0.7032258064516128,0.08735988787999803 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,train,0.983739837398374,0.006198428582156594,0.9766829555986183,0.00915621362803192,0.9651162790697674,0.013297791551254533 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,test,0.7804878048780488,0.0569583167876211,0.6917293233082706,0.08149273111292436,0.685483870967742,0.08052960307802506 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.005994842503189409,train,0.8346883468834688,0.012887544061799755,0.6944244579899811,0.031298259811406555,0.6615375133536034,0.02546594350848827 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.005994842503189409,test,0.6829268292682927,0.05969583820791552,0.5176470588235295,0.08333482773624097,0.5193548387096775,0.07543072819125546 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,train,0.981029810298103,0.00664321563340119,0.9729123189697663,0.009699379059628999,0.9633494946174705,0.013112046657692791 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,test,0.6341463414634146,0.0720192980945863,0.5199063231850116,0.08442623961101829,0.5209677419354839,0.08790833959673887 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,test,0.43902439024390244,0.07204749243362073,0.36498316498316496,0.06351487332038365,0.35806451612903223,0.07441329106831027 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,test,0.6585365853658537,0.07067299497628009,0.5370967741935484,0.08769383610880646,0.5370967741935484,0.08754463728390001 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.3593813663804626,train,0.986449864498645,0.0058388324740040205,0.9806516564069758,0.008537939237657362,0.9709302325581395,0.012526332458764465 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.3593813663804626,test,0.7804878048780488,0.05723917903756749,0.6660633484162897,0.09288828185500214,0.6516129032258065,0.08602098302932805 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.7317073170731707,0.0578590669129162,0.5918552036199095,0.09043784226259088,0.5854838709677419,0.08010534654606613 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,train,0.983739837398374,0.006432935356072508,0.9768796992481203,0.009311664938852302,0.9691634481058427,0.012599785856372227 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,test,0.8048780487804879,0.052097835022573265,0.6893939393939394,0.09483661389312227,0.667741935483871,0.08424724149972018 +flat_mae,patch,logistic,adni_ad_vs_cn,98,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,98,166.81005372000556,test,0.6585365853658537,0.06942442040608622,0.5370967741935484,0.08698632406695068,0.5370967741935484,0.0865605498048517 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,train,0.8943089430894309,0.013385118770999913,0.8326335988835263,0.023780760889812784,0.79753882816994,0.02519280954254722 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,test,0.7804878048780488,0.0471176082430308,0.6328358208955224,0.09214325179835789,0.6177419354838709,0.0749549483142708 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.8997289972899729,0.01376677869442812,0.8395369336545807,0.025243231103992878,0.8010723970745337,0.027074188843069582 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.6829268292682927,0.06446741495298078,0.5547201336675021,0.08646755138939115,0.5532258064516129,0.08388246441055443 diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..19249186b131eb3d9fc63d8015aba169115f4649 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:56:56 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:13:09 time: 4.8159 data: 3.4965 max mem: 2698 +extract (train) [ 20/164] eta: 0:01:00 time: 0.1989 data: 0.0743 max mem: 3005 +extract (train) [ 40/164] eta: 0:00:37 time: 0.1736 data: 0.0582 max mem: 3005 +extract (train) [ 60/164] eta: 0:00:26 time: 0.1762 data: 0.0604 max mem: 3005 +extract (train) [ 80/164] eta: 0:00:20 time: 0.1868 data: 0.0647 max mem: 3005 +extract (train) [100/164] eta: 0:00:14 time: 0.1762 data: 0.0636 max mem: 3005 +extract (train) [120/164] eta: 0:00:09 time: 0.1910 data: 0.0660 max mem: 3005 +extract (train) [140/164] eta: 0:00:05 time: 0.1907 data: 0.0662 max mem: 3005 +extract (train) [160/164] eta: 0:00:00 time: 0.1804 data: 0.0600 max mem: 3005 +extract (train) [163/164] eta: 0:00:00 time: 0.1806 data: 0.0604 max mem: 3005 +extract (train) Total time: 0:00:35 (0.2141 s / it) +extract (validation) [ 0/21] eta: 0:01:18 time: 3.7406 data: 3.6112 max mem: 3005 +extract (validation) [20/21] eta: 0:00:00 time: 0.1738 data: 0.0550 max mem: 3005 +extract (validation) Total time: 0:00:07 (0.3569 s / it) +extract (test) [ 0/21] eta: 0:01:21 time: 3.8830 data: 3.7358 max mem: 3005 +extract (test) [20/21] eta: 0:00:00 time: 0.1769 data: 0.0563 max mem: 3005 +extract (test) Total time: 0:00:07 (0.3654 s / it) +feature extraction time: 0:00:50 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | test | 0.70732 | 0.066598 | 0.60323 | 0.08352 | 0.61285 | 0.089866 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06079497145678979, "f1": 0.7515151515151515, "f1_std": 0.07631676201392011, "bacc": 0.7693548387096774, "bacc_std": 0.08243464481619936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06497024441335794, "f1": 0.5547201336675021, "f1_std": 0.08614643690537145, "bacc": 0.5532258064516129, "bacc_std": 0.08362788215839026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05033484547159764, "f1": 0.4696517412935323, "f1_std": 0.06851470244266303, "bacc": 0.4854838709677419, "bacc_std": 0.05592079829170615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06078769091718673, "f1": 0.7354838709677419, "f1_std": 0.08100091574989322, "bacc": 0.7354838709677419, "bacc_std": 0.08325631473833399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05075993586124509, "f1": 0.4696517412935323, "f1_std": 0.07049121063144048, "bacc": 0.4854838709677419, "bacc_std": 0.05858028021415383} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05466155671285311, "f1": 0.6660633484162897, "f1_std": 0.09068984003200109, "bacc": 0.6516129032258065, "bacc_std": 0.0828851665406192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06120151324176142, "f1": 0.6917293233082706, "f1_std": 0.08732816291849894, "bacc": 0.685483870967742, "bacc_std": 0.08743596223634545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 1291.5496650148827, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05911299537738394, "f1": 0.5017361111111112, "f1_std": 0.07445590617349368, "bacc": 0.5032258064516129, "bacc_std": 0.06836372120277645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06971841042316852, "f1": 0.7184065934065934, "f1_std": 0.07345169170446748, "bacc": 0.7709677419354839, "bacc_std": 0.07806356011144812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06362088410817263, "f1": 0.603225806451613, "f1_std": 0.08164177943527912, "bacc": 0.603225806451613, "bacc_std": 0.08249961851518396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.048179616346447775, "f1": 0.7402714932126697, "f1_std": 0.08055159804491613, "bacc": 0.717741935483871, "bacc_std": 0.07725730156536202} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07208534831710818, "f1": 0.4863669859985261, "f1_std": 0.07849937111508284, "bacc": 0.4887096774193548, "bacc_std": 0.08548234837353695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 1291.5496650148827, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06038691428623988, "f1": 0.6232247284878863, "f1_std": 0.08645028065313905, "bacc": 0.6193548387096774, "bacc_std": 0.0840511030816394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06550767287299236, "f1": 0.6893939393939394, "f1_std": 0.07896948400308917, "bacc": 0.7032258064516128, "bacc_std": 0.084241974640693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05186299619938014, "f1": 0.5512437810945273, "f1_std": 0.08975076481899173, "bacc": 0.5516129032258065, "bacc_std": 0.07202611237380277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05031962089952331, "f1": 0.6328358208955224, "f1_std": 0.0947270341183192, "bacc": 0.6177419354838709, "bacc_std": 0.07751251088974528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07187410501731797, "f1": 0.6272727272727273, "f1_std": 0.08739019115511117, "bacc": 0.6370967741935484, "bacc_std": 0.0919715005285295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04604361277328205, "f1": 0.7144278606965174, "f1_std": 0.09506656102972419, "bacc": 0.6838709677419355, "bacc_std": 0.08180272830389253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06600156831190582, "f1": 0.5547201336675021, "f1_std": 0.08284137531755736, "bacc": 0.5532258064516129, "bacc_std": 0.08047273664012997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06887381663222672, "f1": 0.6479313036690086, "f1_std": 0.0851714584910086, "bacc": 0.6532258064516129, "bacc_std": 0.08921113511502902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.055757164490570665, "f1": 0.6117424242424243, "f1_std": 0.09464435726170588, "bacc": 0.6016129032258064, "bacc_std": 0.08070539531199994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054107456923288604, "f1": 0.6660633484162897, "f1_std": 0.08650477214861539, "bacc": 0.6516129032258065, "bacc_std": 0.07911593963460453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06874789056306317, "f1": 0.6072218128224024, "f1_std": 0.0813964073257685, "bacc": 0.6209677419354839, "bacc_std": 0.08862649771831659} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05050597057430525, "f1": 0.5512437810945273, "f1_std": 0.08688492578968747, "bacc": 0.5516129032258065, "bacc_std": 0.06976373059218836} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.060349960953024064, "f1": 0.5729166666666666, "f1_std": 0.08663307927595014, "bacc": 0.5693548387096774, "bacc_std": 0.081047382179615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06667984006272958, "f1": 0.603225806451613, "f1_std": 0.08361058263975459, "bacc": 0.603225806451613, "bacc_std": 0.08362848937516268} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06322485674487899, "f1": 0.6232247284878863, "f1_std": 0.0858730039863233, "bacc": 0.6193548387096774, "bacc_std": 0.08345621045479806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05761921402712905, "f1": 0.6440972222222222, "f1_std": 0.08382110168231488, "bacc": 0.635483870967742, "bacc_std": 0.0791656478018304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06385459016630178, "f1": 0.6232247284878863, "f1_std": 0.08966046853871479, "bacc": 0.6193548387096774, "bacc_std": 0.08679746995646169} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05476193678463647, "f1": 0.6660633484162897, "f1_std": 0.09192554303290096, "bacc": 0.6516129032258065, "bacc_std": 0.08401113417858935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04386282765860187, "f1": 0.4831932773109243, "f1_std": 0.06787352239816728, "bacc": 0.5016129032258064, "bacc_std": 0.051972279491936176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0757246019996615, "f1": 0.5684210526315789, "f1_std": 0.08149618109984197, "bacc": 0.5887096774193548, "bacc_std": 0.09114699703028004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07047428345288444, "f1": 0.6410774410774411, "f1_std": 0.07401949895288094, "bacc": 0.6887096774193548, "bacc_std": 0.08465310864539982} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06330752680862385, "f1": 0.6693548387096775, "f1_std": 0.08667448592636284, "bacc": 0.6693548387096775, "bacc_std": 0.08824294720559629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.044037299640950565, "f1": 0.6554621848739496, "f1_std": 0.09935620285180007, "bacc": 0.6338709677419355, "bacc_std": 0.07907414584491582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07190290233824981, "f1": 0.5199063231850116, "f1_std": 0.08098997721534544, "bacc": 0.5209677419354839, "bacc_std": 0.08378844395422648} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0736944509440196, "f1": 0.5684210526315789, "f1_std": 0.07896200275317426, "bacc": 0.5887096774193548, "bacc_std": 0.09020108020377117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07047718714866438, "f1": 0.6272727272727273, "f1_std": 0.08652154613456321, "bacc": 0.6370967741935484, "bacc_std": 0.09138377047793435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06680797239469836, "f1": 0.48621553884711777, "f1_std": 0.07957532501539784, "bacc": 0.48709677419354835, "bacc_std": 0.0782979338910862} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.07040570795096193, "f1": 0.6893939393939394, "f1_std": 0.08411994180597884, "bacc": 0.7032258064516128, "bacc_std": 0.08891078810479182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06458035497472517, "f1": 0.7280766396462786, "f1_std": 0.0766797607012638, "bacc": 0.7532258064516129, "bacc_std": 0.08446358228591917} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05846375550639864, "f1": 0.5918552036199095, "f1_std": 0.09099059760072874, "bacc": 0.5854838709677419, "bacc_std": 0.08281719466107057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0639846055250783, "f1": 0.6693548387096775, "f1_std": 0.08589982264167, "bacc": 0.6693548387096775, "bacc_std": 0.08746334299788845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05381960305329092, "f1": 0.6660633484162897, "f1_std": 0.08775569710366908, "bacc": 0.6516129032258065, "bacc_std": 0.0810652349043266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06024883953305914, "f1": 0.6440972222222222, "f1_std": 0.09193857547408515, "bacc": 0.635483870967742, "bacc_std": 0.08609523433762384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 2.782559402207126, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.034960979353200604, "f1": 0.6800445930880714, "f1_std": 0.09533760341734848, "bacc": 0.65, "bacc_std": 0.07167000767406125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06286930628545062, "f1": 0.6917293233082706, "f1_std": 0.08720745016886557, "bacc": 0.685483870967742, "bacc_std": 0.08611854925934863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06563633517300829, "f1": 0.646551724137931, "f1_std": 0.0741690183935421, "bacc": 0.6709677419354838, "bacc_std": 0.08122072519913281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06655303423029467, "f1": 0.6479313036690086, "f1_std": 0.08453056562277879, "bacc": 0.6532258064516129, "bacc_std": 0.08733652039075417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04986547041588738, "f1": 0.7402714932126697, "f1_std": 0.08692582706118779, "bacc": 0.717741935483871, "bacc_std": 0.08243122690834453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05827347372038849, "f1": 0.5176470588235295, "f1_std": 0.08315472676910456, "bacc": 0.5193548387096775, "bacc_std": 0.07321389550705097} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.045052127914508355, "f1": 0.569327731092437, "f1_std": 0.08977054207151601, "bacc": 0.567741935483871, "bacc_std": 0.06720767347237366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.059835189071975034, "f1": 0.7354838709677419, "f1_std": 0.08407657742551267, "bacc": 0.7354838709677419, "bacc_std": 0.08654874292064928} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06409932432515265, "f1": 0.6693548387096775, "f1_std": 0.08557404967542567, "bacc": 0.6693548387096775, "bacc_std": 0.08805939078787292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.057460350243356786, "f1": 0.7885040530582166, "f1_std": 0.06718145001331652, "bacc": 0.8193548387096774, "bacc_std": 0.0705154693325692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05859060515554147, "f1": 0.6660633484162897, "f1_std": 0.0913618620570825, "bacc": 0.6516129032258065, "bacc_std": 0.08443953781437895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05801802833719562, "f1": 0.6660633484162897, "f1_std": 0.09515233109140805, "bacc": 0.6516129032258065, "bacc_std": 0.08573536080037968} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06185542815041882, "f1": 0.6232247284878863, "f1_std": 0.08600915015869696, "bacc": 0.6193548387096774, "bacc_std": 0.08422982848280944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06602896269317507, "f1": 0.48621553884711777, "f1_std": 0.07691364648259792, "bacc": 0.48709677419354835, "bacc_std": 0.0742378927087761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06807930078751422, "f1": 0.6835087719298245, "f1_std": 0.07460852391503017, "bacc": 0.7209677419354839, "bacc_std": 0.08193881121006218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.0716349228056941, "f1": 0.5030303030303029, "f1_std": 0.08068207140686545, "bacc": 0.5048387096774194, "bacc_std": 0.0860830651771983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05513988484440616, "f1": 0.5340909090909092, "f1_std": 0.08061714403777925, "bacc": 0.535483870967742, "bacc_std": 0.06908559892762273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.058978333169582065, "f1": 0.6917293233082706, "f1_std": 0.08569915622414821, "bacc": 0.685483870967742, "bacc_std": 0.08586611283869239} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06225919225006996, "f1": 0.6693548387096775, "f1_std": 0.08598813135283102, "bacc": 0.6693548387096775, "bacc_std": 0.08815170320734522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.056919033052370935, "f1": 0.6660633484162897, "f1_std": 0.09136626602942338, "bacc": 0.6516129032258065, "bacc_std": 0.08340194689008903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06289231422501661, "f1": 0.6693548387096775, "f1_std": 0.08539934199671462, "bacc": 0.6693548387096775, "bacc_std": 0.08639384799801583} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0582515621878334, "f1": 0.6917293233082706, "f1_std": 0.08364599160728516, "bacc": 0.685483870967742, "bacc_std": 0.08440995121858703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07407279728778225, "f1": 0.6072218128224024, "f1_std": 0.08568844316139154, "bacc": 0.6209677419354839, "bacc_std": 0.09379628661036565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0712056618674036, "f1": 0.646551724137931, "f1_std": 0.08073908997077259, "bacc": 0.6709677419354838, "bacc_std": 0.08916030361015106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.03896031324730817, "f1": 0.7415966386554622, "f1_std": 0.09473202622716693, "bacc": 0.7, "bacc_std": 0.07986864215698174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06376443859074103, "f1": 0.5547201336675021, "f1_std": 0.0834093823235783, "bacc": 0.5532258064516129, "bacc_std": 0.08069560899537369} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.03999512165374124, "f1": 0.4972129319955407, "f1_std": 0.0719958327686336, "bacc": 0.5177419354838709, "bacc_std": 0.051018908065823286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06853300919408768, "f1": 0.6676492262343405, "f1_std": 0.08025685169071795, "bacc": 0.6870967741935483, "bacc_std": 0.08690830714123987} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04543440925464816, "f1": 0.569327731092437, "f1_std": 0.0875924742866146, "bacc": 0.567741935483871, "bacc_std": 0.06601480299417731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05085526698047084, "f1": 0.5512437810945273, "f1_std": 0.08912872672361447, "bacc": 0.5516129032258065, "bacc_std": 0.07158737004855159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07448983337758959, "f1": 0.5494505494505495, "f1_std": 0.07799528984550821, "bacc": 0.5725806451612903, "bacc_std": 0.08787735345798961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054159326063412845, "f1": 0.6660633484162897, "f1_std": 0.08994371040408121, "bacc": 0.6516129032258065, "bacc_std": 0.08326188734021547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06679847964443392, "f1": 0.603225806451613, "f1_std": 0.08613637455816871, "bacc": 0.603225806451613, "bacc_std": 0.08691172240955548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.060668181096036555, "f1": 0.7759562841530054, "f1_std": 0.07889166480981656, "bacc": 0.7854838709677419, "bacc_std": 0.08255657480603514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05349907569795384, "f1": 0.5340909090909092, "f1_std": 0.0817724721574741, "bacc": 0.535483870967742, "bacc_std": 0.06871907129484064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05564865990477135, "f1": 0.6917293233082706, "f1_std": 0.08157943930432904, "bacc": 0.685483870967742, "bacc_std": 0.08058712563367687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0522958681228696, "f1": 0.6660633484162897, "f1_std": 0.08866437731187976, "bacc": 0.6516129032258065, "bacc_std": 0.08184449202047435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 1291.5496650148827, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0628143445387551, "f1": 0.7119437939110069, "f1_std": 0.08124378997025508, "bacc": 0.7193548387096774, "bacc_std": 0.08572956778687289} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05783819149146154, "f1": 0.5918552036199095, "f1_std": 0.08839277403402, "bacc": 0.5854838709677419, "bacc_std": 0.07892250338187731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0555817430512815, "f1": 0.7152777777777778, "f1_std": 0.0859018925936977, "bacc": 0.7016129032258065, "bacc_std": 0.08425874443978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07037345776218691, "f1": 0.4533333333333333, "f1_std": 0.0699933611424567, "bacc": 0.4564516129032258, "bacc_std": 0.08031055642607103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 2.782559402207126, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05360127765353744, "f1": 0.8016129032258064, "f1_std": 0.07519158491310474, "bacc": 0.8016129032258064, "bacc_std": 0.07969690241096064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0665415025720408, "f1": 0.6272727272727273, "f1_std": 0.08064739979193049, "bacc": 0.6370967741935484, "bacc_std": 0.08573949461318543} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06521166744144605, "f1": 0.6893939393939394, "f1_std": 0.08110764068822261, "bacc": 0.7032258064516128, "bacc_std": 0.08735988787999803} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0569583167876211, "f1": 0.6917293233082706, "f1_std": 0.08149273111292436, "bacc": 0.685483870967742, "bacc_std": 0.08052960307802506} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05969583820791552, "f1": 0.5176470588235295, "f1_std": 0.08333482773624097, "bacc": 0.5193548387096775, "bacc_std": 0.07543072819125546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0720192980945863, "f1": 0.5199063231850116, "f1_std": 0.08442623961101829, "bacc": 0.5209677419354839, "bacc_std": 0.08790833959673887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 166.81005372000556, "split": "test", "acc": 0.43902439024390244, "acc_std": 0.07204749243362073, "f1": 0.36498316498316496, "f1_std": 0.06351487332038365, "bacc": 0.35806451612903223, "bacc_std": 0.07441329106831027} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07067299497628009, "f1": 0.5370967741935484, "f1_std": 0.08769383610880646, "bacc": 0.5370967741935484, "bacc_std": 0.08754463728390001} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05723917903756749, "f1": 0.6660633484162897, "f1_std": 0.09288828185500214, "bacc": 0.6516129032258065, "bacc_std": 0.08602098302932805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0578590669129162, "f1": 0.5918552036199095, "f1_std": 0.09043784226259088, "bacc": 0.5854838709677419, "bacc_std": 0.08010534654606613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.052097835022573265, "f1": 0.6893939393939394, "f1_std": 0.09483661389312227, "bacc": 0.667741935483871, "bacc_std": 0.08424724149972018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06942442040608622, "f1": 0.5370967741935484, "f1_std": 0.08698632406695068, "bacc": 0.5370967741935484, "bacc_std": 0.0865605498048517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0471176082430308, "f1": 0.6328358208955224, "f1_std": 0.09214325179835789, "bacc": 0.6177419354838709, "bacc_std": 0.0749549483142708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06446741495298078, "f1": 0.5547201336675021, "f1_std": 0.08646755138939115, "bacc": 0.5532258064516129, "bacc_std": 0.08388246441055443} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 103.63 | 254.5 | 0.97938 | 0.041684 | 0.96657 | 0.070243 | 0.95924 | 0.082686 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 103.63 | 254.5 | 0.73244 | 0.067885 | 0.6226 | 0.083814 | 0.62323 | 0.083315 | + + +done! total time: 0:04:54 diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..482460e94bbbd2c45f407a18072573c2c4e4ac1c --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adni_ad_vs_cn reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +model: flat_mae +representation: reg +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..59c048f39f82fe6c65367b0019b9ac8288d13a3c --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,reg,logistic,adni_ad_vs_cn,,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,,166.81005372000556,test,0.6829268292682927,0.06926788045222577,0.5547201336675021,0.08291040320742897,0.5572916666666666,0.0867156208422202 +flat_mae,reg,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.907859078590786,0.013179913602732019,0.8577551020408163,0.022512807497002556,0.8266085956118004,0.02534628326225723 +flat_mae,reg,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.8536585365853658,0.04246973937571651,0.7670454545454546,0.07966367496754641,0.7338709677419355,0.07584546772210869 +flat_mae,reg,logistic,adni_ad_vs_cn,2,0.046415888336127774,train,0.9214092140921409,0.01255100594521618,0.8792716359985108,0.021120842549183005,0.8475840249815104,0.024302291089831434 +flat_mae,reg,logistic,adni_ad_vs_cn,2,0.046415888336127774,test,0.6097560975609756,0.06428750117909754,0.47096774193548385,0.07330687313411444,0.47096774193548385,0.07333902651094684 +flat_mae,reg,logistic,adni_ad_vs_cn,3,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,3,2.782559402207126,test,0.7073170731707317,0.060253262825694154,0.5729166666666666,0.08391054555891782,0.5693548387096774,0.0784432117610184 +flat_mae,reg,logistic,adni_ad_vs_cn,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,4,166.81005372000556,test,0.8048780487804879,0.06549575734064934,0.7515151515151515,0.08066889375104956,0.7693548387096774,0.08476927435769445 +flat_mae,reg,logistic,adni_ad_vs_cn,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,5,2.782559402207126,test,0.6829268292682927,0.04911000896323302,0.4696517412935323,0.06711498447474913,0.4854838709677419,0.05500726231497199 +flat_mae,reg,logistic,adni_ad_vs_cn,6,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,6,21.54434690031882,test,0.7804878048780488,0.06122468154251317,0.6917293233082706,0.08772254816417174,0.685483870967742,0.08723945606839408 +flat_mae,reg,logistic,adni_ad_vs_cn,7,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,7,21.54434690031882,test,0.7560975609756098,0.0610102340719244,0.6440972222222222,0.09095724643701052,0.635483870967742,0.085123666175654 +flat_mae,reg,logistic,adni_ad_vs_cn,8,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,8,21.54434690031882,test,0.7317073170731707,0.06071036945162666,0.5918552036199095,0.09130663897075328,0.5854838709677419,0.08147208940954899 +flat_mae,reg,logistic,adni_ad_vs_cn,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,9,166.81005372000556,test,0.6097560975609756,0.07335871786781714,0.5494505494505495,0.07626381791951264,0.5725806451612903,0.08859208417083059 +flat_mae,reg,logistic,adni_ad_vs_cn,10,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,10,0.3593813663804626,test,0.7073170731707317,0.061143943253192896,0.5729166666666666,0.08631645710543846,0.5693548387096774,0.07938469796221126 +flat_mae,reg,logistic,adni_ad_vs_cn,11,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,11,21.54434690031882,test,0.7804878048780488,0.045139087056007804,0.6328358208955224,0.08777565445034076,0.6177419354838709,0.07119973598087354 +flat_mae,reg,logistic,adni_ad_vs_cn,12,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,12,2.782559402207126,test,0.5853658536585366,0.07659555625849637,0.5108771929824562,0.08093448115011227,0.5225806451612903,0.09198756555340079 +flat_mae,reg,logistic,adni_ad_vs_cn,13,0.3593813663804626,train,0.994579945799458,0.0036143106272040065,0.9923570836785418,0.005154726480804928,0.9883720930232558,0.007753957101385369 +flat_mae,reg,logistic,adni_ad_vs_cn,13,0.3593813663804626,test,0.7560975609756098,0.05692530355135613,0.6440972222222222,0.08577582337107054,0.635483870967742,0.08293732347616613 +flat_mae,reg,logistic,adni_ad_vs_cn,14,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,14,2.782559402207126,test,0.7073170731707317,0.05940614301133882,0.5729166666666666,0.08629561918349811,0.5693548387096774,0.07934082383453664 +flat_mae,reg,logistic,adni_ad_vs_cn,15,0.046415888336127774,train,0.9051490514905149,0.013232198457006718,0.849799383613421,0.02382110186719757,0.8127003040512779,0.02603806745759029 +flat_mae,reg,logistic,adni_ad_vs_cn,15,0.046415888336127774,test,0.7317073170731707,0.04915020864770594,0.5512437810945273,0.08742828593990601,0.5516129032258065,0.06938353037429583 +flat_mae,reg,logistic,adni_ad_vs_cn,16,0.046415888336127774,train,0.9132791327913279,0.013138390260572683,0.8674330878390515,0.021958633639009306,0.8382365025885447,0.025185593338355367 +flat_mae,reg,logistic,adni_ad_vs_cn,16,0.046415888336127774,test,0.7804878048780488,0.05031962089952331,0.6328358208955224,0.0947270341183192,0.6177419354838709,0.07751251088974528 +flat_mae,reg,logistic,adni_ad_vs_cn,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,17,2.782559402207126,test,0.7804878048780488,0.056139941573332866,0.6660633484162897,0.09178952342310046,0.6516129032258065,0.08401838780954123 +flat_mae,reg,logistic,adni_ad_vs_cn,18,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,18,0.3593813663804626,test,0.7804878048780488,0.05600788165832082,0.6660633484162897,0.09256471973158738,0.6516129032258065,0.08446375494911447 +flat_mae,reg,logistic,adni_ad_vs_cn,19,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,19,166.81005372000556,test,0.6341463414634146,0.07340930213195257,0.5467943994104643,0.07761765894525781,0.5548387096774194,0.08337517048300243 +flat_mae,reg,logistic,adni_ad_vs_cn,20,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,20,166.81005372000556,test,0.7317073170731707,0.06575919150280754,0.6479313036690086,0.0812578547370611,0.6532258064516129,0.08582038232781777 +flat_mae,reg,logistic,adni_ad_vs_cn,21,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,21,0.3593813663804626,test,0.7560975609756098,0.05147252260173691,0.6117424242424243,0.08765865475080913,0.6016129032258064,0.0739363533698802 +flat_mae,reg,logistic,adni_ad_vs_cn,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,22,21.54434690031882,test,0.7317073170731707,0.06132978116949898,0.6232247284878863,0.0849193660950125,0.6193548387096774,0.08151630672587447 +flat_mae,reg,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.8048780487804879,0.05758352183491863,0.7354838709677419,0.08097727761751287,0.7354838709677419,0.0840171879248848 +flat_mae,reg,logistic,adni_ad_vs_cn,24,0.3593813663804626,train,0.997289972899729,0.0026479391755356513,0.9961941891766453,0.0037500533778046753,0.9941860465116279,0.005680753231236364 +flat_mae,reg,logistic,adni_ad_vs_cn,24,0.3593813663804626,test,0.7317073170731707,0.04050399323737977,0.4972129319955407,0.07571033536779759,0.5177419354838709,0.053754574957152634 +flat_mae,reg,logistic,adni_ad_vs_cn,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,25,166.81005372000556,test,0.6829268292682927,0.06761552785696907,0.5839188134270101,0.08196346087163776,0.5870967741935484,0.0853153337979838 +flat_mae,reg,logistic,adni_ad_vs_cn,26,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,26,166.81005372000556,test,0.6829268292682927,0.07226588489852294,0.5839188134270101,0.08737646011373293,0.5870967741935484,0.08950852998841387 +flat_mae,reg,logistic,adni_ad_vs_cn,27,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,27,21.54434690031882,test,0.7560975609756098,0.05857094520027637,0.6440972222222222,0.0873410981020601,0.635483870967742,0.08256686698314955 +flat_mae,reg,logistic,adni_ad_vs_cn,28,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,28,21.54434690031882,test,0.6097560975609756,0.06840539479370619,0.47096774193548385,0.07690609921397815,0.47096774193548385,0.0774699954673467 +flat_mae,reg,logistic,adni_ad_vs_cn,29,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,29,166.81005372000556,test,0.7073170731707317,0.06718989400228335,0.603225806451613,0.08791814169643879,0.603225806451613,0.08710965848547918 +flat_mae,reg,logistic,adni_ad_vs_cn,30,0.3593813663804626,train,0.997289972899729,0.002642386211324836,0.9961941891766453,0.0037394374828390886,0.9941860465116279,0.0056688401859236225 +flat_mae,reg,logistic,adni_ad_vs_cn,30,0.3593813663804626,test,0.6341463414634146,0.06596745362326167,0.48621553884711777,0.07849632115585517,0.48709677419354835,0.07558080238993464 +flat_mae,reg,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.924119241192412,0.012594839213314044,0.88400395185917,0.021260044747329225,0.8533979784698824,0.02507868703319414 +flat_mae,reg,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.7073170731707317,0.033563350738417125,0.4142857142857143,0.011710739365674917,0.46774193548387094,0.022195119036695205 +flat_mae,reg,logistic,adni_ad_vs_cn,32,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,32,0.3593813663804626,test,0.6585365853658537,0.07638113206099847,0.5876436781609196,0.08437576355815693,0.6048387096774194,0.09308638145330797 +flat_mae,reg,logistic,adni_ad_vs_cn,33,0.046415888336127774,train,0.9186991869918699,0.012787766487807092,0.8744897959183674,0.022002271679593274,0.8417700714931383,0.0256702911361443 +flat_mae,reg,logistic,adni_ad_vs_cn,33,0.046415888336127774,test,0.7073170731707317,0.06480419416366247,0.603225806451613,0.08426549569176049,0.603225806451613,0.08422059700226112 +flat_mae,reg,logistic,adni_ad_vs_cn,34,0.046415888336127774,train,0.907859078590786,0.013218173225033636,0.8563215758131013,0.02324376865569284,0.8225614265757252,0.02607784624320091 +flat_mae,reg,logistic,adni_ad_vs_cn,34,0.046415888336127774,test,0.8292682926829268,0.05072408479320517,0.7402714932126697,0.08488173186645732,0.717741935483871,0.08221607884853113 +flat_mae,reg,logistic,adni_ad_vs_cn,35,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,35,166.81005372000556,test,0.7804878048780488,0.051280776368782956,0.6328358208955224,0.09856085021573761,0.6177419354838709,0.08168068994114329 +flat_mae,reg,logistic,adni_ad_vs_cn,36,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,36,10000.0,test,0.6097560975609756,0.06454851207014735,0.47096774193548385,0.07140418157686343,0.47096774193548385,0.07151721564207174 +flat_mae,reg,logistic,adni_ad_vs_cn,37,0.3593813663804626,train,0.997289972899729,0.0024803721974553713,0.9961941891766453,0.003507160944691082,0.9941860465116279,0.005321263609657149 +flat_mae,reg,logistic,adni_ad_vs_cn,37,0.3593813663804626,test,0.6585365853658537,0.0716243090150253,0.5651515151515152,0.08548664642578424,0.5709677419354839,0.09147121591118461 +flat_mae,reg,logistic,adni_ad_vs_cn,38,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,38,2.782559402207126,test,0.6829268292682927,0.06460709972533739,0.5839188134270101,0.08158375041463249,0.5870967741935484,0.08441550177951193 +flat_mae,reg,logistic,adni_ad_vs_cn,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,39,2.782559402207126,test,0.6585365853658537,0.06026574107972258,0.5017361111111112,0.0774378234834809,0.5032258064516129,0.0719703149436664 +flat_mae,reg,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.6585365853658537,0.07220650866037549,0.5876436781609196,0.07901331293441857,0.6048387096774194,0.08817182750432619 +flat_mae,reg,logistic,adni_ad_vs_cn,41,0.046415888336127774,train,0.9214092140921409,0.011982094832422848,0.8780665671539749,0.020572398028269812,0.8435368559454351,0.023639970386061505 +flat_mae,reg,logistic,adni_ad_vs_cn,41,0.046415888336127774,test,0.7560975609756098,0.045159746860078724,0.569327731092437,0.09059377494295195,0.567741935483871,0.0680991939638229 +flat_mae,reg,logistic,adni_ad_vs_cn,42,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,42,2.782559402207126,test,0.6585365853658537,0.06765844866085342,0.5370967741935484,0.08261322133495091,0.5370967741935484,0.08346484177601435 +flat_mae,reg,logistic,adni_ad_vs_cn,43,0.005994842503189409,train,0.8238482384823849,0.013220723262870042,0.6743867175303078,0.031979920824775736,0.6463760374722656,0.02568154145384294 +flat_mae,reg,logistic,adni_ad_vs_cn,43,0.005994842503189409,test,0.8048780487804879,0.0452544922193601,0.6554621848739496,0.0945809020453021,0.6338709677419355,0.07569081605978394 +flat_mae,reg,logistic,adni_ad_vs_cn,44,0.046415888336127774,train,0.9214092140921409,0.012427908903164229,0.8780665671539749,0.02170057693958902,0.8435368559454351,0.0254089250118413 +flat_mae,reg,logistic,adni_ad_vs_cn,44,0.046415888336127774,test,0.7560975609756098,0.05396213447565202,0.6117424242424243,0.09160419027027639,0.6016129032258064,0.0794647999612559 +flat_mae,reg,logistic,adni_ad_vs_cn,45,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,45,166.81005372000556,test,0.7073170731707317,0.06876104205306721,0.6272727272727273,0.08321455766851589,0.6370967741935484,0.08759076226243769 +flat_mae,reg,logistic,adni_ad_vs_cn,46,0.046415888336127774,train,0.907859078590786,0.013035377966249474,0.8548454276194002,0.023060819235622167,0.81851425753965,0.025641043112166525 +flat_mae,reg,logistic,adni_ad_vs_cn,46,0.046415888336127774,test,0.7560975609756098,0.05241403829009051,0.6117424242424243,0.09105573728519119,0.6016129032258064,0.07796420801579182 +flat_mae,reg,logistic,adni_ad_vs_cn,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,47,166.81005372000556,test,0.7560975609756098,0.04823213745915314,0.569327731092437,0.09027907806196325,0.567741935483871,0.06876162650720788 +flat_mae,reg,logistic,adni_ad_vs_cn,48,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,48,166.81005372000556,test,0.6829268292682927,0.06458551322656533,0.5547201336675021,0.084868229073962,0.5532258064516129,0.08124803047320739 +flat_mae,reg,logistic,adni_ad_vs_cn,49,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,49,2.782559402207126,test,0.5853658536585366,0.06851411835555693,0.4558938329430133,0.07603454620164343,0.45483870967741935,0.07907818178579569 +flat_mae,reg,logistic,adni_ad_vs_cn,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,50,166.81005372000556,test,0.7560975609756098,0.06420614850811628,0.6693548387096775,0.0879841268705883,0.6693548387096775,0.08843296455260873 +flat_mae,reg,logistic,adni_ad_vs_cn,51,0.046415888336127774,train,0.926829268292683,0.011797798499782493,0.8886877828054298,0.019429827230317845,0.8592119319582545,0.022642946435212474 +flat_mae,reg,logistic,adni_ad_vs_cn,51,0.046415888336127774,test,0.6829268292682927,0.05120350504808049,0.4696517412935323,0.06624358035969148,0.4854838709677419,0.05558473053339689 +flat_mae,reg,logistic,adni_ad_vs_cn,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,52,166.81005372000556,test,0.7073170731707317,0.06369788523166461,0.5729166666666666,0.09015468291597781,0.5693548387096774,0.0828853548580687 +flat_mae,reg,logistic,adni_ad_vs_cn,53,0.046415888336127774,train,0.9159891598915989,0.012183191996701911,0.8683310887806899,0.021389051000678262,0.8319089489686909,0.02437854969253309 +flat_mae,reg,logistic,adni_ad_vs_cn,53,0.046415888336127774,test,0.8048780487804879,0.050329810525012314,0.6893939393939394,0.09019622445316228,0.667741935483871,0.07946292003676955 +flat_mae,reg,logistic,adni_ad_vs_cn,54,0.046415888336127774,train,0.924119241192412,0.012311780474619057,0.8828571428571428,0.021014903571522872,0.8493508094338073,0.024412502157140147 +flat_mae,reg,logistic,adni_ad_vs_cn,54,0.046415888336127774,test,0.7560975609756098,0.0608373847930633,0.6440972222222222,0.08748969394219276,0.635483870967742,0.08417500623901485 +flat_mae,reg,logistic,adni_ad_vs_cn,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,55,2.782559402207126,test,0.7317073170731707,0.06446372377951581,0.6479313036690086,0.08287953238163753,0.6532258064516129,0.08612990328181046 +flat_mae,reg,logistic,adni_ad_vs_cn,56,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,56,21.54434690031882,test,0.7073170731707317,0.06686244503173402,0.603225806451613,0.08677010666210364,0.603225806451613,0.08750997392240557 +flat_mae,reg,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.9105691056910569,0.01285430274467883,0.8612481626234888,0.022021552125270568,0.8283753800640973,0.024815595163525665 +flat_mae,reg,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7073170731707317,0.06146178859637746,0.5729166666666666,0.08715676077478036,0.5693548387096774,0.08064418476828125 +flat_mae,reg,logistic,adni_ad_vs_cn,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,58,21.54434690031882,test,0.7560975609756098,0.06025130792836882,0.6440972222222222,0.08832488952950536,0.635483870967742,0.08398040471854697 +flat_mae,reg,logistic,adni_ad_vs_cn,59,0.046415888336127774,train,0.9295392953929539,0.011807106331834988,0.8922893838692294,0.019784867009969755,0.8609787164105513,0.02345494834725607 +flat_mae,reg,logistic,adni_ad_vs_cn,59,0.046415888336127774,test,0.6829268292682927,0.04892427588600224,0.4696517412935323,0.06816919518377128,0.4854838709677419,0.055605042012306996 +flat_mae,reg,logistic,adni_ad_vs_cn,60,0.046415888336127774,train,0.926829268292683,0.011603732252347903,0.8864757694192181,0.02012072959409575,0.851117593886104,0.02404005491015651 +flat_mae,reg,logistic,adni_ad_vs_cn,60,0.046415888336127774,test,0.7560975609756098,0.06397195994907733,0.6693548387096775,0.0878383258079867,0.6693548387096775,0.08966629421593329 +flat_mae,reg,logistic,adni_ad_vs_cn,61,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,61,21.54434690031882,test,0.7317073170731707,0.06384236610352571,0.6232247284878863,0.08653965121264724,0.6193548387096774,0.08409106356744614 +flat_mae,reg,logistic,adni_ad_vs_cn,62,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,62,166.81005372000556,test,0.7804878048780488,0.05488138743231746,0.6660633484162897,0.08732795161410309,0.6516129032258065,0.08032263142892551 +flat_mae,reg,logistic,adni_ad_vs_cn,63,0.046415888336127774,train,0.924119241192412,0.012215922038786194,0.88400395185917,0.020417115534075696,0.8533979784698824,0.02391466697300663 +flat_mae,reg,logistic,adni_ad_vs_cn,63,0.046415888336127774,test,0.7804878048780488,0.0402668079677833,0.5886287625418061,0.09444418088783978,0.5838709677419355,0.06743241693190179 +flat_mae,reg,logistic,adni_ad_vs_cn,64,0.046415888336127774,train,0.9132791327913279,0.013151799211117572,0.8674330878390515,0.022079047823421162,0.8382365025885447,0.025174698279671594 +flat_mae,reg,logistic,adni_ad_vs_cn,64,0.046415888336127774,test,0.7317073170731707,0.05123632708892544,0.5512437810945273,0.08564121739774407,0.5516129032258065,0.06900937961252603 +flat_mae,reg,logistic,adni_ad_vs_cn,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,65,166.81005372000556,test,0.6585365853658537,0.07072322909659103,0.5651515151515152,0.08391621533268244,0.5709677419354839,0.08842442554101251 +flat_mae,reg,logistic,adni_ad_vs_cn,66,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,66,0.3593813663804626,test,0.8048780487804879,0.054499747583040326,0.7152777777777778,0.08477707014025586,0.7016129032258065,0.08156309202862229 +flat_mae,reg,logistic,adni_ad_vs_cn,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,67,21.54434690031882,test,0.8292682926829268,0.05343302946413993,0.7402714932126697,0.0897878476943895,0.717741935483871,0.08511404230625354 +flat_mae,reg,logistic,adni_ad_vs_cn,68,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,68,21.54434690031882,test,0.7560975609756098,0.055510801988605814,0.6117424242424243,0.09384265129130459,0.6016129032258064,0.0800424894319517 +flat_mae,reg,logistic,adni_ad_vs_cn,69,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,69,21.54434690031882,test,0.7317073170731707,0.06782480844127155,0.6962962962962963,0.06986374509759251,0.7548387096774194,0.07577020485526309 +flat_mae,reg,logistic,adni_ad_vs_cn,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,70,2.782559402207126,test,0.6829268292682927,0.0641650904622394,0.5547201336675021,0.0846313058603222,0.5532258064516129,0.08219736305524777 +flat_mae,reg,logistic,adni_ad_vs_cn,71,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,71,2.782559402207126,test,0.6341463414634146,0.06637732172945972,0.48621553884711777,0.07529306311662956,0.48709677419354835,0.07279342830911865 +flat_mae,reg,logistic,adni_ad_vs_cn,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,72,166.81005372000556,test,0.7804878048780488,0.04813947117809216,0.6328358208955224,0.09381567777555355,0.6177419354838709,0.07705537956074696 +flat_mae,reg,logistic,adni_ad_vs_cn,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,73,21.54434690031882,test,0.7560975609756098,0.06674145913626933,0.7054597701149425,0.07421655076818885,0.7370967741935484,0.08085251407506179 +flat_mae,reg,logistic,adni_ad_vs_cn,74,0.046415888336127774,train,0.9186991869918699,0.012694785181991089,0.8732249198350893,0.022277454840662826,0.837722902457063,0.025754948569648144 +flat_mae,reg,logistic,adni_ad_vs_cn,74,0.046415888336127774,test,0.7560975609756098,0.03192983831243031,0.5119047619047619,0.07621817219903167,0.5338709677419355,0.048741740675514296 +flat_mae,reg,logistic,adni_ad_vs_cn,75,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,75,2.782559402207126,test,0.7317073170731707,0.05585898366349943,0.5918552036199095,0.08794997386381644,0.5854838709677419,0.07802504594016346 +flat_mae,reg,logistic,adni_ad_vs_cn,76,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,76,166.81005372000556,test,0.5121951219512195,0.07414009922376004,0.4368131868131868,0.07154041732004592,0.4403225806451613,0.08221994841127901 +flat_mae,reg,logistic,adni_ad_vs_cn,77,0.005994842503189409,train,0.8211382113821138,0.013193814739488345,0.6671131765992345,0.032168647378154176,0.6405620839838935,0.02530371258306664 +flat_mae,reg,logistic,adni_ad_vs_cn,77,0.005994842503189409,test,0.8048780487804879,0.033334459628007386,0.6095238095238095,0.10134122331440851,0.6,0.06833564223741516 +flat_mae,reg,logistic,adni_ad_vs_cn,78,0.046415888336127774,train,0.907859078590786,0.01320569928503724,0.8577551020408163,0.022366521477251322,0.8266085956118004,0.025023405989373037 +flat_mae,reg,logistic,adni_ad_vs_cn,78,0.046415888336127774,test,0.6585365853658537,0.0649036443183745,0.5017361111111112,0.08316483920080699,0.5032258064516129,0.07686236628335338 +flat_mae,reg,logistic,adni_ad_vs_cn,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,79,2.782559402207126,test,0.7804878048780488,0.06301281702997844,0.7119437939110069,0.07973530931667826,0.7193548387096774,0.0841344308578553 +flat_mae,reg,logistic,adni_ad_vs_cn,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,80,2.782559402207126,test,0.6097560975609756,0.07286113363628802,0.5030303030303029,0.08001243527763578,0.5048387096774194,0.08411439728979575 +flat_mae,reg,logistic,adni_ad_vs_cn,81,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,81,10000.0,test,0.7560975609756098,0.061017722043119164,0.6693548387096775,0.08078092785479944,0.6693548387096775,0.08110237431584238 +flat_mae,reg,logistic,adni_ad_vs_cn,82,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,82,166.81005372000556,test,0.6585365853658537,0.07408373480087516,0.5876436781609196,0.08195744348989137,0.6048387096774194,0.09164925874502186 +flat_mae,reg,logistic,adni_ad_vs_cn,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,83,2.782559402207126,test,0.7317073170731707,0.05192902308371983,0.5512437810945273,0.08735276692371553,0.5516129032258065,0.07097561510579674 +flat_mae,reg,logistic,adni_ad_vs_cn,84,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,84,166.81005372000556,test,0.7804878048780488,0.06460457676116294,0.7119437939110069,0.0833516513427059,0.7193548387096774,0.0874453062301559 +flat_mae,reg,logistic,adni_ad_vs_cn,85,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,85,2.782559402207126,test,0.8780487804878049,0.04644906634611655,0.8144796380090498,0.07977290446168186,0.7838709677419355,0.0810862694328127 +flat_mae,reg,logistic,adni_ad_vs_cn,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,86,166.81005372000556,test,0.6097560975609756,0.06553445674468462,0.47096774193548385,0.0746209839657893,0.47096774193548385,0.07473665587601347 +flat_mae,reg,logistic,adni_ad_vs_cn,87,0.046415888336127774,train,0.9105691056910569,0.012895401337332772,0.8583822759783684,0.023310957215050557,0.8202810419919468,0.02614962901463673 +flat_mae,reg,logistic,adni_ad_vs_cn,87,0.046415888336127774,test,0.8292682926829268,0.04523358635924959,0.7144278606965174,0.09299326804975666,0.6838709677419355,0.07927020375218946 +flat_mae,reg,logistic,adni_ad_vs_cn,88,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,88,0.3593813663804626,test,0.7560975609756098,0.06258778011806361,0.6693548387096775,0.08497220528378131,0.6693548387096775,0.08681423107994495 +flat_mae,reg,logistic,adni_ad_vs_cn,89,0.3593813663804626,train,0.997289972899729,0.002670409605778893,0.9961941891766453,0.0037812076270612653,0.9941860465116279,0.005728960142630293 +flat_mae,reg,logistic,adni_ad_vs_cn,89,0.3593813663804626,test,0.6829268292682927,0.06397500999319024,0.5547201336675021,0.08678941657076757,0.5532258064516129,0.0841513986694392 +flat_mae,reg,logistic,adni_ad_vs_cn,90,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,90,2.782559402207126,test,0.7560975609756098,0.05979292046136588,0.6440972222222222,0.08838239949653823,0.635483870967742,0.0833044024979851 +flat_mae,reg,logistic,adni_ad_vs_cn,91,0.046415888336127774,train,0.907859078590786,0.013306067479196475,0.8577551020408163,0.022440691594073074,0.8266085956118004,0.0247149650632593 +flat_mae,reg,logistic,adni_ad_vs_cn,91,0.046415888336127774,test,0.5853658536585366,0.06485924906420912,0.4177109440267335,0.060604328330125655,0.42096774193548386,0.06205206257928412 +flat_mae,reg,logistic,adni_ad_vs_cn,92,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,92,21.54434690031882,test,0.5853658536585366,0.07081233419820626,0.4558938329430133,0.07716069242714703,0.45483870967741935,0.08017244655281706 +flat_mae,reg,logistic,adni_ad_vs_cn,93,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,93,2.782559402207126,test,0.6585365853658537,0.07513170668364261,0.632051282051282,0.07423362325285586,0.7064516129032259,0.08294375681752789 +flat_mae,reg,logistic,adni_ad_vs_cn,94,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,94,2.782559402207126,test,0.6585365853658537,0.06442162945365022,0.5017361111111112,0.08171012609536353,0.5032258064516129,0.07615822970719052 +flat_mae,reg,logistic,adni_ad_vs_cn,95,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,95,166.81005372000556,test,0.6585365853658537,0.06966255027037495,0.5651515151515152,0.08331816430515195,0.5709677419354839,0.08798472694063383 +flat_mae,reg,logistic,adni_ad_vs_cn,96,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,96,2.782559402207126,test,0.7073170731707317,0.06039983768706203,0.5729166666666666,0.0857012477903012,0.5693548387096774,0.07975609697824308 +flat_mae,reg,logistic,adni_ad_vs_cn,97,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,97,2.782559402207126,test,0.6829268292682927,0.07099159990861434,0.6072218128224024,0.08128167811977693,0.6209677419354839,0.08929376479240445 +flat_mae,reg,logistic,adni_ad_vs_cn,98,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,reg,logistic,adni_ad_vs_cn,98,2.782559402207126,test,0.7073170731707317,0.05937208617102671,0.5729166666666666,0.08548764108265332,0.5693548387096774,0.0800707940534737 +flat_mae,reg,logistic,adni_ad_vs_cn,99,0.046415888336127774,train,0.924119241192412,0.011593103170755556,0.8828571428571428,0.019702821319220593,0.8493508094338073,0.023178832816151698 +flat_mae,reg,logistic,adni_ad_vs_cn,99,0.046415888336127774,test,0.8292682926829268,0.0355253259026262,0.6800445930880714,0.0968155086028259,0.65,0.07282691810038372 +flat_mae,reg,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.9214092140921409,0.013114375284762272,0.8792716359985108,0.02208647722116736,0.8475840249815104,0.025218448751008166 +flat_mae,reg,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.7073170731707317,0.06218658956298626,0.5729166666666666,0.08873908622344918,0.5693548387096774,0.08305867809111518 diff --git a/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc47372cff6a6fe1b5118840d5c90f5e62bbb5e2 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 21:29:55 +config: +output_root: experiments/decoders/output +name_prefix: eval_logistic +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (adni_ad_vs_cn reg logistic) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +model: flat_mae +representation: reg +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/adni_ad_vs_cn__reg__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:29 time: 3.8383 data: 3.0360 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:52 time: 0.1890 data: 0.0617 max mem: 3005 +extract (train) [ 40/164] eta: 0:00:32 time: 0.1629 data: 0.0504 max mem: 3005 +extract (train) [ 60/164] eta: 0:00:24 time: 0.1812 data: 0.0611 max mem: 3005 +extract (train) [ 80/164] eta: 0:00:18 time: 0.1711 data: 0.0559 max mem: 3005 +extract (train) [100/164] eta: 0:00:13 time: 0.1810 data: 0.0622 max mem: 3005 +extract (train) [120/164] eta: 0:00:09 time: 0.2046 data: 0.0729 max mem: 3005 +extract (train) [140/164] eta: 0:00:04 time: 0.1784 data: 0.0575 max mem: 3005 +extract (train) [160/164] eta: 0:00:00 time: 0.1773 data: 0.0577 max mem: 3005 +extract (train) [163/164] eta: 0:00:00 time: 0.1777 data: 0.0578 max mem: 3005 +extract (train) Total time: 0:00:33 (0.2052 s / it) +extract (validation) [ 0/21] eta: 0:01:19 time: 3.7814 data: 3.6673 max mem: 3005 +extract (validation) [20/21] eta: 0:00:00 time: 0.1596 data: 0.0468 max mem: 3005 +extract (validation) Total time: 0:00:07 (0.3473 s / it) +extract (test) [ 0/21] eta: 0:01:23 time: 3.9536 data: 3.7998 max mem: 3005 +extract (test) [20/21] eta: 0:00:00 time: 0.1846 data: 0.0623 max mem: 3005 +extract (test) Total time: 0:00:07 (0.3756 s / it) +feature extraction time: 0:00:48 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adni_ad_vs_cn | | 166.81 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | reg | logistic | adni_ad_vs_cn | | 166.81 | test | 0.68293 | 0.069268 | 0.55472 | 0.08291 | 0.55729 | 0.086716 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04246973937571651, "f1": 0.7670454545454546, "f1_std": 0.07966367496754641, "bacc": 0.7338709677419355, "bacc_std": 0.07584546772210869} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06428750117909754, "f1": 0.47096774193548385, "f1_std": 0.07330687313411444, "bacc": 0.47096774193548385, "bacc_std": 0.07333902651094684} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.060253262825694154, "f1": 0.5729166666666666, "f1_std": 0.08391054555891782, "bacc": 0.5693548387096774, "bacc_std": 0.0784432117610184} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06549575734064934, "f1": 0.7515151515151515, "f1_std": 0.08066889375104956, "bacc": 0.7693548387096774, "bacc_std": 0.08476927435769445} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.04911000896323302, "f1": 0.4696517412935323, "f1_std": 0.06711498447474913, "bacc": 0.4854838709677419, "bacc_std": 0.05500726231497199} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06122468154251317, "f1": 0.6917293233082706, "f1_std": 0.08772254816417174, "bacc": 0.685483870967742, "bacc_std": 0.08723945606839408} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0610102340719244, "f1": 0.6440972222222222, "f1_std": 0.09095724643701052, "bacc": 0.635483870967742, "bacc_std": 0.085123666175654} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06071036945162666, "f1": 0.5918552036199095, "f1_std": 0.09130663897075328, "bacc": 0.5854838709677419, "bacc_std": 0.08147208940954899} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07335871786781714, "f1": 0.5494505494505495, "f1_std": 0.07626381791951264, "bacc": 0.5725806451612903, "bacc_std": 0.08859208417083059} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.061143943253192896, "f1": 0.5729166666666666, "f1_std": 0.08631645710543846, "bacc": 0.5693548387096774, "bacc_std": 0.07938469796221126} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.045139087056007804, "f1": 0.6328358208955224, "f1_std": 0.08777565445034076, "bacc": 0.6177419354838709, "bacc_std": 0.07119973598087354} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 2.782559402207126, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07659555625849637, "f1": 0.5108771929824562, "f1_std": 0.08093448115011227, "bacc": 0.5225806451612903, "bacc_std": 0.09198756555340079} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05692530355135613, "f1": 0.6440972222222222, "f1_std": 0.08577582337107054, "bacc": 0.635483870967742, "bacc_std": 0.08293732347616613} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05940614301133882, "f1": 0.5729166666666666, "f1_std": 0.08629561918349811, "bacc": 0.5693548387096774, "bacc_std": 0.07934082383453664} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04915020864770594, "f1": 0.5512437810945273, "f1_std": 0.08742828593990601, "bacc": 0.5516129032258065, "bacc_std": 0.06938353037429583} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05031962089952331, "f1": 0.6328358208955224, "f1_std": 0.0947270341183192, "bacc": 0.6177419354838709, "bacc_std": 0.07751251088974528} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.056139941573332866, "f1": 0.6660633484162897, "f1_std": 0.09178952342310046, "bacc": 0.6516129032258065, "bacc_std": 0.08401838780954123} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05600788165832082, "f1": 0.6660633484162897, "f1_std": 0.09256471973158738, "bacc": 0.6516129032258065, "bacc_std": 0.08446375494911447} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07340930213195257, "f1": 0.5467943994104643, "f1_std": 0.07761765894525781, "bacc": 0.5548387096774194, "bacc_std": 0.08337517048300243} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06575919150280754, "f1": 0.6479313036690086, "f1_std": 0.0812578547370611, "bacc": 0.6532258064516129, "bacc_std": 0.08582038232781777} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05147252260173691, "f1": 0.6117424242424243, "f1_std": 0.08765865475080913, "bacc": 0.6016129032258064, "bacc_std": 0.0739363533698802} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06132978116949898, "f1": 0.6232247284878863, "f1_std": 0.0849193660950125, "bacc": 0.6193548387096774, "bacc_std": 0.08151630672587447} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05758352183491863, "f1": 0.7354838709677419, "f1_std": 0.08097727761751287, "bacc": 0.7354838709677419, "bacc_std": 0.0840171879248848} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04050399323737977, "f1": 0.4972129319955407, "f1_std": 0.07571033536779759, "bacc": 0.5177419354838709, "bacc_std": 0.053754574957152634} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06761552785696907, "f1": 0.5839188134270101, "f1_std": 0.08196346087163776, "bacc": 0.5870967741935484, "bacc_std": 0.0853153337979838} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07226588489852294, "f1": 0.5839188134270101, "f1_std": 0.08737646011373293, "bacc": 0.5870967741935484, "bacc_std": 0.08950852998841387} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05857094520027637, "f1": 0.6440972222222222, "f1_std": 0.0873410981020601, "bacc": 0.635483870967742, "bacc_std": 0.08256686698314955} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06840539479370619, "f1": 0.47096774193548385, "f1_std": 0.07690609921397815, "bacc": 0.47096774193548385, "bacc_std": 0.0774699954673467} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06718989400228335, "f1": 0.603225806451613, "f1_std": 0.08791814169643879, "bacc": 0.603225806451613, "bacc_std": 0.08710965848547918} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06596745362326167, "f1": 0.48621553884711777, "f1_std": 0.07849632115585517, "bacc": 0.48709677419354835, "bacc_std": 0.07558080238993464} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.033563350738417125, "f1": 0.4142857142857143, "f1_std": 0.011710739365674917, "bacc": 0.46774193548387094, "bacc_std": 0.022195119036695205} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07638113206099847, "f1": 0.5876436781609196, "f1_std": 0.08437576355815693, "bacc": 0.6048387096774194, "bacc_std": 0.09308638145330797} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06480419416366247, "f1": 0.603225806451613, "f1_std": 0.08426549569176049, "bacc": 0.603225806451613, "bacc_std": 0.08422059700226112} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05072408479320517, "f1": 0.7402714932126697, "f1_std": 0.08488173186645732, "bacc": 0.717741935483871, "bacc_std": 0.08221607884853113} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.051280776368782956, "f1": 0.6328358208955224, "f1_std": 0.09856085021573761, "bacc": 0.6177419354838709, "bacc_std": 0.08168068994114329} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 10000.0, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06454851207014735, "f1": 0.47096774193548385, "f1_std": 0.07140418157686343, "bacc": 0.47096774193548385, "bacc_std": 0.07151721564207174} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0716243090150253, "f1": 0.5651515151515152, "f1_std": 0.08548664642578424, "bacc": 0.5709677419354839, "bacc_std": 0.09147121591118461} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06460709972533739, "f1": 0.5839188134270101, "f1_std": 0.08158375041463249, "bacc": 0.5870967741935484, "bacc_std": 0.08441550177951193} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06026574107972258, "f1": 0.5017361111111112, "f1_std": 0.0774378234834809, "bacc": 0.5032258064516129, "bacc_std": 0.0719703149436664} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07220650866037549, "f1": 0.5876436781609196, "f1_std": 0.07901331293441857, "bacc": 0.6048387096774194, "bacc_std": 0.08817182750432619} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.045159746860078724, "f1": 0.569327731092437, "f1_std": 0.09059377494295195, "bacc": 0.567741935483871, "bacc_std": 0.0680991939638229} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06765844866085342, "f1": 0.5370967741935484, "f1_std": 0.08261322133495091, "bacc": 0.5370967741935484, "bacc_std": 0.08346484177601435} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0452544922193601, "f1": 0.6554621848739496, "f1_std": 0.0945809020453021, "bacc": 0.6338709677419355, "bacc_std": 0.07569081605978394} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05396213447565202, "f1": 0.6117424242424243, "f1_std": 0.09160419027027639, "bacc": 0.6016129032258064, "bacc_std": 0.0794647999612559} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06876104205306721, "f1": 0.6272727272727273, "f1_std": 0.08321455766851589, "bacc": 0.6370967741935484, "bacc_std": 0.08759076226243769} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05241403829009051, "f1": 0.6117424242424243, "f1_std": 0.09105573728519119, "bacc": 0.6016129032258064, "bacc_std": 0.07796420801579182} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04823213745915314, "f1": 0.569327731092437, "f1_std": 0.09027907806196325, "bacc": 0.567741935483871, "bacc_std": 0.06876162650720788} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06458551322656533, "f1": 0.5547201336675021, "f1_std": 0.084868229073962, "bacc": 0.5532258064516129, "bacc_std": 0.08124803047320739} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 2.782559402207126, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.06851411835555693, "f1": 0.4558938329430133, "f1_std": 0.07603454620164343, "bacc": 0.45483870967741935, "bacc_std": 0.07907818178579569} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06420614850811628, "f1": 0.6693548387096775, "f1_std": 0.0879841268705883, "bacc": 0.6693548387096775, "bacc_std": 0.08843296455260873} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05120350504808049, "f1": 0.4696517412935323, "f1_std": 0.06624358035969148, "bacc": 0.4854838709677419, "bacc_std": 0.05558473053339689} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06369788523166461, "f1": 0.5729166666666666, "f1_std": 0.09015468291597781, "bacc": 0.5693548387096774, "bacc_std": 0.0828853548580687} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.050329810525012314, "f1": 0.6893939393939394, "f1_std": 0.09019622445316228, "bacc": 0.667741935483871, "bacc_std": 0.07946292003676955} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0608373847930633, "f1": 0.6440972222222222, "f1_std": 0.08748969394219276, "bacc": 0.635483870967742, "bacc_std": 0.08417500623901485} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06446372377951581, "f1": 0.6479313036690086, "f1_std": 0.08287953238163753, "bacc": 0.6532258064516129, "bacc_std": 0.08612990328181046} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06686244503173402, "f1": 0.603225806451613, "f1_std": 0.08677010666210364, "bacc": 0.603225806451613, "bacc_std": 0.08750997392240557} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06146178859637746, "f1": 0.5729166666666666, "f1_std": 0.08715676077478036, "bacc": 0.5693548387096774, "bacc_std": 0.08064418476828125} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06025130792836882, "f1": 0.6440972222222222, "f1_std": 0.08832488952950536, "bacc": 0.635483870967742, "bacc_std": 0.08398040471854697} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.04892427588600224, "f1": 0.4696517412935323, "f1_std": 0.06816919518377128, "bacc": 0.4854838709677419, "bacc_std": 0.055605042012306996} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06397195994907733, "f1": 0.6693548387096775, "f1_std": 0.0878383258079867, "bacc": 0.6693548387096775, "bacc_std": 0.08966629421593329} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06384236610352571, "f1": 0.6232247284878863, "f1_std": 0.08653965121264724, "bacc": 0.6193548387096774, "bacc_std": 0.08409106356744614} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05488138743231746, "f1": 0.6660633484162897, "f1_std": 0.08732795161410309, "bacc": 0.6516129032258065, "bacc_std": 0.08032263142892551} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0402668079677833, "f1": 0.5886287625418061, "f1_std": 0.09444418088783978, "bacc": 0.5838709677419355, "bacc_std": 0.06743241693190179} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05123632708892544, "f1": 0.5512437810945273, "f1_std": 0.08564121739774407, "bacc": 0.5516129032258065, "bacc_std": 0.06900937961252603} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07072322909659103, "f1": 0.5651515151515152, "f1_std": 0.08391621533268244, "bacc": 0.5709677419354839, "bacc_std": 0.08842442554101251} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.054499747583040326, "f1": 0.7152777777777778, "f1_std": 0.08477707014025586, "bacc": 0.7016129032258065, "bacc_std": 0.08156309202862229} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05343302946413993, "f1": 0.7402714932126697, "f1_std": 0.0897878476943895, "bacc": 0.717741935483871, "bacc_std": 0.08511404230625354} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.055510801988605814, "f1": 0.6117424242424243, "f1_std": 0.09384265129130459, "bacc": 0.6016129032258064, "bacc_std": 0.0800424894319517} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06782480844127155, "f1": 0.6962962962962963, "f1_std": 0.06986374509759251, "bacc": 0.7548387096774194, "bacc_std": 0.07577020485526309} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0641650904622394, "f1": 0.5547201336675021, "f1_std": 0.0846313058603222, "bacc": 0.5532258064516129, "bacc_std": 0.08219736305524777} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06637732172945972, "f1": 0.48621553884711777, "f1_std": 0.07529306311662956, "bacc": 0.48709677419354835, "bacc_std": 0.07279342830911865} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04813947117809216, "f1": 0.6328358208955224, "f1_std": 0.09381567777555355, "bacc": 0.6177419354838709, "bacc_std": 0.07705537956074696} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06674145913626933, "f1": 0.7054597701149425, "f1_std": 0.07421655076818885, "bacc": 0.7370967741935484, "bacc_std": 0.08085251407506179} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03192983831243031, "f1": 0.5119047619047619, "f1_std": 0.07621817219903167, "bacc": 0.5338709677419355, "bacc_std": 0.048741740675514296} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05585898366349943, "f1": 0.5918552036199095, "f1_std": 0.08794997386381644, "bacc": 0.5854838709677419, "bacc_std": 0.07802504594016346} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 166.81005372000556, "split": "test", "acc": 0.5121951219512195, "acc_std": 0.07414009922376004, "f1": 0.4368131868131868, "f1_std": 0.07154041732004592, "bacc": 0.4403225806451613, "bacc_std": 0.08221994841127901} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.033334459628007386, "f1": 0.6095238095238095, "f1_std": 0.10134122331440851, "bacc": 0.6, "bacc_std": 0.06833564223741516} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0649036443183745, "f1": 0.5017361111111112, "f1_std": 0.08316483920080699, "bacc": 0.5032258064516129, "bacc_std": 0.07686236628335338} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06301281702997844, "f1": 0.7119437939110069, "f1_std": 0.07973530931667826, "bacc": 0.7193548387096774, "bacc_std": 0.0841344308578553} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07286113363628802, "f1": 0.5030303030303029, "f1_std": 0.08001243527763578, "bacc": 0.5048387096774194, "bacc_std": 0.08411439728979575} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 10000.0, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061017722043119164, "f1": 0.6693548387096775, "f1_std": 0.08078092785479944, "bacc": 0.6693548387096775, "bacc_std": 0.08110237431584238} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07408373480087516, "f1": 0.5876436781609196, "f1_std": 0.08195744348989137, "bacc": 0.6048387096774194, "bacc_std": 0.09164925874502186} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05192902308371983, "f1": 0.5512437810945273, "f1_std": 0.08735276692371553, "bacc": 0.5516129032258065, "bacc_std": 0.07097561510579674} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06460457676116294, "f1": 0.7119437939110069, "f1_std": 0.0833516513427059, "bacc": 0.7193548387096774, "bacc_std": 0.0874453062301559} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 2.782559402207126, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.04644906634611655, "f1": 0.8144796380090498, "f1_std": 0.07977290446168186, "bacc": 0.7838709677419355, "bacc_std": 0.0810862694328127} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06553445674468462, "f1": 0.47096774193548385, "f1_std": 0.0746209839657893, "bacc": 0.47096774193548385, "bacc_std": 0.07473665587601347} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04523358635924959, "f1": 0.7144278606965174, "f1_std": 0.09299326804975666, "bacc": 0.6838709677419355, "bacc_std": 0.07927020375218946} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06258778011806361, "f1": 0.6693548387096775, "f1_std": 0.08497220528378131, "bacc": 0.6693548387096775, "bacc_std": 0.08681423107994495} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06397500999319024, "f1": 0.5547201336675021, "f1_std": 0.08678941657076757, "bacc": 0.5532258064516129, "bacc_std": 0.0841513986694392} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05979292046136588, "f1": 0.6440972222222222, "f1_std": 0.08838239949653823, "bacc": 0.635483870967742, "bacc_std": 0.0833044024979851} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.06485924906420912, "f1": 0.4177109440267335, "f1_std": 0.060604328330125655, "bacc": 0.42096774193548386, "bacc_std": 0.06205206257928412} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 21.54434690031882, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07081233419820626, "f1": 0.4558938329430133, "f1_std": 0.07716069242714703, "bacc": 0.45483870967741935, "bacc_std": 0.08017244655281706} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07513170668364261, "f1": 0.632051282051282, "f1_std": 0.07423362325285586, "bacc": 0.7064516129032259, "bacc_std": 0.08294375681752789} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06442162945365022, "f1": 0.5017361111111112, "f1_std": 0.08171012609536353, "bacc": 0.5032258064516129, "bacc_std": 0.07615822970719052} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06966255027037495, "f1": 0.5651515151515152, "f1_std": 0.08331816430515195, "bacc": 0.5709677419354839, "bacc_std": 0.08798472694063383} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06039983768706203, "f1": 0.5729166666666666, "f1_std": 0.0857012477903012, "bacc": 0.5693548387096774, "bacc_std": 0.07975609697824308} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07099159990861434, "f1": 0.6072218128224024, "f1_std": 0.08128167811977693, "bacc": 0.6209677419354839, "bacc_std": 0.08929376479240445} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05937208617102671, "f1": 0.5729166666666666, "f1_std": 0.08548764108265332, "bacc": 0.5693548387096774, "bacc_std": 0.0800707940534737} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.0355253259026262, "f1": 0.6800445930880714, "f1_std": 0.0968155086028259, "bacc": 0.65, "bacc_std": 0.07282691810038372} +{"model": "flat_mae", "repr": "reg", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06218658956298626, "f1": 0.5729166666666666, "f1_std": 0.08873908622344918, "bacc": 0.5693548387096774, "bacc_std": 0.08305867809111518} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | reg | logistic | adni_ad_vs_cn | train | 100 | 242.29 | 1402.7 | 0.9764 | 0.041867 | 0.96239 | 0.069212 | 0.95381 | 0.082248 | +| flat_mae | reg | logistic | adni_ad_vs_cn | test | 100 | 242.29 | 1402.7 | 0.71805 | 0.068665 | 0.59497 | 0.083228 | 0.59508 | 0.079169 | + + +done! total time: 0:04:40 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64446e441e744f1b32cc507d800a9e91f276aee3 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..2f7487953c685cbb109f9da61c44bcaebbce5410 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 12, "eval/id_best": 27, "eval/lr_best": 0.00047999999999999996, "eval/wd_best": 0.05, "eval/train/loss": 0.004277614410966635, "eval/train/acc": 0.9994736565082373, "eval/train/acc_std": 0.00016324850350056674, "eval/train/f1": 0.9995795850603796, "eval/train/f1_std": 0.00013458352927284443, "eval/validation/loss": 0.025820115581154823, "eval/validation/acc": 0.9930555555555556, "eval/validation/acc_std": 0.0013263234979013492, "eval/validation/f1": 0.9914881652799175, "eval/validation/f1_std": 0.0018341402164751628, "eval/test/loss": 0.04545854777097702, "eval/test/acc": 0.9871031746031746, "eval/test/acc_std": 0.0015778031280899252, "eval/test/f1": 0.9836383856086093, "eval/test/f1_std": 0.0021758462627728976} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..37add78f4dc8d983d84bdfb622533c738437b5c8 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 12, "eval/best/id_best": 27, "eval/best/lr_best": 0.00047999999999999996, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.004277614410966635, "eval/best/train/acc": 0.9994736565082373, "eval/best/train/acc_std": 0.00016324850350056674, "eval/best/train/f1": 0.9995795850603796, "eval/best/train/f1_std": 0.00013458352927284443, "eval/best/validation/loss": 0.025820115581154823, "eval/best/validation/acc": 0.9930555555555556, "eval/best/validation/acc_std": 0.0013263234979013492, "eval/best/validation/f1": 0.9914881652799175, "eval/best/validation/f1_std": 0.0018341402164751628, "eval/best/test/loss": 0.04545854777097702, "eval/best/test/acc": 0.9871031746031746, "eval/best/test/acc_std": 0.0015778031280899252, "eval/best/test/f1": 0.9836383856086093, "eval/best/test/f1_std": 0.0021758462627728976} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..74f4495ea38a6ffc1059ebd6c7cdc5e8761461a4 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 24, "eval/last/lr_best": 0.0003, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.0082715954631567, "eval/last/train/acc": 0.9989473130164745, "eval/last/train/acc_std": 0.000225868067364036, "eval/last/train/f1": 0.9990137223998998, "eval/last/train/f1_std": 0.00021829280930141752, "eval/last/validation/loss": 0.025019746273756027, "eval/last/validation/acc": 0.9925595238095238, "eval/last/validation/acc_std": 0.0013347398601008705, "eval/last/validation/f1": 0.9909552748533937, "eval/last/validation/f1_std": 0.0018413725941259648, "eval/last/test/loss": 0.042015377432107925, "eval/last/test/acc": 0.9873015873015873, "eval/last/test/acc_std": 0.0015844108598273166, "eval/last/test/f1": 0.9840791627125695, "eval/last/test/f1_std": 0.002170612827924507} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..3ef849b7189f330f43dfafb274d4d9f0851ab51f --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",train,0.004277614410966635,0.9994736565082373,0.00016324850350056674,0.9995795850603796,0.00013458352927284443 +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",validation,0.025820115581154823,0.9930555555555556,0.0013263234979013492,0.9914881652799175,0.0018341402164751628 +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",test,0.04545854777097702,0.9871031746031746,0.0015778031280899252,0.9836383856086093,0.0021758462627728976 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..3ef849b7189f330f43dfafb274d4d9f0851ab51f --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",train,0.004277614410966635,0.9994736565082373,0.00016324850350056674,0.9995795850603796,0.00013458352927284443 +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",validation,0.025820115581154823,0.9930555555555556,0.0013263234979013492,0.9914881652799175,0.0018341402164751628 +flat_mae,patch,attn,hcpya_task21,best,12,0.00047999999999999996,0.05,27,"[1.6, 1.0]",test,0.04545854777097702,0.9871031746031746,0.0015778031280899252,0.9836383856086093,0.0021758462627728976 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..f088a92a442dcbb149b3263728b7451673756c52 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0003,0.05,24,"[1, 1.0]",train,0.0082715954631567,0.9989473130164745,0.000225868067364036,0.9990137223998998,0.00021829280930141752 +flat_mae,patch,attn,hcpya_task21,last,19,0.0003,0.05,24,"[1, 1.0]",validation,0.025019746273756027,0.9925595238095238,0.0013347398601008705,0.9909552748533937,0.0018413725941259648 +flat_mae,patch,attn,hcpya_task21,last,19,0.0003,0.05,24,"[1, 1.0]",test,0.042015377432107925,0.9873015873015873,0.0015844108598273166,0.9840791627125695,0.002170612827924507 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..be83a51aeb120cbf096e3c68ede5f74b93d8a39c --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,886 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 22:53:09 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:13 lr: nan time: 3.3338 data: 2.8531 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:47 lr: 0.000003 loss: 3.0500 (3.0489) grad: 0.0948 (0.0911) time: 0.4627 data: 0.0026 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:10 lr: 0.000006 loss: 3.0342 (3.0359) grad: 0.0906 (0.0890) time: 0.4534 data: 0.0043 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:50 lr: 0.000009 loss: 2.9926 (3.0119) grad: 0.0885 (0.0895) time: 0.4489 data: 0.0041 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:37 lr: 0.000012 loss: 2.9336 (2.9786) grad: 0.0885 (0.0889) time: 0.4627 data: 0.0046 max mem: 22446 +train: [0] [100/400] eta: 0:02:26 lr: 0.000015 loss: 2.8325 (2.9320) grad: 0.0902 (0.0895) time: 0.4733 data: 0.0041 max mem: 22446 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 2.6907 (2.8838) grad: 0.0918 (0.0893) time: 0.4481 data: 0.0042 max mem: 22446 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 2.5709 (2.8327) grad: 0.0843 (0.0884) time: 0.4682 data: 0.0044 max mem: 22446 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 2.4834 (2.7794) grad: 0.0856 (0.0881) time: 0.4405 data: 0.0040 max mem: 22446 +train: [0] [180/400] eta: 0:01:44 lr: 0.000027 loss: 2.3784 (2.7296) grad: 0.0814 (0.0872) time: 0.4548 data: 0.0043 max mem: 22446 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 2.2524 (2.6781) grad: 0.0814 (0.0869) time: 0.4550 data: 0.0044 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 2.1704 (2.6299) grad: 0.0825 (0.0866) time: 0.4520 data: 0.0042 max mem: 22446 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 2.1122 (2.5839) grad: 0.0792 (0.0862) time: 0.4510 data: 0.0042 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 2.0405 (2.5392) grad: 0.0786 (0.0857) time: 0.4463 data: 0.0041 max mem: 22446 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 1.9849 (2.4972) grad: 0.0775 (0.0851) time: 0.4470 data: 0.0041 max mem: 22446 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 1.9001 (2.4556) grad: 0.0779 (0.0848) time: 0.6134 data: 0.1730 max mem: 22446 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 1.8526 (2.4163) grad: 0.0778 (0.0843) time: 0.4473 data: 0.0030 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.8004 (2.3787) grad: 0.0766 (0.0841) time: 0.4390 data: 0.0039 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.7814 (2.3454) grad: 0.0760 (0.0837) time: 0.4515 data: 0.0044 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.7288 (2.3103) grad: 0.0763 (0.0835) time: 0.4515 data: 0.0040 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.6606 (2.2779) grad: 0.0761 (0.0829) time: 0.4555 data: 0.0042 max mem: 22446 +train: [0] Total time: 0:03:07 (0.4689 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.6606 (2.2779) grad: 0.0761 (0.0829) +eval (validation): [0] [ 0/63] eta: 0:03:12 time: 3.0629 data: 2.8243 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:19 time: 0.3296 data: 0.0067 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3498 data: 0.0031 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3316 data: 0.0035 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3294 data: 0.0037 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3847 s / it) +cv: [0] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.050 acc: 0.984 f1: 0.981 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:21:16 lr: nan time: 3.1907 data: 2.8412 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:45 lr: 0.000063 loss: 1.6330 (1.6261) grad: 0.0725 (0.0743) time: 0.4629 data: 0.0033 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:08 lr: 0.000066 loss: 1.6031 (1.6015) grad: 0.0729 (0.0753) time: 0.4531 data: 0.0042 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:50 lr: 0.000069 loss: 1.5475 (1.5839) grad: 0.0739 (0.0756) time: 0.4529 data: 0.0043 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:38 lr: 0.000072 loss: 1.5053 (1.5593) grad: 0.0757 (0.0765) time: 0.4732 data: 0.0043 max mem: 22446 +train: [1] [100/400] eta: 0:02:26 lr: 0.000075 loss: 1.4746 (1.5376) grad: 0.0757 (0.0757) time: 0.4614 data: 0.0043 max mem: 22446 +train: [1] [120/400] eta: 0:02:15 lr: 0.000078 loss: 1.4382 (1.5238) grad: 0.0711 (0.0747) time: 0.4738 data: 0.0044 max mem: 22446 +train: [1] [140/400] eta: 0:02:05 lr: 0.000081 loss: 1.4379 (1.5108) grad: 0.0689 (0.0746) time: 0.4562 data: 0.0043 max mem: 22446 +train: [1] [160/400] eta: 0:01:55 lr: 0.000084 loss: 1.4180 (1.4964) grad: 0.0707 (0.0741) time: 0.4694 data: 0.0044 max mem: 22446 +train: [1] [180/400] eta: 0:01:45 lr: 0.000087 loss: 1.3915 (1.4845) grad: 0.0686 (0.0734) time: 0.4583 data: 0.0042 max mem: 22446 +train: [1] [200/400] eta: 0:01:35 lr: 0.000090 loss: 1.3708 (1.4703) grad: 0.0685 (0.0735) time: 0.4728 data: 0.0045 max mem: 22446 +train: [1] [220/400] eta: 0:01:25 lr: 0.000093 loss: 1.3266 (1.4576) grad: 0.0718 (0.0732) time: 0.4455 data: 0.0043 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.3013 (1.4438) grad: 0.0699 (0.0732) time: 0.4631 data: 0.0043 max mem: 22446 +train: [1] [260/400] eta: 0:01:06 lr: 0.000099 loss: 1.2716 (1.4291) grad: 0.0683 (0.0729) time: 0.4605 data: 0.0043 max mem: 22446 +train: [1] [280/400] eta: 0:00:56 lr: 0.000102 loss: 1.2440 (1.4149) grad: 0.0688 (0.0729) time: 0.4521 data: 0.0043 max mem: 22446 +train: [1] [300/400] eta: 0:00:48 lr: 0.000105 loss: 1.2176 (1.4020) grad: 0.0713 (0.0728) time: 0.6334 data: 0.1764 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.2015 (1.3908) grad: 0.0739 (0.0729) time: 0.4497 data: 0.0040 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 1.2152 (1.3797) grad: 0.0706 (0.0727) time: 0.4542 data: 0.0041 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 1.1666 (1.3669) grad: 0.0687 (0.0726) time: 0.4521 data: 0.0042 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.1358 (1.3547) grad: 0.0687 (0.0725) time: 0.4595 data: 0.0041 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.1222 (1.3427) grad: 0.0702 (0.0727) time: 0.4576 data: 0.0043 max mem: 22446 +train: [1] Total time: 0:03:10 (0.4755 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.1222 (1.3427) grad: 0.0702 (0.0727) +eval (validation): [1] [ 0/63] eta: 0:03:14 time: 3.0896 data: 2.8565 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:20 time: 0.3583 data: 0.0145 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3316 data: 0.0027 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3438 data: 0.0035 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3469 data: 0.0035 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3925 s / it) +cv: [1] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.042 acc: 0.987 f1: 0.984 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:29 lr: nan time: 3.2226 data: 2.8815 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:42 lr: 0.000123 loss: 1.0794 (1.0863) grad: 0.0704 (0.0732) time: 0.4526 data: 0.0033 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:07 lr: 0.000126 loss: 1.0833 (1.0856) grad: 0.0700 (0.0737) time: 0.4520 data: 0.0040 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:48 lr: 0.000129 loss: 1.0649 (1.0765) grad: 0.0726 (0.0743) time: 0.4487 data: 0.0042 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:35 lr: 0.000132 loss: 1.0510 (1.0706) grad: 0.0718 (0.0736) time: 0.4559 data: 0.0046 max mem: 22446 +train: [2] [100/400] eta: 0:02:24 lr: 0.000135 loss: 1.0396 (1.0652) grad: 0.0677 (0.0729) time: 0.4545 data: 0.0041 max mem: 22446 +train: [2] [120/400] eta: 0:02:14 lr: 0.000138 loss: 1.0314 (1.0619) grad: 0.0723 (0.0737) time: 0.4870 data: 0.0042 max mem: 22446 +train: [2] [140/400] eta: 0:02:04 lr: 0.000141 loss: 1.0176 (1.0547) grad: 0.0751 (0.0745) time: 0.4568 data: 0.0044 max mem: 22446 +train: [2] [160/400] eta: 0:01:54 lr: 0.000144 loss: 0.9947 (1.0467) grad: 0.0751 (0.0754) time: 0.4677 data: 0.0043 max mem: 22446 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 0.9805 (1.0417) grad: 0.0745 (0.0755) time: 0.4521 data: 0.0040 max mem: 22446 +train: [2] [200/400] eta: 0:01:34 lr: 0.000150 loss: 0.9604 (1.0344) grad: 0.0726 (0.0757) time: 0.4680 data: 0.0044 max mem: 22446 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 0.9378 (1.0259) grad: 0.0707 (0.0762) time: 0.4690 data: 0.0043 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 0.9284 (1.0179) grad: 0.0762 (0.0775) time: 0.4391 data: 0.0040 max mem: 22446 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 0.9094 (1.0090) grad: 0.0762 (0.0777) time: 0.4644 data: 0.0042 max mem: 22446 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 0.9038 (1.0028) grad: 0.0756 (0.0776) time: 0.4523 data: 0.0044 max mem: 22446 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 0.8912 (0.9953) grad: 0.0742 (0.0779) time: 0.6021 data: 0.1693 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.8912 (0.9891) grad: 0.0835 (0.0788) time: 0.4773 data: 0.0037 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 0.8970 (0.9831) grad: 0.0901 (0.0796) time: 0.4609 data: 0.0044 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 0.8556 (0.9761) grad: 0.0840 (0.0794) time: 0.4574 data: 0.0046 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.8297 (0.9682) grad: 0.0716 (0.0790) time: 0.4634 data: 0.0043 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.8320 (0.9623) grad: 0.0774 (0.0795) time: 0.4553 data: 0.0042 max mem: 22446 +train: [2] Total time: 0:03:09 (0.4743 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.8320 (0.9623) grad: 0.0774 (0.0795) +eval (validation): [2] [ 0/63] eta: 0:03:15 time: 3.1039 data: 2.8589 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3647 data: 0.0032 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3396 data: 0.0029 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3398 data: 0.0034 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3420 data: 0.0035 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.3970 s / it) +cv: [2] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 0.052 acc: 0.986 f1: 0.983 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [3] [ 0/400] eta: 0:21:50 lr: nan time: 3.2772 data: 2.9266 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:39 lr: 0.000183 loss: 0.8031 (0.8086) grad: 0.0742 (0.0740) time: 0.4435 data: 0.0027 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:05 lr: 0.000186 loss: 0.8034 (0.8190) grad: 0.0754 (0.0772) time: 0.4504 data: 0.0043 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:47 lr: 0.000189 loss: 0.7943 (0.8101) grad: 0.0875 (0.0874) time: 0.4469 data: 0.0043 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:35 lr: 0.000192 loss: 0.7790 (0.8058) grad: 0.0849 (0.0852) time: 0.4643 data: 0.0042 max mem: 22446 +train: [3] [100/400] eta: 0:02:24 lr: 0.000195 loss: 0.7654 (0.8037) grad: 0.0785 (0.0846) time: 0.4694 data: 0.0044 max mem: 22446 +train: [3] [120/400] eta: 0:02:14 lr: 0.000198 loss: 0.7818 (0.8024) grad: 0.0820 (0.0848) time: 0.4682 data: 0.0043 max mem: 22446 +train: [3] [140/400] eta: 0:02:04 lr: 0.000201 loss: 0.7722 (0.7985) grad: 0.0946 (0.0896) time: 0.4741 data: 0.0041 max mem: 22446 +train: [3] [160/400] eta: 0:01:54 lr: 0.000204 loss: 0.7722 (0.7970) grad: 0.1046 (0.0923) time: 0.4697 data: 0.0043 max mem: 22446 +train: [3] [180/400] eta: 0:01:44 lr: 0.000207 loss: 0.7547 (0.7902) grad: 0.1069 (0.0946) time: 0.4635 data: 0.0043 max mem: 22446 +train: [3] [200/400] eta: 0:01:34 lr: 0.000210 loss: 0.7539 (0.7944) grad: 0.1164 (0.0988) time: 0.4461 data: 0.0038 max mem: 22446 +train: [3] [220/400] eta: 0:01:25 lr: 0.000213 loss: 0.7619 (0.7916) grad: 0.1196 (0.1018) time: 0.4883 data: 0.0043 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.7546 (0.7879) grad: 0.1364 (0.1088) time: 0.4582 data: 0.0041 max mem: 22446 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 0.7477 (0.7855) grad: 0.1668 (0.1149) time: 0.4442 data: 0.0041 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.7335 (0.7861) grad: 0.1983 (0.1214) time: 0.4650 data: 0.0043 max mem: 22446 +train: [3] [300/400] eta: 0:00:48 lr: 0.000225 loss: 0.7385 (0.7850) grad: 0.2030 (0.1263) time: 0.6475 data: 0.1806 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.7342 (0.7831) grad: 0.2014 (0.1343) time: 0.4420 data: 0.0034 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.7832 (0.7868) grad: 0.2813 (0.1463) time: 0.4597 data: 0.0044 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.7961 (0.7877) grad: 0.2813 (0.1655) time: 0.4678 data: 0.0044 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.7311 (0.7893) grad: 0.2430 (0.1850) time: 0.4442 data: 0.0041 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.7946 (0.7935) grad: 0.2430 (0.1876) time: 0.4458 data: 0.0041 max mem: 22446 +train: [3] Total time: 0:03:10 (0.4756 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.7946 (0.7935) grad: 0.2430 (0.1876) +eval (validation): [3] [ 0/63] eta: 0:03:01 time: 2.8770 data: 2.6495 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:20 time: 0.3522 data: 0.0030 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3238 data: 0.0031 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3248 data: 0.0034 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3268 data: 0.0034 max mem: 22446 +eval (validation): [3] Total time: 0:00:23 (0.3782 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.037 acc: 0.990 f1: 0.987 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:20:54 lr: nan time: 3.1372 data: 2.8029 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:34 lr: 0.000243 loss: 0.7732 (0.7867) grad: 0.2320 (0.2988) time: 0.4364 data: 0.0025 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:01 lr: 0.000246 loss: 0.7732 (0.8042) grad: 0.2389 (0.3161) time: 0.4428 data: 0.0038 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:44 lr: 0.000249 loss: 0.8382 (0.8510) grad: 0.3047 (0.3309) time: 0.4440 data: 0.0041 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:31 lr: 0.000252 loss: 0.9138 (0.8788) grad: 0.2924 (0.3109) time: 0.4393 data: 0.0041 max mem: 22446 +train: [4] [100/400] eta: 0:02:20 lr: 0.000255 loss: 0.8381 (0.8732) grad: 0.2756 (0.3079) time: 0.4514 data: 0.0043 max mem: 22446 +train: [4] [120/400] eta: 0:02:10 lr: 0.000258 loss: 0.8109 (0.8706) grad: 0.2823 (0.3065) time: 0.4525 data: 0.0042 max mem: 22446 +train: [4] [140/400] eta: 0:02:00 lr: 0.000261 loss: 0.7566 (0.8494) grad: 0.2475 (0.3050) time: 0.4499 data: 0.0040 max mem: 22446 +train: [4] [160/400] eta: 0:01:51 lr: 0.000264 loss: 0.6631 (0.8332) grad: 0.2441 (0.3004) time: 0.4682 data: 0.0043 max mem: 22446 +train: [4] [180/400] eta: 0:01:42 lr: 0.000267 loss: 0.7439 (0.8337) grad: 0.2726 (0.2996) time: 0.4679 data: 0.0043 max mem: 22446 +train: [4] [200/400] eta: 0:01:32 lr: 0.000270 loss: 0.7594 (0.8325) grad: 0.2766 (0.3008) time: 0.4408 data: 0.0040 max mem: 22446 +train: [4] [220/400] eta: 0:01:23 lr: 0.000273 loss: 0.7372 (0.8323) grad: 0.2681 (0.3085) time: 0.4608 data: 0.0040 max mem: 22446 +train: [4] [240/400] eta: 0:01:13 lr: 0.000276 loss: 0.6958 (0.8265) grad: 0.2779 (0.3067) time: 0.4473 data: 0.0040 max mem: 22446 +train: [4] [260/400] eta: 0:01:04 lr: 0.000279 loss: 0.6811 (0.8269) grad: 0.3183 (0.3137) time: 0.4430 data: 0.0043 max mem: 22446 +train: [4] [280/400] eta: 0:00:55 lr: 0.000282 loss: 0.8783 (0.8472) grad: 0.3604 (0.3147) time: 0.4434 data: 0.0042 max mem: 22446 +train: [4] [300/400] eta: 0:00:46 lr: 0.000285 loss: 0.8693 (0.8412) grad: 0.3029 (0.3139) time: 0.6074 data: 0.1704 max mem: 22446 +train: [4] [320/400] eta: 0:00:37 lr: 0.000288 loss: 0.7349 (0.8338) grad: 0.2852 (0.3122) time: 0.4467 data: 0.0044 max mem: 22446 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 0.7798 (0.8364) grad: 0.3177 (0.3172) time: 0.4534 data: 0.0043 max mem: 22446 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 0.7798 (0.8391) grad: 0.3857 (0.3212) time: 0.4462 data: 0.0041 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.7908 (0.8454) grad: 0.3525 (0.3292) time: 0.4442 data: 0.0042 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.8838 (0.8500) grad: 0.3683 (0.3340) time: 0.4425 data: 0.0043 max mem: 22446 +train: [4] Total time: 0:03:05 (0.4637 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.8838 (0.8500) grad: 0.3683 (0.3340) +eval (validation): [4] [ 0/63] eta: 0:03:12 time: 3.0517 data: 2.8179 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:19 time: 0.3358 data: 0.0072 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3325 data: 0.0028 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3371 data: 0.0035 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3345 data: 0.0035 max mem: 22446 +eval (validation): [4] Total time: 0:00:24 (0.3829 s / it) +cv: [4] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.035 acc: 0.988 f1: 0.985 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:36 lr: nan time: 3.2424 data: 2.8586 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:46 lr: 0.000300 loss: 0.8284 (0.9346) grad: 0.3610 (0.4834) time: 0.4632 data: 0.0038 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:08 lr: 0.000300 loss: 0.8253 (0.9519) grad: 0.3688 (0.4834) time: 0.4490 data: 0.0043 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:49 lr: 0.000300 loss: 0.7621 (0.8973) grad: 0.3463 (0.4374) time: 0.4485 data: 0.0043 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:35 lr: 0.000300 loss: 0.8403 (0.9322) grad: 0.3281 (0.4200) time: 0.4501 data: 0.0043 max mem: 22446 +train: [5] [100/400] eta: 0:02:25 lr: 0.000300 loss: 0.8523 (0.9402) grad: 0.3334 (0.4104) time: 0.4741 data: 0.0042 max mem: 22446 +train: [5] [120/400] eta: 0:02:15 lr: 0.000300 loss: 0.8373 (0.9166) grad: 0.3876 (0.3970) time: 0.4733 data: 0.0043 max mem: 22446 +train: [5] [140/400] eta: 0:02:04 lr: 0.000300 loss: 0.7269 (0.9026) grad: 0.3383 (0.3912) time: 0.4487 data: 0.0041 max mem: 22446 +train: [5] [160/400] eta: 0:01:54 lr: 0.000299 loss: 0.8154 (0.8982) grad: 0.3048 (0.3808) time: 0.4714 data: 0.0041 max mem: 22446 +train: [5] [180/400] eta: 0:01:44 lr: 0.000299 loss: 0.7072 (0.8662) grad: 0.2956 (0.3784) time: 0.4607 data: 0.0042 max mem: 22446 +train: [5] [200/400] eta: 0:01:34 lr: 0.000299 loss: 0.7102 (0.8671) grad: 0.3075 (0.3737) time: 0.4606 data: 0.0041 max mem: 22446 +train: [5] [220/400] eta: 0:01:24 lr: 0.000299 loss: 0.7102 (0.8538) grad: 0.2964 (0.3679) time: 0.4461 data: 0.0042 max mem: 22446 +train: [5] [240/400] eta: 0:01:15 lr: 0.000299 loss: 0.6123 (0.8516) grad: 0.3214 (0.3687) time: 0.4675 data: 0.0043 max mem: 22446 +train: [5] [260/400] eta: 0:01:05 lr: 0.000299 loss: 0.7433 (0.8531) grad: 0.3393 (0.3658) time: 0.4469 data: 0.0043 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 0.6717 (0.8385) grad: 0.2785 (0.3595) time: 0.4427 data: 0.0041 max mem: 22446 +train: [5] [300/400] eta: 0:00:47 lr: 0.000298 loss: 0.6592 (0.8353) grad: 0.2752 (0.3559) time: 0.5999 data: 0.1669 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 0.5315 (0.8189) grad: 0.2529 (0.3492) time: 0.4610 data: 0.0039 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 0.5315 (0.8083) grad: 0.2364 (0.3436) time: 0.4534 data: 0.0040 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 0.4998 (0.7976) grad: 0.2482 (0.3415) time: 0.4595 data: 0.0042 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.4509 (0.7794) grad: 0.2697 (0.3352) time: 0.4650 data: 0.0042 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.4810 (0.7721) grad: 0.2686 (0.3317) time: 0.4419 data: 0.0041 max mem: 22446 +train: [5] Total time: 0:03:08 (0.4718 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.4810 (0.7721) grad: 0.2686 (0.3317) +eval (validation): [5] [ 0/63] eta: 0:03:12 time: 3.0596 data: 2.8258 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:20 time: 0.3391 data: 0.0087 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3272 data: 0.0030 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3258 data: 0.0033 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3262 data: 0.0033 max mem: 22446 +eval (validation): [5] Total time: 0:00:23 (0.3786 s / it) +cv: [5] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 0.038 acc: 0.989 f1: 0.987 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:21:38 lr: nan time: 3.2467 data: 2.8622 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:41 lr: 0.000296 loss: 0.6271 (0.6280) grad: 0.2647 (0.2895) time: 0.4489 data: 0.0025 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:06 lr: 0.000296 loss: 0.5380 (0.5683) grad: 0.2516 (0.2715) time: 0.4508 data: 0.0041 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:49 lr: 0.000296 loss: 0.4472 (0.5788) grad: 0.2297 (0.2615) time: 0.4552 data: 0.0043 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:35 lr: 0.000295 loss: 0.4472 (0.5678) grad: 0.2325 (0.2554) time: 0.4460 data: 0.0044 max mem: 22446 +train: [6] [100/400] eta: 0:02:24 lr: 0.000295 loss: 0.4255 (0.5551) grad: 0.2169 (0.2469) time: 0.4611 data: 0.0043 max mem: 22446 +train: [6] [120/400] eta: 0:02:13 lr: 0.000295 loss: 0.4255 (0.5448) grad: 0.2262 (0.2492) time: 0.4647 data: 0.0041 max mem: 22446 +train: [6] [140/400] eta: 0:02:03 lr: 0.000294 loss: 0.4644 (0.5530) grad: 0.2622 (0.2518) time: 0.4619 data: 0.0040 max mem: 22446 +train: [6] [160/400] eta: 0:01:53 lr: 0.000294 loss: 0.4945 (0.5497) grad: 0.2457 (0.2495) time: 0.4594 data: 0.0042 max mem: 22446 +train: [6] [180/400] eta: 0:01:44 lr: 0.000293 loss: 0.5315 (0.5514) grad: 0.2252 (0.2494) time: 0.4736 data: 0.0044 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.5707 (0.5478) grad: 0.2517 (0.2506) time: 0.4503 data: 0.0041 max mem: 22446 +train: [6] [220/400] eta: 0:01:24 lr: 0.000292 loss: 0.5671 (0.5559) grad: 0.2517 (0.2511) time: 0.4651 data: 0.0042 max mem: 22446 +train: [6] [240/400] eta: 0:01:14 lr: 0.000292 loss: 0.5671 (0.5550) grad: 0.2637 (0.2524) time: 0.4471 data: 0.0042 max mem: 22446 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 0.4391 (0.5542) grad: 0.2588 (0.2553) time: 0.4533 data: 0.0041 max mem: 22446 +train: [6] [280/400] eta: 0:00:55 lr: 0.000291 loss: 0.4308 (0.5542) grad: 0.2263 (0.2529) time: 0.4436 data: 0.0042 max mem: 22446 +train: [6] [300/400] eta: 0:00:47 lr: 0.000290 loss: 0.5207 (0.5536) grad: 0.2263 (0.2530) time: 0.5976 data: 0.1706 max mem: 22446 +train: [6] [320/400] eta: 0:00:37 lr: 0.000290 loss: 0.3836 (0.5433) grad: 0.2587 (0.2550) time: 0.4481 data: 0.0032 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.3793 (0.5402) grad: 0.2391 (0.2537) time: 0.4517 data: 0.0042 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.3768 (0.5317) grad: 0.2004 (0.2520) time: 0.4479 data: 0.0043 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.3581 (0.5268) grad: 0.2118 (0.2526) time: 0.4508 data: 0.0042 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.3806 (0.5187) grad: 0.2111 (0.2486) time: 0.4488 data: 0.0042 max mem: 22446 +train: [6] Total time: 0:03:07 (0.4689 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.3806 (0.5187) grad: 0.2111 (0.2486) +eval (validation): [6] [ 0/63] eta: 0:03:15 time: 3.0970 data: 2.8595 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3525 data: 0.0098 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3291 data: 0.0027 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3241 data: 0.0032 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3221 data: 0.0032 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3829 s / it) +cv: [6] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.033 acc: 0.990 f1: 0.989 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:46 lr: nan time: 3.2657 data: 2.8632 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:51 lr: 0.000286 loss: 0.3314 (0.3606) grad: 0.1922 (0.1993) time: 0.4766 data: 0.0150 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:11 lr: 0.000286 loss: 0.3314 (0.3808) grad: 0.1922 (0.1922) time: 0.4532 data: 0.0041 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:52 lr: 0.000285 loss: 0.3501 (0.3844) grad: 0.1566 (0.1815) time: 0.4508 data: 0.0043 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:37 lr: 0.000284 loss: 0.3462 (0.3767) grad: 0.1492 (0.1752) time: 0.4546 data: 0.0043 max mem: 22446 +train: [7] [100/400] eta: 0:02:25 lr: 0.000284 loss: 0.3593 (0.3830) grad: 0.1540 (0.1754) time: 0.4564 data: 0.0042 max mem: 22446 +train: [7] [120/400] eta: 0:02:15 lr: 0.000283 loss: 0.3593 (0.3733) grad: 0.1540 (0.1760) time: 0.4675 data: 0.0042 max mem: 22446 +train: [7] [140/400] eta: 0:02:04 lr: 0.000282 loss: 0.2885 (0.3701) grad: 0.1507 (0.1707) time: 0.4640 data: 0.0043 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.2890 (0.3677) grad: 0.1657 (0.1723) time: 0.4703 data: 0.0044 max mem: 22446 +train: [7] [180/400] eta: 0:01:45 lr: 0.000281 loss: 0.3377 (0.3704) grad: 0.1944 (0.1741) time: 0.4869 data: 0.0043 max mem: 22446 +train: [7] [200/400] eta: 0:01:35 lr: 0.000280 loss: 0.3785 (0.3731) grad: 0.1886 (0.1748) time: 0.4658 data: 0.0040 max mem: 22446 +train: [7] [220/400] eta: 0:01:25 lr: 0.000279 loss: 0.3942 (0.3753) grad: 0.1842 (0.1759) time: 0.4450 data: 0.0041 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.3715 (0.3713) grad: 0.1629 (0.1753) time: 0.4533 data: 0.0040 max mem: 22446 +train: [7] [260/400] eta: 0:01:06 lr: 0.000278 loss: 0.3349 (0.3742) grad: 0.1675 (0.1761) time: 0.4643 data: 0.0041 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.3485 (0.3779) grad: 0.1784 (0.1795) time: 0.4473 data: 0.0041 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.3485 (0.3777) grad: 0.1765 (0.1792) time: 0.5980 data: 0.1710 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.3009 (0.3722) grad: 0.1415 (0.1759) time: 0.4395 data: 0.0033 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.2704 (0.3668) grad: 0.1226 (0.1733) time: 0.4378 data: 0.0042 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.2521 (0.3610) grad: 0.1226 (0.1701) time: 0.4522 data: 0.0041 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.2521 (0.3559) grad: 0.1166 (0.1676) time: 0.4489 data: 0.0043 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.2483 (0.3514) grad: 0.1176 (0.1658) time: 0.4496 data: 0.0043 max mem: 22446 +train: [7] Total time: 0:03:08 (0.4717 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.2483 (0.3514) grad: 0.1176 (0.1658) +eval (validation): [7] [ 0/63] eta: 0:03:15 time: 3.1018 data: 2.8675 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:20 time: 0.3417 data: 0.0040 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3327 data: 0.0029 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3259 data: 0.0036 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3223 data: 0.0036 max mem: 22446 +eval (validation): [7] Total time: 0:00:24 (0.3813 s / it) +cv: [7] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.027 acc: 0.991 f1: 0.990 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:02 lr: nan time: 3.1562 data: 2.8132 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:38 lr: 0.000270 loss: 0.2181 (0.2522) grad: 0.1266 (0.1284) time: 0.4461 data: 0.0035 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:04 lr: 0.000270 loss: 0.2489 (0.2675) grad: 0.1394 (0.1608) time: 0.4484 data: 0.0041 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:47 lr: 0.000269 loss: 0.2578 (0.2764) grad: 0.1270 (0.1465) time: 0.4495 data: 0.0042 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:34 lr: 0.000268 loss: 0.2541 (0.2749) grad: 0.1150 (0.1427) time: 0.4490 data: 0.0042 max mem: 22446 +train: [8] [100/400] eta: 0:02:22 lr: 0.000267 loss: 0.2541 (0.2740) grad: 0.1166 (0.1359) time: 0.4544 data: 0.0043 max mem: 22446 +train: [8] [120/400] eta: 0:02:12 lr: 0.000266 loss: 0.2691 (0.2772) grad: 0.1229 (0.1367) time: 0.4673 data: 0.0042 max mem: 22446 +train: [8] [140/400] eta: 0:02:02 lr: 0.000265 loss: 0.2732 (0.2806) grad: 0.1265 (0.1362) time: 0.4624 data: 0.0043 max mem: 22446 +train: [8] [160/400] eta: 0:01:52 lr: 0.000264 loss: 0.2732 (0.2806) grad: 0.1491 (0.1387) time: 0.4553 data: 0.0040 max mem: 22446 +train: [8] [180/400] eta: 0:01:44 lr: 0.000263 loss: 0.2579 (0.2774) grad: 0.1438 (0.1375) time: 0.4884 data: 0.0044 max mem: 22446 +train: [8] [200/400] eta: 0:01:34 lr: 0.000262 loss: 0.2706 (0.2789) grad: 0.1305 (0.1380) time: 0.4794 data: 0.0045 max mem: 22446 +train: [8] [220/400] eta: 0:01:24 lr: 0.000260 loss: 0.2596 (0.2769) grad: 0.1305 (0.1372) time: 0.4511 data: 0.0043 max mem: 22446 +train: [8] [240/400] eta: 0:01:15 lr: 0.000259 loss: 0.2293 (0.2735) grad: 0.1186 (0.1371) time: 0.4415 data: 0.0041 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.2083 (0.2717) grad: 0.1179 (0.1356) time: 0.4600 data: 0.0044 max mem: 22446 +train: [8] [280/400] eta: 0:00:56 lr: 0.000257 loss: 0.2497 (0.2743) grad: 0.1336 (0.1358) time: 0.4637 data: 0.0041 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.2388 (0.2715) grad: 0.1278 (0.1343) time: 0.6063 data: 0.1715 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.2204 (0.2698) grad: 0.1204 (0.1341) time: 0.4470 data: 0.0029 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.2310 (0.2673) grad: 0.1001 (0.1320) time: 0.4425 data: 0.0043 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.2130 (0.2652) grad: 0.0966 (0.1304) time: 0.4485 data: 0.0043 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.1945 (0.2618) grad: 0.0855 (0.1290) time: 0.4550 data: 0.0044 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.2055 (0.2598) grad: 0.1058 (0.1280) time: 0.4468 data: 0.0042 max mem: 22446 +train: [8] Total time: 0:03:08 (0.4704 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.2055 (0.2598) grad: 0.1058 (0.1280) +eval (validation): [8] [ 0/63] eta: 0:03:24 time: 3.2431 data: 3.0001 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:20 time: 0.3299 data: 0.0028 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3380 data: 0.0031 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3143 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3159 data: 0.0031 max mem: 22446 +eval (validation): [8] Total time: 0:00:23 (0.3794 s / it) +cv: [8] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.051 acc: 0.992 f1: 0.990 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:21:16 lr: nan time: 3.1918 data: 2.8102 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:35 lr: 0.000249 loss: 0.1976 (0.1998) grad: 0.0802 (0.0864) time: 0.4363 data: 0.0032 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:03 lr: 0.000248 loss: 0.2058 (0.2206) grad: 0.0932 (0.1009) time: 0.4501 data: 0.0039 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:47 lr: 0.000247 loss: 0.2058 (0.2182) grad: 0.1082 (0.1064) time: 0.4553 data: 0.0044 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:34 lr: 0.000246 loss: 0.1936 (0.2180) grad: 0.0960 (0.1071) time: 0.4532 data: 0.0042 max mem: 22446 +train: [9] [100/400] eta: 0:02:23 lr: 0.000244 loss: 0.1972 (0.2188) grad: 0.0972 (0.1090) time: 0.4639 data: 0.0041 max mem: 22446 +train: [9] [120/400] eta: 0:02:12 lr: 0.000243 loss: 0.1952 (0.2178) grad: 0.0931 (0.1059) time: 0.4538 data: 0.0041 max mem: 22446 +train: [9] [140/400] eta: 0:02:02 lr: 0.000242 loss: 0.1911 (0.2157) grad: 0.0861 (0.1052) time: 0.4482 data: 0.0042 max mem: 22446 +train: [9] [160/400] eta: 0:01:52 lr: 0.000241 loss: 0.1906 (0.2138) grad: 0.1204 (0.1063) time: 0.4594 data: 0.0044 max mem: 22446 +train: [9] [180/400] eta: 0:01:42 lr: 0.000240 loss: 0.1771 (0.2117) grad: 0.1016 (0.1054) time: 0.4560 data: 0.0041 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.1900 (0.2120) grad: 0.0887 (0.1053) time: 0.5008 data: 0.0047 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.2028 (0.2111) grad: 0.0941 (0.1047) time: 0.4624 data: 0.0044 max mem: 22446 +train: [9] [240/400] eta: 0:01:14 lr: 0.000236 loss: 0.1906 (0.2110) grad: 0.0880 (0.1051) time: 0.4321 data: 0.0041 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1878 (0.2103) grad: 0.1002 (0.1049) time: 0.4572 data: 0.0042 max mem: 22446 +train: [9] [280/400] eta: 0:00:55 lr: 0.000233 loss: 0.1713 (0.2074) grad: 0.0825 (0.1037) time: 0.4476 data: 0.0044 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.1796 (0.2081) grad: 0.0853 (0.1051) time: 0.5932 data: 0.1655 max mem: 22446 +train: [9] [320/400] eta: 0:00:37 lr: 0.000230 loss: 0.1956 (0.2071) grad: 0.0853 (0.1039) time: 0.4524 data: 0.0032 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1898 (0.2059) grad: 0.0776 (0.1028) time: 0.4429 data: 0.0042 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.1844 (0.2046) grad: 0.0802 (0.1017) time: 0.4387 data: 0.0041 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1844 (0.2048) grad: 0.0871 (0.1025) time: 0.4433 data: 0.0043 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1709 (0.2030) grad: 0.0741 (0.1011) time: 0.4536 data: 0.0044 max mem: 22446 +train: [9] Total time: 0:03:06 (0.4674 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1709 (0.2030) grad: 0.0741 (0.1011) +eval (validation): [9] [ 0/63] eta: 0:03:10 time: 3.0262 data: 2.7467 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3550 data: 0.0043 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3185 data: 0.0028 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3250 data: 0.0033 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3242 data: 0.0034 max mem: 22446 +eval (validation): [9] Total time: 0:00:23 (0.3798 s / it) +cv: [9] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.033 acc: 0.992 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:20:55 lr: nan time: 3.1390 data: 2.7612 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:40 lr: 0.000224 loss: 0.1568 (0.1618) grad: 0.0650 (0.0686) time: 0.4524 data: 0.0041 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:05 lr: 0.000222 loss: 0.1568 (0.1768) grad: 0.0752 (0.0864) time: 0.4455 data: 0.0043 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:47 lr: 0.000221 loss: 0.1667 (0.1749) grad: 0.0994 (0.0955) time: 0.4434 data: 0.0042 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:34 lr: 0.000220 loss: 0.1603 (0.1728) grad: 0.0810 (0.0916) time: 0.4564 data: 0.0044 max mem: 22446 +train: [10] [100/400] eta: 0:02:23 lr: 0.000218 loss: 0.1485 (0.1709) grad: 0.0723 (0.0868) time: 0.4689 data: 0.0042 max mem: 22446 +train: [10] [120/400] eta: 0:02:13 lr: 0.000217 loss: 0.1504 (0.1711) grad: 0.0705 (0.0853) time: 0.4620 data: 0.0043 max mem: 22446 +train: [10] [140/400] eta: 0:02:03 lr: 0.000215 loss: 0.1565 (0.1716) grad: 0.0708 (0.0844) time: 0.4559 data: 0.0042 max mem: 22446 +train: [10] [160/400] eta: 0:01:53 lr: 0.000214 loss: 0.1511 (0.1711) grad: 0.0709 (0.0842) time: 0.4512 data: 0.0041 max mem: 22446 +train: [10] [180/400] eta: 0:01:43 lr: 0.000213 loss: 0.1548 (0.1701) grad: 0.0736 (0.0836) time: 0.4685 data: 0.0044 max mem: 22446 +train: [10] [200/400] eta: 0:01:34 lr: 0.000211 loss: 0.1587 (0.1697) grad: 0.0786 (0.0825) time: 0.4818 data: 0.0046 max mem: 22446 +train: [10] [220/400] eta: 0:01:24 lr: 0.000210 loss: 0.1680 (0.1703) grad: 0.0797 (0.0826) time: 0.4739 data: 0.0045 max mem: 22446 +train: [10] [240/400] eta: 0:01:15 lr: 0.000208 loss: 0.1482 (0.1692) grad: 0.0647 (0.0816) time: 0.4499 data: 0.0043 max mem: 22446 +train: [10] [260/400] eta: 0:01:05 lr: 0.000207 loss: 0.1536 (0.1708) grad: 0.0640 (0.0818) time: 0.4509 data: 0.0039 max mem: 22446 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 0.1655 (0.1701) grad: 0.0723 (0.0816) time: 0.4807 data: 0.0045 max mem: 22446 +train: [10] [300/400] eta: 0:00:48 lr: 0.000204 loss: 0.1506 (0.1696) grad: 0.0700 (0.0813) time: 0.6306 data: 0.1700 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.1408 (0.1684) grad: 0.0682 (0.0807) time: 0.4442 data: 0.0038 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.1463 (0.1683) grad: 0.0751 (0.0808) time: 0.4506 data: 0.0042 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.1467 (0.1670) grad: 0.0704 (0.0796) time: 0.4470 data: 0.0042 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.1357 (0.1659) grad: 0.0608 (0.0785) time: 0.4524 data: 0.0041 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.1461 (0.1657) grad: 0.0547 (0.0777) time: 0.4476 data: 0.0042 max mem: 22446 +train: [10] Total time: 0:03:09 (0.4729 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.1461 (0.1657) grad: 0.0547 (0.0777) +eval (validation): [10] [ 0/63] eta: 0:03:14 time: 3.0809 data: 2.8446 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:19 time: 0.3229 data: 0.0035 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3375 data: 0.0026 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3070 data: 0.0029 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3075 data: 0.0028 max mem: 22446 +eval (validation): [10] Total time: 0:00:23 (0.3712 s / it) +cv: [10] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.028 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:21:32 lr: nan time: 3.2302 data: 2.8459 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:40 lr: 0.000195 loss: 0.1389 (0.1443) grad: 0.0435 (0.0512) time: 0.4466 data: 0.0025 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:05 lr: 0.000193 loss: 0.1397 (0.1448) grad: 0.0538 (0.0541) time: 0.4476 data: 0.0042 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:47 lr: 0.000192 loss: 0.1369 (0.1449) grad: 0.0540 (0.0555) time: 0.4463 data: 0.0042 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:33 lr: 0.000190 loss: 0.1408 (0.1476) grad: 0.0493 (0.0568) time: 0.4404 data: 0.0044 max mem: 22446 +train: [11] [100/400] eta: 0:02:22 lr: 0.000189 loss: 0.1431 (0.1464) grad: 0.0623 (0.0585) time: 0.4569 data: 0.0033 max mem: 22446 +train: [11] [120/400] eta: 0:02:12 lr: 0.000187 loss: 0.1422 (0.1475) grad: 0.0631 (0.0596) time: 0.4699 data: 0.0043 max mem: 22446 +train: [11] [140/400] eta: 0:02:02 lr: 0.000186 loss: 0.1363 (0.1476) grad: 0.0605 (0.0596) time: 0.4529 data: 0.0042 max mem: 22446 +train: [11] [160/400] eta: 0:01:52 lr: 0.000184 loss: 0.1320 (0.1463) grad: 0.0419 (0.0579) time: 0.4489 data: 0.0040 max mem: 22446 +train: [11] [180/400] eta: 0:01:43 lr: 0.000183 loss: 0.1365 (0.1453) grad: 0.0453 (0.0577) time: 0.4681 data: 0.0044 max mem: 22446 +train: [11] [200/400] eta: 0:01:33 lr: 0.000181 loss: 0.1403 (0.1452) grad: 0.0616 (0.0576) time: 0.4788 data: 0.0044 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.1372 (0.1459) grad: 0.0664 (0.0594) time: 0.4664 data: 0.0044 max mem: 22446 +train: [11] [240/400] eta: 0:01:14 lr: 0.000178 loss: 0.1321 (0.1439) grad: 0.0552 (0.0589) time: 0.4581 data: 0.0040 max mem: 22446 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 0.1343 (0.1440) grad: 0.0504 (0.0589) time: 0.4439 data: 0.0034 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.1248 (0.1432) grad: 0.0565 (0.0591) time: 0.4862 data: 0.0044 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.1205 (0.1425) grad: 0.0564 (0.0589) time: 0.6005 data: 0.1660 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.1280 (0.1426) grad: 0.0497 (0.0585) time: 0.4517 data: 0.0032 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.1265 (0.1415) grad: 0.0475 (0.0574) time: 0.4616 data: 0.0042 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.1192 (0.1412) grad: 0.0487 (0.0572) time: 0.4550 data: 0.0042 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.1280 (0.1404) grad: 0.0514 (0.0567) time: 0.4549 data: 0.0041 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.1241 (0.1398) grad: 0.0410 (0.0562) time: 0.4609 data: 0.0041 max mem: 22446 +train: [11] Total time: 0:03:08 (0.4723 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.1241 (0.1398) grad: 0.0410 (0.0562) +eval (validation): [11] [ 0/63] eta: 0:03:19 time: 3.1619 data: 2.8893 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3519 data: 0.0036 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3345 data: 0.0031 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3254 data: 0.0034 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3209 data: 0.0034 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3863 s / it) +cv: [11] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.029 acc: 0.993 f1: 0.992 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:21:59 lr: nan time: 3.2994 data: 2.8998 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:46 lr: 0.000164 loss: 0.1105 (0.1176) grad: 0.0309 (0.0419) time: 0.4599 data: 0.0026 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:09 lr: 0.000163 loss: 0.1248 (0.1271) grad: 0.0476 (0.0456) time: 0.4517 data: 0.0041 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:49 lr: 0.000161 loss: 0.1377 (0.1282) grad: 0.0516 (0.0475) time: 0.4417 data: 0.0044 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:35 lr: 0.000160 loss: 0.1226 (0.1259) grad: 0.0446 (0.0459) time: 0.4460 data: 0.0044 max mem: 22446 +train: [12] [100/400] eta: 0:02:23 lr: 0.000158 loss: 0.1263 (0.1294) grad: 0.0453 (0.0472) time: 0.4462 data: 0.0042 max mem: 22446 +train: [12] [120/400] eta: 0:02:13 lr: 0.000156 loss: 0.1382 (0.1298) grad: 0.0391 (0.0461) time: 0.4667 data: 0.0045 max mem: 22446 +train: [12] [140/400] eta: 0:02:02 lr: 0.000155 loss: 0.1271 (0.1288) grad: 0.0375 (0.0461) time: 0.4452 data: 0.0044 max mem: 22446 +train: [12] [160/400] eta: 0:01:51 lr: 0.000153 loss: 0.1239 (0.1280) grad: 0.0411 (0.0466) time: 0.4334 data: 0.0042 max mem: 22446 +train: [12] [180/400] eta: 0:01:42 lr: 0.000152 loss: 0.1178 (0.1269) grad: 0.0409 (0.0462) time: 0.4550 data: 0.0042 max mem: 22446 +train: [12] [200/400] eta: 0:01:32 lr: 0.000150 loss: 0.1156 (0.1262) grad: 0.0479 (0.0469) time: 0.4613 data: 0.0041 max mem: 22446 +train: [12] [220/400] eta: 0:01:23 lr: 0.000149 loss: 0.1098 (0.1257) grad: 0.0482 (0.0464) time: 0.4686 data: 0.0047 max mem: 22446 +train: [12] [240/400] eta: 0:01:14 lr: 0.000147 loss: 0.1171 (0.1262) grad: 0.0399 (0.0463) time: 0.4609 data: 0.0044 max mem: 22446 +train: [12] [260/400] eta: 0:01:04 lr: 0.000145 loss: 0.1297 (0.1261) grad: 0.0387 (0.0459) time: 0.4473 data: 0.0042 max mem: 22446 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 0.1233 (0.1259) grad: 0.0330 (0.0456) time: 0.4541 data: 0.0041 max mem: 22446 +train: [12] [300/400] eta: 0:00:47 lr: 0.000142 loss: 0.1171 (0.1253) grad: 0.0322 (0.0450) time: 0.6085 data: 0.1684 max mem: 22446 +train: [12] [320/400] eta: 0:00:37 lr: 0.000141 loss: 0.1131 (0.1255) grad: 0.0320 (0.0446) time: 0.4526 data: 0.0034 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.1123 (0.1249) grad: 0.0374 (0.0448) time: 0.4500 data: 0.0035 max mem: 22446 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 0.1146 (0.1244) grad: 0.0362 (0.0445) time: 0.4519 data: 0.0044 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.1182 (0.1240) grad: 0.0329 (0.0442) time: 0.4425 data: 0.0041 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1150 (0.1240) grad: 0.0361 (0.0441) time: 0.4400 data: 0.0043 max mem: 22446 +train: [12] Total time: 0:03:06 (0.4669 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1150 (0.1240) grad: 0.0361 (0.0441) +eval (validation): [12] [ 0/63] eta: 0:03:10 time: 3.0195 data: 2.7835 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3496 data: 0.0041 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3348 data: 0.0029 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3160 data: 0.0035 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3132 data: 0.0035 max mem: 22446 +eval (validation): [12] Total time: 0:00:23 (0.3801 s / it) +cv: [12] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 0.026 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:21:00 lr: nan time: 3.1513 data: 2.8177 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:37 lr: 0.000133 loss: 0.0989 (0.1151) grad: 0.0407 (0.0455) time: 0.4439 data: 0.0034 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:02 lr: 0.000131 loss: 0.1136 (0.1182) grad: 0.0357 (0.0419) time: 0.4392 data: 0.0037 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:45 lr: 0.000130 loss: 0.1136 (0.1198) grad: 0.0316 (0.0410) time: 0.4461 data: 0.0044 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:32 lr: 0.000128 loss: 0.1094 (0.1168) grad: 0.0385 (0.0412) time: 0.4461 data: 0.0041 max mem: 22446 +train: [13] [100/400] eta: 0:02:21 lr: 0.000127 loss: 0.1056 (0.1145) grad: 0.0385 (0.0403) time: 0.4445 data: 0.0043 max mem: 22446 +train: [13] [120/400] eta: 0:02:11 lr: 0.000125 loss: 0.1105 (0.1150) grad: 0.0333 (0.0396) time: 0.4625 data: 0.0043 max mem: 22446 +train: [13] [140/400] eta: 0:02:01 lr: 0.000124 loss: 0.1213 (0.1167) grad: 0.0340 (0.0395) time: 0.4602 data: 0.0044 max mem: 22446 +train: [13] [160/400] eta: 0:01:51 lr: 0.000122 loss: 0.1217 (0.1167) grad: 0.0332 (0.0393) time: 0.4437 data: 0.0040 max mem: 22446 +train: [13] [180/400] eta: 0:01:42 lr: 0.000120 loss: 0.1058 (0.1161) grad: 0.0333 (0.0394) time: 0.4604 data: 0.0041 max mem: 22446 +train: [13] [200/400] eta: 0:01:32 lr: 0.000119 loss: 0.1055 (0.1158) grad: 0.0328 (0.0387) time: 0.4625 data: 0.0043 max mem: 22446 +train: [13] [220/400] eta: 0:01:23 lr: 0.000117 loss: 0.1078 (0.1155) grad: 0.0294 (0.0380) time: 0.4735 data: 0.0045 max mem: 22446 +train: [13] [240/400] eta: 0:01:14 lr: 0.000116 loss: 0.1078 (0.1151) grad: 0.0293 (0.0378) time: 0.4631 data: 0.0041 max mem: 22446 +train: [13] [260/400] eta: 0:01:04 lr: 0.000114 loss: 0.1096 (0.1150) grad: 0.0316 (0.0377) time: 0.4490 data: 0.0041 max mem: 22446 +train: [13] [280/400] eta: 0:00:55 lr: 0.000113 loss: 0.1103 (0.1146) grad: 0.0290 (0.0374) time: 0.4591 data: 0.0042 max mem: 22446 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 0.1031 (0.1141) grad: 0.0301 (0.0374) time: 0.6106 data: 0.1669 max mem: 22446 +train: [13] [320/400] eta: 0:00:37 lr: 0.000110 loss: 0.1001 (0.1135) grad: 0.0312 (0.0370) time: 0.4427 data: 0.0034 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.1001 (0.1134) grad: 0.0307 (0.0368) time: 0.4410 data: 0.0041 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.1068 (0.1131) grad: 0.0307 (0.0365) time: 0.4482 data: 0.0038 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.1065 (0.1124) grad: 0.0301 (0.0364) time: 0.4502 data: 0.0042 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1054 (0.1126) grad: 0.0315 (0.0363) time: 0.4503 data: 0.0042 max mem: 22446 +train: [13] Total time: 0:03:06 (0.4671 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1054 (0.1126) grad: 0.0315 (0.0363) +eval (validation): [13] [ 0/63] eta: 0:03:12 time: 3.0489 data: 2.7754 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3382 data: 0.0038 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3297 data: 0.0027 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3151 data: 0.0035 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3131 data: 0.0035 max mem: 22446 +eval (validation): [13] Total time: 0:00:23 (0.3747 s / it) +cv: [13] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:37 lr: nan time: 3.2437 data: 2.8956 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:36 lr: 0.000102 loss: 0.1025 (0.1087) grad: 0.0302 (0.0330) time: 0.4366 data: 0.0025 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:04 lr: 0.000101 loss: 0.1025 (0.1095) grad: 0.0305 (0.0348) time: 0.4508 data: 0.0038 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:46 lr: 0.000099 loss: 0.0972 (0.1069) grad: 0.0314 (0.0345) time: 0.4422 data: 0.0043 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:32 lr: 0.000098 loss: 0.1012 (0.1073) grad: 0.0317 (0.0345) time: 0.4421 data: 0.0043 max mem: 22446 +train: [14] [100/400] eta: 0:02:21 lr: 0.000096 loss: 0.1125 (0.1078) grad: 0.0338 (0.0347) time: 0.4464 data: 0.0043 max mem: 22446 +train: [14] [120/400] eta: 0:02:11 lr: 0.000095 loss: 0.1094 (0.1078) grad: 0.0320 (0.0345) time: 0.4626 data: 0.0043 max mem: 22446 +train: [14] [140/400] eta: 0:02:01 lr: 0.000093 loss: 0.1061 (0.1080) grad: 0.0310 (0.0344) time: 0.4617 data: 0.0040 max mem: 22446 +train: [14] [160/400] eta: 0:01:51 lr: 0.000092 loss: 0.1058 (0.1074) grad: 0.0294 (0.0340) time: 0.4443 data: 0.0040 max mem: 22446 +train: [14] [180/400] eta: 0:01:42 lr: 0.000090 loss: 0.1029 (0.1071) grad: 0.0286 (0.0336) time: 0.4750 data: 0.0041 max mem: 22446 +train: [14] [200/400] eta: 0:01:33 lr: 0.000089 loss: 0.1006 (0.1065) grad: 0.0276 (0.0330) time: 0.4660 data: 0.0043 max mem: 22446 +train: [14] [220/400] eta: 0:01:24 lr: 0.000088 loss: 0.0992 (0.1067) grad: 0.0287 (0.0328) time: 0.4717 data: 0.0043 max mem: 22446 +train: [14] [240/400] eta: 0:01:14 lr: 0.000086 loss: 0.1091 (0.1069) grad: 0.0304 (0.0327) time: 0.4680 data: 0.0044 max mem: 22446 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 0.1048 (0.1070) grad: 0.0292 (0.0325) time: 0.4487 data: 0.0042 max mem: 22446 +train: [14] [280/400] eta: 0:00:55 lr: 0.000083 loss: 0.1048 (0.1076) grad: 0.0292 (0.0324) time: 0.4564 data: 0.0041 max mem: 22446 +train: [14] [300/400] eta: 0:00:47 lr: 0.000082 loss: 0.1009 (0.1074) grad: 0.0298 (0.0323) time: 0.6241 data: 0.1782 max mem: 22446 +train: [14] [320/400] eta: 0:00:37 lr: 0.000081 loss: 0.0961 (0.1067) grad: 0.0284 (0.0320) time: 0.4442 data: 0.0031 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.0920 (0.1064) grad: 0.0287 (0.0320) time: 0.4462 data: 0.0042 max mem: 22446 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 0.0944 (0.1066) grad: 0.0297 (0.0320) time: 0.4483 data: 0.0039 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1044 (0.1065) grad: 0.0313 (0.0321) time: 0.4496 data: 0.0043 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1036 (0.1065) grad: 0.0318 (0.0322) time: 0.4447 data: 0.0044 max mem: 22446 +train: [14] Total time: 0:03:07 (0.4690 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1036 (0.1065) grad: 0.0318 (0.0322) +eval (validation): [14] [ 0/63] eta: 0:03:13 time: 3.0688 data: 2.7871 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3799 data: 0.0044 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3391 data: 0.0035 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3136 data: 0.0031 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3135 data: 0.0031 max mem: 22446 +eval (validation): [14] Total time: 0:00:24 (0.3908 s / it) +cv: [14] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:01 lr: nan time: 3.3049 data: 2.8997 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:39 lr: 0.000074 loss: 0.0949 (0.1008) grad: 0.0295 (0.0313) time: 0.4416 data: 0.0024 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:04 lr: 0.000072 loss: 0.1023 (0.1040) grad: 0.0295 (0.0306) time: 0.4420 data: 0.0036 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:46 lr: 0.000071 loss: 0.0973 (0.1008) grad: 0.0282 (0.0309) time: 0.4477 data: 0.0041 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:33 lr: 0.000070 loss: 0.0962 (0.1012) grad: 0.0281 (0.0309) time: 0.4525 data: 0.0041 max mem: 22446 +train: [15] [100/400] eta: 0:02:21 lr: 0.000068 loss: 0.0945 (0.0991) grad: 0.0293 (0.0307) time: 0.4405 data: 0.0042 max mem: 22446 +train: [15] [120/400] eta: 0:02:11 lr: 0.000067 loss: 0.0915 (0.0995) grad: 0.0293 (0.0307) time: 0.4599 data: 0.0041 max mem: 22446 +train: [15] [140/400] eta: 0:02:01 lr: 0.000066 loss: 0.0968 (0.0996) grad: 0.0294 (0.0307) time: 0.4464 data: 0.0044 max mem: 22446 +train: [15] [160/400] eta: 0:01:51 lr: 0.000064 loss: 0.1018 (0.1006) grad: 0.0298 (0.0309) time: 0.4319 data: 0.0040 max mem: 22446 +train: [15] [180/400] eta: 0:01:41 lr: 0.000063 loss: 0.1018 (0.1006) grad: 0.0304 (0.0310) time: 0.4476 data: 0.0040 max mem: 22446 +train: [15] [200/400] eta: 0:01:32 lr: 0.000062 loss: 0.1035 (0.1008) grad: 0.0304 (0.0311) time: 0.4651 data: 0.0044 max mem: 22446 +train: [15] [220/400] eta: 0:01:23 lr: 0.000061 loss: 0.1059 (0.1014) grad: 0.0325 (0.0315) time: 0.4697 data: 0.0044 max mem: 22446 +train: [15] [240/400] eta: 0:01:14 lr: 0.000059 loss: 0.0976 (0.1010) grad: 0.0294 (0.0313) time: 0.4810 data: 0.0045 max mem: 22446 +train: [15] [260/400] eta: 0:01:04 lr: 0.000058 loss: 0.0957 (0.1011) grad: 0.0287 (0.0314) time: 0.4649 data: 0.0043 max mem: 22446 +train: [15] [280/400] eta: 0:00:55 lr: 0.000057 loss: 0.1009 (0.1011) grad: 0.0291 (0.0315) time: 0.4619 data: 0.0043 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.0986 (0.1008) grad: 0.0313 (0.0318) time: 0.6123 data: 0.1641 max mem: 22446 +train: [15] [320/400] eta: 0:00:37 lr: 0.000054 loss: 0.0983 (0.1012) grad: 0.0300 (0.0316) time: 0.4857 data: 0.0034 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.1024 (0.1013) grad: 0.0289 (0.0315) time: 0.4660 data: 0.0042 max mem: 22446 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 0.0999 (0.1017) grad: 0.0292 (0.0315) time: 0.4424 data: 0.0035 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.1041 (0.1021) grad: 0.0312 (0.0315) time: 0.4518 data: 0.0045 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.0930 (0.1017) grad: 0.0305 (0.0314) time: 0.4565 data: 0.0043 max mem: 22446 +train: [15] Total time: 0:03:08 (0.4710 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.0930 (0.1017) grad: 0.0305 (0.0314) +eval (validation): [15] [ 0/63] eta: 0:03:17 time: 3.1367 data: 2.8316 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:21 time: 0.3684 data: 0.0039 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3361 data: 0.0031 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3317 data: 0.0037 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3282 data: 0.0036 max mem: 22446 +eval (validation): [15] Total time: 0:00:24 (0.3942 s / it) +cv: [15] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.025 acc: 0.992 f1: 0.990 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:21:16 lr: nan time: 3.1903 data: 2.8320 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:52 lr: 0.000048 loss: 0.0936 (0.0981) grad: 0.0265 (0.0293) time: 0.4834 data: 0.0039 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:13 lr: 0.000047 loss: 0.0936 (0.0973) grad: 0.0287 (0.0307) time: 0.4610 data: 0.0045 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:53 lr: 0.000046 loss: 0.0983 (0.1017) grad: 0.0299 (0.0303) time: 0.4510 data: 0.0043 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:38 lr: 0.000045 loss: 0.1020 (0.1022) grad: 0.0299 (0.0303) time: 0.4481 data: 0.0044 max mem: 22446 +train: [16] [100/400] eta: 0:02:26 lr: 0.000044 loss: 0.1014 (0.1009) grad: 0.0280 (0.0303) time: 0.4557 data: 0.0044 max mem: 22446 +train: [16] [120/400] eta: 0:02:15 lr: 0.000043 loss: 0.0962 (0.1006) grad: 0.0287 (0.0303) time: 0.4790 data: 0.0042 max mem: 22446 +train: [16] [140/400] eta: 0:02:05 lr: 0.000042 loss: 0.1015 (0.1015) grad: 0.0292 (0.0302) time: 0.4601 data: 0.0043 max mem: 22446 +train: [16] [160/400] eta: 0:01:55 lr: 0.000041 loss: 0.0998 (0.1007) grad: 0.0290 (0.0301) time: 0.4620 data: 0.0044 max mem: 22446 +train: [16] [180/400] eta: 0:01:45 lr: 0.000040 loss: 0.0850 (0.0999) grad: 0.0290 (0.0303) time: 0.4657 data: 0.0044 max mem: 22446 +train: [16] [200/400] eta: 0:01:35 lr: 0.000039 loss: 0.0843 (0.0987) grad: 0.0283 (0.0300) time: 0.4619 data: 0.0043 max mem: 22446 +train: [16] [220/400] eta: 0:01:25 lr: 0.000038 loss: 0.0900 (0.0989) grad: 0.0279 (0.0300) time: 0.4733 data: 0.0045 max mem: 22446 +train: [16] [240/400] eta: 0:01:16 lr: 0.000036 loss: 0.1002 (0.0990) grad: 0.0288 (0.0301) time: 0.4660 data: 0.0044 max mem: 22446 +train: [16] [260/400] eta: 0:01:06 lr: 0.000035 loss: 0.0986 (0.0989) grad: 0.0285 (0.0300) time: 0.4591 data: 0.0041 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.0954 (0.0990) grad: 0.0291 (0.0303) time: 0.4420 data: 0.0040 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.0947 (0.0985) grad: 0.0301 (0.0302) time: 0.6304 data: 0.1624 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.0909 (0.0984) grad: 0.0275 (0.0302) time: 0.4710 data: 0.0034 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.0855 (0.0978) grad: 0.0272 (0.0302) time: 0.4621 data: 0.0042 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.0840 (0.0977) grad: 0.0295 (0.0302) time: 0.4473 data: 0.0043 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.0912 (0.0975) grad: 0.0304 (0.0302) time: 0.4584 data: 0.0042 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.0938 (0.0977) grad: 0.0287 (0.0303) time: 0.4679 data: 0.0043 max mem: 22446 +train: [16] Total time: 0:03:11 (0.4777 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.0938 (0.0977) grad: 0.0287 (0.0303) +eval (validation): [16] [ 0/63] eta: 0:03:20 time: 3.1842 data: 2.9448 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:20 time: 0.3530 data: 0.0035 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3453 data: 0.0034 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3369 data: 0.0035 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3342 data: 0.0034 max mem: 22446 +eval (validation): [16] Total time: 0:00:24 (0.3939 s / it) +cv: [16] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.029 acc: 0.993 f1: 0.992 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:44 lr: nan time: 3.2610 data: 2.8486 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:51 lr: 0.000028 loss: 0.1000 (0.1037) grad: 0.0287 (0.0319) time: 0.4758 data: 0.0024 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:11 lr: 0.000027 loss: 0.0892 (0.0964) grad: 0.0279 (0.0296) time: 0.4530 data: 0.0042 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:52 lr: 0.000026 loss: 0.0870 (0.0954) grad: 0.0276 (0.0296) time: 0.4575 data: 0.0043 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:38 lr: 0.000025 loss: 0.0915 (0.0962) grad: 0.0299 (0.0299) time: 0.4603 data: 0.0042 max mem: 22446 +train: [17] [100/400] eta: 0:02:26 lr: 0.000024 loss: 0.1019 (0.0979) grad: 0.0304 (0.0301) time: 0.4577 data: 0.0043 max mem: 22446 +train: [17] [120/400] eta: 0:02:15 lr: 0.000023 loss: 0.1034 (0.0992) grad: 0.0305 (0.0303) time: 0.4588 data: 0.0043 max mem: 22446 +train: [17] [140/400] eta: 0:02:05 lr: 0.000023 loss: 0.0908 (0.0989) grad: 0.0293 (0.0301) time: 0.4772 data: 0.0044 max mem: 22446 +train: [17] [160/400] eta: 0:01:55 lr: 0.000022 loss: 0.0925 (0.0997) grad: 0.0275 (0.0300) time: 0.4544 data: 0.0040 max mem: 22446 +train: [17] [180/400] eta: 0:01:45 lr: 0.000021 loss: 0.0936 (0.0993) grad: 0.0298 (0.0302) time: 0.4697 data: 0.0041 max mem: 22446 +train: [17] [200/400] eta: 0:01:35 lr: 0.000020 loss: 0.0936 (0.0989) grad: 0.0303 (0.0303) time: 0.4704 data: 0.0042 max mem: 22446 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 0.0899 (0.0987) grad: 0.0326 (0.0304) time: 0.4549 data: 0.0041 max mem: 22446 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 0.0893 (0.0984) grad: 0.0277 (0.0302) time: 0.4661 data: 0.0045 max mem: 22446 +train: [17] [260/400] eta: 0:01:06 lr: 0.000018 loss: 0.1003 (0.0990) grad: 0.0278 (0.0302) time: 0.4844 data: 0.0043 max mem: 22446 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 0.1012 (0.0993) grad: 0.0302 (0.0303) time: 0.4611 data: 0.0042 max mem: 22446 +train: [17] [300/400] eta: 0:00:48 lr: 0.000016 loss: 0.0925 (0.0987) grad: 0.0283 (0.0303) time: 0.6407 data: 0.1701 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.0929 (0.0991) grad: 0.0283 (0.0301) time: 0.4738 data: 0.0034 max mem: 22446 +train: [17] [340/400] eta: 0:00:29 lr: 0.000015 loss: 0.0989 (0.0992) grad: 0.0295 (0.0303) time: 0.4670 data: 0.0044 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.0940 (0.0991) grad: 0.0299 (0.0302) time: 0.4566 data: 0.0044 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0905 (0.0987) grad: 0.0287 (0.0302) time: 0.4527 data: 0.0040 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.0924 (0.0989) grad: 0.0281 (0.0300) time: 0.4445 data: 0.0039 max mem: 22446 +train: [17] Total time: 0:03:11 (0.4795 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.0924 (0.0989) grad: 0.0281 (0.0300) +eval (validation): [17] [ 0/63] eta: 0:03:16 time: 3.1135 data: 2.8171 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3645 data: 0.0046 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3387 data: 0.0030 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3358 data: 0.0037 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3354 data: 0.0036 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3939 s / it) +cv: [17] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:10 lr: nan time: 3.3271 data: 2.9548 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:47 lr: 0.000012 loss: 0.0893 (0.0938) grad: 0.0305 (0.0309) time: 0.4631 data: 0.0030 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:10 lr: 0.000012 loss: 0.0893 (0.0946) grad: 0.0302 (0.0303) time: 0.4526 data: 0.0036 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:52 lr: 0.000011 loss: 0.0874 (0.0933) grad: 0.0285 (0.0301) time: 0.4628 data: 0.0043 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:37 lr: 0.000011 loss: 0.0923 (0.0966) grad: 0.0283 (0.0302) time: 0.4473 data: 0.0042 max mem: 22446 +train: [18] [100/400] eta: 0:02:25 lr: 0.000010 loss: 0.0906 (0.0960) grad: 0.0269 (0.0296) time: 0.4549 data: 0.0043 max mem: 22446 +train: [18] [120/400] eta: 0:02:14 lr: 0.000009 loss: 0.0967 (0.0972) grad: 0.0286 (0.0297) time: 0.4606 data: 0.0043 max mem: 22446 +train: [18] [140/400] eta: 0:02:04 lr: 0.000009 loss: 0.1084 (0.0993) grad: 0.0297 (0.0300) time: 0.4718 data: 0.0045 max mem: 22446 +train: [18] [160/400] eta: 0:01:54 lr: 0.000008 loss: 0.1019 (0.0984) grad: 0.0301 (0.0299) time: 0.4752 data: 0.0043 max mem: 22446 +train: [18] [180/400] eta: 0:01:44 lr: 0.000008 loss: 0.0957 (0.0988) grad: 0.0299 (0.0300) time: 0.4577 data: 0.0043 max mem: 22446 +train: [18] [200/400] eta: 0:01:35 lr: 0.000007 loss: 0.0996 (0.0989) grad: 0.0299 (0.0299) time: 0.4704 data: 0.0044 max mem: 22446 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 0.0932 (0.0983) grad: 0.0276 (0.0298) time: 0.4809 data: 0.0044 max mem: 22446 +train: [18] [240/400] eta: 0:01:16 lr: 0.000006 loss: 0.0957 (0.0985) grad: 0.0285 (0.0300) time: 0.4730 data: 0.0044 max mem: 22446 +train: [18] [260/400] eta: 0:01:06 lr: 0.000006 loss: 0.0951 (0.0982) grad: 0.0281 (0.0298) time: 0.4826 data: 0.0043 max mem: 22446 +train: [18] [280/400] eta: 0:00:57 lr: 0.000006 loss: 0.0927 (0.0980) grad: 0.0286 (0.0298) time: 0.4698 data: 0.0040 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.0984 (0.0988) grad: 0.0303 (0.0299) time: 0.6227 data: 0.1802 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.0977 (0.0987) grad: 0.0289 (0.0299) time: 0.4614 data: 0.0031 max mem: 22446 +train: [18] [340/400] eta: 0:00:29 lr: 0.000004 loss: 0.0923 (0.0985) grad: 0.0289 (0.0299) time: 0.4681 data: 0.0044 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.0923 (0.0984) grad: 0.0297 (0.0300) time: 0.4757 data: 0.0042 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.0917 (0.0980) grad: 0.0294 (0.0300) time: 0.4742 data: 0.0041 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0917 (0.0981) grad: 0.0291 (0.0300) time: 0.4754 data: 0.0045 max mem: 22446 +train: [18] Total time: 0:03:13 (0.4827 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0917 (0.0981) grad: 0.0291 (0.0300) +eval (validation): [18] [ 0/63] eta: 0:03:28 time: 3.3105 data: 3.0068 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:21 time: 0.3612 data: 0.0038 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3536 data: 0.0028 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3404 data: 0.0038 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3369 data: 0.0037 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.4021 s / it) +cv: [18] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:35 lr: nan time: 3.3899 data: 3.0171 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:55 lr: 0.000003 loss: 0.0969 (0.1006) grad: 0.0275 (0.0303) time: 0.4806 data: 0.0040 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:18 lr: 0.000003 loss: 0.0912 (0.0970) grad: 0.0277 (0.0294) time: 0.4809 data: 0.0042 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:58 lr: 0.000002 loss: 0.0868 (0.0954) grad: 0.0280 (0.0297) time: 0.4664 data: 0.0042 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:43 lr: 0.000002 loss: 0.0939 (0.0957) grad: 0.0296 (0.0303) time: 0.4674 data: 0.0044 max mem: 22446 +train: [19] [100/400] eta: 0:02:30 lr: 0.000002 loss: 0.0932 (0.0947) grad: 0.0292 (0.0304) time: 0.4682 data: 0.0045 max mem: 22446 +train: [19] [120/400] eta: 0:02:18 lr: 0.000002 loss: 0.0919 (0.0965) grad: 0.0276 (0.0303) time: 0.4646 data: 0.0045 max mem: 22446 +train: [19] [140/400] eta: 0:02:08 lr: 0.000001 loss: 0.0936 (0.0966) grad: 0.0282 (0.0303) time: 0.4824 data: 0.0043 max mem: 22446 +train: [19] [160/400] eta: 0:01:57 lr: 0.000001 loss: 0.0912 (0.0967) grad: 0.0294 (0.0303) time: 0.4688 data: 0.0042 max mem: 22446 +train: [19] [180/400] eta: 0:01:47 lr: 0.000001 loss: 0.0912 (0.0964) grad: 0.0284 (0.0301) time: 0.4570 data: 0.0038 max mem: 22446 +train: [19] [200/400] eta: 0:01:36 lr: 0.000001 loss: 0.0856 (0.0958) grad: 0.0275 (0.0300) time: 0.4647 data: 0.0039 max mem: 22446 +train: [19] [220/400] eta: 0:01:26 lr: 0.000001 loss: 0.0886 (0.0958) grad: 0.0285 (0.0298) time: 0.4560 data: 0.0042 max mem: 22446 +train: [19] [240/400] eta: 0:01:17 lr: 0.000001 loss: 0.0873 (0.0953) grad: 0.0285 (0.0297) time: 0.4911 data: 0.0044 max mem: 22446 +train: [19] [260/400] eta: 0:01:07 lr: 0.000000 loss: 0.0871 (0.0950) grad: 0.0289 (0.0300) time: 0.4728 data: 0.0044 max mem: 22446 +train: [19] [280/400] eta: 0:00:57 lr: 0.000000 loss: 0.0964 (0.0955) grad: 0.0299 (0.0300) time: 0.4861 data: 0.0043 max mem: 22446 +train: [19] [300/400] eta: 0:00:49 lr: 0.000000 loss: 0.0982 (0.0960) grad: 0.0277 (0.0298) time: 0.6432 data: 0.1886 max mem: 22446 +train: [19] [320/400] eta: 0:00:39 lr: 0.000000 loss: 0.0917 (0.0963) grad: 0.0282 (0.0299) time: 0.4801 data: 0.0035 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.0940 (0.0963) grad: 0.0294 (0.0300) time: 0.4886 data: 0.0042 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.0963 (0.0966) grad: 0.0294 (0.0300) time: 0.4605 data: 0.0044 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.0966 (0.0967) grad: 0.0282 (0.0300) time: 0.4913 data: 0.0045 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.0942 (0.0965) grad: 0.0260 (0.0300) time: 0.4662 data: 0.0043 max mem: 22446 +train: [19] Total time: 0:03:15 (0.4897 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.0942 (0.0965) grad: 0.0260 (0.0300) +eval (validation): [19] [ 0/63] eta: 0:03:12 time: 3.0543 data: 2.7722 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3956 data: 0.0033 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:10 time: 0.3804 data: 0.0037 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3508 data: 0.0037 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3444 data: 0.0037 max mem: 22446 +eval (validation): [19] Total time: 0:00:26 (0.4217 s / it) +cv: [19] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.025 acc: 0.993 f1: 0.991 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9925595238095238, "hparam": [1, 1.0], "hparam_id": 24, "epoch": 19, "is_best": false, "best_score": 0.9930555555555556} +eval (train): [20] [ 0/297] eta: 0:15:32 time: 3.1401 data: 2.8952 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:28 time: 0.4061 data: 0.0048 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:02:00 time: 0.3955 data: 0.0034 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:42 time: 0.3599 data: 0.0035 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3584 data: 0.0036 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:20 time: 0.3803 data: 0.0036 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:12 time: 0.4066 data: 0.0039 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:02 time: 0.3641 data: 0.0036 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:54 time: 0.3621 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:45 time: 0.3614 data: 0.0038 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3497 data: 0.0035 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3862 data: 0.0038 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3496 data: 0.0035 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3730 data: 0.0033 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3691 data: 0.0038 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3474 data: 0.0033 max mem: 22446 +eval (train): [20] Total time: 0:01:53 (0.3831 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:07 time: 2.9813 data: 2.7387 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3445 data: 0.0029 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3707 data: 0.0033 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3575 data: 0.0035 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3559 data: 0.0036 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4045 s / it) +eval (test): [20] [ 0/79] eta: 0:03:44 time: 2.8388 data: 2.5904 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3696 data: 0.0038 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.4012 data: 0.0032 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3603 data: 0.0036 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3417 data: 0.0034 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.4021 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9930555555555556, "hparam": [1.6, 1.0], "hparam_id": 27, "epoch": 12, "is_best": true, "best_score": 0.9930555555555556} +eval (train): [20] [ 0/297] eta: 0:16:06 time: 3.2552 data: 2.9352 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:26 time: 0.3934 data: 0.0030 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:56 time: 0.3768 data: 0.0030 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:40 time: 0.3600 data: 0.0038 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:27 time: 0.3387 data: 0.0032 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:17 time: 0.3561 data: 0.0033 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3634 data: 0.0031 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:59 time: 0.3426 data: 0.0037 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:51 time: 0.3577 data: 0.0034 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3317 data: 0.0031 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3496 data: 0.0032 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3481 data: 0.0032 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3490 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3478 data: 0.0033 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3287 data: 0.0030 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3269 data: 0.0028 max mem: 22446 +eval (train): [20] Total time: 0:01:47 (0.3633 s / it) +eval (validation): [20] [ 0/63] eta: 0:02:47 time: 2.6588 data: 2.4262 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:19 time: 0.3347 data: 0.0150 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:08 time: 0.3291 data: 0.0028 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3335 data: 0.0030 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3291 data: 0.0032 max mem: 22446 +eval (validation): [20] Total time: 0:00:23 (0.3733 s / it) +eval (test): [20] [ 0/79] eta: 0:03:35 time: 2.7270 data: 2.4607 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:25 time: 0.3245 data: 0.0034 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:15 time: 0.3315 data: 0.0026 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3585 data: 0.0033 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3248 data: 0.0034 max mem: 22446 +eval (test): [20] Total time: 0:00:29 (0.3686 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|----------:|--------:|-----------:|--------:|-----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 12 | 0.00048 | 0.05 | 27 | [1.6, 1.0] | train | 0.0042776 | 0.99947 | 0.00016325 | 0.99958 | 0.00013458 | +| flat_mae | patch | attn | hcpya_task21 | best | 12 | 0.00048 | 0.05 | 27 | [1.6, 1.0] | validation | 0.02582 | 0.99306 | 0.0013263 | 0.99149 | 0.0018341 | +| flat_mae | patch | attn | hcpya_task21 | best | 12 | 0.00048 | 0.05 | 27 | [1.6, 1.0] | test | 0.045459 | 0.9871 | 0.0015778 | 0.98364 | 0.0021758 | + + +done! total time: 1:19:33 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..503cccb037e1b4d0f01ed0dfa7a8701d809b1533 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.2778862291574478, "train/grad": 0.08294104691594839, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.047891845703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.04689697265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.045220947265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.043516845703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0419189453125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.039625244140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.036932373046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.034085693359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.03011962890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.02577392578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.02156494140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.015003662109375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.008607177734375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.998878173828125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.989171142578125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.97959716796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.966693115234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.950782470703125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.931590576171875, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.912415771484375, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.88695068359375, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.8584710693359376, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.8236846923828125, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.7831607055664063, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.7367518615722655, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.6751638793945314, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.613544235229492, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.551605224609375, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.4590423583984373, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.338661766052246, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2250310134887696, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1203454756736755, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9812043523788452, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.862328494116664, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7310979398339987, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6120606955885888, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4969760658871383, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3978926107566805, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2997137755062431, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.18994499781169, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.112290188902989, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.0492030616104602, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9738545276131481, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9149286270048469, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.852811311846599, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7943385333288461, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.7502207144815475, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.699333465397358, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.6595284095965326, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012864935281686484, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012860317109152674, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012852419102564454, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012843766612932087, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012835452714934945, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01282385179772973, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012811504625715316, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01279832141008228, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012780755185522139, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012762841396033764, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01274694554042071, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012723891204223037, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012704569133929908, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012679436667822301, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012659474383108318, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012643041275441646, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012626622482202947, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012613200880587101, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012604291546158492, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01259784349706024, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012588870069012046, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.012572409561835229, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012539839278906584, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01249155648984015, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012439497942104936, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012392020411789417, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012365733375772833, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012347681075334549, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012312457677908242, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012229814622551204, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01210719888098538, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011967351380735635, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011756004386115819, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011518234333489091, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011156265125609934, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01073394190520048, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010321345860138536, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010052166819805279, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009785510586807504, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009429996985127219, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00913913918484468, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008935392973944545, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008870028402889148, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008926278293947688, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0088852481613867, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008922585390973837, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009100024955696426, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.009273541670991108, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.009373054224997759, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0338010787963867, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.030791997909546, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0258097648620605, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.020803213119507, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.015831232070923, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.008852481842041, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.000905990600586, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.992030143737793, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.980203628540039, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9674811363220215, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.954700469970703, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9352540969848633, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.915839672088623, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.886756658554077, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.8577089309692383, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.828749179840088, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.7901084423065186, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.7418372631073, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.683894157409668, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.626058578491211, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5494933128356934, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4646785259246826, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3628575801849365, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.244159460067749, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.1061806678771973, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.9168285131454468, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.721888780593872, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.5266985893249512, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.2469234466552734, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.9236162304878235, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.670493483543396, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4800436794757843, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.28940191864967346, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.18124651908874512, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11469969898462296, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08732252568006516, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0746394693851471, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0683898776769638, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06493028253316879, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06060349568724632, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.05478741601109505, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0518418587744236, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.049830082803964615, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.04982311278581619, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.04953772574663162, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.05233185738325119, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.057052627205848694, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.05913866311311722, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.0570114366710186, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.11408730158730158, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1388888888888889, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.1875, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.23660714285714285, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2683531746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2926587301587302, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.30009920634920634, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3013392857142857, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.29216269841269843, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.28273809523809523, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2720734126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.25223214285714285, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.24057539682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22271825396825398, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21279761904761904, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20436507936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.1974206349206349, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.1922123015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.19246031746031747, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.19518849206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.20337301587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.21949404761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2584325396825397, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.3323412698412698, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.42237103174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5843253968253969, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.6703869047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.736359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8271329365079365, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9107142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9404761904761905, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.982390873015873, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9801587301587301, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07328495010983185, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.082004366087204, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.09711177222324363, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10902222127962848, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.11795234908944738, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.12670137508128554, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.13051406325602616, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.13588769750324434, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.13153638682039012, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1263571079199276, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11818780987451474, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.10253713451021375, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09035075924094671, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07337144977666596, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06336123727111449, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.053687908439032365, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.04587133405325291, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.040270327369948004, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.03982698883496912, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.04318085201680078, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.05148988559763695, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.06767433334471139, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.10463934921721896, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.15903012721422122, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2333988920355351, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.3890195490006694, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.4605957917221425, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.5730254092960227, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.7340827997182571, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8909719635377107, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9357097924834216, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9552577608212728, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9617348087956158, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9673791421103126, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9736563672662671, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9778338932368855, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9791926317824352, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9793266483636012, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9789187940613803, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9792876052119229, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9798847043735949, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9788231685627748, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9805447603489067, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9794716249372319, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9788758237515671, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9776602727851502, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9736278914807516, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9720467165435952, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9774240480481131, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.9738545276131481, "validation/loss_best": 0.049830082803964615, "validation/acc_best": 0.9841269841269841, "validation/f1_best": 0.9805447603489067} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.34267585337162, "train/grad": 0.07265213660895825, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.007701416015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.0008203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.989453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.978111572265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.966717529296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.950889892578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.932803955078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.9125732421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.885511474609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.85638671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.82711669921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.78203125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7370086669921876, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6696820068359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6025836181640627, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5358090209960937, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4464151000976564, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3336448669433594, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1963101959228517, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0575740814208983, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8740312194824218, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.6759849357604981, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.4535227251052856, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.2236274433135987, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.0029129569977522, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.7734204057231545, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.602586953472346, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.47495065746828913, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3401693229936063, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.22920632774941624, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16433003627695142, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1255344375409186, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09489833754487335, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.080391629897058, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0719026478100568, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06768673471175134, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0647925228253007, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0628548842202872, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06080068595707416, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05846758613362908, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05845715369097888, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05839096374809742, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05737725365906954, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.05776167938485741, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05959592053666711, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06303885296918452, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06856238880194723, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.07558246633969247, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.12113429828546941, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01266843176446855, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01264222898054868, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012602713690139353, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012571112741716206, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012543834703974426, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012514548194594681, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012489261664450168, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01247009883634746, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012459471677429975, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012460079505108296, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012468054112978279, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012483573220670223, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012492016931064427, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012478685285896064, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012442291802726687, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01240102550946176, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01235996580682695, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012341789240017532, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012337162592448294, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01231344288215041, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012220440465025604, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.012039282242767513, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.011760111092589795, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01136571119306609, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010787151067052036, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009844012164976448, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.008926206131000071, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008162267886800691, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007289797348785214, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0065766987903043625, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006139520982978866, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0058210063917795195, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0055582248768769205, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005422265146044083, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0053770929510938, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005439788567018695, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005583295119577087, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005705207840073854, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005660262240562588, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005599525347934104, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005884420496149687, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00603898933302844, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005920028688851744, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005846753061632626, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006031590071797837, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006366162804479245, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006476433135539992, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.007114450882654637, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01383551687002182, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9745075702667236, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.962785243988037, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9432969093322754, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.923846960067749, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9044382572174072, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.877345085144043, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.846353530883789, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.8114962577819824, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.7649483680725098, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7144415378570557, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.663820266723633, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.586014986038208, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.508683919906616, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.394045114517212, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.2801430225372314, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.1654410362243652, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.0084409713745117, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.8046694993972778, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.5545438528060913, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.3107366561889648, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.016087532043457, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7438275218009949, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4977424740791321, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.30744659900665283, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.18505971133708954, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11751501262187958, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09252642095088959, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07980996370315552, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06913625448942184, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.061142805963754654, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.05623004585504532, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.05306050926446915, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0498163066804409, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04778078943490982, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04619619995355606, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.045455824583768845, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04339234158396721, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.041850872337818146, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04454304650425911, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05119267478585243, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.05226338282227516, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.04732132703065872, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.04701266810297966, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0452929362654686, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.043845225125551224, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.05440860614180565, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.048217277973890305, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07624026387929916, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1581266075372696, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.30084325396825395, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2904265873015873, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2743055555555556, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.26240079365079366, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2537202380952381, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.24305555555555555, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.234375, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.22891865079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.22470238095238096, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.22371031746031747, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.22668650793650794, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.23487103174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.25223214285714285, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2931547619047619, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.34499007936507936, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.39955357142857145, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.4861111111111111, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5694444444444444, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6721230158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7849702380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8762400793650794, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9166666666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9744543650793651, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1321301391717712, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.12395911232952697, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.10925050522247053, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0994014142758514, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09218559261937635, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.08397928389572246, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07691734434456843, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.07142404265293428, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06820422915471575, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06753261876272482, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07063578586138364, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07687377279001166, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09332338883147996, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12321251677359407, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1505090671355014, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18734923812035637, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.271754868732076, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3461793531955846, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5047056624146824, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.698046292346407, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8441592559708304, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9017079057569701, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9379082346851646, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.967893238281534, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9731831943922677, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9747162476572219, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9759333321548457, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9776676483034942, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9795077011136012, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9808683302457972, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9818579865799516, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9811963351794132, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9808611877031611, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9812756233398816, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9829454828398926, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9825190134427769, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9833067110230521, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9841143120680258, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9808959258709317, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9741941004775018, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.977694343369027, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9816423062711299, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9804849590713077, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9801268858627895, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9846056948672998, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9765328151279429, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9813653201511228, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9749304680970111, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9744870559988001, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.0628548842202872, "validation/loss_best": 0.041850872337818146, "validation/acc_best": 0.9866071428571429, "validation/f1_best": 0.9841143120680258} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.9622802817821503, "train/grad": 0.0794766661338508, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.92920166015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.9108984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.8801611328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.84965576171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.819013671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7760406494140626, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7268243408203126, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.6714312744140627, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.5977581787109374, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5182070922851563, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.438818054199219, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.3162580871582032, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.191667327880859, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.0001133728027343, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.8051011276245117, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.6117917823791503, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.3666834545135498, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.093527340888977, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8242385315895081, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.616814852654934, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4216312988102436, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.28399820350110533, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.18883586570620536, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.13220317675732077, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1025491527095437, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.08596360069699585, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.07805338813923299, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.07303802802227438, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06804564401507378, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.06369633466936647, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.06088034083135426, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.059056395450606945, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.057062055803835394, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.05556633229367435, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.05380115455016494, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.052178756427019836, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.051027573263272646, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05176953226327896, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.053167558182030915, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05504399018362165, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05483370326459408, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05523125014267862, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06228973362594843, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06806981535628438, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08625221531838179, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10035758027806878, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.15100755091756582, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.3630776221025735, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.21884058103896678, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012432690914720297, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01241649323143065, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012404668359085918, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012408973989076913, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012423775144852697, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012451759786345065, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012483641230501235, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012501811468973755, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01248438201379031, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012425052253529429, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012352399723604322, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012269914853386582, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012247331161051988, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01228157723788172, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0123085095314309, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012264011190272867, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012072775573469698, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011650614836253226, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.010913283354602754, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010019537841435522, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00877867401461117, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007583827896742151, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006544055873528123, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005762567967176438, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005299814878962934, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0050926790008088575, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005060764502850361, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005074772235821002, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00511850183684146, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005161089704488404, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00520033074382809, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005285328757017851, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005392191216233186, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00546786596038146, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005493654425954446, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0054113537240482405, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005339270407130243, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0054104077005467845, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00550023781193886, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005799257621401921, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005752420303178951, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0059806465476867745, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006493036649480928, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006696754956792574, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008131369292386807, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.009838820012519137, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.015266768215224148, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.030842671860882548, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.018185348505539878, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8769683837890625, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.850738048553467, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8070924282073975, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7632055282592773, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7193493843078613, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6577250957489014, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.58735728263855, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.508732318878174, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.4048659801483154, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.293063163757324, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.1804587841033936, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.0021915435791016, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.816455602645874, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.5312150716781616, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2556639909744263, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.0074824094772339, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.733770489692688, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.48337236046791077, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.29462969303131104, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.18902894854545593, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12633845210075378, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09839241206645966, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0821484923362732, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.07200732827186584, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0652175173163414, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.05940956249833107, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.05489317700266838, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.051594506949186325, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04836685582995415, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04700532928109169, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.046185679733753204, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.045620668679475784, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04429532587528229, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.044111840426921844, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04476851224899292, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04592549055814743, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04946761950850487, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04490955173969269, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.050001855939626694, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05227505788207054, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06018948554992676, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0650651827454567, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.09923691302537918, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06254587322473526, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07837589085102081, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.12063896656036377, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.1498686522245407, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.21604061126708984, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1372126340866089, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24231150793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23586309523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2286706349206349, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.22321428571428573, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.22247023809523808, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.22495039682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.23363095238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.25148809523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2896825396825397, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3427579365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.39186507936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4982638888888889, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5674603174603174, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6887400793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8194444444444444, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8874007936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9312996031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.982390873015873, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.984375, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.984375, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9809027777777778, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08510262918821825, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.08049950427968408, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07299167309239578, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.06924552289261345, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06927150262327017, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.07093734008937275, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07882935598599092, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09440732993024523, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.12464412546611041, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1527343021793705, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.1821276417317314, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.29095009789127607, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.34481729735526184, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5410039861894959, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.7510347464097774, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8584187678691413, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9236645341887563, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9561103877158222, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.965412379467484, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9702336900159235, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.974587731165804, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9768069536899188, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9785035553176662, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9777017641820843, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9792451955703966, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9798689849525422, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9806659025196347, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9826662093083827, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9820642845443966, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9822538778897973, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9819147665656884, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.980684324068886, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9806845443580156, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.97947126287413, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9795323897722471, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9791213123090272, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9784242956411504, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9821959174845964, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9812513616560475, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9814559051526585, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.976875363241371, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9812991960311062, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9622017317988949, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9786548743546601, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9719291040781173, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9747448604575732, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9737761393300689, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.980699192664811, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9788173014879785, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 0.07303802802227438, "validation/loss_best": 0.051594506949186325, "validation/acc_best": 0.9858630952380952, "validation/f1_best": 0.9826662093083827} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.7935499486327171, "train/grad": 0.1875840312987566, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.8128802490234377, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.7767724609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.716610107421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.6564385986328123, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5964385986328127, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.512877197265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.417865905761719, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.3111485290527343, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.166881561279297, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.0063782501220704, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8413402557373046, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5854406356811523, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3387927436828613, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.0106778478622436, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.747121262550354, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.547495938539505, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3646460402011871, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.23199232250452043, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.15375415425747632, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.11803394759073854, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.09691900519654155, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.08566851833835244, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.07829508290626108, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0730115893483162, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.06888058376498521, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06499449543654919, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.062025357149541376, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.05962619069032371, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.056799936573952436, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.05420699359849095, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.05252292203716934, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.051167278746142984, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.049911716394126414, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.049101631613448264, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.04811324615962803, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.04701897579245269, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0460598927270621, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04543348742648959, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.047375548724085094, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04822674316354096, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04948961012065411, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05830594557337463, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06266530396416783, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11264620653353631, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3243889336381108, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.23070227227173745, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6360505972057581, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5089224294479936, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.801831303294748, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012617897358722985, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012648655562661587, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012700048075057566, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01272536812350154, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012713296846486628, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012640237202867865, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01251968728378415, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012409665575250983, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012360308319330215, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01240241929423064, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012460297923535108, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012439536079764366, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012235631528310477, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011669773422181607, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010905435637105255, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009985324337612837, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008629603327717632, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007149402125505731, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005955216637812555, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005348676412831992, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00504180682823062, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0049482796061784025, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004942430006922223, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004948099625180475, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004939686948200688, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004922267866204493, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004921250840416178, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004929979177541099, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004949434628360905, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0049570651614340025, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004983656454423908, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004992178533284459, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004985182424134109, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004992430406855419, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004966091491223778, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0049412250192835926, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004978830344043672, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004947199241869384, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005247066266456386, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0053539714582439045, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005176478549692547, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0066804241517820625, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00729224025599251, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011342648310610458, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.026134998560555688, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02214177491939381, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04451445407690881, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03787413204434529, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.13709367095175368, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7400686740875244, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.6931095123291016, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.614778995513916, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.536799430847168, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.459573984146118, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.352386474609375, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.2300336360931396, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0897154808044434, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8952908515930176, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.677111029624939, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.4586029052734375, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.1429774761199951, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.871769905090332, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5636551380157471, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.360734224319458, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.23591874539852142, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.15074558556079865, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1088133156299591, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.08754853159189224, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07641077041625977, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06751466542482376, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06110629811882973, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.05569560080766678, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.05118231847882271, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04767550900578499, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04426603764295578, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04204373061656952, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0401947945356369, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03820933774113655, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03728668764233589, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.036473359912633896, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.036185186356306076, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.036791060119867325, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03776266798377037, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.037986110895872116, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03796837106347084, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.036895208060741425, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04335003346204758, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.03934212028980255, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.039029039442539215, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.05099719017744064, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10351091623306274, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08663877099752426, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2861942648887634, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3196974992752075, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5755400061607361, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.3171056509017944, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.760279417037964, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.279492378234863, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22098214285714285, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.22271825396825398, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.22767857142857142, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.23983134920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.26165674603174605, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.30431547619047616, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.36259920634920634, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4459325396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5639880952380952, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6416170634920635, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7586805555555556, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8824404761904762, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9260912698412699, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9665178571428571, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9699900793650794, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9744543650793651, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9776785714285714, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.980406746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.988343253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9322916666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9637896825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0667636191399659, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06844377859396122, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07227151595337833, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.08255083337202292, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.10178032103325557, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.13285921882991808, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.16268970120972726, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2444505083214629, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.35345796451412803, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4456645856944692, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6425995507319298, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8456280013809719, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9150525675368074, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.947680157476528, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.966979998592094, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9706666628147327, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9738743877675452, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9765972107511064, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9783948921267279, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9794913916665122, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9804565243130776, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9817051163215486, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9833009624009185, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9842820128105364, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9851137956732545, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9852320023565861, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9854237019857643, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9846467458527565, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9861647246647571, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.98604172721061, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9865869156232582, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9867632843893903, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.98747572817379, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9872058624112312, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9868250535785721, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9873690782102238, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9878484004920063, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9844550440075766, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9845861210147469, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9864762041972346, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9834739905182488, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9754590328016898, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9782738027608191, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9454483279708615, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9685646726924443, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9353292407093667, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.949455205849952, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8974127034723983, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.958049218539422, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.049911716394126414, "validation/loss_best": 0.036791060119867325, "validation/acc_best": 0.9898313492063492, "validation/f1_best": 0.98747572817379} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.8500415913760662, "train/grad": 0.3340397182479501, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.655653076171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5960833740234377, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.497613830566406, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3998318481445313, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.302303466796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.1642742919921876, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.002100830078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.814084014892578, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.560573196411133, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.2968320274353027, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.058198289871216, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.755639352798462, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5321702334284782, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3176703851670027, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.20358740080147983, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.14631178019568325, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11135583736002445, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09247113266959786, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0813116242364049, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0748677643109113, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.06921511400491, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06463369180448353, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06055696916766465, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.05702053476125002, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05391679082065821, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.05090768437832594, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04861525249667466, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.046990458639338616, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0453520911000669, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0442255702894181, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.04343180033378303, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.042601128164678814, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.041864117700606586, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.04148111216723919, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.04044208858162165, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03896633101627231, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04608313834294677, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05162827380001545, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.048881660765036944, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05573526845313609, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06794645491056145, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.12836771905422212, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2345612796768546, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2945973733998835, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3376900241989642, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.2509524440951645, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.0013411922752855, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.853450196357444, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.827649111039936, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012617956628091634, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012612574230879546, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012529526222497225, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012407504785805941, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012313246219418943, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01228560543153435, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012365039358846842, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012472378648817539, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012472053123638034, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012236000304110348, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011819557910785079, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010980954985134303, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009968077035155147, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008283372288569808, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0067909130326006565, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005777940404368564, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005083080949261785, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004778769629192539, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004664453685982153, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004638860669219866, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004630415822612121, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004605878712027334, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004579026894643903, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004553702126722783, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004539899482042528, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004521003427507822, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004497622277849587, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004501348665362457, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004564627359213773, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004647940338472835, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004692742149636615, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004681707658310188, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004707290088699665, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0047131332076969555, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004685354944813298, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004552273180452176, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005413157238363056, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0056568378371594005, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005386526112779393, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006426555920043029, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00738478497354663, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.012753718770904926, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.022906936365616276, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.024436285957936187, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.026748969906144993, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08200098160246853, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.10188877229689836, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1744293337643046, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.18541867545034676, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5648653507232666, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.492302656173706, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.372772216796875, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.253750801086426, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.133134365081787, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.9589378833770752, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.7519773244857788, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.516223669052124, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2159500122070312, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9306509494781494, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6982731223106384, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.44033992290496826, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2801710069179535, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.16081759333610535, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1163686066865921, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.09591188281774521, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08099234104156494, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07058031111955643, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.06304312497377396, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.05812776833772659, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05352308228611946, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.049996644258499146, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04702584072947502, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04482470080256462, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0428827665746212, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0410623736679554, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03992366045713425, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03867986798286438, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03577341511845589, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.035064611583948135, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03714507445693016, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03642301261425018, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.038152989000082016, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03865407407283783, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.037016503512859344, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03519653528928757, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04525015875697136, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04484424740076065, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07041206955909729, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07846200466156006, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13603205978870392, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10578984767198563, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.29830363392829895, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.35959893465042114, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.2995636761188507, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.1086881160736084, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.826390504837036, "validation/loss_047_lr4.3e+01_wd1.0e+00": 11.047004699707031, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.17241907119751, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2371031746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.25297619047619047, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2976190476190476, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3551587301587302, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.42509920634920634, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5344742063492064, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.611359126984127, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7286706349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8675595238095238, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9293154761904762, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.953125, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9652777777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9776785714285714, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.984375, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.986359126984127, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.984375, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9677579365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9439484126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9694940476190477, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07961082871559622, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09300169527226373, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12755768641182566, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.157615269723835, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.21998138556029984, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.33051432193360464, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.39889680255861365, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5940079792017909, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8257916941123865, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9228697107958997, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.952119408823236, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9659173994142588, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9707268153862736, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9738782031410208, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9769522210748762, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9783058867725161, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9784929232715738, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9786204003919756, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9816907440918305, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9824031621204947, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9821436029951256, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9822067660456675, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9837221247599559, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9833690171999157, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9843116364340154, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.984866019441587, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9843787041628266, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9855344295715031, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9844939366600336, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9840344244370101, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9809829485747353, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.981657604618988, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9816098649556283, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9815200503498746, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9835777488252317, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9853562619356507, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9823323461571728, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9786291116768137, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9714192932319341, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9747211206912066, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9681604972513311, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9805061336455949, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9724187519634401, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9716412920780794, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9657313604889775, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9774163878506401, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9684527689608244, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9231731845004121, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9641451927745169, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.03896633101627231, "validation/loss_best": 0.03519653528928757, "validation/acc_best": 0.9878472222222222, "validation/f1_best": 0.9853562619356507} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.772112193852663, "train/grad": 0.33169026602059604, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.473064270019531, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.3875982666015627, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.2449462890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.0996202087402343, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.9496499633789062, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.7334038543701171, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.485422477722168, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.220742473602295, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9135542774200439, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6519091057777405, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.46072916507720946, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.2757063712179661, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.18009076416492462, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.12061763398349284, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0979047095309943, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08654711901210249, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07800480332225561, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07174755450338126, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06690509215928614, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06335027472116053, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05957717620767653, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.056096252985298634, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05274775503203273, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.049645837107673285, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.046853156965225934, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04395249119028449, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04172834866680205, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.039561518356204035, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03683103235438466, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.03452024744823575, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03304197099991143, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.03248994109220803, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.032134561659768225, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03168459753505885, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.029605817636474966, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03058524549007416, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0329315735027194, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.032808280130848286, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07589325016364455, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04462236045859754, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10134266087785364, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1587666260730475, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.30143771760165694, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2759064702037722, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.21672791151329876, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.4761332553625106, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.2480420850217344, "train/loss_047_lr4.3e+01_wd1.0e+00": 8.21286598850973, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.3434496527165174, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012519287625327707, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012394210887141526, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012264356366358697, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01228114640340209, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01237625680398196, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01247942404821515, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012414944837801159, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01210165800061077, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01145444429013878, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010601326441392302, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009577668157871812, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00783727342961356, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006341572084929794, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00516023108153604, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004782237476902083, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004657940831966698, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004624178013182245, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004652363351196982, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004690485342871397, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004714554999372922, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004712412569788284, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004692078525549732, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004672082720207982, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004647057318361476, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004629262406961061, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004608042232139269, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004575645890508895, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004464146954123862, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004280367415958608, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00417913597288134, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004085312937422714, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004052569988489268, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004039662379946094, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004042846507072682, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003909559905441711, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004302084593946347, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004439468295295228, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004953646197973285, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009067411076684948, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005840468461356068, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01181948095390453, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016421029534744776, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.025537460383082345, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023191510964233886, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.020900059223119237, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12179905127450004, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.13883539008115492, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2015589921246101, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.12649300853926648, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.37447452545166, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2751309871673584, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.1073086261749268, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9335211515426636, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.7540483474731445, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.5008257627487183, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2245360612869263, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9498358964920044, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6588361859321594, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.43517908453941345, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2897428870201111, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.16991528868675232, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1214512437582016, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09152669459581375, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07737390697002411, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06902486830949783, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.061855021864175797, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05615541338920593, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05162769928574562, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04858281463384628, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04570642113685608, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04323739930987358, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.040768638253211975, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03849509730935097, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03643007576465607, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.034933559596538544, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03427964821457863, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03487703576683998, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03566579520702362, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.038471803069114685, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.039246998727321625, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03845891356468201, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0420505665242672, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.043973810970783234, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0430179238319397, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09795565903186798, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05805066227912903, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04635461047291756, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06060657277703285, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07936292141675949, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.19957205653190613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2681660056114197, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2338932454586029, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4015963077545166, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.15536226332187653, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6738576889038086, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.542276859283447, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.76957893371582, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.6066641807556152, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2958829365079365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3454861111111111, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4412202380952381, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5530753968253969, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6240079365079365, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7437996031746031, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8675595238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9312996031746031, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.955109126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9652777777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.984375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.988343253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9749503968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.12652958420521557, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15282924050997393, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.24004390822947097, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.35040894711460047, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4121219210598463, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.617878975560024, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8273647677341439, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9255487889469679, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9563228309894314, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9671766872598647, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9705531745378376, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9740048866337176, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9774870559150909, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9769737924616353, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.978399109678188, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.978771515020451, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9810182642571099, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9819009882559178, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.983588405594244, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9846040389254208, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9851131011449051, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9858351268447307, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9866232546648688, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9871465956681249, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.986941406470355, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9857844542969654, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9868539404777232, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.986127021583266, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9870549395521461, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9860320733812867, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9862161667836918, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9864223419114793, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9855494871221596, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.985016319786802, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.985716663518487, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9662335732834345, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9794641153564947, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9863901964740736, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9823467428901049, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9828550415835996, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9541504570952607, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9701496390064199, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9806841940921946, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9661510508550841, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9800198808292946, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9622932854626006, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9593180530552982, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9655093516909028, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9738602839794182, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 0.049645837107673285, "validation/loss_best": 0.03849509730935097, "validation/acc_best": 0.9893353174603174, "validation/f1_best": 0.9871465956681249} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.5186975293606519, "train/grad": 0.24863043367862703, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.2871112060546874, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.1729669189453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9769377136230468, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.7737247467041015, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5682779693603515, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2905520629882812, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0060448265075683, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7425105905532837, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.4850671648979187, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.30711542174220086, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.20393786072731018, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1315887807123363, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10321583312936128, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08463310136459767, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07537518562749028, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06962885465472937, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06443297000601887, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05985317694023252, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05574841837398708, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.052529746191576125, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.049067708663642406, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.045893750861287115, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04268609251827002, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03972021664492786, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03704085359349847, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03417746739462018, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03167662809602916, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.029627961572259665, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.02713550554588437, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.02488615095615387, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.023003793684765697, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.02172038031741977, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.02094679718837142, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02070869630202651, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.021860416857525707, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.025109984371811153, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.02811943931505084, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03134496155194938, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.03437629728578031, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05074847735464573, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08431426135823131, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09888286630623043, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.09743946640752256, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13520241891033946, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09944350504316389, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.6641652947850525, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.0957800053525717, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.259550800193101, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.7302966736722736, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012224695798940956, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012204038114286959, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01232204535510391, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012443452477455139, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012435468039475381, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012176676481030881, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011662732400000096, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010969716454856098, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00983569078380242, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008367625884711743, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006962405075319112, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005605267762439326, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005084393183933571, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004819826867897063, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004737200835952535, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004708812065946404, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004701730617671274, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004698337997542695, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004685656967631076, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004651555026648566, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004602155102184043, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004547498373140116, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004490143376751803, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004449774369422812, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0044117408583406355, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00435328810752253, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0042786238408007195, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004193871614406816, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004034311121358769, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0038885171088622883, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0037387423557811417, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.003623158848786261, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.003615570092006237, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0036577143061367677, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003876830860172049, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004767551622899191, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004751773574171238, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005270129546242969, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005577139267697931, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00805849348049378, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011260199704927344, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010734159049569598, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.012215281404154985, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016873526721726165, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0123439949161501, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09461529293352303, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1070599841293256, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.12666841822082817, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.11269587296442136, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.1920690536499023, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.0629591941833496, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8403831720352173, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6122616529464722, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.3879656791687012, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.098024845123291, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8179116249084473, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5740787386894226, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3538450598716736, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.21632008254528046, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.14822979271411896, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.10500790178775787, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08608565479516983, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07169555127620697, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0638032779097557, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05868560075759888, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05399366468191147, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04996830224990845, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04669015854597092, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04431809484958649, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.041881460696458817, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04000231996178627, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.038358841091394424, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.037375159561634064, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0368027463555336, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03535926714539528, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03390902653336525, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0325763002038002, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03134332224726677, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.032181497663259506, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03337524086236954, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03609614819288254, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04185543581843376, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0480068065226078, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03859392926096916, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03630933538079262, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04644893482327461, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06216548755764961, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07485858350992203, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07727620005607605, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.12845081090927124, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14583875238895416, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.18798168003559113, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.22544421255588531, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1628819853067398, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.271456480026245, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1183133125305176, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.9977993965148926, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.730219602584839, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3881448412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.47172619047619047, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5944940476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6865079365079365, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8018353174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9000496031746031, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9424603174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9558531746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9665178571428571, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9724702380952381, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9739583333333334, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9756944444444444, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.986359126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.988343253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9722222222222222, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1829393829530818, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.27486197752867436, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.38354573497781186, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5184465879669486, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7105051328555143, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8795080381266499, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9389315433484337, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9531431051129458, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9675716421793469, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9733005467096625, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9737759592996407, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9751974303860784, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9775092907296894, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9784192860816932, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9800968416741762, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9822883473887375, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9826282950330741, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9838360409173044, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9839193503974089, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9841278461210294, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9834474699624984, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9844315970710682, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9843160580244724, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9842676326583542, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9852750529275401, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9873094515116759, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9876626691605239, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9876595460864112, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9885092161230279, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9883600972676673, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9887642147188881, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9874506877579255, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9848820137507268, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9829153444352015, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9879753784824439, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9877224966220168, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9866664204036935, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9836241181972787, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9849867329394241, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9837894649714443, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9753139673203461, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9772330982729452, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.982374682977722, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9781298915270159, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9808906122987905, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9747658275624822, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.971600807293768, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.970311889625373, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9650860043383076, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.023003793684765697, "validation/loss_best": 0.03337524086236954, "validation/acc_best": 0.9895833333333334, "validation/f1_best": 0.9887642147188881} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.3513592702895403, "train/grad": 0.16579218082129954, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1119728088378906, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.967205810546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7187590789794922, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4703409576416016, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2346778106689453, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9432768630981445, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.676525502204895, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.45819506883621214, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.27660572588443755, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1761092695221305, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.13138158669695257, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10217503737658262, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08853246401064098, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07758192277513444, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07105354624800383, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06644950466230512, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.061830096850171684, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.057508509112522005, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05343072924762964, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05011810795404017, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04660824475809932, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.043314158683642745, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.040020157471299174, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.036799736488610506, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03371188122779131, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.030218858029693364, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.027310085482895374, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.024948113914579153, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.022632214557379484, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.02049176556058228, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.018166803577914833, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.016219463935121893, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.014123806161805988, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.013052658038213849, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.013843154786154627, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.011341706728562712, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.023090523909777404, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.021222902685403822, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.02581069662235677, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.056792971957474944, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.04188010435551405, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04088148533366621, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.057286968538537625, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07117967863567173, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.058578940518200395, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.5988775894232095, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.2246631392929703, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4013903441186994, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.3884160931315273, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012204061774536968, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012307395529933273, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012433947115205228, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012350195939652621, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012076766439713537, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01153260954655707, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010797633910551668, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009723510609474033, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007933836246374995, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006146075110882521, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005192374482285231, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004675953143741935, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0045167144050356, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004444780034828, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004431565263075754, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00442065329873003, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004407916458440013, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004390813292120583, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004348161869856994, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004303769628168083, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004248557940591127, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004184834292391315, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004096429438213818, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003979726094985381, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003843183216813486, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0036732709166244605, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0035336081460991407, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003426528694690205, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0034181508338951973, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.003337023757703719, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0032072312743548536, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.003003516607641359, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0027314037540782012, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0025373979571122617, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003047115087229031, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0026270149287211096, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004238650898646484, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0038073934926205768, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004408856151126201, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008171142302860517, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006023023492530228, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0063622051302239056, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0088405575652002, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.009603018015951896, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007654791404912942, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05134604278095901, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07016760647250149, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0776962280845631, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.07958642860141069, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0167551040649414, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.8569694757461548, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5861892700195312, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.3233301639556885, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.082423448562622, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7968932390213013, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5475879311561584, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3550885021686554, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.20674782991409302, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.13558633625507355, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.10492731630802155, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08292660117149353, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.0717066153883934, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.062204062938690186, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05639708787202835, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05230403319001198, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04844837635755539, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04497630521655083, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.041849784553050995, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03978265821933746, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03773792088031769, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.036456674337387085, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03558540344238281, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03437958285212517, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03218245878815651, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03007359430193901, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.028811117634177208, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.028209663927555084, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02770022675395012, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.028228657320141792, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028507718816399574, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.02866503782570362, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.027367176488041878, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.027318133041262627, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.033293452113866806, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03251639008522034, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03617129102349281, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.042687755078077316, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.05289435014128685, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07259658724069595, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09987577795982361, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10621203482151031, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1527271866798401, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1459275633096695, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.12731072306632996, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8547219038009644, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7314962148666382, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.0688862800598145, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.1646015644073486, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5034722222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5778769841269841, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6850198412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8224206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8973214285714286, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9389880952380952, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9670138888888888, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9724702380952381, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9759424603174603, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9776785714285714, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.980406746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.984375, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.984375, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9779265873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.30474960884516444, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3671034423768479, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.522609426323232, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7543197368995782, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.875346443160831, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9358947580239845, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.959666814426179, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9675293744889886, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9732608397827672, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9757642573616346, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9766770131061131, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9781287574406798, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9791935382536949, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9807758731302707, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9815851707095964, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9819221978477178, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.982307588916049, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9831882702006832, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9856161420525387, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9858061767809589, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9869036164346635, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9865433316932307, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9856677181811168, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9859267378398302, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.986816667812315, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9865141675047459, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9866906760733811, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9892756849644355, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9893590403901185, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9888591068686596, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.988166439366438, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9879589281264494, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9898650895305017, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9892667288432726, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9888724478161758, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887533353930116, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9866479495164077, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9884285636474279, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.98729337218985, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9806151128687647, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9810144891898742, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9811940855907748, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9826013506587282, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.982416273948935, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.981930494292658, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9825035137314531, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9815629256738043, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9766067775090261, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9750726070208163, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.014123806161805988, "validation/loss_best": 0.027367176488041878, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9898650895305017} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.25983168110251426, "train/grad": 0.12804629554972052, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9303738403320312, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.7557355499267577, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4655469512939454, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1934638977050782, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9533683776855468, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6800143718719482, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.45341760039329526, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.28875368312001226, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.17253657571971417, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.12223993070423603, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10041180012747646, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08448991279117762, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07607210450805724, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06871737590059639, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06391822271980345, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06030596920289099, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05643939402885735, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.052587780402973294, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04879003000445664, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04570984377525747, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.042266202736645934, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.039037440121173855, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.035721470518037673, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03235918847844005, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0290992892999202, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.025613499572500587, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.022805041810497643, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.02045267205685377, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.017268243664875626, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.014228886971250177, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.012331050420179963, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0105514701269567, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.009617899926379323, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.010127484481781721, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.007822531163692475, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.01263092298991978, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.015447486629709602, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.014042326398193836, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.017519600838422775, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.028069515265524387, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.020667627938091754, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.03520240066573024, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.02883432846516371, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.034507607966661455, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.043514987463131545, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3412900395318866, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5508049824740737, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8224820117000491, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.7645432188361884, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012357434923760593, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01244566563051194, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01235774314031005, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01200347937643528, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011522168531082571, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.010757785118184983, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009642687099985779, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008090271637775004, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006138778936583549, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005037584939273075, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0046229161706287415, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004417558462009765, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004365841295802966, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004369238613871857, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004375895572011359, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004369874771218747, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004345893811550923, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004291028081788681, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004218582255998626, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004150346233509481, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0040831882078782656, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004005165334092453, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003935775397112593, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0038286529315519147, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0036758736034971664, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0035050557932117953, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0033874875963374506, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003286194243846694, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.003042854214872932, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0027323643274212374, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0025601532560176566, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0023858809130160807, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002422144117172138, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002488529741003731, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0022619231014232356, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0029085909703644576, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.003235068069575391, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0030842394546880315, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.003536220979375457, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005182689897637545, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.004208845196121729, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005238022679413916, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005130085489442251, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005535499234560177, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006364322347859961, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03405861687549285, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04172971214973413, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06107874735615547, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.06129727983291725, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8520058393478394, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6662583351135254, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3630961179733276, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0868120193481445, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.849496603012085, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5871740579605103, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3782629370689392, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.23429599404335022, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.14261463284492493, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.1043376550078392, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08640590310096741, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07214746624231339, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06408701837062836, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05711135268211365, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05252331867814064, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04923846945166588, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04611297324299812, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.043249960988759995, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04061301425099373, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03869590535759926, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03664093092083931, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03481588140130043, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.033211417496204376, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.031917303800582886, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031299445778131485, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03119587153196335, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0314488522708416, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03145437687635422, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03182905167341232, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03178616240620613, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.032249633222818375, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.031573839485645294, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03689464554190636, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.039454687386751175, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.040608860552310944, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050399962812662125, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06237397715449333, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.05088125541806221, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.060547199100255966, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09420739859342575, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09424277395009995, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0755542516708374, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.12724849581718445, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.09087403863668442, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.10051541030406952, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8483759760856628, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.577569842338562, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.47528076171875, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.9139065742492676, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5853174603174603, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6502976190476191, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8077876984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8985615079365079, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9384920634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9573412698412699, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9672619047619048, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9714781746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9744543650793651, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9779265873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9789186507936508, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.37268530675184414, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.46285747951239414, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7220962499113333, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8779005490215426, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9355066411065247, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9570453516570496, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9684894811777696, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9722594470724013, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.974570990568063, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9773649804252906, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9780956032439606, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9789913866892167, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9809005595182199, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9815615605253735, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9829974211459651, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.982995440789083, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9840704000985733, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9839538652178798, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9842011906759954, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9851067630395777, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9849645072053729, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9853708520660159, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9865070359198741, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9872608069893467, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.987000464972373, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9870435187546956, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9861356200963833, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9863396524971266, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.987655666787241, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9875402615740728, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9884168404925915, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891739899337018, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986270379225485, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9883799129218421, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9865069791342749, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9872516288406787, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9820814789650331, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9897770097416757, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9840038592770577, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9847635639668759, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9815572901315294, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9849998827684978, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9806692506831406, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9844700844097751, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9819068972463102, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9801796671590869, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9821886438782512, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9782208828316582, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.976102593060261, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.014042326398193836, "validation/loss_best": 0.05088125541806221, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9897770097416757} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.20300505064427854, "train/grad": 0.10107957961037756, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.770359115600586, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5740962982177735, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2610231781005858, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9834704780578614, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7518397617340088, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5051204586029052, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.31986254304647443, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.20067966125905515, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.13194569185376168, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.10359265863895416, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.09004238899797201, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07902954732999205, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07265825378708542, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06638066221959889, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.061881982926279305, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05831979189999401, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.054362506922334436, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05031984876841307, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04640074019320309, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04307294264435768, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03933217904530466, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03582672004587948, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.032135662781074645, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.028350841281935574, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.024708368042483927, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.020732527887448667, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.01743827308528125, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.014910529283806682, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.012148570315912367, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.009259263155981898, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.006977401738986373, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.005457876473665238, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.005318205673247576, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.004143341099843383, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0076799365878105165, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0068086906522512435, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.003277140995487571, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.010141536286100745, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.007519707614555955, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.01914324700832367, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00933252883143723, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.018519945675507188, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.016389876175671815, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0185979225859046, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.033063797373324634, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.15608137998729943, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.23435819949954748, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5113376028090716, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.4137979597505182, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012406471576541662, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012367309425026179, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012007119143381715, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011458975011482835, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010857428875751793, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009859322551637888, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00840129008051008, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006750781612936407, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005387367149814963, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004856645485851914, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004660436225822196, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0045605703821638595, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004531762608094141, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0045072495756903665, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004488360209506936, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004463734198361635, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004417546975309961, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004358120609540492, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004281459187041037, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004204523674852681, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0041086735427961684, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003985771635780111, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0038209230480424593, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003613252674258547, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0034072833159734727, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0031679820708814077, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002961410932475701, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002781819569063373, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002537783596017107, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002184548603872827, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0018532904775202042, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0015980020170354692, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0016811711936588836, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0012068709990603567, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017453970775864035, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0017973185999562702, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0012608984910161781, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0026637797395613915, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.002099272604646103, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00531549502508426, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0019122430387845668, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0033525959333621813, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0031944327845271347, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00402150336522908, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006066703656802001, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02175890737474636, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03262567938572547, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04792558299676095, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03892830685592788, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.704456090927124, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4998568296432495, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1801789999008179, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9042382836341858, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6791938543319702, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.445255309343338, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2746444344520569, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.17046508193016052, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.11388873308897018, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08908192068338394, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.07641223818063736, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06563633680343628, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05932922288775444, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05324697867035866, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04916217550635338, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.046161290258169174, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04319370910525322, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04021679610013962, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03768342360854149, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.035646673291921616, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0336722657084465, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03181840479373932, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.029930274933576584, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.028458189219236374, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02753022313117981, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026782751083374023, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.026528717949986458, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.026745418086647987, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.027718650177121162, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02933458983898163, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.031202062964439392, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03301379084587097, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0328170508146286, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03522671386599541, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03643948957324028, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.0381101593375206, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.041157472878694534, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.042334575206041336, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.06148253381252289, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0829622820019722, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07075968384742737, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07471809536218643, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08901505172252655, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08446740359067917, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.13165897130966187, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5989099144935608, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1338282823562622, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3546539545059204, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.3342856168746948, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6344246031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7415674603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8809523809523809, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9340277777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9523809523809523, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9642857142857143, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9702380952380952, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9734623015873016, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9774305555555556, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.984375, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.984375, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9831349206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4317748245527777, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6165776725038109, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8475391294187601, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9292112950729996, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9504725546342074, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9654847033629589, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9709080791547317, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9740045898965103, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9770313371713054, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9782875915220046, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9786615310041733, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.980769360367455, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9815600668557783, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9821994868285602, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9828931654987065, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9837237364556964, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9852021676371744, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9851071585901844, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9860308363760701, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9867540683808629, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.986872843440169, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9877609381105104, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9874192278746057, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9880299178283558, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9890069865205146, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9894118837472414, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9896254880945089, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9896615607642099, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9893558438398126, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9896485400343897, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9895473099883044, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9892109426215338, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9910625274177686, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9897048911762618, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9891613858829131, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.989584062222717, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865267419505136, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9889706884124482, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9887186422225369, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9834429784210592, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9857337057706126, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859443813127787, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9852451300253077, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.986393632474898, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9817301973316643, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9815974049438484, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9829240374615673, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9791625684378021, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9814061174420966, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.005318205673247576, "validation/loss_best": 0.0328170508146286, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9910625274177686} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.16569634240120648, "train/grad": 0.0776956123765558, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6262360382080079, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4140575408935547, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.090771141052246, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8208265113830566, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6071217823028564, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.39244153738021853, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.24170015662908553, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.15601278133690358, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.11134714958257973, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.09146764378994704, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08108684641309083, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07188035205937922, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06606833402998745, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06019002594985068, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.05579329497180879, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.052307369774207475, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04844707523472607, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.044467794047668575, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04063330431468785, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03748834890313447, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03393174758180976, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03061382895335555, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.027130638333037495, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.023614835627377033, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.020229444289579988, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.016420880071818827, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.013415664685890078, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.010954338004812597, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.008190498370677233, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.005487235272303223, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0039015924651175735, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002936504501849413, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0024684187304228546, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.002130395406857133, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.005473061241209507, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0012345492746680974, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0024871918838471176, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0017939798440784215, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.004203204140067101, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.008949866481125356, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.007293584607541561, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00839145353063941, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.009182828273624181, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.01080643675290048, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.017709980830550195, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1218685719743371, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.10685493932105601, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.2247203857265413, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.2763798754569143, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012404700824990868, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.012278648209758103, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011800531432963907, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011200508968904614, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010523797343485058, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009269242570735515, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007558154531288892, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.005945725763449445, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.005034534573787823, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004733503659954294, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004637384337838739, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004584865988581441, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004561904158908874, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004533990394556895, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004498227786971256, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004458773355872836, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004399312215100508, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004312390064587817, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004193443900439888, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00407968875428196, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0039333061291836205, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0037684341850399506, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0035782430588733403, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003376312684486038, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0031823899431037716, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0029495677141676423, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0027267858977938885, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002489680960752594, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0021425741532584653, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.001657708241345972, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0013512856047600509, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001082921593924766, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0008869551560383115, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0007124699439009419, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00152174963566722, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0005296954052710134, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0009047723915364259, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.000933434891098841, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0014106041998316243, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.002497894213561587, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0017153289089283418, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.001804992299657684, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0022032812820568016, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002691670705842455, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0036271700585015676, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.015579957665418887, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01856651023063477, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.028992354073676882, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03328081564357576, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5774118900299072, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3611642122268677, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0352903604507446, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7668325304985046, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5566487312316895, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.35022735595703125, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.21081236004829407, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.13697300851345062, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.09823587536811829, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08003772795200348, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.0699748694896698, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0610625296831131, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.055551156401634216, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.050231028348207474, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04647092893719673, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04375271871685982, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0409867949783802, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03846508637070656, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03637305274605751, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03479873761534691, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03328508511185646, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03187937289476395, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030131172388792038, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.028679031878709793, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02729044295847416, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026260733604431152, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02589656412601471, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02583450824022293, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02586652711033821, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.026454908773303032, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02749716490507126, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.027915211394429207, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02816298045217991, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.02948920987546444, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0383269302546978, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.035428378731012344, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.036214858293533325, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04852140322327614, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.05973237007856369, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.07847614586353302, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06475555151700974, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.07894203811883926, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08071207255125046, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.09315367043018341, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08016134053468704, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3977068066596985, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7558904886245728, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8966665863990784, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1177217960357666, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6949404761904762, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8100198412698413, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9107142857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9451884920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9593253968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.96875, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9779265873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9816468253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.988343253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9928075396825397, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.988343253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9826388888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5362070249285457, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7274373826535258, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8939626026457375, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9442251277798073, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9592759984929667, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9693762499875958, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9727193061961662, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9765035567192958, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9773874216510751, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9790280336152142, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9800356912499133, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.981387489336577, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9826376510850773, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9841480155257167, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9836133714588717, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9835650640056353, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.984328157346415, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9849139372735023, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9854787002046473, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9868340819613941, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9866892332298302, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9868415198801669, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9875575508942331, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9879972299843546, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9896175334713468, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9912928224622658, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.991112534654958, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9895889424636197, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.991358605212814, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9906803565756496, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9903185339614569, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9910623086794347, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.990603352639299, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9911469423516112, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9888120942896252, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9908256332600593, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9899555930920588, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9888536286068176, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9860243475386943, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9872446081058079, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9852063982224099, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.986961652268868, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9875347559127343, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843922437849745, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9850432514607872, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9869513767699893, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9870006474321484, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9837733045479871, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9829731241633668, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.002936504501849413, "validation/loss_best": 0.027915211394429207, "validation/acc_best": 0.9928075396825397, "validation/f1_best": 0.9910623086794347} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.1397585577145219, "train/grad": 0.0562178856972605, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5217549896240234, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.301326789855957, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.975125904083252, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7113662242889405, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5093535017967225, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.31661360859870913, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.19206540271639824, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.13053299333900215, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.09884704031050205, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.08376592112705111, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07542606882750988, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06778360051102936, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0627546835038811, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05736865230835974, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.053322057146579026, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04996480979956686, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04618229381740093, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.042316560940817, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03850060223601758, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.035345293823629616, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.031710971556603906, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.028262236416339875, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024649866772815585, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.021010420406237244, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01736393122933805, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.013269412154331804, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.009999686330556869, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.007497112238779664, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.004924838170409202, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.003010246269404888, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.001858212063089013, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.001213648747652769, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.000811731768772006, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0011589272413402795, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.002363047683611512, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0006957458425313235, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0002835835888981819, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0003565128147602081, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.001862375969067216, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0031433916743844746, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0016972895711660386, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00486349712125957, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.002585562728345394, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0036510893516242503, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.012217069854959845, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.041264330968260765, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.04709339926019311, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.08855416420847177, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.1010801558662206, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.012235557432286442, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011961382189765573, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.011319309147074819, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010609854017384351, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009757915218360723, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.008238394826184959, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0064429205353371795, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0051868198136799035, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004649402464274317, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004509357828646898, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004470452812965959, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004461707494338043, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004451552510145121, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0044221307215048, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004390826298185857, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004354033386771334, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004295947971259011, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00421785249360255, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004112891112090438, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003994140498471097, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003814062622768688, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0036302796402742387, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0034162616466346662, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003193103995145066, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0029566373111447317, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002630123784620082, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0023084221384488046, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0019753208604379323, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0015023584405207658, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0010308422896378032, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0007200192996788246, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005020808309018321, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0003103817668670672, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003664851553708104, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0008866695485903619, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00025148533360493276, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0001772394263764454, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00025978418429417616, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0006180811335926251, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0016622918636385986, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0005702127114753708, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0012733733586353679, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.001066095594182972, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001725686673025688, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0026917383434231735, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.009963233870713796, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.011984777555269917, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01659536051436489, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01514780547132421, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4731786251068115, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2501276731491089, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.9245623350143433, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6656768918037415, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.46998390555381775, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.2865281403064728, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.17265719175338745, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.11806611716747284, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.08896497637033463, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.07443348318338394, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06628040969371796, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.058771438896656036, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05393217131495476, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04923732206225395, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04572639241814613, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04305887967348099, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.040418580174446106, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03781610727310181, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03580571338534355, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03423807770013809, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03263096511363983, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03132084757089615, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030370330438017845, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.029596947133541107, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.029185041785240173, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02924737147986889, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03020334616303444, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02941592037677765, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02722190134227276, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027806080877780914, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0283803828060627, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.028361964970827103, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0290589090436697, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0290914885699749, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03576745092868805, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03448450192809105, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03775473311543465, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04825262725353241, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.05009612441062927, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06886868923902512, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06485866755247116, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06838073581457138, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07237900793552399, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.09167707711458206, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07991594821214676, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.32157450914382935, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8089921474456787, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8541068434715271, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8428679704666138, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7559523809523809, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8559027777777778, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9280753968253969, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9513888888888888, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9632936507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9692460317460317, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9774305555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9801587301587301, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6412290393908612, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8113372409399424, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9224557479112215, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9498685859746807, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9641831825175021, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9701906139956202, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9734695554514013, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9765713254600448, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9777316653170535, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9788454371574685, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.979796011595552, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9816897459023398, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.983349954245134, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9831973930957464, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9834684074104045, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9844647235025754, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9846581128128244, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9849282365737182, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9862455657244893, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9864712077519775, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9867446463582109, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9871190919125701, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9877399279913126, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9888284706144825, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9898267075637932, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9905758391467794, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9900751980393252, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9898863020705859, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9903857809932876, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9907899498383864, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9911508602189297, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9909218739315426, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9918778081212671, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913944469641378, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9891264512821684, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9906491670112633, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9888918480019141, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9895363954915906, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.988827723564289, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9883262541364014, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9863681232697753, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9873451931949552, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9875022393023172, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9865173754229046, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9885001015682972, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9889936186803518, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9839260491152801, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9824223029860045, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9820647318731103, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.000811731768772006, "validation/loss_best": 0.0290589090436697, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.9918778081212671} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.12399668481200933, "train/grad": 0.04408538022078574, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4268150329589844, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2014744186401367, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8774178409576416, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6243081140518189, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4369469749927521, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.2656234785914421, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.16371139910072088, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.11704692190513015, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.09216285785660147, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07978559870272875, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07273436586372554, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0660447069723159, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06150082061998546, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05640510406345129, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.05243681701831519, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04914353125728667, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04539576241746545, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.041567195225507024, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.037725166818127036, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03463776059448719, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.031017884984612464, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.027461731527000665, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.023679324919357897, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.019702406683936715, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.015763084683567286, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.011559906248003245, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00830746553838253, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.005922013595700264, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.003550451099872589, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0018261256255209446, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0010814893618226052, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007529708836227655, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004436987265944481, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00034110781736671923, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0006211933307349682, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00022123008966445923, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00018198758363723754, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0002243109978735447, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00021147231571376325, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0011524578835815191, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0003090427443385124, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0007994839083403349, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0008439506310969591, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0013724380824714898, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0012649621348828077, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.008791590351611376, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03423000027425587, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03583160617388785, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.03548841536976397, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01205506970640272, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011690691830590367, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010960895651951432, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.010169754684902728, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.009164006968494504, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.007474756981246174, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005725330151617527, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004784580752020702, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004410887118428946, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004314101614872925, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0042960964411031454, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004299330648500473, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004299624088453129, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004283815957023762, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004245888146688231, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004199797422043048, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004130028165527619, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004029169123095926, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.003904346659546718, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0037916255203890616, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003641387051611673, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034770514238334728, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0032808182790176944, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030312869235058315, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0027221459038264586, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0023269202246592614, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0019404052043682895, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.001585761487694981, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0010993582885930665, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0006258937329039327, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00038699162105785943, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0002714886322382881, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00016074496544149497, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00012847365878030815, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0003561714536942873, "train/grad_035_lr6.0e+00_wd1.0e+00": 9.52037942977313e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 7.419916258953663e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00014563015316980453, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00012477656459907393, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0005313061305418998, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0001567711841786945, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0002846126861380327, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00046853750386574686, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0005656908877534761, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0006627196850845962, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00296548196372374, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006662950692920677, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.009979569044384124, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.009316416273830766, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3906704187393188, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1642003059387207, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8417575359344482, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5918409824371338, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.40874093770980835, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.24392713606357574, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.1493338644504547, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.10635780543088913, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.08240014314651489, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06996007263660431, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06277970224618912, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05582452192902565, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05144951865077019, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04681459814310074, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04356186464428902, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04097114875912666, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03847861289978027, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0360095240175724, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.033896442502737045, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03236177936196327, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03092673420906067, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.029631027951836586, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.028847582638263702, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0282193161547184, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.027354935184121132, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.026492951437830925, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02600734867155552, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.025820115581154823, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02611633948981762, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02670464664697647, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02764452062547207, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.028315454721450806, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02861056476831436, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029259102419018745, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03245852515101433, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.034162215888500214, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03574410080909729, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.046711284667253494, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04906163364648819, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06564734876155853, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.062350016087293625, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06761561334133148, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07269996404647827, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.07749737799167633, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06870267540216446, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3258790671825409, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6718766689300537, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.6711013317108154, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7440070509910583, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7958829365079365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8816964285714286, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9375, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9548611111111112, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9657738095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9712301587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9739583333333334, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9809027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.984375, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9928075396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9930555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9846230158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7022960569915406, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8521461344707235, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9333780356539023, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9542447312177784, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9673340271840435, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9720809293971174, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9739004539623588, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9760896856483819, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9780956032439606, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9798190241151684, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9806335347286715, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9825859332971157, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.983224361538795, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9833114423634964, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9839259002522047, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9858774675313794, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9863655306133782, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9869217828151255, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9869219468128201, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9873281717743578, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9872706329423222, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9874408748547704, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9872375082378501, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9883640631070959, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.989803953523755, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9905763916041633, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9909355035911115, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9914881652799175, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9905447202877479, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9905918345692551, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.990513837659312, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9910426622879427, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9915120019885636, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9912122216273828, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9902654297883635, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9898087664365728, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9893071354469412, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9894885828093204, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9862994183319045, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9881917620837851, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9860243868706797, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.988724314591964, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9891940295236995, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.98716635459602, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9861031456172858, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9811621048429529, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9825278570697245, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 0.005922013595700264, "validation/loss_best": 0.025820115581154823, "validation/acc_best": 0.9930555555555556, "validation/f1_best": 0.9914881652799175} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.11262614034116268, "train/grad": 0.03627897100523114, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3529941177368163, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1243158531188966, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8026320934295654, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5575145244598388, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3817726439237595, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.22745545566082, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.14304840859025716, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.10596826785244047, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08569361431524157, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07511749744415283, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06895703756250442, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0628230209928006, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05858097380958498, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05373863631859422, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.049952167021110654, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04674564843997359, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04320173604413867, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0395359377656132, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03579346302896738, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03263130290433765, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.028970323586836457, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.025245415037497877, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.021308503011241556, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.017286714389920233, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.013452501809224486, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.009346680147573352, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.006308778338134289, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.004267424792051316, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002501188209280372, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0013623276352882384, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0008559871651232242, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005929511319845915, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00040222995914518834, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003040701150894165, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0002417601365596056, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00017439641058444977, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00015802938491106034, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00011936819180846214, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0001251038536429405, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.00018008901737630366, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00019170401617884635, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00041888797655701635, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0005223722849041223, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0008606214076280593, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0002815551869571209, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0017915610689669848, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.009800588618963957, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.003523223167285323, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.01961425730958581, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.011959722079336644, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011539398855529726, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010759870465844869, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009897181214764714, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008782919747754931, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.007008619441185146, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0054206156311556696, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004724146436201409, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004493740785401315, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00445268300245516, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004461843318422325, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0044712918775621805, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004467273953487165, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004434652712661773, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004380333633744158, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004312770260148682, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004228594297019299, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004116962821572087, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.003979046746098902, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0038484509616682774, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0036823000393633263, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034825733146863058, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003272051124513382, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030250052731571487, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002730755428056, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00227375087531982, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0017655202685273252, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0013231752967840294, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0008656421166779182, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0005038166690792422, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0003122747672159676, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.000212495331993523, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014486606273749202, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0001122797431798972, "train/grad_034_lr5.1e+00_wd1.0e+00": 9.825317562956571e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 7.480102469287431e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 6.148472618747292e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00012649118576007367, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.7050957128268465e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.000100363106448631, "train/grad_040_lr1.4e+01_wd1.0e+00": 8.34285826179837e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0002793102844019302, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0002518789547180988, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00039030876043618436, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0002856871293455754, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0017234156230857827, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0024228139435973042, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002504890832279973, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.005017956516853902, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3278919458389282, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1000958681106567, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7812466025352478, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5391480922698975, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.36633315682411194, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.21567285060882568, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.1350575089454651, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09911151975393295, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07838600128889084, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06736073642969131, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06066806986927986, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05428660660982132, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05007234960794449, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04556770622730255, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0424254834651947, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.039970602840185165, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03756178915500641, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03522917255759239, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03309984877705574, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03152510151267052, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.029938196763396263, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02845270000398159, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.027316590771079063, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.026178322732448578, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025485562160611153, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02504231035709381, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.025017239153385162, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.025109054520726204, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.025668542832136154, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02665509097278118, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02794850803911686, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.02858155220746994, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.028944678604602814, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029405491426587105, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03139141947031021, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.034272659569978714, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.035485316067934036, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04649882763624191, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04827351123094559, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06252538412809372, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.060701921582221985, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06737793982028961, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06906759738922119, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06828977167606354, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0626981183886528, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.28612715005874634, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6287369132041931, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5374912023544312, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5679724216461182, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8249007936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8970734126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9449404761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.966765873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9722222222222222, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9761904761904762, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9781746031746031, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9818948412698413, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.988343253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.988343253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9856150793650794, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7538685178555778, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8752870426004012, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9437311044755043, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9598183483445022, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9681307371467027, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9728430160149403, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9757934102334583, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9773433229043427, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9790293868237908, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9805259671524654, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9818754865363177, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9824060078378027, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9833797422342191, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9843980871755809, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9855504280155074, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9866750931598993, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.987032500316897, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.98759401779201, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9874663413763805, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9882860698863193, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9882777723007966, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9882003358061562, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9885543438759502, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9891885164302463, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9908239029784992, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.990722943685378, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9905463084833754, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9909505408350422, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9904559661556073, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9898277154308565, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9904231019530736, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9901682545195223, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9909759297720099, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9915728741581242, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9904424096692946, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9903903415936423, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9894282332798161, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9892635441280078, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9885181190850271, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.986494866138199, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9883187521170794, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9868029208020989, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9886066007056977, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9886061182181611, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9874981254542936, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.98498306728422, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9843179561093097, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9845526158570765, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 0.009346680147573352, "validation/loss_best": 0.02504231035709381, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.990722943685378} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.10646779254078866, "train/grad": 0.03218758095987141, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3089826202392578, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0783067703247071, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7582872295379639, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5189389443397522, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3511526596546173, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.20780881702899934, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.13382257718592883, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.10175114194862545, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0836092568282038, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07388857690617442, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06816764709539712, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.062188508138060566, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05790313166566193, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05305702218785882, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04907689205370844, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04568524044007063, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.041967376256361606, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.037988177612423894, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.034063491970300674, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030715699661523103, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02685342764481902, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.023055860362946986, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01898641562089324, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.014965609339997172, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01126467641443014, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.00765228078700602, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0051972299441695215, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.003511833194643259, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002012459635734558, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.001118765827268362, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007179251592606306, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005005642026662826, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.000343426950275898, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000276249460875988, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00020812109112739564, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00015973879024386406, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00014773034490644932, "train/loss_037_lr8.3e+00_wd1.0e+00": 9.577889926731586e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00010858846828341484, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.4932944476604464e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00015419390983879567, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00020675860345363616, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00018032082356512547, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0004836450982838869, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00030320020392537116, "train/loss_045_lr3.1e+01_wd1.0e+00": 8.571013808250427e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.40190190076828e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.000400865999981761, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.00045980270951986315, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01198302469216287, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01153982653748244, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010745642483234405, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009830265787895769, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008638890858273953, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006811176431365311, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005351603608578444, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004767828551121056, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00456004825537093, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004514750686939806, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004513920946628787, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004514576795045287, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004495011730468832, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004455525099183432, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004397911084233783, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004326097032462712, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004234335592482239, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0040996332801296375, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.003938638564141002, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003786426561709959, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0035913774151413234, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003396958131925203, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0031391612748848273, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0028311484506411944, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0024553693577763626, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00195630822890962, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0014932192726701031, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.001088512932183221, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006635127439767529, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.000382439032982802, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002509951840102076, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017411272820027078, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001224513433953689, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.841374352390631e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.137600020575065e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.647286834606802e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.69748483951571e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.436857662383886e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.1909624678501134e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.5652540371984284e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 6.776149006292087e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 9.882488722780636e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 8.659438345894444e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00020244196492399168, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00012519921180143, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0003254683445039442, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0007539108724805578, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0003269932404130396, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0010445816615311072, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2825161218643188, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0542147159576416, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7387641072273254, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5028808116912842, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.33763909339904785, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.19747833907604218, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.12620744109153748, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0944572165608406, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07576540112495422, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06553641706705093, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05938863381743431, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.053349923342466354, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04927600175142288, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04508921131491661, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04207128286361694, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03956002742052078, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03717370331287384, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.034743864089250565, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.0326564721763134, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030845554545521736, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.029120678082108498, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027744991704821587, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026651306077837944, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02588074840605259, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02535656839609146, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.024943837895989418, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.024807928130030632, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.025052260607481003, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0259646438062191, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.0269638579338789, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028135770931839943, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.028757264837622643, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02897733822464943, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029360760003328323, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03108896315097809, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03451773524284363, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0356299988925457, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04595477133989334, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04736018925905228, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06028754264116287, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.05910377949476242, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06621591001749039, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06490882486104965, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0663253515958786, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05819445475935936, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2588757872581482, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5495610237121582, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4732886254787445, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5590435862541199, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.84375, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9060019841269841, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9474206349206349, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9615575396825397, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9682539682539683, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9724702380952381, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9766865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.980406746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9821428571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9903273809523809, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9853670634920635, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7906204532139975, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8883798013829602, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9464900039372901, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9620490935621928, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9692934524172487, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9730936747748726, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9758345967051728, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9779933713734386, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9793897076046447, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9804656938166303, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9824174401730635, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9840836029313607, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9843609377062369, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842015735463564, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9849707285120867, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9859282847773481, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9865661512491031, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.988370178682018, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9889542087423225, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9886461179181005, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.989058164406654, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9885587917847987, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9887761474991217, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9897203989597992, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9910067888916727, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9912740979712243, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9912753499481837, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9912770923845613, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9912770923845613, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9906488416598104, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9906488416598104, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9911672552360754, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9913332998785135, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913925523144659, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9898535058784868, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9890399529906998, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9901839189355846, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9857280477432282, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9869405982613552, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9869822851364767, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9884529917952187, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9895771698334958, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9873195722610844, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9861715683450912, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9838176052827663, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9837146396225604, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 0.00765228078700602, "validation/loss_best": 0.024943837895989418, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.9912740979712243} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.10171843450516463, "train/grad": 0.0314426083676517, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2627516555786134, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0332295417785644, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7187783432006836, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.48661505460739135, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3262116366624832, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.19261831298470497, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.12678458746522664, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09794286378659307, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08124458592385053, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.0722227725200355, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06673640566878021, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0610318607930094, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.057050076508894564, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05243274389766157, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04862353509292006, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04545904085971415, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04177851374261081, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03796001194044948, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03403174638748169, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0305669120978564, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02652098536491394, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.022508708890527487, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01823125947266817, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.013988355286419391, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.010123572144657374, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.006377936070784926, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.004047975484281778, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00266880271025002, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0015836456324905158, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009289589896798134, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006238447688519954, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00044725488871335986, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00031011033803224564, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00025145492516458033, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00018976470455527305, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00014568754471838474, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00013504075817763806, "train/loss_037_lr8.3e+00_wd1.0e+00": 9.512493386864662e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00011769924312829972, "train/loss_039_lr1.2e+01_wd1.0e+00": 6.020808592438698e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00014557735994458199, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.000135493203997612, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00011843938380479813, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00018971425481140614, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00016736481338739396, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0347611755132677e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.4901161193847656e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.792748510837555e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.6253162175416947e-05, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.011898794178850949, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011424378589726985, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010608448763377964, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.00965173428878188, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008419798112008721, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.00660826733103022, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005322341558057815, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004867036361247301, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004736850708723068, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004721011229557917, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004731587574933655, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004728964219102636, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004718928300426342, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004674249642994255, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0046003948041470726, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00451841763802804, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004393082489259541, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004243188594118692, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004051045462692855, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0038644356925215105, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003633744306425797, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0033766031148843466, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003061052223711158, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0026936221127107276, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002271880868647713, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017301303856220329, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0012537348795012805, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008966572803547024, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.000565098018887511, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003351818952069152, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002215178561709763, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00015575387493754533, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011068828752058835, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.01622867218066e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.2958293933425e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.944042179436337e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.234498507945773e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.308028243883655e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 4.924154378912249e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.146608899742276e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 6.271909181570834e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.697346832396534e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 7.37939812385946e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00011000765995108352, "train/grad_044_lr2.6e+01_wd1.0e+00": 8.488561209908596e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.6215993905513616e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.0666242939157305e-07, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.5655293957549065e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 5.695793629607137e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2518738508224487, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.023359775543213, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7105422019958496, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4791303873062134, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.31887805461883545, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1857437938451767, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.12054033577442169, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09138815104961395, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07402876019477844, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06433306634426117, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05843450501561165, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05263243615627289, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04867497831583023, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.044392574578523636, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04143046960234642, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.039097923785448074, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03664863482117653, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.034296609461307526, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03218412026762962, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030484085902571678, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028895661234855652, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.02746587060391903, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026467569172382355, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.025632869452238083, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025004900991916656, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02458341233432293, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.024593383073806763, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02510138601064682, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.025921564549207687, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.026940181851387024, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028053630143404007, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.028798891231417656, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.029228847473859787, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.02944904752075672, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.031111449003219604, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03453335911035538, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03561081737279892, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04537222161889076, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04678579792380333, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.059077657759189606, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.057775627821683884, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06440755724906921, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06288366764783859, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06370667368173599, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05599780008196831, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.24265225231647491, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5202101469039917, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4387242794036865, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5025207996368408, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8568948412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9097222222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9486607142857143, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9625496031746031, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9682539682539683, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9724702380952381, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9766865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9809027777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9828869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9918154761904762, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9856150793650794, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8143336196555828, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8951438738000671, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9479638266024045, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9642361703661422, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9693893781311422, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.972908786962103, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9758371255018679, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9781725877984078, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9796460201432059, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9810045091131452, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9825972091722546, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9831828161908391, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9836784773277957, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.984021540585774, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9857805690305894, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9865084406851264, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9868776476029583, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9883756525922155, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9889040132174252, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9886864792703831, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9889979505685998, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9893800565753, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9892753815380454, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9904215637190605, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.990457818357905, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9907705612592782, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9905013002259734, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9905013002259734, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9903235940312152, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9908741574896063, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9908741574896063, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9909713790898529, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9909739336538851, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913925523144659, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9905726319680603, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9888609448825811, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9902269326821102, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9859131544058564, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9878265783689977, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9869809966984457, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9884282844790807, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9895850247811476, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9874426274606807, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9863504087146033, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9838176052827663, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9837131390502698, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.010123572144657374, "validation/loss_best": 0.025004900991916656, "validation/acc_best": 0.9923115079365079, "validation/f1_best": 0.990457818357905} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.0976882504671812, "train/grad": 0.030293477112427355, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2323704147338868, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0029336738586425, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6911684083938598, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.46357826232910154, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.30850775629281996, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1814691825211048, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.12048672430217267, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09383563500829041, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07820464473217725, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06956511492840946, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06413252553902567, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05852607615292072, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05433629188686609, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.049594142381101845, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.045728208562359214, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04247803423553705, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03881330687552691, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03488327207975089, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.030970689253881575, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02763894378207624, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.023756452659144996, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.019997849613428115, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.016067584110423923, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.01229581075720489, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008879809714853764, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005623897770419717, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003561295745894313, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002330551324412227, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013727831933647394, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008064467366784811, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005398108065128326, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003854616824537516, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00027383505366742613, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00021632283926010132, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00016033058986067772, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00013222090899944305, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00012424411252141, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.071547374129296e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00010094890370965004, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.625822581350803e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0001356474868953228, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00011828596703708172, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0001272113062441349, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0001914344262331724, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00015870669856667518, "train/loss_045_lr3.1e+01_wd1.0e+00": 6.5390486270189285e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.5029294192790985e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.279477193951607e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.36465847492218e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01175507687497884, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011267456850036979, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01043582208454609, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009440189562737941, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008191673760302364, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0063733844528906045, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005125587965594604, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004667161799152382, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0045084935473278165, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0044717274379217995, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004460287388064898, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00444877847970929, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004414698878536001, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004358428616542369, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0042736479814630005, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004195270211494062, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004079799221653957, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.003934992397844326, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0037707010877784343, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0036107579598319717, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0034076858914340847, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0031806350604165344, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029023253278865012, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0025556862614757848, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0021353727470705052, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0015916107656812528, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0011283622000701143, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008016198183395318, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005050248038605787, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00030183808440597203, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00020252017535767663, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014551954132457467, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001073334392367542, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.48725496575753e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.723001601386613e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.5912188580578e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.8391893318466825e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.855629316319664e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 4.6546080105258626e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.0064453748650522e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 6.0936343304547334e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.298070732100314e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 7.562107849066635e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00010307901834806899, "train/grad_044_lr2.6e+01_wd1.0e+00": 8.169646428211053e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.600438396508022e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 6.550331314738271e-07, "train/grad_047_lr4.3e+01_wd1.0e+00": 5.286995170986344e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.194117439332638e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.233054757118225, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0046287775039673, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6933676600456238, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4646908640861511, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.3076647222042084, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1790783405303955, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.11719390004873276, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.08949122577905655, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07289361208677292, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.0635199323296547, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05782369151711464, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05209704861044884, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04829343780875206, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.044142499566078186, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04117860645055771, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03880878910422325, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03648398444056511, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.034198347479104996, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03207672759890556, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030421381816267967, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028922969475388527, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027586251497268677, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026739947497844696, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02572847343981266, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025377757847309113, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.025047145783901215, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02509026601910591, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0255338903516531, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.026290766894817352, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027573172003030777, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.02861141972243786, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.029176268726587296, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.029351240023970604, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.02954782173037529, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.031206073239445686, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03450043126940727, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.035567961633205414, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04517325386404991, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04655594751238823, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05796663835644722, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.057453859597444534, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06361645460128784, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06183503195643425, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06240846961736679, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05465594306588173, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.23323367536067963, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5024458169937134, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4184606373310089, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.4802277982234955, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8608630952380952, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9154265873015873, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9506448412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9630456349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.96875, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9809027777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9828869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.988343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9920634920634921, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9928075396825397, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9900793650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9856150793650794, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8203084695794186, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9031489000759085, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.948984460232892, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9642823726456268, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9696700116520353, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9734058327503464, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.975652214292885, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9777332717663187, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9796460201432059, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9810045091131452, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9825972091722546, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837238001323103, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9829076828336772, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842013892967783, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9857805690305894, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9863283384275329, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9865183217091452, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9880583194810726, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.989309453161654, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9892800245437153, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.988550244458654, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9894494414746043, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9898354261371878, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9902790069178012, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9906305819142525, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9907705612592782, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9909505408350422, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9906812798017371, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9912253341317265, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9902333444136505, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9915985985504101, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9910213710415627, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9913356114817599, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9913023092038955, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9903903415936423, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.989405520517553, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.99021295434277, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9864360060289881, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9878265783689977, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.987159695660034, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9886066007056977, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9893984766644477, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876094042271061, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9864611966813438, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9838176052827663, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9837131390502698, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0005398108065128326, "validation/loss_best": 0.02861141972243786, "validation/acc_best": 0.9928075396825397, "validation/f1_best": 0.9915985985504101} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.09894469723105431, "train/grad": 0.030046923868358135, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2243629837036132, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9949231147766113, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6843797874450683, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.45912292957305906, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3065479776263237, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1830295892804861, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.12489216733723879, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09960808334872126, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.0846360132098198, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07624409382231534, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07085335937328636, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06505692578852176, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0608108764141798, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.055703073237091304, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.051468937313184145, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.047977853687480095, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04400910742580891, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03975875403732061, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03544504257850349, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03170995845459402, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02732434505596757, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.022899952074512838, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.018148521361872554, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.013459016820415854, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009282959569245576, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005602632379159331, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003503094473853707, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002322232620790601, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013876913860440254, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008428243733942509, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005771948024630546, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00042266235686838627, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00029760784469544887, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00023799458518624305, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00017810841090977193, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0001398136094212532, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00013296819292008878, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.864366449415684e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 9.947311133146286e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.756632424890995e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00014448391273617746, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00013088482432067395, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00011987778358161449, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00018554427661001682, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0001498014573007822, "train/loss_045_lr3.1e+01_wd1.0e+00": 8.696727454662324e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.175290137529373e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.574378788471222e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2715347111225128e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.011781731522642076, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011311366120353342, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010498251770623029, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.00948115428443998, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.008154177262913436, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0062020091409794986, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0049642453272826965, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004548786827945151, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004417342623928562, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004401658234419301, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004397175117046573, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004385964488028549, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00435447405994637, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004293355313420761, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.004208656296832487, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004119136379449629, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.003998826546885539, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0038439134357031433, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0036613088056037667, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00347984807100147, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003246497173240641, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0029935476317768917, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0026815960391104454, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0023168444586917757, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0018939335588220275, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013794433009024943, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009704517565114657, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006933109066449106, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004359805476269685, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00026907649096756357, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00018282555248333665, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013418132442893693, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.750965792136412e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.934578821391369e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.273859816701587e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.173149711538372e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.541553150289701e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.7552013271806575e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 4.221433704387323e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.9092448514078662e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.562263776536014e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.16677658948106e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 7.306424235167697e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.03398402724065e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 7.453444029216883e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.1539612412721366e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 7.427647613461191e-07, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.2484114575967167e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 5.075845515429475e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2233003377914429, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9950628280639648, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6845938563346863, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.45724472403526306, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.30183520913124084, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1755058616399765, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.11547023802995682, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.0885825827717781, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07229142636060715, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06311670690774918, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05754169076681137, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.051775023341178894, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04803895950317383, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04392852634191513, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.040994539856910706, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03867783769965172, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.036262914538383484, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.033961907029151917, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03192134201526642, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030271215364336967, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028649406507611275, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027294572442770004, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.02641669660806656, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02550816722214222, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.02497098781168461, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.02470974251627922, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02475971169769764, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.025279978290200233, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02609674260020256, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02725937031209469, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028306063264608383, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.029013022780418396, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.029405197128653526, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029641814529895782, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.031059877946972847, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03455270081758499, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03550262749195099, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04488144442439079, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04640418663620949, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05762213096022606, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.057154763489961624, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0632498636841774, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.061490099877119064, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.06196937710046768, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05418762192130089, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2271297574043274, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.49400758743286133, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4073861837387085, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.4673122763633728, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8653273809523809, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.917906746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9513888888888888, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9630456349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9694940476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9784226190476191, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9831349206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.984375, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9905753968253969, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9915674603174603, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9928075396825397, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9903273809523809, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.986359126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9858630952380952, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8269907937279675, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9072142433493566, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.950090203301765, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9642804799672763, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9702040692245714, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9736764020719073, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9762450316046335, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9774518234708273, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9791044722405945, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9811862013999215, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.982777590953013, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837238001323103, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9836788919690566, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842015735463564, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9853849727893188, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9868665854774167, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9869216468144396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.988192187076348, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.988450467002907, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9886450751939526, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9886791432665583, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.98916552241553, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9897921909239811, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9898656792081685, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9910005927897927, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9911746142913879, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9907718629965362, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9905013002259734, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9901432704612337, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9910528922055125, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9911913635329035, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9909713790898529, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9915120019885636, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9911219873602372, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9902107004436373, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9895845286256717, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9903913054087334, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9862678573608347, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9878265783689977, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.987159695660034, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9886073606584831, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9895771698334958, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876094042271061, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9866396806366536, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.984721579786523, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9839383139953167, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 0.005602632379159331, "validation/loss_best": 0.02470974251627922, "validation/acc_best": 0.9928075396825397, "validation/f1_best": 0.9911746142913879} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.09812149243429304, "train/grad": 0.029984241304919123, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2164644622802734, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9882491493225097, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.67994215965271, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.4563879871368408, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3047683137655258, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1823403598368168, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.12492108877748251, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09970728708431124, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08456446015276015, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07587241659872233, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07025854110717773, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06420141875743866, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05965273255482316, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.0543719947617501, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0501386586856097, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04654177590273321, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.042553099291399124, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03835525118745863, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.034117137314751744, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030449864380061628, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02611844999715686, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.021797700822353362, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.017316173836588858, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.012921297335997223, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009052088875323534, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0055997393559664484, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003525685602799058, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0023533133137971164, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001413559541106224, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008533050213009119, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005862193927168846, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00042728278785943987, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003065201826393604, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002485215943306685, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00018779834732413292, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0001576335821300745, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.000141034834086895, "train/loss_037_lr8.3e+00_wd1.0e+00": 9.119657799601555e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00011038299649953842, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.985955707728863e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00016102151945233346, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00013438045978546144, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00013945164158940315, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00020058605819940568, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00017426530830562116, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.2889867648482322e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.343036562204361e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.3062171638011932e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9730331152677536e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.011710727941244841, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011206840532831847, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010347864124923944, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009300385718233883, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.007971704059746116, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006089747298974543, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.004954728835728019, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004620835749665275, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004550017955480144, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0045596307521918785, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004565065315109678, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004545357133029029, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004497124045738019, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0044108720408985395, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0043091386288870125, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004212466846220195, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004083235290891025, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.003929284970508888, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00374070382094942, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0035622294951463116, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0033219406992429867, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0030534029396949336, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0027405952552362577, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002368254164030077, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0019225807556358632, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013907764255418442, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009711703579523601, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006895207234629197, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004361441608671157, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00027011746154130377, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00018547988616774092, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013471129763274802, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.757983873782905e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.028638870143822e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.391216126047539e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.3560937213319446e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.58941828657089e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.683728441970402e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 4.35409500639139e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.950559400916042e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.968641342054326e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.277139808812393e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 7.359471561624531e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.774914629900877e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 7.868963358909298e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.1620682393391508e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.1706837429787343e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 8.093210453277069e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 4.688163635358225e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.219679594039917, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9913527369499207, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6813752055168152, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4544730484485626, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.29971975088119507, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.17410211265087128, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.11487752199172974, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.08824189752340317, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07205493748188019, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06295572221279144, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.057483065873384476, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.051747411489486694, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04800897464156151, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.043906789273023605, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.040870700031518936, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.038611993193626404, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.036157239228487015, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.033895716071128845, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.031835224479436874, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030202820897102356, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028532912954688072, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027302172034978867, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026336319744586945, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0254579558968544, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.024960417300462723, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.024728193879127502, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02485371194779873, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.025358056649565697, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02612987905740738, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.027275750413537025, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028370510786771774, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.028956277295947075, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02946189045906067, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029691582545638084, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03103761374950409, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03447764739394188, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.03552946820855141, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04476145654916763, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04627930000424385, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.05742701143026352, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.05700746551156044, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.06316883116960526, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.061225976794958115, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.061680082231760025, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05397019162774086, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.225893035531044, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.4893556237220764, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.40284526348114014, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.463546484708786, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8655753968253969, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9188988095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9503968253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9627976190476191, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9689980158730159, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9809027777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9910714285714286, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9908234126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9910714285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9903273809523809, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9858630952380952, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.82818799206795, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9086937341066142, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9490938851956062, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.964145786566939, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.969914178375039, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.973651402760387, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9763782865849009, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9780956032439606, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9794626141752374, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9813658774503716, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9824167681274674, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837238001323103, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9839899694442984, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9842497367902057, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9857481273554589, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9863283384275329, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9869216468144396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9885492595306412, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.988450467002907, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9886450751939526, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9884526306890539, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9895692243828835, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9900161881943403, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9896399248629192, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9909552748533937, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9907705612592782, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9909958866925835, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9905013002259734, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9901432704612337, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9910528922055125, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9911913635329035, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9909713790898529, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9915120019885636, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9912122216273828, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9902107004436373, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9895845286256717, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9906137581054937, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9860384974113969, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9878265783689977, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9875173128579225, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9886066007056977, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9895771698334958, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876094042271061, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9864611966813438, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9841765818316235, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9839383139953167, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.009052088875323534, "validation/loss_best": 0.024960417300462723, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.9909552748533937} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.09651363894343376, "train/grad": 0.02996470235288143, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.218430519104004, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9886644554138183, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6775776433944702, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.4517499399185181, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.29892319947481155, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.17600117594003678, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.11903787303715944, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09417126188054681, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07940745723433792, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07109170289710164, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06583811153657734, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.060208344152197245, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05598356442525983, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05110026089474559, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04711523749865591, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04375463509932161, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.039913686849176885, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03590418592095375, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.031831393847242, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.028322030594572424, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02416755922138691, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.020103129977360367, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.015794143779203296, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011577912429347635, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00789700111374259, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004804722219705582, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0030232111923396586, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0020197176095098257, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012373822648078204, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0007664770260453224, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005215883534401655, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003808057773858309, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002724148984998465, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002163008600473404, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00016061222180724144, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00013578969985246657, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00012032007798552513, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.169385604560375e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 9.915890172123909e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.885543137788773e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00013301925733685493, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00012328103184700012, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00011419394053518772, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00018198151141405106, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0001519697066396475, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.901682794094085e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.0742416381835937e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.1396932899951933e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2506451457738875e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01169351120479405, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.011182402893900871, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.010323372916318477, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.009280787503812463, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.00798313443781808, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.006155306464061141, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.005023971473565325, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.004650386898429133, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.004535548851708882, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.004515818508225493, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.004504137998155784, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.004479777275992092, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.004435785762034357, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.004362241396156605, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0042775755090406166, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0041834698704769835, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004055162313161418, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.003895021876669489, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.003709997359837871, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0035178731894120576, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003275575719162589, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003017553783429321, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0026976251952146413, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002305689040440484, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0018527656230071443, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013281100062522454, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009251207932902617, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006519982232202892, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00041225725465665164, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002573086073471131, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00017727102394019312, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00012963759227773152, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.649410435258688e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.773886364475401e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.17228588140506e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.111221149803669e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.390028555292247e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.5385580602707026e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 4.224172616460464e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.842351476654237e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 5.617827834015543e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 6.061721453704649e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 6.882706506720737e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.173717413304416e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 7.656297006001012e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.940598934852164e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.5766374582488807e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.3652057367119289e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 3.980430706059412e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2192089557647705, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.990852952003479, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6808809638023376, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.45406419038772583, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.29940834641456604, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1738983392715454, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.11480585485696793, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.08816023916006088, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07204388082027435, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06294558197259903, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05744877830147743, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05172625184059143, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.047965023666620255, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.043835852295160294, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04091082885861397, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03857305273413658, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03622213006019592, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03384615108370781, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03183631971478462, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.030262215062975883, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.028541142120957375, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.027350589632987976, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.026310592889785767, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.02543993853032589, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.025019746273756027, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.024732647463679314, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.02487768791615963, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.02529023587703705, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.02618173323571682, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.02731155790388584, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.028344282880425453, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0289817713201046, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.02949586883187294, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.029657529667019844, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.030991630628705025, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.034493543207645416, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.035535965114831924, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04472190514206886, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04631747677922249, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0574009045958519, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.056938257068395615, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0631791204214096, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.06118815392255783, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.061687372624874115, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05395936220884323, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.22576534748077393, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.48923954367637634, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4025026261806488, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.462842732667923, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8653273809523809, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.917906746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9503968253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9630456349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.96875, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9809027777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9833829365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9908234126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9913194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9918154761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9925595238095238, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9923115079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9920634920634921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9915674603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9913194444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9910714285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9895833333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9903273809523809, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9858630952380952, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8278949965483022, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.9068429241166656, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.94923492381971, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9646869831918794, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.969736237082907, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.973651402760387, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9765566181167549, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9780956032439606, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9794626141752374, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9813658774503716, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9825972091722546, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9837238001323103, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9839899694442984, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9846093932589348, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9857481273554589, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9863283384275329, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9869216468144396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9883695439585396, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.988450467002907, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9886450751939526, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9884526306890539, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9893452271125243, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9900161881943403, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9896399248629192, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9909552748533937, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9909958866925835, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9909958866925835, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9905013002259734, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9901432704612337, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9910528922055125, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9911913635329035, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9909713790898529, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9909739336538851, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9912122216273828, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.990669787539713, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9898535058784868, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9892494680782882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.989405520517553, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.989539509417716, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9906137581054937, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9862678573608347, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9880049464133556, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9875173128579225, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9886073606584831, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9895771698334958, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9876094042271061, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9864611966813438, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9841765818316235, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9839383139953167, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.00789700111374259, "validation/loss_best": 0.025019746273756027, "validation/acc_best": 0.9925595238095238, "validation/f1_best": 0.9909552748533937} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e423231a4a89fe0d24aea5b18ee09666964804d1 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..8f2a4bc6938bbcd6e4b83de31939d70224a113cf --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 18, "eval/id_best": 48, "eval/lr_best": 0.015, "eval/wd_best": 0.05, "eval/train/loss": 0.04504745826125145, "eval/train/acc": 0.9927890941628507, "eval/train/acc_std": 0.0005673411646758755, "eval/train/f1": 0.9935910504032479, "eval/train/f1_std": 0.000555172191450433, "eval/validation/loss": 0.07688092440366745, "eval/validation/acc": 0.9794146825396826, "eval/validation/acc_std": 0.002293781395177082, "eval/validation/f1": 0.9776367535338889, "eval/validation/f1_std": 0.0028010960080500644, "eval/test/loss": 0.09627865999937057, "eval/test/acc": 0.9720238095238095, "eval/test/acc_std": 0.0022132335214132617, "eval/test/f1": 0.9665086098368507, "eval/test/f1_std": 0.0029953135730193085} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..a682e1f96cb62db540d8f90f6d6af5334e3ab7d0 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 18, "eval/best/id_best": 48, "eval/best/lr_best": 0.015, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.04504745826125145, "eval/best/train/acc": 0.9927890941628507, "eval/best/train/acc_std": 0.0005673411646758755, "eval/best/train/f1": 0.9935910504032479, "eval/best/train/f1_std": 0.000555172191450433, "eval/best/validation/loss": 0.07688092440366745, "eval/best/validation/acc": 0.9794146825396826, "eval/best/validation/acc_std": 0.002293781395177082, "eval/best/validation/f1": 0.9776367535338889, "eval/best/validation/f1_std": 0.0028010960080500644, "eval/best/test/loss": 0.09627865999937057, "eval/best/test/acc": 0.9720238095238095, "eval/best/test/acc_std": 0.0022132335214132617, "eval/best/test/f1": 0.9665086098368507, "eval/best/test/f1_std": 0.0029953135730193085} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..4737fab38d412215def2158e68ce311576dc6fb4 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 46, "eval/last/lr_best": 0.010799999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.04962094500660896, "eval/last/train/acc": 0.9914732354334439, "eval/last/train/acc_std": 0.0006049668452250976, "eval/last/train/f1": 0.9923889332331616, "eval/last/train/f1_std": 0.0006120415774154669, "eval/last/validation/loss": 0.07877406477928162, "eval/last/validation/acc": 0.9791666666666666, "eval/last/validation/acc_std": 0.002302999276114693, "eval/last/validation/f1": 0.9770337881793244, "eval/last/validation/f1_std": 0.0028598650083941435, "eval/last/test/loss": 0.09756624698638916, "eval/last/test/acc": 0.9722222222222222, "eval/last/test/acc_std": 0.002207655310280523, "eval/last/test/f1": 0.9665906867473781, "eval/last/test/f1_std": 0.003014212636362539} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9140ec569e66a2aaa97ab47130e7bab382467aad --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",train,0.04504745826125145,0.9927890941628507,0.0005673411646758755,0.9935910504032479,0.000555172191450433 +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",validation,0.07688092440366745,0.9794146825396826,0.002293781395177082,0.9776367535338889,0.0028010960080500644 +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",test,0.09627865999937057,0.9720238095238095,0.0022132335214132617,0.9665086098368507,0.0029953135730193085 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..9140ec569e66a2aaa97ab47130e7bab382467aad --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",train,0.04504745826125145,0.9927890941628507,0.0005673411646758755,0.9935910504032479,0.000555172191450433 +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",validation,0.07688092440366745,0.9794146825396826,0.002293781395177082,0.9776367535338889,0.0028010960080500644 +flat_mae,patch,linear,hcpya_task21,best,18,0.015,0.05,48,"[50, 1.0]",test,0.09627865999937057,0.9720238095238095,0.0022132335214132617,0.9665086098368507,0.0029953135730193085 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..aabde31e6d17ed2da9c8eea29c87e3643216aa2b --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,linear,hcpya_task21,last,19,0.010799999999999999,0.05,46,"[36, 1.0]",train,0.04962094500660896,0.9914732354334439,0.0006049668452250976,0.9923889332331616,0.0006120415774154669 +flat_mae,patch,linear,hcpya_task21,last,19,0.010799999999999999,0.05,46,"[36, 1.0]",validation,0.07877406477928162,0.9791666666666666,0.002302999276114693,0.9770337881793244,0.0028598650083941435 +flat_mae,patch,linear,hcpya_task21,last,19,0.010799999999999999,0.05,46,"[36, 1.0]",test,0.09756624698638916,0.9722222222222222,0.002207655310280523,0.9665906867473781,0.003014212636362539 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f7030ad8e7e43a723c974270c01275f298a154c7 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/log.txt @@ -0,0 +1,889 @@ +fMRI foundation model probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: aef99c83a386cf95c3d8ca503ecc968d8d5694af, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-03-07 23:07:32 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 0.8M (0.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:04 lr: nan time: 3.6107 data: 3.2496 max mem: 3910 +train: [0] [ 20/400] eta: 0:03:17 lr: 0.000003 loss: 3.0327 (3.0368) grad: 0.1109 (0.1150) time: 0.3663 data: 0.0037 max mem: 3951 +train: [0] [ 40/400] eta: 0:02:48 lr: 0.000006 loss: 3.0299 (3.0337) grad: 0.1103 (0.1128) time: 0.4106 data: 0.0032 max mem: 3951 +train: [0] [ 60/400] eta: 0:02:30 lr: 0.000009 loss: 3.0237 (3.0283) grad: 0.1109 (0.1132) time: 0.3900 data: 0.0038 max mem: 3951 +train: [0] [ 80/400] eta: 0:02:16 lr: 0.000012 loss: 3.0016 (3.0200) grad: 0.1119 (0.1127) time: 0.3769 data: 0.0035 max mem: 3951 +train: [0] [100/400] eta: 0:02:04 lr: 0.000015 loss: 2.9834 (3.0105) grad: 0.1065 (0.1110) time: 0.3674 data: 0.0034 max mem: 3951 +train: [0] [120/400] eta: 0:01:54 lr: 0.000018 loss: 2.9574 (2.9992) grad: 0.1077 (0.1106) time: 0.3794 data: 0.0033 max mem: 3951 +train: [0] [140/400] eta: 0:01:44 lr: 0.000021 loss: 2.9271 (2.9859) grad: 0.1077 (0.1099) time: 0.3644 data: 0.0033 max mem: 3951 +train: [0] [160/400] eta: 0:01:35 lr: 0.000024 loss: 2.8912 (2.9721) grad: 0.1020 (0.1083) time: 0.3550 data: 0.0033 max mem: 3951 +train: [0] [180/400] eta: 0:01:26 lr: 0.000027 loss: 2.8566 (2.9564) grad: 0.0989 (0.1079) time: 0.3621 data: 0.0034 max mem: 3951 +train: [0] [200/400] eta: 0:01:18 lr: 0.000030 loss: 2.8142 (2.9408) grad: 0.1022 (0.1072) time: 0.3730 data: 0.0035 max mem: 3951 +train: [0] [220/400] eta: 0:01:10 lr: 0.000033 loss: 2.7748 (2.9227) grad: 0.1022 (0.1068) time: 0.3770 data: 0.0033 max mem: 3951 +train: [0] [240/400] eta: 0:01:02 lr: 0.000036 loss: 2.7243 (2.9051) grad: 0.0989 (0.1060) time: 0.3924 data: 0.0034 max mem: 3951 +train: [0] [260/400] eta: 0:00:54 lr: 0.000039 loss: 2.6887 (2.8869) grad: 0.0989 (0.1056) time: 0.3846 data: 0.0034 max mem: 3951 +train: [0] [280/400] eta: 0:00:46 lr: 0.000042 loss: 2.6582 (2.8693) grad: 0.0943 (0.1047) time: 0.3578 data: 0.0033 max mem: 3951 +train: [0] [300/400] eta: 0:00:39 lr: 0.000045 loss: 2.6003 (2.8502) grad: 0.0902 (0.1041) time: 0.5251 data: 0.1765 max mem: 3951 +train: [0] [320/400] eta: 0:00:31 lr: 0.000048 loss: 2.5734 (2.8319) grad: 0.0906 (0.1033) time: 0.3744 data: 0.0031 max mem: 3951 +train: [0] [340/400] eta: 0:00:23 lr: 0.000051 loss: 2.5436 (2.8135) grad: 0.0942 (0.1029) time: 0.3511 data: 0.0033 max mem: 3951 +train: [0] [360/400] eta: 0:00:15 lr: 0.000054 loss: 2.4999 (2.7950) grad: 0.0949 (0.1026) time: 0.3788 data: 0.0035 max mem: 3951 +train: [0] [380/400] eta: 0:00:07 lr: 0.000057 loss: 2.4534 (2.7763) grad: 0.0913 (0.1020) time: 0.3857 data: 0.0036 max mem: 3951 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 2.4158 (2.7567) grad: 0.0918 (0.1015) time: 0.3829 data: 0.0034 max mem: 3951 +train: [0] Total time: 0:02:36 (0.3911 s / it) +train: [0] Summary: lr: 0.000060 loss: 2.4158 (2.7567) grad: 0.0918 (0.1015) +eval (validation): [0] [ 0/63] eta: 0:03:44 time: 3.5598 data: 3.3288 max mem: 3951 +eval (validation): [0] [20/63] eta: 0:00:22 time: 0.3594 data: 0.0030 max mem: 3951 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3517 data: 0.0032 max mem: 3951 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3324 data: 0.0032 max mem: 3951 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3297 data: 0.0032 max mem: 3951 +eval (validation): [0] Total time: 0:00:25 (0.4020 s / it) +cv: [0] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.512 acc: 0.936 f1: 0.929 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:23:07 lr: nan time: 3.4690 data: 3.2265 max mem: 3951 +train: [1] [ 20/400] eta: 0:03:17 lr: 0.000063 loss: 2.3607 (2.3553) grad: 0.0915 (0.0926) time: 0.3723 data: 0.0031 max mem: 3951 +train: [1] [ 40/400] eta: 0:02:37 lr: 0.000066 loss: 2.3561 (2.3520) grad: 0.0906 (0.0917) time: 0.3512 data: 0.0027 max mem: 3951 +train: [1] [ 60/400] eta: 0:02:19 lr: 0.000069 loss: 2.3272 (2.3392) grad: 0.0904 (0.0928) time: 0.3516 data: 0.0037 max mem: 3951 +train: [1] [ 80/400] eta: 0:02:07 lr: 0.000072 loss: 2.3062 (2.3265) grad: 0.0899 (0.0915) time: 0.3664 data: 0.0034 max mem: 3951 +train: [1] [100/400] eta: 0:01:56 lr: 0.000075 loss: 2.2782 (2.3160) grad: 0.0853 (0.0899) time: 0.3453 data: 0.0033 max mem: 3951 +train: [1] [120/400] eta: 0:01:47 lr: 0.000078 loss: 2.2525 (2.3023) grad: 0.0853 (0.0900) time: 0.3728 data: 0.0035 max mem: 3951 +train: [1] [140/400] eta: 0:01:39 lr: 0.000081 loss: 2.2057 (2.2867) grad: 0.0870 (0.0898) time: 0.3753 data: 0.0035 max mem: 3951 +train: [1] [160/400] eta: 0:01:31 lr: 0.000084 loss: 2.1999 (2.2760) grad: 0.0865 (0.0892) time: 0.3477 data: 0.0033 max mem: 3951 +train: [1] [180/400] eta: 0:01:23 lr: 0.000087 loss: 2.1773 (2.2639) grad: 0.0831 (0.0891) time: 0.3800 data: 0.0043 max mem: 3951 +train: [1] [200/400] eta: 0:01:15 lr: 0.000090 loss: 2.1549 (2.2526) grad: 0.0831 (0.0884) time: 0.3681 data: 0.0033 max mem: 3951 +train: [1] [220/400] eta: 0:01:07 lr: 0.000093 loss: 2.1137 (2.2376) grad: 0.0857 (0.0882) time: 0.3569 data: 0.0033 max mem: 3951 +train: [1] [240/400] eta: 0:01:00 lr: 0.000096 loss: 2.0979 (2.2251) grad: 0.0866 (0.0880) time: 0.3840 data: 0.0033 max mem: 3951 +train: [1] [260/400] eta: 0:00:52 lr: 0.000099 loss: 2.0900 (2.2128) grad: 0.0854 (0.0879) time: 0.3794 data: 0.0034 max mem: 3951 +train: [1] [280/400] eta: 0:00:45 lr: 0.000102 loss: 2.0758 (2.2017) grad: 0.0798 (0.0874) time: 0.3548 data: 0.0033 max mem: 3951 +train: [1] [300/400] eta: 0:00:39 lr: 0.000105 loss: 2.0284 (2.1896) grad: 0.0792 (0.0872) time: 0.5961 data: 0.2493 max mem: 3951 +train: [1] [320/400] eta: 0:00:31 lr: 0.000108 loss: 2.0151 (2.1788) grad: 0.0792 (0.0867) time: 0.3725 data: 0.0033 max mem: 3951 +train: [1] [340/400] eta: 0:00:23 lr: 0.000111 loss: 1.9807 (2.1662) grad: 0.0827 (0.0868) time: 0.3532 data: 0.0029 max mem: 3951 +train: [1] [360/400] eta: 0:00:15 lr: 0.000114 loss: 1.9590 (2.1543) grad: 0.0853 (0.0866) time: 0.3707 data: 0.0033 max mem: 3951 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 1.9425 (2.1425) grad: 0.0832 (0.0865) time: 0.3663 data: 0.0033 max mem: 3951 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.9195 (2.1322) grad: 0.0808 (0.0862) time: 0.3544 data: 0.0035 max mem: 3951 +train: [1] Total time: 0:02:33 (0.3840 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.9195 (2.1322) grad: 0.0808 (0.0862) +eval (validation): [1] [ 0/63] eta: 0:03:40 time: 3.5054 data: 3.2336 max mem: 3951 +eval (validation): [1] [20/63] eta: 0:00:23 time: 0.3926 data: 0.0034 max mem: 3951 +eval (validation): [1] [40/63] eta: 0:00:10 time: 0.3456 data: 0.0038 max mem: 3951 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3463 data: 0.0036 max mem: 3951 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3401 data: 0.0036 max mem: 3951 +eval (validation): [1] Total time: 0:00:26 (0.4155 s / it) +cv: [1] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.213 acc: 0.960 f1: 0.955 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:23:40 lr: nan time: 3.5522 data: 3.2589 max mem: 3951 +train: [2] [ 20/400] eta: 0:03:27 lr: 0.000123 loss: 1.8869 (1.8992) grad: 0.0784 (0.0804) time: 0.3966 data: 0.0037 max mem: 3951 +train: [2] [ 40/400] eta: 0:02:43 lr: 0.000126 loss: 1.8869 (1.8944) grad: 0.0787 (0.0805) time: 0.3569 data: 0.0032 max mem: 3951 +train: [2] [ 60/400] eta: 0:02:25 lr: 0.000129 loss: 1.8504 (1.8807) grad: 0.0786 (0.0798) time: 0.3730 data: 0.0035 max mem: 3951 +train: [2] [ 80/400] eta: 0:02:11 lr: 0.000132 loss: 1.8575 (1.8778) grad: 0.0775 (0.0790) time: 0.3610 data: 0.0036 max mem: 3951 +train: [2] [100/400] eta: 0:02:00 lr: 0.000135 loss: 1.8575 (1.8722) grad: 0.0758 (0.0785) time: 0.3617 data: 0.0034 max mem: 3951 +train: [2] [120/400] eta: 0:01:50 lr: 0.000138 loss: 1.8422 (1.8645) grad: 0.0779 (0.0790) time: 0.3656 data: 0.0035 max mem: 3951 +train: [2] [140/400] eta: 0:01:41 lr: 0.000141 loss: 1.8307 (1.8591) grad: 0.0800 (0.0788) time: 0.3597 data: 0.0035 max mem: 3951 +train: [2] [160/400] eta: 0:01:32 lr: 0.000144 loss: 1.8151 (1.8544) grad: 0.0742 (0.0781) time: 0.3471 data: 0.0034 max mem: 3951 +train: [2] [180/400] eta: 0:01:24 lr: 0.000147 loss: 1.7969 (1.8456) grad: 0.0720 (0.0776) time: 0.3646 data: 0.0035 max mem: 3951 +train: [2] [200/400] eta: 0:01:16 lr: 0.000150 loss: 1.7732 (1.8377) grad: 0.0727 (0.0774) time: 0.3566 data: 0.0036 max mem: 3951 +train: [2] [220/400] eta: 0:01:08 lr: 0.000153 loss: 1.7409 (1.8280) grad: 0.0743 (0.0772) time: 0.3586 data: 0.0034 max mem: 3951 +train: [2] [240/400] eta: 0:01:00 lr: 0.000156 loss: 1.7382 (1.8214) grad: 0.0742 (0.0772) time: 0.3737 data: 0.0035 max mem: 3951 +train: [2] [260/400] eta: 0:00:52 lr: 0.000159 loss: 1.7356 (1.8137) grad: 0.0745 (0.0771) time: 0.3754 data: 0.0034 max mem: 3951 +train: [2] [280/400] eta: 0:00:45 lr: 0.000162 loss: 1.7085 (1.8071) grad: 0.0739 (0.0769) time: 0.3628 data: 0.0033 max mem: 3951 +train: [2] [300/400] eta: 0:00:38 lr: 0.000165 loss: 1.7058 (1.8000) grad: 0.0738 (0.0767) time: 0.5524 data: 0.2001 max mem: 3951 +train: [2] [320/400] eta: 0:00:30 lr: 0.000168 loss: 1.7026 (1.7944) grad: 0.0688 (0.0761) time: 0.3609 data: 0.0036 max mem: 3951 +train: [2] [340/400] eta: 0:00:23 lr: 0.000171 loss: 1.6736 (1.7871) grad: 0.0678 (0.0758) time: 0.3509 data: 0.0031 max mem: 3951 +train: [2] [360/400] eta: 0:00:15 lr: 0.000174 loss: 1.6678 (1.7815) grad: 0.0711 (0.0755) time: 0.3538 data: 0.0034 max mem: 3951 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 1.6632 (1.7752) grad: 0.0721 (0.0754) time: 0.3735 data: 0.0033 max mem: 3951 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 1.6414 (1.7682) grad: 0.0731 (0.0754) time: 0.3732 data: 0.0033 max mem: 3951 +train: [2] Total time: 0:02:32 (0.3820 s / it) +train: [2] Summary: lr: 0.000180 loss: 1.6414 (1.7682) grad: 0.0731 (0.0754) +eval (validation): [2] [ 0/63] eta: 0:03:52 time: 3.6957 data: 3.4112 max mem: 3951 +eval (validation): [2] [20/63] eta: 0:00:22 time: 0.3718 data: 0.0026 max mem: 3951 +eval (validation): [2] [40/63] eta: 0:00:10 time: 0.3910 data: 0.0036 max mem: 3951 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3289 data: 0.0032 max mem: 3951 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3254 data: 0.0030 max mem: 3951 +eval (validation): [2] Total time: 0:00:26 (0.4197 s / it) +cv: [2] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.155 acc: 0.967 f1: 0.964 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:23:22 lr: nan time: 3.5060 data: 3.2115 max mem: 3951 +train: [3] [ 20/400] eta: 0:03:13 lr: 0.000183 loss: 1.6080 (1.6074) grad: 0.0738 (0.0737) time: 0.3587 data: 0.0044 max mem: 3951 +train: [3] [ 40/400] eta: 0:02:34 lr: 0.000186 loss: 1.6086 (1.6035) grad: 0.0732 (0.0729) time: 0.3458 data: 0.0028 max mem: 3951 +train: [3] [ 60/400] eta: 0:02:17 lr: 0.000189 loss: 1.5967 (1.6029) grad: 0.0690 (0.0723) time: 0.3561 data: 0.0036 max mem: 3951 +train: [3] [ 80/400] eta: 0:02:05 lr: 0.000192 loss: 1.5963 (1.6018) grad: 0.0672 (0.0709) time: 0.3561 data: 0.0034 max mem: 3951 +train: [3] [100/400] eta: 0:01:54 lr: 0.000195 loss: 1.5904 (1.5977) grad: 0.0672 (0.0707) time: 0.3437 data: 0.0035 max mem: 3951 +train: [3] [120/400] eta: 0:01:46 lr: 0.000198 loss: 1.5726 (1.5916) grad: 0.0694 (0.0707) time: 0.3737 data: 0.0036 max mem: 3951 +train: [3] [140/400] eta: 0:01:38 lr: 0.000201 loss: 1.5510 (1.5847) grad: 0.0699 (0.0710) time: 0.3534 data: 0.0034 max mem: 3951 +train: [3] [160/400] eta: 0:01:29 lr: 0.000204 loss: 1.5434 (1.5812) grad: 0.0672 (0.0706) time: 0.3433 data: 0.0033 max mem: 3951 +train: [3] [180/400] eta: 0:01:22 lr: 0.000207 loss: 1.5496 (1.5780) grad: 0.0667 (0.0708) time: 0.3716 data: 0.0035 max mem: 3951 +train: [3] [200/400] eta: 0:01:14 lr: 0.000210 loss: 1.5245 (1.5728) grad: 0.0696 (0.0708) time: 0.3622 data: 0.0035 max mem: 3951 +train: [3] [220/400] eta: 0:01:06 lr: 0.000213 loss: 1.5129 (1.5673) grad: 0.0694 (0.0708) time: 0.3460 data: 0.0033 max mem: 3951 +train: [3] [240/400] eta: 0:00:59 lr: 0.000216 loss: 1.5068 (1.5621) grad: 0.0668 (0.0706) time: 0.3623 data: 0.0033 max mem: 3951 +train: [3] [260/400] eta: 0:00:51 lr: 0.000219 loss: 1.5093 (1.5574) grad: 0.0657 (0.0702) time: 0.3524 data: 0.0032 max mem: 3951 +train: [3] [280/400] eta: 0:00:44 lr: 0.000222 loss: 1.5123 (1.5536) grad: 0.0653 (0.0699) time: 0.3621 data: 0.0034 max mem: 3951 +train: [3] [300/400] eta: 0:00:37 lr: 0.000225 loss: 1.5003 (1.5490) grad: 0.0656 (0.0698) time: 0.5205 data: 0.1882 max mem: 3951 +train: [3] [320/400] eta: 0:00:30 lr: 0.000228 loss: 1.4704 (1.5431) grad: 0.0659 (0.0696) time: 0.3747 data: 0.0042 max mem: 3951 +train: [3] [340/400] eta: 0:00:22 lr: 0.000231 loss: 1.4446 (1.5376) grad: 0.0670 (0.0694) time: 0.3345 data: 0.0040 max mem: 3951 +train: [3] [360/400] eta: 0:00:14 lr: 0.000234 loss: 1.4447 (1.5330) grad: 0.0630 (0.0691) time: 0.3723 data: 0.0033 max mem: 3951 +train: [3] [380/400] eta: 0:00:07 lr: 0.000237 loss: 1.4258 (1.5267) grad: 0.0677 (0.0692) time: 0.3768 data: 0.0034 max mem: 3951 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.4313 (1.5233) grad: 0.0650 (0.0689) time: 0.3612 data: 0.0035 max mem: 3951 +train: [3] Total time: 0:02:29 (0.3745 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.4313 (1.5233) grad: 0.0650 (0.0689) +eval (validation): [3] [ 0/63] eta: 0:03:43 time: 3.5555 data: 3.2695 max mem: 3951 +eval (validation): [3] [20/63] eta: 0:00:22 time: 0.3622 data: 0.0060 max mem: 3951 +eval (validation): [3] [40/63] eta: 0:00:10 time: 0.3752 data: 0.0036 max mem: 3951 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3419 data: 0.0031 max mem: 3951 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3420 data: 0.0033 max mem: 3951 +eval (validation): [3] Total time: 0:00:26 (0.4150 s / it) +cv: [3] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.120 acc: 0.971 f1: 0.968 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:24:11 lr: nan time: 3.6298 data: 3.3796 max mem: 3951 +train: [4] [ 20/400] eta: 0:03:19 lr: 0.000243 loss: 1.4321 (1.4192) grad: 0.0661 (0.0670) time: 0.3702 data: 0.0028 max mem: 3951 +train: [4] [ 40/400] eta: 0:02:40 lr: 0.000246 loss: 1.4144 (1.4182) grad: 0.0652 (0.0651) time: 0.3639 data: 0.0029 max mem: 3951 +train: [4] [ 60/400] eta: 0:02:23 lr: 0.000249 loss: 1.4149 (1.4217) grad: 0.0643 (0.0659) time: 0.3750 data: 0.0035 max mem: 3951 +train: [4] [ 80/400] eta: 0:02:11 lr: 0.000252 loss: 1.3944 (1.4108) grad: 0.0640 (0.0654) time: 0.3700 data: 0.0035 max mem: 3951 +train: [4] [100/400] eta: 0:01:59 lr: 0.000255 loss: 1.3789 (1.4082) grad: 0.0637 (0.0657) time: 0.3568 data: 0.0035 max mem: 3951 +train: [4] [120/400] eta: 0:01:51 lr: 0.000258 loss: 1.3935 (1.4041) grad: 0.0639 (0.0654) time: 0.3832 data: 0.0034 max mem: 3951 +train: [4] [140/400] eta: 0:01:42 lr: 0.000261 loss: 1.3887 (1.4003) grad: 0.0646 (0.0653) time: 0.3712 data: 0.0035 max mem: 3951 +train: [4] [160/400] eta: 0:01:33 lr: 0.000264 loss: 1.3587 (1.3959) grad: 0.0618 (0.0648) time: 0.3534 data: 0.0033 max mem: 3951 +train: [4] [180/400] eta: 0:01:24 lr: 0.000267 loss: 1.3558 (1.3904) grad: 0.0616 (0.0650) time: 0.3659 data: 0.0036 max mem: 3951 +train: [4] [200/400] eta: 0:01:16 lr: 0.000270 loss: 1.3185 (1.3830) grad: 0.0634 (0.0650) time: 0.3611 data: 0.0033 max mem: 3951 +train: [4] [220/400] eta: 0:01:08 lr: 0.000273 loss: 1.3255 (1.3799) grad: 0.0631 (0.0646) time: 0.3754 data: 0.0036 max mem: 3951 +train: [4] [240/400] eta: 0:01:00 lr: 0.000276 loss: 1.3128 (1.3752) grad: 0.0603 (0.0643) time: 0.3630 data: 0.0031 max mem: 3951 +train: [4] [260/400] eta: 0:00:53 lr: 0.000279 loss: 1.3115 (1.3706) grad: 0.0594 (0.0641) time: 0.3588 data: 0.0035 max mem: 3951 +train: [4] [280/400] eta: 0:00:45 lr: 0.000282 loss: 1.3152 (1.3660) grad: 0.0604 (0.0640) time: 0.3648 data: 0.0034 max mem: 3951 +train: [4] [300/400] eta: 0:00:38 lr: 0.000285 loss: 1.3077 (1.3629) grad: 0.0599 (0.0637) time: 0.5319 data: 0.1951 max mem: 3951 +train: [4] [320/400] eta: 0:00:30 lr: 0.000288 loss: 1.2951 (1.3569) grad: 0.0622 (0.0638) time: 0.3695 data: 0.0031 max mem: 3951 +train: [4] [340/400] eta: 0:00:23 lr: 0.000291 loss: 1.2578 (1.3523) grad: 0.0622 (0.0636) time: 0.3635 data: 0.0033 max mem: 3951 +train: [4] [360/400] eta: 0:00:15 lr: 0.000294 loss: 1.2796 (1.3489) grad: 0.0598 (0.0635) time: 0.3518 data: 0.0034 max mem: 3951 +train: [4] [380/400] eta: 0:00:07 lr: 0.000297 loss: 1.2587 (1.3439) grad: 0.0623 (0.0635) time: 0.3788 data: 0.0034 max mem: 3951 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.2303 (1.3390) grad: 0.0643 (0.0635) time: 0.3717 data: 0.0034 max mem: 3951 +train: [4] Total time: 0:02:33 (0.3834 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.2303 (1.3390) grad: 0.0643 (0.0635) +eval (validation): [4] [ 0/63] eta: 0:03:37 time: 3.4458 data: 3.2126 max mem: 3951 +eval (validation): [4] [20/63] eta: 0:00:22 time: 0.3711 data: 0.0034 max mem: 3951 +eval (validation): [4] [40/63] eta: 0:00:10 time: 0.3589 data: 0.0035 max mem: 3951 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3324 data: 0.0033 max mem: 3951 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3303 data: 0.0033 max mem: 3951 +eval (validation): [4] Total time: 0:00:25 (0.4069 s / it) +cv: [4] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.108 acc: 0.974 f1: 0.971 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:23:39 lr: nan time: 3.5497 data: 3.3061 max mem: 3951 +train: [5] [ 20/400] eta: 0:03:23 lr: 0.000300 loss: 1.2632 (1.2515) grad: 0.0586 (0.0605) time: 0.3861 data: 0.0037 max mem: 3951 +train: [5] [ 40/400] eta: 0:02:42 lr: 0.000300 loss: 1.2563 (1.2559) grad: 0.0595 (0.0601) time: 0.3639 data: 0.0030 max mem: 3951 +train: [5] [ 60/400] eta: 0:02:26 lr: 0.000300 loss: 1.2519 (1.2575) grad: 0.0560 (0.0589) time: 0.3876 data: 0.0035 max mem: 3951 +train: [5] [ 80/400] eta: 0:02:13 lr: 0.000300 loss: 1.2505 (1.2582) grad: 0.0581 (0.0597) time: 0.3721 data: 0.0034 max mem: 3951 +train: [5] [100/400] eta: 0:02:01 lr: 0.000300 loss: 1.2270 (1.2476) grad: 0.0581 (0.0592) time: 0.3616 data: 0.0034 max mem: 3951 +train: [5] [120/400] eta: 0:01:52 lr: 0.000300 loss: 1.2297 (1.2474) grad: 0.0567 (0.0592) time: 0.3810 data: 0.0035 max mem: 3951 +train: [5] [140/400] eta: 0:01:43 lr: 0.000300 loss: 1.2205 (1.2412) grad: 0.0610 (0.0594) time: 0.3814 data: 0.0038 max mem: 3951 +train: [5] [160/400] eta: 0:01:35 lr: 0.000299 loss: 1.2126 (1.2382) grad: 0.0613 (0.0598) time: 0.3756 data: 0.0033 max mem: 3951 +train: [5] [180/400] eta: 0:01:26 lr: 0.000299 loss: 1.2133 (1.2344) grad: 0.0608 (0.0601) time: 0.3813 data: 0.0034 max mem: 3951 +train: [5] [200/400] eta: 0:01:18 lr: 0.000299 loss: 1.1886 (1.2291) grad: 0.0608 (0.0602) time: 0.3943 data: 0.0035 max mem: 3951 +train: [5] [220/400] eta: 0:01:10 lr: 0.000299 loss: 1.1919 (1.2261) grad: 0.0575 (0.0601) time: 0.3739 data: 0.0035 max mem: 3951 +train: [5] [240/400] eta: 0:01:02 lr: 0.000299 loss: 1.1995 (1.2248) grad: 0.0566 (0.0598) time: 0.3966 data: 0.0037 max mem: 3951 +train: [5] [260/400] eta: 0:00:54 lr: 0.000299 loss: 1.1788 (1.2211) grad: 0.0564 (0.0598) time: 0.3847 data: 0.0036 max mem: 3951 +train: [5] [280/400] eta: 0:00:47 lr: 0.000298 loss: 1.1788 (1.2202) grad: 0.0569 (0.0596) time: 0.3909 data: 0.0036 max mem: 3951 +train: [5] [300/400] eta: 0:00:40 lr: 0.000298 loss: 1.1771 (1.2169) grad: 0.0565 (0.0593) time: 0.6265 data: 0.2494 max mem: 3951 +train: [5] [320/400] eta: 0:00:32 lr: 0.000298 loss: 1.1605 (1.2130) grad: 0.0531 (0.0590) time: 0.3944 data: 0.0037 max mem: 3951 +train: [5] [340/400] eta: 0:00:24 lr: 0.000298 loss: 1.1411 (1.2097) grad: 0.0565 (0.0591) time: 0.3668 data: 0.0029 max mem: 3951 +train: [5] [360/400] eta: 0:00:16 lr: 0.000297 loss: 1.1368 (1.2063) grad: 0.0578 (0.0589) time: 0.3742 data: 0.0035 max mem: 3951 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 1.1363 (1.2032) grad: 0.0559 (0.0588) time: 0.4001 data: 0.0037 max mem: 3951 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.1391 (1.1999) grad: 0.0564 (0.0588) time: 0.3747 data: 0.0036 max mem: 3951 +train: [5] Total time: 0:02:40 (0.4015 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.1391 (1.1999) grad: 0.0564 (0.0588) +eval (validation): [5] [ 0/63] eta: 0:03:52 time: 3.6911 data: 3.3986 max mem: 3951 +eval (validation): [5] [20/63] eta: 0:00:24 time: 0.4083 data: 0.0032 max mem: 3951 +eval (validation): [5] [40/63] eta: 0:00:10 time: 0.3788 data: 0.0031 max mem: 3951 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3649 data: 0.0033 max mem: 3951 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3546 data: 0.0033 max mem: 3951 +eval (validation): [5] Total time: 0:00:27 (0.4408 s / it) +cv: [5] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.098 acc: 0.975 f1: 0.970 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:23:18 lr: nan time: 3.4966 data: 3.2598 max mem: 3951 +train: [6] [ 20/400] eta: 0:03:31 lr: 0.000296 loss: 1.1316 (1.1394) grad: 0.0561 (0.0569) time: 0.4090 data: 0.0138 max mem: 3951 +train: [6] [ 40/400] eta: 0:02:48 lr: 0.000296 loss: 1.1303 (1.1328) grad: 0.0579 (0.0590) time: 0.3736 data: 0.0029 max mem: 3951 +train: [6] [ 60/400] eta: 0:02:31 lr: 0.000296 loss: 1.1223 (1.1237) grad: 0.0579 (0.0581) time: 0.3972 data: 0.0036 max mem: 3951 +train: [6] [ 80/400] eta: 0:02:18 lr: 0.000295 loss: 1.1423 (1.1317) grad: 0.0524 (0.0561) time: 0.4002 data: 0.0036 max mem: 3951 +train: [6] [100/400] eta: 0:02:06 lr: 0.000295 loss: 1.1437 (1.1299) grad: 0.0531 (0.0562) time: 0.3764 data: 0.0033 max mem: 3951 +train: [6] [120/400] eta: 0:01:58 lr: 0.000295 loss: 1.1057 (1.1270) grad: 0.0573 (0.0566) time: 0.4190 data: 0.0034 max mem: 3951 +train: [6] [140/400] eta: 0:01:48 lr: 0.000294 loss: 1.1232 (1.1288) grad: 0.0573 (0.0566) time: 0.3983 data: 0.0035 max mem: 3951 +train: [6] [160/400] eta: 0:01:39 lr: 0.000294 loss: 1.1326 (1.1283) grad: 0.0566 (0.0565) time: 0.3895 data: 0.0032 max mem: 3951 +train: [6] [180/400] eta: 0:01:31 lr: 0.000293 loss: 1.1289 (1.1255) grad: 0.0542 (0.0563) time: 0.4188 data: 0.0034 max mem: 3951 +train: [6] [200/400] eta: 0:01:22 lr: 0.000293 loss: 1.0942 (1.1231) grad: 0.0530 (0.0561) time: 0.3957 data: 0.0034 max mem: 3951 +train: [6] [220/400] eta: 0:01:13 lr: 0.000292 loss: 1.0909 (1.1200) grad: 0.0545 (0.0559) time: 0.3859 data: 0.0034 max mem: 3951 +train: [6] [240/400] eta: 0:01:05 lr: 0.000292 loss: 1.1028 (1.1195) grad: 0.0556 (0.0560) time: 0.3853 data: 0.0034 max mem: 3951 +train: [6] [260/400] eta: 0:00:57 lr: 0.000291 loss: 1.0913 (1.1150) grad: 0.0559 (0.0560) time: 0.3928 data: 0.0035 max mem: 3951 +train: [6] [280/400] eta: 0:00:48 lr: 0.000291 loss: 1.0780 (1.1147) grad: 0.0526 (0.0557) time: 0.3840 data: 0.0034 max mem: 3951 +train: [6] [300/400] eta: 0:00:41 lr: 0.000290 loss: 1.0951 (1.1131) grad: 0.0523 (0.0557) time: 0.5842 data: 0.2305 max mem: 3951 +train: [6] [320/400] eta: 0:00:33 lr: 0.000290 loss: 1.0881 (1.1110) grad: 0.0525 (0.0553) time: 0.4037 data: 0.0032 max mem: 3951 +train: [6] [340/400] eta: 0:00:24 lr: 0.000289 loss: 1.0613 (1.1085) grad: 0.0530 (0.0553) time: 0.3800 data: 0.0030 max mem: 3951 +train: [6] [360/400] eta: 0:00:16 lr: 0.000288 loss: 1.0548 (1.1052) grad: 0.0524 (0.0553) time: 0.3734 data: 0.0035 max mem: 3951 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 1.0580 (1.1039) grad: 0.0520 (0.0552) time: 0.3726 data: 0.0035 max mem: 3951 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 1.0671 (1.1013) grad: 0.0526 (0.0550) time: 0.4018 data: 0.0035 max mem: 3951 +train: [6] Total time: 0:02:44 (0.4101 s / it) +train: [6] Summary: lr: 0.000287 loss: 1.0671 (1.1013) grad: 0.0526 (0.0550) +eval (validation): [6] [ 0/63] eta: 0:03:58 time: 3.7788 data: 3.4666 max mem: 3951 +eval (validation): [6] [20/63] eta: 0:00:25 time: 0.4380 data: 0.0031 max mem: 3951 +eval (validation): [6] [40/63] eta: 0:00:11 time: 0.3947 data: 0.0037 max mem: 3951 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3542 data: 0.0035 max mem: 3951 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3505 data: 0.0034 max mem: 3951 +eval (validation): [6] Total time: 0:00:28 (0.4526 s / it) +cv: [6] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.096 acc: 0.976 f1: 0.972 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:24:42 lr: nan time: 3.7067 data: 3.4743 max mem: 3951 +train: [7] [ 20/400] eta: 0:03:40 lr: 0.000286 loss: 1.0672 (1.0711) grad: 0.0496 (0.0540) time: 0.4241 data: 0.0039 max mem: 3951 +train: [7] [ 40/400] eta: 0:02:53 lr: 0.000286 loss: 1.0585 (1.0592) grad: 0.0533 (0.0538) time: 0.3760 data: 0.0033 max mem: 3951 +train: [7] [ 60/400] eta: 0:02:31 lr: 0.000285 loss: 1.0202 (1.0410) grad: 0.0533 (0.0547) time: 0.3762 data: 0.0035 max mem: 3951 +train: [7] [ 80/400] eta: 0:02:17 lr: 0.000284 loss: 1.0258 (1.0494) grad: 0.0522 (0.0535) time: 0.3799 data: 0.0037 max mem: 3951 +train: [7] [100/400] eta: 0:02:04 lr: 0.000284 loss: 1.0388 (1.0481) grad: 0.0500 (0.0539) time: 0.3576 data: 0.0033 max mem: 3951 +train: [7] [120/400] eta: 0:01:54 lr: 0.000283 loss: 1.0385 (1.0482) grad: 0.0514 (0.0533) time: 0.3678 data: 0.0039 max mem: 3951 +train: [7] [140/400] eta: 0:01:44 lr: 0.000282 loss: 1.0201 (1.0429) grad: 0.0526 (0.0538) time: 0.3635 data: 0.0033 max mem: 3951 +train: [7] [160/400] eta: 0:01:35 lr: 0.000282 loss: 1.0164 (1.0405) grad: 0.0539 (0.0537) time: 0.3614 data: 0.0033 max mem: 3951 +train: [7] [180/400] eta: 0:01:26 lr: 0.000281 loss: 1.0248 (1.0381) grad: 0.0532 (0.0537) time: 0.3782 data: 0.0035 max mem: 3951 +train: [7] [200/400] eta: 0:01:18 lr: 0.000280 loss: 1.0270 (1.0372) grad: 0.0501 (0.0533) time: 0.3806 data: 0.0034 max mem: 3951 +train: [7] [220/400] eta: 0:01:10 lr: 0.000279 loss: 1.0207 (1.0350) grad: 0.0481 (0.0530) time: 0.3669 data: 0.0034 max mem: 3951 +train: [7] [240/400] eta: 0:01:02 lr: 0.000278 loss: 1.0171 (1.0325) grad: 0.0495 (0.0529) time: 0.3911 data: 0.0035 max mem: 3951 +train: [7] [260/400] eta: 0:00:54 lr: 0.000278 loss: 0.9998 (1.0302) grad: 0.0523 (0.0529) time: 0.3849 data: 0.0035 max mem: 3951 +train: [7] [280/400] eta: 0:00:46 lr: 0.000277 loss: 0.9839 (1.0267) grad: 0.0527 (0.0528) time: 0.3703 data: 0.0033 max mem: 3951 +train: [7] [300/400] eta: 0:00:39 lr: 0.000276 loss: 0.9973 (1.0261) grad: 0.0501 (0.0526) time: 0.5482 data: 0.2079 max mem: 3951 +train: [7] [320/400] eta: 0:00:31 lr: 0.000275 loss: 0.9992 (1.0233) grad: 0.0501 (0.0525) time: 0.3922 data: 0.0034 max mem: 3951 +train: [7] [340/400] eta: 0:00:23 lr: 0.000274 loss: 0.9875 (1.0227) grad: 0.0492 (0.0523) time: 0.3613 data: 0.0034 max mem: 3951 +train: [7] [360/400] eta: 0:00:15 lr: 0.000273 loss: 0.9918 (1.0203) grad: 0.0490 (0.0522) time: 0.3525 data: 0.0034 max mem: 3951 +train: [7] [380/400] eta: 0:00:07 lr: 0.000272 loss: 0.9648 (1.0179) grad: 0.0521 (0.0523) time: 0.3625 data: 0.0031 max mem: 3951 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.9832 (1.0166) grad: 0.0521 (0.0523) time: 0.3816 data: 0.0035 max mem: 3951 +train: [7] Total time: 0:02:36 (0.3924 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.9832 (1.0166) grad: 0.0521 (0.0523) +eval (validation): [7] [ 0/63] eta: 0:03:41 time: 3.5182 data: 3.2351 max mem: 3951 +eval (validation): [7] [20/63] eta: 0:00:23 time: 0.4094 data: 0.0048 max mem: 3951 +eval (validation): [7] [40/63] eta: 0:00:10 time: 0.3594 data: 0.0036 max mem: 3951 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3377 data: 0.0035 max mem: 3951 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3324 data: 0.0035 max mem: 3951 +eval (validation): [7] Total time: 0:00:26 (0.4235 s / it) +cv: [7] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.092 acc: 0.974 f1: 0.972 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:23:18 lr: nan time: 3.4964 data: 3.2431 max mem: 3951 +train: [8] [ 20/400] eta: 0:03:29 lr: 0.000270 loss: 1.0003 (1.0111) grad: 0.0473 (0.0495) time: 0.4029 data: 0.0040 max mem: 3951 +train: [8] [ 40/400] eta: 0:02:46 lr: 0.000270 loss: 0.9839 (0.9848) grad: 0.0508 (0.0517) time: 0.3687 data: 0.0035 max mem: 3951 +train: [8] [ 60/400] eta: 0:02:26 lr: 0.000269 loss: 0.9702 (0.9780) grad: 0.0523 (0.0520) time: 0.3705 data: 0.0034 max mem: 3951 +train: [8] [ 80/400] eta: 0:02:13 lr: 0.000268 loss: 0.9702 (0.9763) grad: 0.0510 (0.0518) time: 0.3774 data: 0.0034 max mem: 3951 +train: [8] [100/400] eta: 0:02:02 lr: 0.000267 loss: 0.9529 (0.9708) grad: 0.0509 (0.0518) time: 0.3724 data: 0.0033 max mem: 3951 +train: [8] [120/400] eta: 0:01:54 lr: 0.000266 loss: 0.9455 (0.9675) grad: 0.0533 (0.0521) time: 0.3974 data: 0.0034 max mem: 3951 +train: [8] [140/400] eta: 0:01:45 lr: 0.000265 loss: 0.9624 (0.9681) grad: 0.0534 (0.0519) time: 0.3928 data: 0.0035 max mem: 3951 +train: [8] [160/400] eta: 0:01:36 lr: 0.000264 loss: 0.9783 (0.9683) grad: 0.0527 (0.0522) time: 0.3903 data: 0.0035 max mem: 3951 +train: [8] [180/400] eta: 0:01:28 lr: 0.000263 loss: 0.9754 (0.9677) grad: 0.0517 (0.0518) time: 0.4115 data: 0.0034 max mem: 3951 +train: [8] [200/400] eta: 0:01:20 lr: 0.000262 loss: 0.9571 (0.9671) grad: 0.0491 (0.0516) time: 0.3848 data: 0.0033 max mem: 3951 +train: [8] [220/400] eta: 0:01:12 lr: 0.000260 loss: 0.9555 (0.9658) grad: 0.0500 (0.0513) time: 0.3896 data: 0.0033 max mem: 3951 +train: [8] [240/400] eta: 0:01:04 lr: 0.000259 loss: 0.9578 (0.9648) grad: 0.0490 (0.0513) time: 0.4228 data: 0.0033 max mem: 3951 +train: [8] [260/400] eta: 0:00:56 lr: 0.000258 loss: 0.9556 (0.9631) grad: 0.0495 (0.0513) time: 0.3887 data: 0.0035 max mem: 3951 +train: [8] [280/400] eta: 0:00:47 lr: 0.000257 loss: 0.9319 (0.9606) grad: 0.0494 (0.0513) time: 0.3620 data: 0.0035 max mem: 3951 +train: [8] [300/400] eta: 0:00:41 lr: 0.000256 loss: 0.9339 (0.9612) grad: 0.0487 (0.0510) time: 0.5926 data: 0.2095 max mem: 3951 +train: [8] [320/400] eta: 0:00:32 lr: 0.000255 loss: 0.9390 (0.9603) grad: 0.0482 (0.0509) time: 0.4129 data: 0.0039 max mem: 3951 +train: [8] [340/400] eta: 0:00:24 lr: 0.000254 loss: 0.9374 (0.9599) grad: 0.0484 (0.0508) time: 0.4022 data: 0.0024 max mem: 3951 +train: [8] [360/400] eta: 0:00:16 lr: 0.000253 loss: 0.9328 (0.9578) grad: 0.0494 (0.0509) time: 0.3666 data: 0.0036 max mem: 3951 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 0.9229 (0.9571) grad: 0.0500 (0.0508) time: 0.3935 data: 0.0036 max mem: 3951 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.9346 (0.9565) grad: 0.0480 (0.0506) time: 0.4081 data: 0.0036 max mem: 3951 +train: [8] Total time: 0:02:43 (0.4084 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.9346 (0.9565) grad: 0.0480 (0.0506) +eval (validation): [8] [ 0/63] eta: 0:04:00 time: 3.8223 data: 3.5826 max mem: 3951 +eval (validation): [8] [20/63] eta: 0:00:23 time: 0.3723 data: 0.0038 max mem: 3951 +eval (validation): [8] [40/63] eta: 0:00:10 time: 0.3881 data: 0.0034 max mem: 3951 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3419 data: 0.0035 max mem: 3951 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3409 data: 0.0035 max mem: 3951 +eval (validation): [8] Total time: 0:00:26 (0.4280 s / it) +cv: [8] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.093 acc: 0.975 f1: 0.972 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:26:46 lr: nan time: 4.0155 data: 3.6946 max mem: 3951 +train: [9] [ 20/400] eta: 0:03:48 lr: 0.000249 loss: 0.9291 (0.9332) grad: 0.0484 (0.0482) time: 0.4308 data: 0.0030 max mem: 3951 +train: [9] [ 40/400] eta: 0:02:58 lr: 0.000248 loss: 0.9292 (0.9340) grad: 0.0484 (0.0481) time: 0.3852 data: 0.0036 max mem: 3951 +train: [9] [ 60/400] eta: 0:02:36 lr: 0.000247 loss: 0.9292 (0.9307) grad: 0.0487 (0.0488) time: 0.3831 data: 0.0035 max mem: 3951 +train: [9] [ 80/400] eta: 0:02:20 lr: 0.000246 loss: 0.9351 (0.9358) grad: 0.0496 (0.0488) time: 0.3739 data: 0.0033 max mem: 3951 +train: [9] [100/400] eta: 0:02:08 lr: 0.000244 loss: 0.9284 (0.9310) grad: 0.0479 (0.0485) time: 0.3935 data: 0.0034 max mem: 3951 +train: [9] [120/400] eta: 0:01:57 lr: 0.000243 loss: 0.9231 (0.9290) grad: 0.0479 (0.0486) time: 0.3809 data: 0.0038 max mem: 3951 +train: [9] [140/400] eta: 0:01:47 lr: 0.000242 loss: 0.9281 (0.9277) grad: 0.0488 (0.0487) time: 0.3741 data: 0.0030 max mem: 3951 +train: [9] [160/400] eta: 0:01:38 lr: 0.000241 loss: 0.9129 (0.9271) grad: 0.0491 (0.0487) time: 0.3826 data: 0.0035 max mem: 3951 +train: [9] [180/400] eta: 0:01:29 lr: 0.000240 loss: 0.9120 (0.9264) grad: 0.0488 (0.0486) time: 0.3831 data: 0.0030 max mem: 3951 +train: [9] [200/400] eta: 0:01:20 lr: 0.000238 loss: 0.9107 (0.9249) grad: 0.0481 (0.0484) time: 0.3778 data: 0.0034 max mem: 3951 +train: [9] [220/400] eta: 0:01:12 lr: 0.000237 loss: 0.9131 (0.9233) grad: 0.0482 (0.0484) time: 0.3627 data: 0.0033 max mem: 3951 +train: [9] [240/400] eta: 0:01:03 lr: 0.000236 loss: 0.9131 (0.9223) grad: 0.0469 (0.0483) time: 0.3855 data: 0.0033 max mem: 3951 +train: [9] [260/400] eta: 0:00:55 lr: 0.000234 loss: 0.8984 (0.9200) grad: 0.0459 (0.0482) time: 0.3632 data: 0.0033 max mem: 3951 +train: [9] [280/400] eta: 0:00:47 lr: 0.000233 loss: 0.8842 (0.9183) grad: 0.0480 (0.0483) time: 0.3694 data: 0.0034 max mem: 3951 +train: [9] [300/400] eta: 0:00:40 lr: 0.000232 loss: 0.9062 (0.9176) grad: 0.0468 (0.0482) time: 0.5379 data: 0.1960 max mem: 3951 +train: [9] [320/400] eta: 0:00:32 lr: 0.000230 loss: 0.9035 (0.9156) grad: 0.0468 (0.0483) time: 0.3940 data: 0.0038 max mem: 3951 +train: [9] [340/400] eta: 0:00:24 lr: 0.000229 loss: 0.8868 (0.9143) grad: 0.0476 (0.0482) time: 0.3889 data: 0.0033 max mem: 3951 +train: [9] [360/400] eta: 0:00:16 lr: 0.000228 loss: 0.8943 (0.9133) grad: 0.0480 (0.0483) time: 0.3551 data: 0.0032 max mem: 3951 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 0.8781 (0.9120) grad: 0.0490 (0.0483) time: 0.3784 data: 0.0033 max mem: 3951 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.8873 (0.9112) grad: 0.0460 (0.0484) time: 0.3783 data: 0.0033 max mem: 3951 +train: [9] Total time: 0:02:39 (0.3983 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.8873 (0.9112) grad: 0.0460 (0.0484) +eval (validation): [9] [ 0/63] eta: 0:03:44 time: 3.5606 data: 3.3152 max mem: 3951 +eval (validation): [9] [20/63] eta: 0:00:22 time: 0.3675 data: 0.0038 max mem: 3951 +eval (validation): [9] [40/63] eta: 0:00:10 time: 0.3674 data: 0.0027 max mem: 3951 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3397 data: 0.0032 max mem: 3951 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3340 data: 0.0031 max mem: 3951 +eval (validation): [9] Total time: 0:00:26 (0.4131 s / it) +cv: [9] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.085 acc: 0.976 f1: 0.974 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:23:52 lr: nan time: 3.5803 data: 3.2835 max mem: 3951 +train: [10] [ 20/400] eta: 0:03:28 lr: 0.000224 loss: 0.8690 (0.8857) grad: 0.0482 (0.0498) time: 0.3974 data: 0.0041 max mem: 3951 +train: [10] [ 40/400] eta: 0:02:47 lr: 0.000222 loss: 0.8908 (0.8924) grad: 0.0469 (0.0477) time: 0.3747 data: 0.0033 max mem: 3951 +train: [10] [ 60/400] eta: 0:02:27 lr: 0.000221 loss: 0.8809 (0.8894) grad: 0.0457 (0.0482) time: 0.3739 data: 0.0032 max mem: 3951 +train: [10] [ 80/400] eta: 0:02:13 lr: 0.000220 loss: 0.8704 (0.8848) grad: 0.0484 (0.0482) time: 0.3648 data: 0.0034 max mem: 3951 +train: [10] [100/400] eta: 0:02:01 lr: 0.000218 loss: 0.8690 (0.8834) grad: 0.0474 (0.0479) time: 0.3576 data: 0.0033 max mem: 3951 +train: [10] [120/400] eta: 0:01:51 lr: 0.000217 loss: 0.8682 (0.8780) grad: 0.0480 (0.0478) time: 0.3653 data: 0.0032 max mem: 3951 +train: [10] [140/400] eta: 0:01:42 lr: 0.000215 loss: 0.8682 (0.8792) grad: 0.0486 (0.0481) time: 0.3695 data: 0.0033 max mem: 3951 +train: [10] [160/400] eta: 0:01:33 lr: 0.000214 loss: 0.8912 (0.8820) grad: 0.0468 (0.0478) time: 0.3677 data: 0.0034 max mem: 3951 +train: [10] [180/400] eta: 0:01:25 lr: 0.000213 loss: 0.8912 (0.8822) grad: 0.0468 (0.0477) time: 0.3497 data: 0.0034 max mem: 3951 +train: [10] [200/400] eta: 0:01:16 lr: 0.000211 loss: 0.8705 (0.8799) grad: 0.0468 (0.0477) time: 0.3645 data: 0.0033 max mem: 3951 +train: [10] [220/400] eta: 0:01:08 lr: 0.000210 loss: 0.8677 (0.8791) grad: 0.0459 (0.0478) time: 0.3681 data: 0.0034 max mem: 3951 +train: [10] [240/400] eta: 0:01:01 lr: 0.000208 loss: 0.8677 (0.8789) grad: 0.0452 (0.0475) time: 0.3740 data: 0.0033 max mem: 3951 +train: [10] [260/400] eta: 0:00:53 lr: 0.000207 loss: 0.8696 (0.8793) grad: 0.0444 (0.0473) time: 0.3957 data: 0.0034 max mem: 3951 +train: [10] [280/400] eta: 0:00:45 lr: 0.000205 loss: 0.8672 (0.8789) grad: 0.0459 (0.0473) time: 0.3685 data: 0.0034 max mem: 3951 +train: [10] [300/400] eta: 0:00:39 lr: 0.000204 loss: 0.8603 (0.8772) grad: 0.0459 (0.0471) time: 0.5727 data: 0.2081 max mem: 3951 +train: [10] [320/400] eta: 0:00:31 lr: 0.000202 loss: 0.8521 (0.8767) grad: 0.0459 (0.0472) time: 0.3674 data: 0.0033 max mem: 3951 +train: [10] [340/400] eta: 0:00:23 lr: 0.000201 loss: 0.8521 (0.8756) grad: 0.0463 (0.0471) time: 0.3652 data: 0.0033 max mem: 3951 +train: [10] [360/400] eta: 0:00:15 lr: 0.000199 loss: 0.8667 (0.8754) grad: 0.0463 (0.0471) time: 0.3493 data: 0.0032 max mem: 3951 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 0.8739 (0.8753) grad: 0.0440 (0.0469) time: 0.3598 data: 0.0034 max mem: 3951 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.8411 (0.8733) grad: 0.0428 (0.0467) time: 0.3568 data: 0.0034 max mem: 3951 +train: [10] Total time: 0:02:34 (0.3865 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.8411 (0.8733) grad: 0.0428 (0.0467) +eval (validation): [10] [ 0/63] eta: 0:04:09 time: 3.9679 data: 3.7355 max mem: 3951 +eval (validation): [10] [20/63] eta: 0:00:22 time: 0.3497 data: 0.0030 max mem: 3951 +eval (validation): [10] [40/63] eta: 0:00:10 time: 0.3454 data: 0.0035 max mem: 3951 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3367 data: 0.0032 max mem: 3951 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3341 data: 0.0023 max mem: 3951 +eval (validation): [10] Total time: 0:00:25 (0.4059 s / it) +cv: [10] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.092 acc: 0.978 f1: 0.976 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:23:20 lr: nan time: 3.5008 data: 3.2463 max mem: 3951 +train: [11] [ 20/400] eta: 0:03:15 lr: 0.000195 loss: 0.8356 (0.8376) grad: 0.0443 (0.0455) time: 0.3656 data: 0.0042 max mem: 3951 +train: [11] [ 40/400] eta: 0:02:39 lr: 0.000193 loss: 0.8485 (0.8514) grad: 0.0449 (0.0462) time: 0.3702 data: 0.0033 max mem: 3951 +train: [11] [ 60/400] eta: 0:02:22 lr: 0.000192 loss: 0.8638 (0.8541) grad: 0.0448 (0.0461) time: 0.3710 data: 0.0034 max mem: 3951 +train: [11] [ 80/400] eta: 0:02:11 lr: 0.000190 loss: 0.8606 (0.8565) grad: 0.0446 (0.0460) time: 0.3805 data: 0.0035 max mem: 3951 +train: [11] [100/400] eta: 0:02:00 lr: 0.000189 loss: 0.8493 (0.8537) grad: 0.0452 (0.0463) time: 0.3663 data: 0.0034 max mem: 3951 +train: [11] [120/400] eta: 0:01:50 lr: 0.000187 loss: 0.8424 (0.8522) grad: 0.0446 (0.0464) time: 0.3681 data: 0.0034 max mem: 3951 +train: [11] [140/400] eta: 0:01:42 lr: 0.000186 loss: 0.8571 (0.8535) grad: 0.0462 (0.0464) time: 0.3750 data: 0.0034 max mem: 3951 +train: [11] [160/400] eta: 0:01:34 lr: 0.000184 loss: 0.8354 (0.8508) grad: 0.0458 (0.0461) time: 0.3931 data: 0.0032 max mem: 3951 +train: [11] [180/400] eta: 0:01:25 lr: 0.000183 loss: 0.8354 (0.8505) grad: 0.0440 (0.0459) time: 0.3596 data: 0.0032 max mem: 3951 +train: [11] [200/400] eta: 0:01:17 lr: 0.000181 loss: 0.8402 (0.8499) grad: 0.0439 (0.0458) time: 0.3666 data: 0.0034 max mem: 3951 +train: [11] [220/400] eta: 0:01:09 lr: 0.000180 loss: 0.8522 (0.8505) grad: 0.0448 (0.0457) time: 0.3662 data: 0.0035 max mem: 3951 +train: [11] [240/400] eta: 0:01:01 lr: 0.000178 loss: 0.8494 (0.8491) grad: 0.0461 (0.0459) time: 0.3582 data: 0.0033 max mem: 3951 +train: [11] [260/400] eta: 0:00:53 lr: 0.000177 loss: 0.8421 (0.8498) grad: 0.0461 (0.0459) time: 0.3660 data: 0.0033 max mem: 3951 +train: [11] [280/400] eta: 0:00:45 lr: 0.000175 loss: 0.8346 (0.8475) grad: 0.0454 (0.0460) time: 0.3678 data: 0.0033 max mem: 3951 +train: [11] [300/400] eta: 0:00:39 lr: 0.000174 loss: 0.7952 (0.8446) grad: 0.0467 (0.0459) time: 0.5236 data: 0.1822 max mem: 3951 +train: [11] [320/400] eta: 0:00:31 lr: 0.000172 loss: 0.8151 (0.8444) grad: 0.0444 (0.0458) time: 0.3882 data: 0.0038 max mem: 3951 +train: [11] [340/400] eta: 0:00:23 lr: 0.000170 loss: 0.8181 (0.8428) grad: 0.0415 (0.0457) time: 0.3678 data: 0.0025 max mem: 3951 +train: [11] [360/400] eta: 0:00:15 lr: 0.000169 loss: 0.8181 (0.8425) grad: 0.0430 (0.0456) time: 0.3678 data: 0.0036 max mem: 3951 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 0.8173 (0.8414) grad: 0.0451 (0.0457) time: 0.3730 data: 0.0032 max mem: 3951 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.8173 (0.8409) grad: 0.0451 (0.0456) time: 0.3676 data: 0.0036 max mem: 3951 +train: [11] Total time: 0:02:34 (0.3861 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.8173 (0.8409) grad: 0.0451 (0.0456) +eval (validation): [11] [ 0/63] eta: 0:03:34 time: 3.4045 data: 3.1668 max mem: 3951 +eval (validation): [11] [20/63] eta: 0:00:22 time: 0.3717 data: 0.0046 max mem: 3951 +eval (validation): [11] [40/63] eta: 0:00:10 time: 0.3591 data: 0.0029 max mem: 3951 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3457 data: 0.0036 max mem: 3951 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3458 data: 0.0036 max mem: 3951 +eval (validation): [11] Total time: 0:00:25 (0.4113 s / it) +cv: [11] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.085 acc: 0.977 f1: 0.974 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:28:03 lr: nan time: 4.2077 data: 3.9181 max mem: 3951 +train: [12] [ 20/400] eta: 0:03:29 lr: 0.000164 loss: 0.8311 (0.8338) grad: 0.0435 (0.0443) time: 0.3694 data: 0.0037 max mem: 3951 +train: [12] [ 40/400] eta: 0:02:44 lr: 0.000163 loss: 0.8291 (0.8264) grad: 0.0438 (0.0442) time: 0.3573 data: 0.0024 max mem: 3951 +train: [12] [ 60/400] eta: 0:02:24 lr: 0.000161 loss: 0.8256 (0.8273) grad: 0.0440 (0.0444) time: 0.3617 data: 0.0035 max mem: 3951 +train: [12] [ 80/400] eta: 0:02:11 lr: 0.000160 loss: 0.8299 (0.8270) grad: 0.0441 (0.0444) time: 0.3619 data: 0.0038 max mem: 3951 +train: [12] [100/400] eta: 0:01:59 lr: 0.000158 loss: 0.8227 (0.8258) grad: 0.0446 (0.0448) time: 0.3569 data: 0.0032 max mem: 3951 +train: [12] [120/400] eta: 0:01:49 lr: 0.000156 loss: 0.8200 (0.8257) grad: 0.0457 (0.0450) time: 0.3582 data: 0.0034 max mem: 3951 +train: [12] [140/400] eta: 0:01:41 lr: 0.000155 loss: 0.8200 (0.8242) grad: 0.0426 (0.0447) time: 0.3691 data: 0.0035 max mem: 3951 +train: [12] [160/400] eta: 0:01:32 lr: 0.000153 loss: 0.8088 (0.8220) grad: 0.0431 (0.0450) time: 0.3640 data: 0.0035 max mem: 3951 +train: [12] [180/400] eta: 0:01:24 lr: 0.000152 loss: 0.8138 (0.8212) grad: 0.0437 (0.0448) time: 0.3725 data: 0.0033 max mem: 3951 +train: [12] [200/400] eta: 0:01:16 lr: 0.000150 loss: 0.8138 (0.8234) grad: 0.0437 (0.0448) time: 0.3752 data: 0.0036 max mem: 3951 +train: [12] [220/400] eta: 0:01:08 lr: 0.000149 loss: 0.8154 (0.8230) grad: 0.0442 (0.0449) time: 0.3689 data: 0.0035 max mem: 3951 +train: [12] [240/400] eta: 0:01:00 lr: 0.000147 loss: 0.8169 (0.8228) grad: 0.0436 (0.0447) time: 0.3621 data: 0.0034 max mem: 3951 +train: [12] [260/400] eta: 0:00:53 lr: 0.000145 loss: 0.8242 (0.8230) grad: 0.0437 (0.0448) time: 0.3662 data: 0.0034 max mem: 3951 +train: [12] [280/400] eta: 0:00:45 lr: 0.000144 loss: 0.8266 (0.8243) grad: 0.0434 (0.0447) time: 0.3723 data: 0.0031 max mem: 3951 +train: [12] [300/400] eta: 0:00:38 lr: 0.000142 loss: 0.8266 (0.8243) grad: 0.0428 (0.0446) time: 0.5262 data: 0.1828 max mem: 3951 +train: [12] [320/400] eta: 0:00:31 lr: 0.000141 loss: 0.8157 (0.8230) grad: 0.0437 (0.0446) time: 0.3935 data: 0.0039 max mem: 3951 +train: [12] [340/400] eta: 0:00:23 lr: 0.000139 loss: 0.8009 (0.8215) grad: 0.0435 (0.0445) time: 0.3728 data: 0.0025 max mem: 3951 +train: [12] [360/400] eta: 0:00:15 lr: 0.000138 loss: 0.8013 (0.8214) grad: 0.0445 (0.0446) time: 0.3610 data: 0.0033 max mem: 3951 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 0.8217 (0.8206) grad: 0.0453 (0.0446) time: 0.3558 data: 0.0035 max mem: 3951 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.8124 (0.8200) grad: 0.0451 (0.0445) time: 0.3745 data: 0.0036 max mem: 3951 +train: [12] Total time: 0:02:33 (0.3849 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.8124 (0.8200) grad: 0.0451 (0.0445) +eval (validation): [12] [ 0/63] eta: 0:03:44 time: 3.5679 data: 3.2813 max mem: 3951 +eval (validation): [12] [20/63] eta: 0:00:22 time: 0.3680 data: 0.0047 max mem: 3951 +eval (validation): [12] [40/63] eta: 0:00:10 time: 0.3644 data: 0.0029 max mem: 3951 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3474 data: 0.0035 max mem: 3951 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3465 data: 0.0035 max mem: 3951 +eval (validation): [12] Total time: 0:00:26 (0.4148 s / it) +cv: [12] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.093 acc: 0.977 f1: 0.974 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:24:18 lr: nan time: 3.6455 data: 3.3396 max mem: 3951 +train: [13] [ 20/400] eta: 0:03:32 lr: 0.000133 loss: 0.8143 (0.8205) grad: 0.0415 (0.0419) time: 0.4052 data: 0.0030 max mem: 3951 +train: [13] [ 40/400] eta: 0:02:48 lr: 0.000131 loss: 0.8104 (0.8087) grad: 0.0425 (0.0430) time: 0.3728 data: 0.0035 max mem: 3951 +train: [13] [ 60/400] eta: 0:02:31 lr: 0.000130 loss: 0.7823 (0.7994) grad: 0.0437 (0.0440) time: 0.3977 data: 0.0037 max mem: 3951 +train: [13] [ 80/400] eta: 0:02:18 lr: 0.000128 loss: 0.7780 (0.7959) grad: 0.0441 (0.0438) time: 0.3916 data: 0.0035 max mem: 3951 +train: [13] [100/400] eta: 0:02:07 lr: 0.000127 loss: 0.7972 (0.7986) grad: 0.0432 (0.0440) time: 0.3965 data: 0.0034 max mem: 3951 +train: [13] [120/400] eta: 0:01:56 lr: 0.000125 loss: 0.7994 (0.7980) grad: 0.0432 (0.0440) time: 0.3764 data: 0.0033 max mem: 3951 +train: [13] [140/400] eta: 0:01:46 lr: 0.000124 loss: 0.7941 (0.7989) grad: 0.0430 (0.0439) time: 0.3788 data: 0.0036 max mem: 3951 +train: [13] [160/400] eta: 0:01:37 lr: 0.000122 loss: 0.7933 (0.7980) grad: 0.0441 (0.0441) time: 0.3780 data: 0.0032 max mem: 3951 +train: [13] [180/400] eta: 0:01:29 lr: 0.000120 loss: 0.7888 (0.7973) grad: 0.0443 (0.0442) time: 0.3882 data: 0.0034 max mem: 3951 +train: [13] [200/400] eta: 0:01:20 lr: 0.000119 loss: 0.7856 (0.7962) grad: 0.0449 (0.0444) time: 0.3734 data: 0.0033 max mem: 3951 +train: [13] [220/400] eta: 0:01:11 lr: 0.000117 loss: 0.7998 (0.7978) grad: 0.0451 (0.0443) time: 0.3727 data: 0.0034 max mem: 3951 +train: [13] [240/400] eta: 0:01:03 lr: 0.000116 loss: 0.7998 (0.7990) grad: 0.0438 (0.0442) time: 0.3742 data: 0.0035 max mem: 3951 +train: [13] [260/400] eta: 0:00:55 lr: 0.000114 loss: 0.7960 (0.7990) grad: 0.0438 (0.0443) time: 0.3829 data: 0.0035 max mem: 3951 +train: [13] [280/400] eta: 0:00:47 lr: 0.000113 loss: 0.8046 (0.7995) grad: 0.0445 (0.0442) time: 0.4108 data: 0.0037 max mem: 3951 +train: [13] [300/400] eta: 0:00:40 lr: 0.000111 loss: 0.8054 (0.7995) grad: 0.0430 (0.0442) time: 0.5708 data: 0.1903 max mem: 3951 +train: [13] [320/400] eta: 0:00:32 lr: 0.000110 loss: 0.8048 (0.7999) grad: 0.0427 (0.0442) time: 0.3879 data: 0.0037 max mem: 3951 +train: [13] [340/400] eta: 0:00:24 lr: 0.000108 loss: 0.8048 (0.8010) grad: 0.0434 (0.0443) time: 0.3741 data: 0.0031 max mem: 3951 +train: [13] [360/400] eta: 0:00:16 lr: 0.000107 loss: 0.7932 (0.8004) grad: 0.0441 (0.0443) time: 0.3715 data: 0.0035 max mem: 3951 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 0.7915 (0.7996) grad: 0.0433 (0.0441) time: 0.3552 data: 0.0033 max mem: 3951 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.7738 (0.7986) grad: 0.0431 (0.0441) time: 0.3791 data: 0.0033 max mem: 3951 +train: [13] Total time: 0:02:40 (0.4003 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.7738 (0.7986) grad: 0.0431 (0.0441) +eval (validation): [13] [ 0/63] eta: 0:03:38 time: 3.4729 data: 3.2391 max mem: 3951 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3569 data: 0.0042 max mem: 3951 +eval (validation): [13] [40/63] eta: 0:00:10 time: 0.3680 data: 0.0029 max mem: 3951 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3534 data: 0.0034 max mem: 3951 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3500 data: 0.0034 max mem: 3951 +eval (validation): [13] Total time: 0:00:26 (0.4133 s / it) +cv: [13] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.083 acc: 0.977 f1: 0.974 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:23:53 lr: nan time: 3.5848 data: 3.3267 max mem: 3951 +train: [14] [ 20/400] eta: 0:03:35 lr: 0.000102 loss: 0.8107 (0.8239) grad: 0.0441 (0.0443) time: 0.4174 data: 0.0042 max mem: 3951 +train: [14] [ 40/400] eta: 0:02:53 lr: 0.000101 loss: 0.8107 (0.8044) grad: 0.0431 (0.0437) time: 0.3925 data: 0.0029 max mem: 3951 +train: [14] [ 60/400] eta: 0:02:33 lr: 0.000099 loss: 0.7817 (0.7957) grad: 0.0431 (0.0438) time: 0.3855 data: 0.0033 max mem: 3951 +train: [14] [ 80/400] eta: 0:02:19 lr: 0.000098 loss: 0.7935 (0.7964) grad: 0.0447 (0.0442) time: 0.3849 data: 0.0033 max mem: 3951 +train: [14] [100/400] eta: 0:02:07 lr: 0.000096 loss: 0.7935 (0.7963) grad: 0.0447 (0.0441) time: 0.3870 data: 0.0034 max mem: 3951 +train: [14] [120/400] eta: 0:01:56 lr: 0.000095 loss: 0.7791 (0.7919) grad: 0.0456 (0.0443) time: 0.3705 data: 0.0033 max mem: 3951 +train: [14] [140/400] eta: 0:01:46 lr: 0.000093 loss: 0.7623 (0.7875) grad: 0.0456 (0.0444) time: 0.3757 data: 0.0034 max mem: 3951 +train: [14] [160/400] eta: 0:01:38 lr: 0.000092 loss: 0.7717 (0.7863) grad: 0.0447 (0.0443) time: 0.4005 data: 0.0036 max mem: 3951 +train: [14] [180/400] eta: 0:01:29 lr: 0.000090 loss: 0.7781 (0.7855) grad: 0.0447 (0.0444) time: 0.4085 data: 0.0033 max mem: 3951 +train: [14] [200/400] eta: 0:01:21 lr: 0.000089 loss: 0.7781 (0.7861) grad: 0.0435 (0.0443) time: 0.3763 data: 0.0034 max mem: 3951 +train: [14] [220/400] eta: 0:01:12 lr: 0.000088 loss: 0.7861 (0.7875) grad: 0.0435 (0.0442) time: 0.3780 data: 0.0034 max mem: 3951 +train: [14] [240/400] eta: 0:01:04 lr: 0.000086 loss: 0.7800 (0.7859) grad: 0.0435 (0.0441) time: 0.3793 data: 0.0034 max mem: 3951 +train: [14] [260/400] eta: 0:00:56 lr: 0.000085 loss: 0.7756 (0.7857) grad: 0.0430 (0.0441) time: 0.3998 data: 0.0034 max mem: 3951 +train: [14] [280/400] eta: 0:00:47 lr: 0.000083 loss: 0.7756 (0.7852) grad: 0.0419 (0.0440) time: 0.3737 data: 0.0035 max mem: 3951 +train: [14] [300/400] eta: 0:00:40 lr: 0.000082 loss: 0.7802 (0.7854) grad: 0.0428 (0.0439) time: 0.5584 data: 0.1805 max mem: 3951 +train: [14] [320/400] eta: 0:00:32 lr: 0.000081 loss: 0.7812 (0.7847) grad: 0.0430 (0.0440) time: 0.3787 data: 0.0045 max mem: 3951 +train: [14] [340/400] eta: 0:00:24 lr: 0.000079 loss: 0.7663 (0.7846) grad: 0.0424 (0.0439) time: 0.3758 data: 0.0031 max mem: 3951 +train: [14] [360/400] eta: 0:00:16 lr: 0.000078 loss: 0.7722 (0.7847) grad: 0.0440 (0.0439) time: 0.3723 data: 0.0029 max mem: 3951 +train: [14] [380/400] eta: 0:00:08 lr: 0.000076 loss: 0.7841 (0.7849) grad: 0.0434 (0.0438) time: 0.3583 data: 0.0033 max mem: 3951 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.7628 (0.7837) grad: 0.0425 (0.0438) time: 0.3823 data: 0.0034 max mem: 3951 +train: [14] Total time: 0:02:40 (0.4011 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.7628 (0.7837) grad: 0.0425 (0.0438) +eval (validation): [14] [ 0/63] eta: 0:03:47 time: 3.6145 data: 3.3107 max mem: 3951 +eval (validation): [14] [20/63] eta: 0:00:23 time: 0.3919 data: 0.0044 max mem: 3951 +eval (validation): [14] [40/63] eta: 0:00:10 time: 0.3424 data: 0.0030 max mem: 3951 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3454 data: 0.0034 max mem: 3951 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3429 data: 0.0033 max mem: 3951 +eval (validation): [14] Total time: 0:00:26 (0.4153 s / it) +cv: [14] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.082 acc: 0.977 f1: 0.974 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:54 lr: nan time: 3.5875 data: 3.2884 max mem: 3951 +train: [15] [ 20/400] eta: 0:03:29 lr: 0.000074 loss: 0.7770 (0.7905) grad: 0.0441 (0.0440) time: 0.4005 data: 0.0029 max mem: 3951 +train: [15] [ 40/400] eta: 0:02:48 lr: 0.000072 loss: 0.7719 (0.7832) grad: 0.0430 (0.0431) time: 0.3771 data: 0.0034 max mem: 3951 +train: [15] [ 60/400] eta: 0:02:29 lr: 0.000071 loss: 0.7751 (0.7856) grad: 0.0429 (0.0438) time: 0.3807 data: 0.0036 max mem: 3951 +train: [15] [ 80/400] eta: 0:02:15 lr: 0.000070 loss: 0.7795 (0.7853) grad: 0.0445 (0.0439) time: 0.3822 data: 0.0035 max mem: 3951 +train: [15] [100/400] eta: 0:02:04 lr: 0.000068 loss: 0.7661 (0.7795) grad: 0.0445 (0.0440) time: 0.3726 data: 0.0033 max mem: 3951 +train: [15] [120/400] eta: 0:01:54 lr: 0.000067 loss: 0.7639 (0.7796) grad: 0.0417 (0.0437) time: 0.3751 data: 0.0033 max mem: 3951 +train: [15] [140/400] eta: 0:01:45 lr: 0.000066 loss: 0.7777 (0.7801) grad: 0.0424 (0.0438) time: 0.3819 data: 0.0034 max mem: 3951 +train: [15] [160/400] eta: 0:01:36 lr: 0.000064 loss: 0.7763 (0.7800) grad: 0.0430 (0.0435) time: 0.3847 data: 0.0036 max mem: 3951 +train: [15] [180/400] eta: 0:01:28 lr: 0.000063 loss: 0.7763 (0.7801) grad: 0.0430 (0.0436) time: 0.4144 data: 0.0036 max mem: 3951 +train: [15] [200/400] eta: 0:01:19 lr: 0.000062 loss: 0.7834 (0.7818) grad: 0.0433 (0.0435) time: 0.3683 data: 0.0033 max mem: 3951 +train: [15] [220/400] eta: 0:01:11 lr: 0.000061 loss: 0.7849 (0.7810) grad: 0.0430 (0.0435) time: 0.3732 data: 0.0034 max mem: 3951 +train: [15] [240/400] eta: 0:01:03 lr: 0.000059 loss: 0.7849 (0.7817) grad: 0.0430 (0.0434) time: 0.3757 data: 0.0035 max mem: 3951 +train: [15] [260/400] eta: 0:00:55 lr: 0.000058 loss: 0.7848 (0.7821) grad: 0.0427 (0.0434) time: 0.3774 data: 0.0035 max mem: 3951 +train: [15] [280/400] eta: 0:00:47 lr: 0.000057 loss: 0.7817 (0.7814) grad: 0.0408 (0.0432) time: 0.3773 data: 0.0037 max mem: 3951 +train: [15] [300/400] eta: 0:00:40 lr: 0.000056 loss: 0.7837 (0.7815) grad: 0.0418 (0.0432) time: 0.5329 data: 0.1872 max mem: 3951 +train: [15] [320/400] eta: 0:00:32 lr: 0.000054 loss: 0.7594 (0.7797) grad: 0.0413 (0.0432) time: 0.3695 data: 0.0036 max mem: 3951 +train: [15] [340/400] eta: 0:00:23 lr: 0.000053 loss: 0.7551 (0.7788) grad: 0.0420 (0.0432) time: 0.3636 data: 0.0034 max mem: 3951 +train: [15] [360/400] eta: 0:00:15 lr: 0.000052 loss: 0.7621 (0.7787) grad: 0.0433 (0.0432) time: 0.3626 data: 0.0033 max mem: 3951 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 0.7621 (0.7779) grad: 0.0431 (0.0432) time: 0.3429 data: 0.0032 max mem: 3951 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.7704 (0.7778) grad: 0.0424 (0.0432) time: 0.3732 data: 0.0034 max mem: 3951 +train: [15] Total time: 0:02:37 (0.3926 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.7704 (0.7778) grad: 0.0424 (0.0432) +eval (validation): [15] [ 0/63] eta: 0:03:37 time: 3.4495 data: 3.2176 max mem: 3951 +eval (validation): [15] [20/63] eta: 0:00:22 time: 0.3673 data: 0.0040 max mem: 3951 +eval (validation): [15] [40/63] eta: 0:00:10 time: 0.3531 data: 0.0028 max mem: 3951 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3340 data: 0.0031 max mem: 3951 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3325 data: 0.0026 max mem: 3951 +eval (validation): [15] Total time: 0:00:25 (0.4048 s / it) +cv: [15] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.080 acc: 0.978 f1: 0.976 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:24:24 lr: nan time: 3.6615 data: 3.3715 max mem: 3951 +train: [16] [ 20/400] eta: 0:03:18 lr: 0.000048 loss: 0.7881 (0.7924) grad: 0.0377 (0.0389) time: 0.3665 data: 0.0030 max mem: 3951 +train: [16] [ 40/400] eta: 0:02:39 lr: 0.000047 loss: 0.7790 (0.7799) grad: 0.0402 (0.0413) time: 0.3611 data: 0.0030 max mem: 3951 +train: [16] [ 60/400] eta: 0:02:21 lr: 0.000046 loss: 0.7603 (0.7728) grad: 0.0425 (0.0425) time: 0.3616 data: 0.0034 max mem: 3951 +train: [16] [ 80/400] eta: 0:02:09 lr: 0.000045 loss: 0.7753 (0.7755) grad: 0.0420 (0.0424) time: 0.3632 data: 0.0035 max mem: 3951 +train: [16] [100/400] eta: 0:01:58 lr: 0.000044 loss: 0.7737 (0.7754) grad: 0.0429 (0.0427) time: 0.3632 data: 0.0035 max mem: 3951 +train: [16] [120/400] eta: 0:01:48 lr: 0.000043 loss: 0.7647 (0.7710) grad: 0.0429 (0.0427) time: 0.3552 data: 0.0034 max mem: 3951 +train: [16] [140/400] eta: 0:01:40 lr: 0.000042 loss: 0.7647 (0.7728) grad: 0.0422 (0.0429) time: 0.3809 data: 0.0035 max mem: 3951 +train: [16] [160/400] eta: 0:01:32 lr: 0.000041 loss: 0.7780 (0.7735) grad: 0.0436 (0.0432) time: 0.3755 data: 0.0035 max mem: 3951 +train: [16] [180/400] eta: 0:01:24 lr: 0.000040 loss: 0.7684 (0.7732) grad: 0.0425 (0.0430) time: 0.3724 data: 0.0034 max mem: 3951 +train: [16] [200/400] eta: 0:01:16 lr: 0.000039 loss: 0.7626 (0.7729) grad: 0.0399 (0.0431) time: 0.3648 data: 0.0032 max mem: 3951 +train: [16] [220/400] eta: 0:01:08 lr: 0.000038 loss: 0.7495 (0.7723) grad: 0.0414 (0.0430) time: 0.3632 data: 0.0033 max mem: 3951 +train: [16] [240/400] eta: 0:01:00 lr: 0.000036 loss: 0.7605 (0.7732) grad: 0.0417 (0.0431) time: 0.3628 data: 0.0034 max mem: 3951 +train: [16] [260/400] eta: 0:00:53 lr: 0.000035 loss: 0.7591 (0.7715) grad: 0.0435 (0.0431) time: 0.3970 data: 0.0035 max mem: 3951 +train: [16] [280/400] eta: 0:00:45 lr: 0.000034 loss: 0.7522 (0.7710) grad: 0.0406 (0.0428) time: 0.3826 data: 0.0036 max mem: 3951 +train: [16] [300/400] eta: 0:00:39 lr: 0.000033 loss: 0.7667 (0.7707) grad: 0.0402 (0.0428) time: 0.5380 data: 0.1824 max mem: 3951 +train: [16] [320/400] eta: 0:00:31 lr: 0.000032 loss: 0.7782 (0.7711) grad: 0.0434 (0.0429) time: 0.3841 data: 0.0037 max mem: 3951 +train: [16] [340/400] eta: 0:00:23 lr: 0.000031 loss: 0.7796 (0.7711) grad: 0.0421 (0.0428) time: 0.3680 data: 0.0031 max mem: 3951 +train: [16] [360/400] eta: 0:00:15 lr: 0.000031 loss: 0.7663 (0.7705) grad: 0.0411 (0.0428) time: 0.3877 data: 0.0034 max mem: 3951 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 0.7477 (0.7702) grad: 0.0425 (0.0429) time: 0.3700 data: 0.0034 max mem: 3951 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.7520 (0.7705) grad: 0.0431 (0.0429) time: 0.3594 data: 0.0035 max mem: 3951 +train: [16] Total time: 0:02:34 (0.3874 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.7520 (0.7705) grad: 0.0431 (0.0429) +eval (validation): [16] [ 0/63] eta: 0:03:50 time: 3.6659 data: 3.4057 max mem: 3951 +eval (validation): [16] [20/63] eta: 0:00:22 time: 0.3703 data: 0.0050 max mem: 3951 +eval (validation): [16] [40/63] eta: 0:00:10 time: 0.3750 data: 0.0029 max mem: 3951 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3386 data: 0.0034 max mem: 3951 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3351 data: 0.0034 max mem: 3951 +eval (validation): [16] Total time: 0:00:26 (0.4171 s / it) +cv: [16] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.086 acc: 0.978 f1: 0.975 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:24:17 lr: nan time: 3.6441 data: 3.3613 max mem: 3951 +train: [17] [ 20/400] eta: 0:03:20 lr: 0.000028 loss: 0.7738 (0.7571) grad: 0.0436 (0.0433) time: 0.3732 data: 0.0037 max mem: 3951 +train: [17] [ 40/400] eta: 0:02:45 lr: 0.000027 loss: 0.7660 (0.7589) grad: 0.0436 (0.0435) time: 0.3885 data: 0.0032 max mem: 3951 +train: [17] [ 60/400] eta: 0:02:28 lr: 0.000026 loss: 0.7660 (0.7609) grad: 0.0452 (0.0443) time: 0.3903 data: 0.0034 max mem: 3951 +train: [17] [ 80/400] eta: 0:02:16 lr: 0.000025 loss: 0.7674 (0.7623) grad: 0.0444 (0.0440) time: 0.3893 data: 0.0035 max mem: 3951 +train: [17] [100/400] eta: 0:02:04 lr: 0.000024 loss: 0.7578 (0.7617) grad: 0.0438 (0.0441) time: 0.3799 data: 0.0036 max mem: 3951 +train: [17] [120/400] eta: 0:01:54 lr: 0.000023 loss: 0.7556 (0.7621) grad: 0.0434 (0.0437) time: 0.3730 data: 0.0034 max mem: 3951 +train: [17] [140/400] eta: 0:01:45 lr: 0.000023 loss: 0.7634 (0.7630) grad: 0.0417 (0.0436) time: 0.3708 data: 0.0036 max mem: 3951 +train: [17] [160/400] eta: 0:01:36 lr: 0.000022 loss: 0.7690 (0.7646) grad: 0.0430 (0.0435) time: 0.3847 data: 0.0035 max mem: 3951 +train: [17] [180/400] eta: 0:01:27 lr: 0.000021 loss: 0.7824 (0.7660) grad: 0.0436 (0.0435) time: 0.3851 data: 0.0034 max mem: 3951 +train: [17] [200/400] eta: 0:01:19 lr: 0.000020 loss: 0.7590 (0.7644) grad: 0.0436 (0.0435) time: 0.3815 data: 0.0032 max mem: 3951 +train: [17] [220/400] eta: 0:01:11 lr: 0.000019 loss: 0.7644 (0.7667) grad: 0.0424 (0.0434) time: 0.3664 data: 0.0033 max mem: 3951 +train: [17] [240/400] eta: 0:01:02 lr: 0.000019 loss: 0.7664 (0.7650) grad: 0.0427 (0.0434) time: 0.3748 data: 0.0034 max mem: 3951 +train: [17] [260/400] eta: 0:00:54 lr: 0.000018 loss: 0.7569 (0.7651) grad: 0.0425 (0.0432) time: 0.3810 data: 0.0035 max mem: 3951 +train: [17] [280/400] eta: 0:00:47 lr: 0.000017 loss: 0.7687 (0.7672) grad: 0.0415 (0.0432) time: 0.3928 data: 0.0036 max mem: 3951 +train: [17] [300/400] eta: 0:00:40 lr: 0.000016 loss: 0.7945 (0.7691) grad: 0.0418 (0.0431) time: 0.5397 data: 0.1830 max mem: 3951 +train: [17] [320/400] eta: 0:00:32 lr: 0.000016 loss: 0.7695 (0.7679) grad: 0.0411 (0.0430) time: 0.4032 data: 0.0039 max mem: 3951 +train: [17] [340/400] eta: 0:00:23 lr: 0.000015 loss: 0.7522 (0.7682) grad: 0.0419 (0.0430) time: 0.3631 data: 0.0031 max mem: 3951 +train: [17] [360/400] eta: 0:00:15 lr: 0.000014 loss: 0.7524 (0.7671) grad: 0.0416 (0.0429) time: 0.3812 data: 0.0036 max mem: 3951 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 0.7540 (0.7669) grad: 0.0409 (0.0428) time: 0.3658 data: 0.0034 max mem: 3951 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.7657 (0.7670) grad: 0.0406 (0.0427) time: 0.3556 data: 0.0033 max mem: 3951 +train: [17] Total time: 0:02:38 (0.3955 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.7657 (0.7670) grad: 0.0406 (0.0427) +eval (validation): [17] [ 0/63] eta: 0:03:40 time: 3.4971 data: 3.2672 max mem: 3951 +eval (validation): [17] [20/63] eta: 0:00:23 time: 0.3880 data: 0.0048 max mem: 3951 +eval (validation): [17] [40/63] eta: 0:00:10 time: 0.3596 data: 0.0031 max mem: 3951 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3475 data: 0.0035 max mem: 3951 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3398 data: 0.0035 max mem: 3951 +eval (validation): [17] Total time: 0:00:26 (0.4177 s / it) +cv: [17] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.077 acc: 0.979 f1: 0.977 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [18] [ 0/400] eta: 0:23:16 lr: nan time: 3.4922 data: 3.2299 max mem: 3951 +train: [18] [ 20/400] eta: 0:03:18 lr: 0.000012 loss: 0.7629 (0.7727) grad: 0.0436 (0.0446) time: 0.3749 data: 0.0039 max mem: 3951 +train: [18] [ 40/400] eta: 0:02:40 lr: 0.000012 loss: 0.7708 (0.7737) grad: 0.0418 (0.0429) time: 0.3640 data: 0.0030 max mem: 3951 +train: [18] [ 60/400] eta: 0:02:22 lr: 0.000011 loss: 0.7652 (0.7637) grad: 0.0417 (0.0437) time: 0.3622 data: 0.0034 max mem: 3951 +train: [18] [ 80/400] eta: 0:02:08 lr: 0.000011 loss: 0.7401 (0.7562) grad: 0.0413 (0.0429) time: 0.3540 data: 0.0035 max mem: 3951 +train: [18] [100/400] eta: 0:01:58 lr: 0.000010 loss: 0.7322 (0.7530) grad: 0.0407 (0.0429) time: 0.3574 data: 0.0034 max mem: 3951 +train: [18] [120/400] eta: 0:01:48 lr: 0.000009 loss: 0.7351 (0.7540) grad: 0.0415 (0.0428) time: 0.3591 data: 0.0035 max mem: 3951 +train: [18] [140/400] eta: 0:01:40 lr: 0.000009 loss: 0.7391 (0.7547) grad: 0.0420 (0.0428) time: 0.3776 data: 0.0036 max mem: 3951 +train: [18] [160/400] eta: 0:01:32 lr: 0.000008 loss: 0.7659 (0.7574) grad: 0.0420 (0.0427) time: 0.3821 data: 0.0036 max mem: 3951 +train: [18] [180/400] eta: 0:01:24 lr: 0.000008 loss: 0.7662 (0.7587) grad: 0.0430 (0.0430) time: 0.3656 data: 0.0034 max mem: 3951 +train: [18] [200/400] eta: 0:01:16 lr: 0.000007 loss: 0.7812 (0.7620) grad: 0.0430 (0.0430) time: 0.3581 data: 0.0033 max mem: 3951 +train: [18] [220/400] eta: 0:01:08 lr: 0.000007 loss: 0.7643 (0.7623) grad: 0.0414 (0.0429) time: 0.3509 data: 0.0034 max mem: 3951 +train: [18] [240/400] eta: 0:01:00 lr: 0.000006 loss: 0.7626 (0.7634) grad: 0.0405 (0.0429) time: 0.3697 data: 0.0034 max mem: 3951 +train: [18] [260/400] eta: 0:00:52 lr: 0.000006 loss: 0.7600 (0.7620) grad: 0.0419 (0.0429) time: 0.3824 data: 0.0034 max mem: 3951 +train: [18] [280/400] eta: 0:00:45 lr: 0.000006 loss: 0.7489 (0.7626) grad: 0.0424 (0.0430) time: 0.3700 data: 0.0036 max mem: 3951 +train: [18] [300/400] eta: 0:00:38 lr: 0.000005 loss: 0.7762 (0.7645) grad: 0.0430 (0.0431) time: 0.5240 data: 0.1853 max mem: 3951 +train: [18] [320/400] eta: 0:00:30 lr: 0.000005 loss: 0.7733 (0.7656) grad: 0.0435 (0.0432) time: 0.3823 data: 0.0034 max mem: 3951 +train: [18] [340/400] eta: 0:00:23 lr: 0.000004 loss: 0.7601 (0.7647) grad: 0.0430 (0.0431) time: 0.3664 data: 0.0032 max mem: 3951 +train: [18] [360/400] eta: 0:00:15 lr: 0.000004 loss: 0.7566 (0.7649) grad: 0.0419 (0.0431) time: 0.3644 data: 0.0030 max mem: 3951 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 0.7566 (0.7647) grad: 0.0418 (0.0430) time: 0.3603 data: 0.0032 max mem: 3951 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.7568 (0.7645) grad: 0.0425 (0.0431) time: 0.3466 data: 0.0033 max mem: 3951 +train: [18] Total time: 0:02:32 (0.3817 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.7568 (0.7645) grad: 0.0425 (0.0431) +eval (validation): [18] [ 0/63] eta: 0:03:42 time: 3.5331 data: 3.3016 max mem: 3951 +eval (validation): [18] [20/63] eta: 0:00:22 time: 0.3748 data: 0.0153 max mem: 3951 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3636 data: 0.0033 max mem: 3951 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3371 data: 0.0036 max mem: 3951 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3344 data: 0.0036 max mem: 3951 +eval (validation): [18] Total time: 0:00:26 (0.4133 s / it) +cv: [18] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.077 acc: 0.979 f1: 0.978 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +train: [19] [ 0/400] eta: 0:22:00 lr: nan time: 3.3002 data: 3.0794 max mem: 3951 +train: [19] [ 20/400] eta: 0:03:18 lr: 0.000003 loss: 0.7579 (0.7602) grad: 0.0420 (0.0431) time: 0.3844 data: 0.0075 max mem: 3951 +train: [19] [ 40/400] eta: 0:02:44 lr: 0.000003 loss: 0.7556 (0.7568) grad: 0.0420 (0.0426) time: 0.3895 data: 0.0033 max mem: 3951 +train: [19] [ 60/400] eta: 0:02:26 lr: 0.000002 loss: 0.7556 (0.7659) grad: 0.0419 (0.0423) time: 0.3740 data: 0.0031 max mem: 3951 +train: [19] [ 80/400] eta: 0:02:12 lr: 0.000002 loss: 0.7763 (0.7665) grad: 0.0416 (0.0424) time: 0.3669 data: 0.0035 max mem: 3951 +train: [19] [100/400] eta: 0:02:01 lr: 0.000002 loss: 0.7448 (0.7587) grad: 0.0420 (0.0427) time: 0.3599 data: 0.0034 max mem: 3951 +train: [19] [120/400] eta: 0:01:51 lr: 0.000002 loss: 0.7455 (0.7604) grad: 0.0431 (0.0426) time: 0.3661 data: 0.0034 max mem: 3951 +train: [19] [140/400] eta: 0:01:42 lr: 0.000001 loss: 0.7561 (0.7584) grad: 0.0430 (0.0429) time: 0.3705 data: 0.0034 max mem: 3951 +train: [19] [160/400] eta: 0:01:34 lr: 0.000001 loss: 0.7524 (0.7578) grad: 0.0413 (0.0425) time: 0.3926 data: 0.0035 max mem: 3951 +train: [19] [180/400] eta: 0:01:25 lr: 0.000001 loss: 0.7566 (0.7586) grad: 0.0415 (0.0425) time: 0.3677 data: 0.0035 max mem: 3951 +train: [19] [200/400] eta: 0:01:17 lr: 0.000001 loss: 0.7625 (0.7584) grad: 0.0438 (0.0424) time: 0.3557 data: 0.0033 max mem: 3951 +train: [19] [220/400] eta: 0:01:09 lr: 0.000001 loss: 0.7496 (0.7581) grad: 0.0431 (0.0426) time: 0.3566 data: 0.0033 max mem: 3951 +train: [19] [240/400] eta: 0:01:01 lr: 0.000001 loss: 0.7504 (0.7572) grad: 0.0437 (0.0427) time: 0.3803 data: 0.0033 max mem: 3951 +train: [19] [260/400] eta: 0:00:53 lr: 0.000000 loss: 0.7639 (0.7590) grad: 0.0439 (0.0428) time: 0.3688 data: 0.0033 max mem: 3951 +train: [19] [280/400] eta: 0:00:45 lr: 0.000000 loss: 0.7643 (0.7573) grad: 0.0434 (0.0429) time: 0.3626 data: 0.0032 max mem: 3951 +train: [19] [300/400] eta: 0:00:39 lr: 0.000000 loss: 0.7600 (0.7591) grad: 0.0413 (0.0428) time: 0.5742 data: 0.1982 max mem: 3951 +train: [19] [320/400] eta: 0:00:31 lr: 0.000000 loss: 0.7627 (0.7595) grad: 0.0413 (0.0428) time: 0.3969 data: 0.0035 max mem: 3951 +train: [19] [340/400] eta: 0:00:23 lr: 0.000000 loss: 0.7570 (0.7592) grad: 0.0418 (0.0427) time: 0.3630 data: 0.0032 max mem: 3951 +train: [19] [360/400] eta: 0:00:15 lr: 0.000000 loss: 0.7570 (0.7595) grad: 0.0411 (0.0425) time: 0.3593 data: 0.0036 max mem: 3951 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 0.7446 (0.7592) grad: 0.0403 (0.0425) time: 0.3709 data: 0.0032 max mem: 3951 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.7429 (0.7592) grad: 0.0413 (0.0425) time: 0.3560 data: 0.0033 max mem: 3951 +train: [19] Total time: 0:02:35 (0.3883 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.7429 (0.7592) grad: 0.0413 (0.0425) +eval (validation): [19] [ 0/63] eta: 0:03:37 time: 3.4473 data: 3.2038 max mem: 3951 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3753 data: 0.0039 max mem: 3951 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3431 data: 0.0032 max mem: 3951 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3469 data: 0.0035 max mem: 3951 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3388 data: 0.0033 max mem: 3951 +eval (validation): [19] Total time: 0:00:25 (0.4084 s / it) +cv: [19] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.079 acc: 0.979 f1: 0.977 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-last.pth +eval model info: +{"score": 0.9791666666666666, "hparam": [36, 1.0], "hparam_id": 46, "epoch": 19, "is_best": false, "best_score": 0.9794146825396826} +eval (train): [20] [ 0/297] eta: 0:17:39 time: 3.5676 data: 3.3259 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:19 time: 0.3492 data: 0.0029 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:52 time: 0.3691 data: 0.0036 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:38 time: 0.3692 data: 0.0037 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:27 time: 0.3654 data: 0.0034 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:17 time: 0.3479 data: 0.0035 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:08 time: 0.3626 data: 0.0035 max mem: 3951 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3592 data: 0.0037 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3644 data: 0.0035 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:44 time: 0.3553 data: 0.0036 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3716 data: 0.0039 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3478 data: 0.0034 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3656 data: 0.0034 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3497 data: 0.0040 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3446 data: 0.0033 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3319 data: 0.0034 max mem: 3951 +eval (train): [20] Total time: 0:01:49 (0.3697 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:26 time: 3.2815 data: 3.0503 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3545 data: 0.0051 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3399 data: 0.0035 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3289 data: 0.0030 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3240 data: 0.0033 max mem: 3951 +eval (validation): [20] Total time: 0:00:24 (0.3915 s / it) +eval (test): [20] [ 0/79] eta: 0:04:21 time: 3.3111 data: 3.0952 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3603 data: 0.0040 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3259 data: 0.0031 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3298 data: 0.0035 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3286 data: 0.0032 max mem: 3951 +eval (test): [20] Total time: 0:00:29 (0.3789 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/checkpoint-best.pth +eval model info: +{"score": 0.9794146825396826, "hparam": [50, 1.0], "hparam_id": 48, "epoch": 18, "is_best": true, "best_score": 0.9794146825396826} +eval (train): [20] [ 0/297] eta: 0:17:07 time: 3.4597 data: 3.2201 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:22 time: 0.3661 data: 0.0034 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:50 time: 0.3386 data: 0.0034 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:33 time: 0.3224 data: 0.0034 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:22 time: 0.3424 data: 0.0034 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:13 time: 0.3428 data: 0.0036 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:05 time: 0.3619 data: 0.0034 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:57 time: 0.3372 data: 0.0033 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:49 time: 0.3219 data: 0.0031 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3525 data: 0.0035 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3329 data: 0.0037 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3622 data: 0.0036 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3894 data: 0.0034 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3331 data: 0.0031 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3472 data: 0.0033 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3284 data: 0.0033 max mem: 3951 +eval (train): [20] Total time: 0:01:46 (0.3577 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:39 time: 3.4898 data: 3.1983 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3361 data: 0.0051 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3546 data: 0.0035 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3276 data: 0.0033 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3278 data: 0.0033 max mem: 3951 +eval (validation): [20] Total time: 0:00:24 (0.3938 s / it) +eval (test): [20] [ 0/79] eta: 0:04:16 time: 3.2451 data: 3.0299 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3642 data: 0.0115 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3163 data: 0.0031 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3445 data: 0.0029 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3236 data: 0.0029 max mem: 3951 +eval (test): [20] Total time: 0:00:29 (0.3766 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|------:|-----:|------------:|:----------|:-----------|---------:|--------:|-----------:|--------:|-----------:| +| flat_mae | patch | linear | hcpya_task21 | best | 18 | 0.015 | 0.05 | 48 | [50, 1.0] | train | 0.045047 | 0.99279 | 0.00056734 | 0.99359 | 0.00055517 | +| flat_mae | patch | linear | hcpya_task21 | best | 18 | 0.015 | 0.05 | 48 | [50, 1.0] | validation | 0.076881 | 0.97941 | 0.0022938 | 0.97764 | 0.0028011 | +| flat_mae | patch | linear | hcpya_task21 | best | 18 | 0.015 | 0.05 | 48 | [50, 1.0] | test | 0.096279 | 0.97202 | 0.0022132 | 0.96651 | 0.0029953 | + + +done! total time: 1:07:43 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..9875ab9fc831bcc7160930b351e40a2a55ed2e55 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__patch__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.7566785049438476, "train/grad": 0.10151078660041093, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.03873291015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.038541259765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.03817138671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.03782958984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.03747802734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.036962890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.036361083984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.03572265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.034849853515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.033958740234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.033050537109375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.03166259765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.030225830078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.028084716796875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.02599609375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.023946533203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.021064453125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.017618408203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.01345458984375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.009266357421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.003701171875, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.997568359375, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.990103759765625, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.981307373046875, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.97121826171875, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.958043212890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.9450390625, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.9322314453125, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.913154296875, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.8883685302734374, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.8642181396484374, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.84062744140625, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.80622802734375, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.7730194091796876, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.73025146484375, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.6842425537109373, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6307318115234377, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.575453186035156, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.51065673828125, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.4229972839355467, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3501902770996095, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2831985473632814, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1921941375732423, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.111136627197266, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.016056480407715, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9140431785583496, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.8267965126037597, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.7237499523162843, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.63774405002594, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015841846284456552, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01584047740790993, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015838161432184278, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015836008377373218, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015833571199327708, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01583030289504677, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01582667370326817, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01582278262358159, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01581734753213823, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015811217338778077, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01580480839125812, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015795382489450276, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.015785744106397034, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.015771669331006707, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01575686854775995, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.015743484278209506, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01572474390733987, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015702615100890396, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0156745127402246, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015647189356386662, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015611079977825284, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0155715801846236, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.015523192975670098, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015466926130466163, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015402749781496822, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.015318397236987948, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01523471985012293, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01515328892506659, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01503257071133703, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.014877252103760839, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.014725166149437427, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014580584489740431, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.014369708891026676, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01416903200559318, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.013914881832897663, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013649519276805222, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013350731716491282, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.013054075061809271, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012722135547082871, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.012295849795918912, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011955167558044195, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011648673897143453, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011241416924167424, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01088255195878446, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.010465005973819642, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.010020887525752187, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009644005203153939, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.009201095445314422, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008832439456600696, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.033613443374634, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.032975912094116, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0319061279296875, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.0307838916778564, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0296878814697266, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0281975269317627, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.02640438079834, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.024510145187378, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.021908760070801, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0191171169281006, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0162699222564697, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.012009859085083, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0076704025268555, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0013177394866943, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.994961977005005, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.988543748855591, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.9800820350646973, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.9695956707000732, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.95709490776062, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.944643974304199, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.9282314777374268, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.90997576713562, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.8877758979797363, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.8620481491088867, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.832761526107788, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7945668697357178, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.757150888442993, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7205874919891357, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6670079231262207, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.598198413848877, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5321059226989746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4685187339782715, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.3773996829986572, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.291187286376953, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.1830952167510986, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.070190191268921, "validation/loss_036_lr7.1e+00_wd1.0e+00": 1.9432262182235718, "validation/loss_037_lr8.3e+00_wd1.0e+00": 1.8171570301055908, "validation/loss_038_lr9.8e+00_wd1.0e+00": 1.6757303476333618, "validation/loss_039_lr1.2e+01_wd1.0e+00": 1.496660590171814, "validation/loss_040_lr1.4e+01_wd1.0e+00": 1.3589094877243042, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.2411096096038818, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.095522165298462, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.9790608882904053, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.857990026473999, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7451270222663879, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6611500978469849, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5750276446342468, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5123617649078369, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.10615079365079365, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.10788690476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.11235119047619048, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.11507936507936507, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.11904761904761904, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.1232638888888889, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.1284722222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.13392857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.14037698412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1440972222222222, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1470734126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.15426587301587302, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1579861111111111, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16294642857142858, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.16741071428571427, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.17063492063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.17261904761904762, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.17336309523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.1753472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.1775793650793651, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.17807539682539683, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.1800595238095238, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.18452380952380953, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.1892361111111111, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.1939484126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.20337301587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21378968253968253, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2279265873015873, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2604166666666667, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.3142361111111111, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.3658234126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.4124503968253968, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.4593253968253968, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.4945436507936508, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.5374503968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.5922619047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.6416170634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.6832837301587301, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.7395833333333334, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8273809523809523, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8683035714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8896329365079365, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9089781746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9164186507936508, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9206349206349206, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9243551587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9280753968253969, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9322916666666666, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9357638888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.018548569099798004, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.018985411704419416, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019568726602789053, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.019965875656721255, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.020566987365040378, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02117260272059323, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021702651606233563, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0225283105030286, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02432849072722564, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.025259407067572466, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02554285817114527, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02744741435192552, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.028390732489165398, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.030465582486051346, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.03293111172928806, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.03371112536947179, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.035478173565459396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03310281051091631, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.033004892046065715, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.033401158011194865, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.03148056145350731, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.032200738664782826, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.039122328314464196, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.04556330143364904, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.05208078675545182, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.063732859197195, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.0768941091555013, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.09255790372144483, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1249131839701689, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16804455149668623, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2060214182853868, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.23760522449525728, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.27485499972847083, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.3089385775754031, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.35466606075491774, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.4117711567723172, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.46541079538236746, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.5178500409106178, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.6215703392378514, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.79490827334412, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.853755906855841, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8807652283441924, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9035110136979857, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9116204653522434, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9142685217991946, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9181012867839013, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9217144803329829, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9254819869138576, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9290045692177996, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 1.63774405002594, "validation/loss_best": 0.5123617649078369, "validation/acc_best": 0.9357638888888888, "validation/f1_best": 0.9290045692177996} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.1321961343288423, "train/grad": 0.08621496837586165, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.028682861328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.02717529296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.02455810546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.022059326171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.019581298828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.016051025390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.011895751953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.00754150390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.001475830078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.995015869140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.988585205078125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9786865234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.968880615234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.954376220703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.93997802734375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.92560546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.906851806640625, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.883582763671875, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.856236572265625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.8291357421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.7940240478515626, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.755455322265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.7096478271484377, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.65708740234375, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.5986676025390625, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.52421142578125, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4533489990234374, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.3856689453125, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2900823974609374, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.172022399902344, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.063881378173828, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9643988037109374, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.829368133544922, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7094144439697265, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5696402740478517, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4350652694702148, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.297013931274414, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.172626075744629, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.046464900970459, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9042097949981689, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8058456563949585, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.7280382800102234, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.6384759497642517, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.5711095654964446, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.5037595635652542, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.44286518812179565, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.3980987536907196, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.35231865659356115, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.318846003562212, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015987484171055256, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015977460937574506, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01596002794802189, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015942567214369773, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01592570201959461, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015902415160089733, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01587574417702854, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.015846375660039484, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015807839911431074, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015765716889873146, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01572426859755069, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015660770647227765, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01559783460572362, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.015505007561296224, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.015413698637858034, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.015323328706435858, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.015204913481138647, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015057433741167187, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014884553863666952, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.014715681788511575, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014496909421868622, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014257172602228821, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013975512650795281, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013656308138743044, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013304561655968428, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012865429865196348, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012457861220464111, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012080639079213142, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011566929500550032, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010969028442632407, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010453306995332241, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01000243023270741, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.009419770608656109, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008923013366293163, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008360736563336104, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007828413106035442, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007286201470997185, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006795303407125175, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006289124295581133, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.005706339825410396, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005292754534166306, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004960113613633439, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0045689593430142846, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00426747869933024, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.003958250724244863, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0036722954513970763, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0034581951232394203, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.003234707790543325, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0030722813884494827, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0205047130584717, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.0179076194763184, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0135912895202637, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.0092523097991943, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0050063133239746, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9989240169525146, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.992156982421875, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.984431505203247, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9743154048919678, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.963371992111206, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9525294303894043, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.935931921005249, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.91953182220459, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8952252864837646, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.8712241649627686, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.8475868701934814, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.8165717124938965, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.7786312103271484, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.7342004776000977, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.690837860107422, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.6347544193267822, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5738298892974854, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5023787021636963, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.421713352203369, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3334951400756836, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.2231626510620117, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.1204051971435547, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.024571657180786, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.8923484086990356, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.7353354692459106, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.5971890687942505, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.4758256673812866, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.3199903964996338, "validation/loss_033_lr4.3e+00_wd1.0e+00": 1.1910271644592285, "validation/loss_034_lr5.1e+00_wd1.0e+00": 1.0515745878219604, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.9289550185203552, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.8142226338386536, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.719735860824585, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.6317421197891235, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5406355857849121, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4817730784416199, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4371677041053772, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3872634768486023, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.35055384039878845, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3143467903137207, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2813509404659271, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.25702518224716187, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.23178359866142273, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.21328803896903992, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.14211309523809523, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1463293650793651, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.15302579365079366, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.15823412698412698, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.16071428571428573, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.16493055555555555, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.16939484126984128, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.171875, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1728670634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1736111111111111, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17633928571428573, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.17683531746031747, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1783234126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1817956349206349, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.18526785714285715, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.1884920634920635, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.1939484126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20262896825396826, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.21428571428571427, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23462301587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26537698412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.30927579365079366, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.35813492063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.4166666666666667, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.46750992063492064, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5240575396825397, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.5768849206349206, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.6165674603174603, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.6594742063492064, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7105654761904762, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.7673611111111112, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8296130952380952, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8717757936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8898809523809523, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9035218253968254, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9112103174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9136904761904762, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.917906746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9250992063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9280753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9315476190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9367559523809523, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.939484126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9506448412698413, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9598214285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.024692842263008125, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.02574749449287892, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.027002956484641, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02835813878036061, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02898830764625849, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.031047855764479864, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.032507779744778274, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.034903860397618496, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03301378743281997, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03133251921905004, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.033288164285481425, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.029387558831732174, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.029955900602433777, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.03484553244804981, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.039835123863742036, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.044517357360128525, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.051638922290754424, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.06204695244313298, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.07677811489258601, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.09832632342081403, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.12747623631928318, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1627423964135745, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20203347189188692, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.248889935135853, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.29654631954203864, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.3548867706463259, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.40483238545639044, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.43898543878627533, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.48366143905487635, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.5667040379950525, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.6890905336596892, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.803989800449834, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8603594733167631, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8800450252003484, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8950745590929643, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9041216136393976, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9058820193750159, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9102883733435378, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9140658604825175, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9172475069427245, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9212338228026388, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.924444796975606, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.930101107243543, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.932274161887309, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9372483427091844, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9441098157432078, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9486316824348916, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.953404271236771, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.955059143484357, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.318846003562212, "validation/loss_best": 0.21328803896903992, "validation/acc_best": 0.9598214285714286, "validation/f1_best": 0.955059143484357} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.7681930410861968, "train/grad": 0.07536336382851004, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.011151123046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.00707763671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.000360107421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.993731689453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.9869384765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.977659912109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.96705810546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.955269775390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.939625244140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.92287353515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.906265869140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8810595703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.856357421875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.819864501953125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.784193115234375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.7492919921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.7038970947265626, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6489501953125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.58534423828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.5240826416015625, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.445980224609375, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3625689697265626, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.2663629150390623, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1601629638671875, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.046976318359375, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9094789123535156, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7857785034179687, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6742768859863282, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5269197082519532, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3619998931884765, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2260753631591796, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1134729385375977, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9778263950347901, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8721321105957032, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.763897967338562, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6728768420219421, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.5906299066543579, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.524364881515503, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.4634921360015869, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.40075163513422013, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.36022808492183683, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3293116606771946, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.29458512246608737, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.26887675777077674, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.24317104287445546, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.21981149811297654, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.20246734861284493, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.18458884991705418, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.17134252149611712, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015644802395254374, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015617816979065537, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01557428490370512, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015529699358157813, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015486189848743379, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015425849943421782, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015356515124440194, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.015280549000017345, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015179481212981046, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015072422879748047, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014967685365118086, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014807554506696761, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014650122211314738, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014417993286624551, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.014193696407601238, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.013975717704743146, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01369495503604412, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0133589038066566, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012977829007431864, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01261669846251607, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012166859484277665, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01170207602903247, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.011191667448729277, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010660559544339776, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010133183388970792, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009541874749120325, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009045404724311083, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008617405120749027, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008066316938493402, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007451594090089202, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0069363016006536785, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006498082461766899, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00595486699603498, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00551849857554771, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005060390998842194, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004667082867817953, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004305714317597449, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004011418306035921, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0037397492781747134, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0034570452343905345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003270534670446068, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0031237169209634885, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0029539461142849177, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002825517190503888, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.002694015765446238, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0025755407201359048, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0024885018926579503, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0024012901284731925, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.002336630924255587, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9990170001983643, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9933316707611084, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.983738899230957, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.974249839782715, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9648189544677734, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.951700210571289, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.936833381652832, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.920259714126587, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8983466625213623, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8750193119049072, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8519489765167236, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.817125082015991, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7830681800842285, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.733114004135132, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6846561431884766, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.637514591217041, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5766818523406982, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.503636598587036, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4200949668884277, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3406827449798584, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.2406914234161377, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.1356191635131836, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.016711950302124, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.8884572982788086, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.754848599433899, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.5981310606002808, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.4624521732330322, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.3447299003601074, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.196777105331421, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.0411434173583984, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.9211080074310303, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.8267954587936401, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.7191488146781921, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.6390419602394104, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.5599817037582397, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.4953409731388092, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.4378049075603485, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.391976535320282, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.349869966506958, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3063494861125946, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.27826988697052, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2568669617176056, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.23278021812438965, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.21465183794498444, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.19635164737701416, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.17950843274593353, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.16716866195201874, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.15464811027050018, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.14596568048000336, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.16517857142857142, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1684027777777778, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.17162698412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.17336309523809523, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.1746031746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.17683531746031747, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.17683531746031747, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.17782738095238096, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.17906746031746032, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.18377976190476192, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18700396825396826, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19146825396825398, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1996527777777778, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2160218253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2378472222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2690972222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.31746031746031744, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.3680555555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.4330357142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.4739583333333333, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5280257936507936, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5840773809523809, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6349206349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6750992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7185019841269841, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7889384920634921, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8407738095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8720238095238095, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8901289682539683, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9032738095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9084821428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9134424603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9188988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9228670634920635, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9332837301587301, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9389880952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9503968253968254, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9533730158730159, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9556051587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9575892857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9670138888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.031129883007619215, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03248109965070235, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.034311419078629786, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.03347379634252054, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.03270345321764619, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03365209221008646, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.029727947878755677, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.029004127280523404, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0300465788889678, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.037475514337631556, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04250973340091161, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0483662239434172, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05898298690338151, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07927759479957573, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10321333760507215, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13209377444529127, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16931283603189445, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20937616971970674, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2619480041901074, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.30133285436481294, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.3553265529504709, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.4098503644006096, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.45576574571946327, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.5028264807994618, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.5821268368694531, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7281609208192966, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8189685446745869, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8614505404676246, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8819366524373148, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8975092517431953, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9039167730234183, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9083082735008816, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9127531240000799, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9165082817665796, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9232686574934661, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9266120540823738, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9320489458928717, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9359357974408977, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.940916131222086, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9439893857689994, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9478460131711299, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9504269045957436, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9524578977532985, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9561469620205074, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9589521896839879, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9607021078847888, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9631959351889847, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9640258293272491, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9640922107818807, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.18458884991705418, "validation/loss_best": 0.15464811027050018, "validation/acc_best": 0.9670138888888888, "validation/f1_best": 0.9640258293272491} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.5233405882120132, "train/grad": 0.06889101319015026, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.98516357421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.97747802734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.964547119140625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.95173095703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.938968505859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.921441650390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.9015283203125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.87935791015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.85037841796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.819466552734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7892138671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.743740234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6993634033203127, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.634906005859375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5730157470703126, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.51337646484375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.43710693359375, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3465960693359373, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.24465087890625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.149203186035156, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0310443115234373, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9093612670898437, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7751034545898436, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6343443298339844, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4928154754638672, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3336856842041016, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2022481536865235, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.092881546020508, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.9609939193725586, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.8286821746826172, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.7304277992248536, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.6551487803459167, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.5707466554641724, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.5087742698192597, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.4480443751811981, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.3984492862224579, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.35430836766958235, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.31894361212849615, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.2863791424036026, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2525721528381109, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.23059394590556623, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.21360819961875677, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.19447034630924465, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.18006603337824345, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.16572105702012777, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1525602897629142, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.14273299442604184, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.13261205285787583, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.12513667114079, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015601047812961042, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015551244658418, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015466636195778847, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01538353626616299, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015300900042057038, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01518836078234017, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015062334309332072, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014921517008915544, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014737734259106218, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014540480482392013, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014348293025977909, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014062775024212897, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013788186255842447, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013394905868917704, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01302262555807829, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012669224445708095, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012227310109883547, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011720100203529, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011173784327693283, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010690798610448837, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010133801714982838, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009602379947900773, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009055187983904034, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008510567920748145, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00797586410306394, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007372141524683684, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006862118793651462, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0064262835402041675, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0058859807101543996, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005325296238297596, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004897361340699718, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004562646795529872, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004179544267244637, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0038929733354598284, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0036076706496533007, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003369347071275115, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.003155731875449419, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.002981179492198862, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0028164297190960497, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0026427898852853103, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.002528466766816564, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00244087656843476, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0023410685564158483, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002264003786840476, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00218357844161801, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0021042479039169847, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0020431984285824, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0019806846586288883, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0019372413543169388, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9693846702575684, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9593167304992676, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9426679611206055, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9262185096740723, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9099552631378174, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.8874032497406006, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.8619656562805176, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.8338711261749268, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.797083854675293, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7581582069396973, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.720069408416748, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6631195545196533, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6080660820007324, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.528777837753296, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4530694484710693, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.380810260772705, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.289364814758301, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.182119607925415, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.0630767345428467, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.9533151388168335, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.8200323581695557, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.6857410669326782, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.5414413213729858, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.3951598405838013, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.2537468671798706, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.10164213180542, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.9813917875289917, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.8853256106376648, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.773570716381073, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.6656205654144287, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.5876423120498657, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.5288092494010925, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.46367284655570984, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.41605907678604126, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36969855427742004, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3317183256149292, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.29790568351745605, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.27078157663345337, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2457507848739624, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.219864621758461, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2029837965965271, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19011737406253815, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.17535997927188873, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.16395315527915955, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.15211117267608643, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.1415058672428131, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.133829265832901, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.12601609528064728, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.1200002059340477, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.17311507936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1753472222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.17683531746031747, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.1775793650793651, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.17906746031746032, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.1810515873015873, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.1857638888888889, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1892361111111111, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.19444444444444445, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2058531746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.21875, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.24975198412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.29191468253968256, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.35168650793650796, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.40327380952380953, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.44642857142857145, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.4965277777777778, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.546875, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6116071428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.65625, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7018849206349206, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7504960317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8167162698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8623511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8854166666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9027777777777778, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9114583333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9146825396825397, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9176587301587301, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9221230158730159, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9273313492063492, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9315476190476191, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9352678571428571, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9377480158730159, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9466765873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9608134920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9652777777777778, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9714781746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.03217765998203122, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03319358476352894, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.03144051550384439, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02974881994662933, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.030214735761553424, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03310143110131449, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.04033157962425279, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.045704090032245306, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05220792131995674, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06680833637462881, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08202461990792755, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11498122582088782, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15084687256312854, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1950728133460499, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2319668178992566, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.26989044653565436, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.3203533769881232, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3715667870076957, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.4386956916785551, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.4843421145412855, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.5469887524339933, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.6399460595559373, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.773353573830185, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8450821946195597, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8748128703699729, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8950744164343696, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9052861013390214, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9085213700232312, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9117483058472275, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9151923956030702, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.920780184898088, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9258477937355961, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9296780149666753, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9319847000847202, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9383160001374213, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9421286457802308, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9458460044932304, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9479007301441642, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9541492224840805, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9562501278308084, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9576477576477315, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9592255402973312, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9601222282088461, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.961843620094041, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9636146204646359, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9662156951337522, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9668331078725685, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9673957210760616, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9682634251099594, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.12513667114079, "validation/loss_best": 0.1200002059340477, "validation/acc_best": 0.9714781746031746, "validation/f1_best": 0.9682634251099594} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.339006580710411, "train/grad": 0.0635307465866208, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.951348876953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.938671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.9179345703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.897386474609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.876973876953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.848892822265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.817432861328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.78271240234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.7375537109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.6899249267578127, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6435797119140627, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5747454833984373, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.508802490234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4145672607421873, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3255548095703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2414559936523437, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.136141052246094, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0145745849609376, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.8818402099609375, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.7617703247070313, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.6191287231445313, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.4793608093261719, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.3340579223632814, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.1918736267089844, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.0592899322509766, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.9219269943237305, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.8168959045410156, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.7348294115066528, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.6412342405319214, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.5522209024429321, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.4884150755405426, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.44059963822364806, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.38747643887996674, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.3486745992302895, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.3104989781975746, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.27917116045951845, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.25103390745818616, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.22829600393772126, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.20713245641440153, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1848787785321474, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.17025916114449502, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1589252021163702, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1460692584887147, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1363684083148837, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.1267226078733802, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.11786666136234998, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11140486724674702, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.10475745039060712, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.10009038038551807, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015447852946817876, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01536658830009401, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015233525931835174, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015102638909593225, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01497328736819327, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014795277989469469, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014594290014356375, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0143715056264773, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014088156535290181, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013793545519001782, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013509320886805653, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013090336825698615, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012695761620998383, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01214693400543183, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011644505183212459, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01118988033849746, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01065185239072889, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010076429937034845, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009500452810898423, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009016824157442897, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008470994492527098, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007949003668036312, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007403027324471623, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00685475428821519, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006327898558229208, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005761613175272942, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005315642340574414, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004957923842594028, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004542374832089991, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004138114688685164, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.003842656119959429, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0036167105799540878, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0033627285447437316, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0031739079864928498, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002982515709591098, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0028223290084861217, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.002674150616512634, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0025497782160528006, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0024303064128616825, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0022996454493841158, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0022112123551778495, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.002139243751880713, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0020545884157763795, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0019899682665709406, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0019255025035818107, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0018710378400282933, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0018373141597840003, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0018109900891431607, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0018004973494680598, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9319047927856445, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.916499614715576, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8912618160247803, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.866382360458374, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.8418612480163574, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.808067798614502, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.7703497409820557, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.728792190551758, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.675088882446289, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.618685722351074, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.5642154216766357, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.4836690425872803, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.4070560932159424, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.298511266708374, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.1971960067749023, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.1022164821624756, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.9848908185958862, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.8513550758361816, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.708113431930542, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.5812251567840576, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.4340696334838867, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.294176459312439, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.153228759765625, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.0200371742248535, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.9000986218452454, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.7796595692634583, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.6900964975357056, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.6214376091957092, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.5440536141395569, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.471383661031723, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4194863438606262, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.38066837191581726, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3377901315689087, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3063490688800812, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2754136919975281, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2499537318944931, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2271033227443695, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.20860959589481354, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.19129426777362823, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.17298854887485504, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.16092438995838165, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.15152707695960999, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1410568654537201, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.13322412967681885, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.12530899047851562, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.11809880286455154, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.11301405727863312, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.10796860605478287, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.10474081337451935, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.1775793650793651, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.17857142857142858, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.1800595238095238, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.18501984126984128, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.18824404761904762, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.19345238095238096, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.20262896825396826, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2160218253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.24305555555555555, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.28273809523809523, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.3273809523809524, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.3864087301587302, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.43725198412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.4928075396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5404265873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5873015873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6418650793650794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6870039682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7418154761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.798859126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8551587301587301, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8821924603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9017857142857143, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9112103174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9171626984126984, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9206349206349206, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9243551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9273313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9325396825396826, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9370039682539683, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9409722222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9471726190476191, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.957093253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.970734126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9734623015873016, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.030768015363565417, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.029691955840373045, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.03154870025757256, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.039204629074454565, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.04422950539677929, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.050977174866312674, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.06305926327571669, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.07945249941330605, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.10895941849543486, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.14420871593177143, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.1781358231748427, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.22438013006350946, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.26296230253724756, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3156299953833985, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.3637739617757756, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4117667825187715, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.46538161692567936, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5203048386966549, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.628707245209987, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7400729277037906, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.838824685245206, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8738608625109042, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8956904988460511, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9073815987891352, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9131022294385293, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9146237395700327, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9180219614967972, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9201615856140118, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9258580585426425, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9306812545235102, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9341923254558221, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9381886816142879, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9408407974697726, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9440347924966384, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9473654740312218, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9515812364297301, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9548244886553237, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9569672630211356, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.960313489933787, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9636074396924238, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9651540165071321, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9660625406568679, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9665149876370209, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9672595233729735, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9681261614268476, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9691697348165983, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9695647156996288, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9710015542685783, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9704381475818581, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.10475745039060712, "validation/loss_best": 0.10796860605478287, "validation/acc_best": 0.9737103174603174, "validation/f1_best": 0.9710015542685783} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.1998834031820298, "train/grad": 0.05876503499224782, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.911800537109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.89366943359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.863865966796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8345703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.805736083984375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7661962890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.72210693359375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.6739178466796876, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.611728515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.546949462890625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.484532470703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.3929833984375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3064495849609377, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.185191345214844, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.073128662109375, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.96923583984375, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.8423306274414062, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7003427124023438, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.5510011291503907, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.4217291259765625, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.2756119537353516, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.1405020141601563, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.0083081817626953, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.886792221069336, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.7798960208892822, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.6745793199539185, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.5973767232894898, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5385383439064025, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.47277050495147704, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.41085817337036135, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3667377561330795, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3336393272876739, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.2967013722658157, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.26954158425331115, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.24268894299864768, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.22045269295573233, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.2003309354186058, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18395223818719386, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.16870034776628018, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1525871202349663, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14192869856953622, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1336845536902547, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12432936754077673, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11727096667513251, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.11024997981265187, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1038790457881987, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09918718283064663, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.09449943497776986, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.09122640407644213, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01512474749237299, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015010494426824152, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.014822996193543077, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014637332786805927, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014453404410742223, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014204773204401135, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013932097223587334, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.013639744045212865, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013262434992939234, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012876230166293681, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012512248498387635, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01199024686589837, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011514524333178996, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010881845108233392, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010340059446170925, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009874217347241938, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009345946600660682, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008793738198000937, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008234706001821905, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0077520633302628995, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007196556848939508, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006666190382093191, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006129564119037241, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005619459012523293, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005157217886298895, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004691193107282743, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004340140416752547, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004068723847158253, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0037578431132715196, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0034571917250286787, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0032374300441006198, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0030650282977148892, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002868824787437916, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002720239852787927, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0025672448810655623, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0024359787174034865, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0023149184760404753, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.002214875413919799, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.002118986625573598, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.002015087568142917, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.001944594405940734, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0018903175537707284, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0018288659796235152, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0017852450243663043, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0017438334703911095, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0017141204871586524, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0016957186249783262, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.001686164197162725, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0016844586574006825, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.891427755355835, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.870488166809082, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8362319469451904, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.8026185035705566, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7697200775146484, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.7246413230895996, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.674531936645508, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.619952440261841, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.5498147010803223, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.4770164489746094, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.4073827266693115, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.3058371543884277, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.2104687690734863, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.0780532360076904, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.9570035934448242, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.84613835811615, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.7122102975845337, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.5648735761642456, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.413131833076477, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.28469717502594, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.1429706811904907, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.0152409076690674, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.8933442831039429, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.783612847328186, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.688780665397644, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5969365835189819, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5300148725509644, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.47932031750679016, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4226611852645874, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.36947187781333923, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.3315577805042267, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.30301374197006226, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.27118659019470215, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.24777397513389587, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.22468073666095734, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.20557166635990143, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.18838591873645782, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17451359331607819, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1616448163986206, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14822308719158173, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13936954736709595, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13262684643268585, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.12497249990701675, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.11950407922267914, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.11422240734100342, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.10973133891820908, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.10607701539993286, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.10162059217691422, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.09847506135702133, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.18055555555555555, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1840277777777778, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.18898809523809523, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.1939484126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.20362103174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2170138888888889, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2435515873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2847222222222222, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3382936507936508, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.38913690476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.4365079365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4905753968253968, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5334821428571429, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6046626984126984, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6517857142857143, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6877480158730159, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.738343253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8082837301587301, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.859375, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8809523809523809, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8973214285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9087301587301587, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9134424603174603, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.917906746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9260912698412699, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9315476190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9357638888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9397321428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9459325396825397, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.966765873015873, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.970734126984127, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9749503968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.032417399102567815, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03778006888940469, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.04518131948467697, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.05160835885784282, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06396906253891951, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.08034814452582295, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.10839800780443852, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.14537470125728127, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.18457235484556944, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.2246038882504878, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.2622404660545267, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.31627004212726206, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.35830684849946404, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.42996973422141255, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.4792186193326047, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.525967901546611, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6229259325944186, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7608260223548173, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8441386036031466, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8711221572533591, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8891477111827335, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9036299553656477, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9077450569937608, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9118175332377414, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9135459503723908, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9202307421707271, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9254676736836901, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.929510450917525, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9329565444043385, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.93618988795532, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9391237476688249, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9421167113156308, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.947442599610113, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.949887772020203, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9521698564681428, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9548140032395018, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.956469653470769, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9591013846492683, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9607032003493262, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9624945392922479, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9662337477294246, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9660535220002014, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9667389212071863, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9679768307703223, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9680956044594426, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9688294804339167, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9694816723880115, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9710071756791411, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9704299645066382, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.09122640407644213, "validation/loss_best": 0.09847506135702133, "validation/acc_best": 0.9749503968253969, "validation/f1_best": 0.9704299645066382} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 1.1012568894028663, "train/grad": 0.05503761235624552, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.874442138671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.851168212890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.81307373046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.775782470703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.73936767578125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.689517822265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.634447021484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.5744866943359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.4976910400390624, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.418504638671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.34284423828125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.2330694580078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.130676574707031, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.989404296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.8614642333984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.7451510620117188, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.6064906311035156, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.456111602783203, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.3041912841796874, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.1780762481689453, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.041635971069336, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.921053695678711, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.8078888702392578, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.7076458740234375, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.6219408512115479, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.5393615341186524, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.47970383405685424, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.4343654417991638, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.38379647850990295, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.33624556064605715, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3021790689229965, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.2765058958530426, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.24779270499944686, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.22661906115710737, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.20548787645995617, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.18792352460324765, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.17198750555515288, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.15898148719221353, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1467158142849803, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1337793319672346, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12517193593084813, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11852513652294874, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.110918089132756, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.10517647905275225, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.09942605629563332, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.09417384393513202, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09033363208174705, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.08648846827447415, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.08380338844843209, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01478943506255746, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014640465364791452, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01439639215823263, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014158435864374042, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013931183097884058, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01362650987226516, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013289908999577165, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012928484613075853, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012478396552614868, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012024171752855181, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011604140265844762, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.011023261887021362, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010517933717928828, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.009877596686128526, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009349067159928382, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008899580196011812, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008383990752045066, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007830073027871549, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007260732783470303, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006773593781981617, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006229187536519021, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00573239368153736, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005251853958470747, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004814450102858245, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004432058056117967, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00405484797549434, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00377544017508626, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003559114800300449, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.003310810043476522, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0030706589203327896, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0028928341460414233, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0027545913209905846, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0025956788240000606, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0024756959918886424, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0023528706125216558, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.002247373694553971, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.002151280324906111, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0020736734289675952, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0019975625193910675, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0019194120599422605, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.001870611184858717, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0018325143383117392, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0017926908831577748, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0017608954090974293, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0017275376088218763, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.001693339376943186, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0016630278574302792, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0016317855118541046, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.001619416467146948, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8527557849884033, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8266818523406982, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.7840895652770996, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.742494583129883, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.701874256134033, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.646684408187866, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5856502056121826, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.519641399383545, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.435516357421875, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.3490967750549316, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.2670695781707764, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.148952007293701, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.039731740951538, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8904625177383423, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.7566959857940674, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.636789321899414, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4957720041275024, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.345440149307251, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.196815848350525, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0756456851959229, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9469420313835144, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8354000449180603, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.732123851776123, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6415964961051941, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5649492740631104, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.49138733744621277, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.43842044472694397, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.3983297646045685, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.35346540808677673, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3113064467906952, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2812078297138214, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.2585299015045166, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.2331515997648239, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.21436333656311035, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.19580237567424774, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1804545819759369, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.16662365198135376, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1554528772830963, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1450703740119934, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13414707779884338, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.12692396342754364, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.12134268134832382, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.11493454873561859, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.11034873127937317, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1061558797955513, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.10259880870580673, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.09951892495155334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.09638011455535889, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.09460582584142685, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.1875, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.19171626984126985, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.20089285714285715, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2152777777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.23015873015873015, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.26264880952380953, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3100198412698413, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3586309523809524, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.41617063492063494, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.4645337301587302, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5101686507936508, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5642361111111112, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6235119047619048, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6768353174603174, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.722718253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7765376984126984, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8407738095238095, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.875, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8958333333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9072420634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9131944444444444, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9201388888888888, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9248511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9372519841269841, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9446924603174603, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9513888888888888, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.96875, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.972718253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9754464285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.04296236336524898, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.04905573798725632, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06015853884701547, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.07844988075915492, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09454072987601678, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.12620105083337177, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.1641786539526764, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.19975700884383127, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.24457990386922374, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.2861155762084658, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3337563956584873, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.3897886828187295, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.448203277942181, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5101629642767352, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5875979064384471, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6982150190304757, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8143338742186983, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8648168605865513, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8882993098663237, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9018638695537534, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.908170523706887, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9136596712543023, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9137824790478507, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9188468016009481, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9225489155555755, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9306202371235883, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9343749044722036, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9378529525584386, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9410158567009604, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.944842649922551, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9495784591277253, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9517426822684582, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9550743291890398, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.957062692662825, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9583125156217472, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.960318675719176, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9620220215094225, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9651396631010721, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9660334309071682, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.966552775619497, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9688676729639896, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9698717475646893, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.969553031592695, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9688212460041764, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9685880436647059, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9695631095433516, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9703071601011288, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9723832660377583, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9717642709011727, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.08648846827447415, "validation/loss_best": 0.09638011455535889, "validation/acc_best": 0.9756944444444444, "validation/f1_best": 0.9723832660377583} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 1.0166446959972382, "train/grad": 0.052307908870279786, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.8335205078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.804801025390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.758089599609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.7126513671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.6683660888671876, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.6082452392578124, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.542120361328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.470655517578125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.3800323486328123, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.287259521484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.199766845703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.0745205688476562, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.9593862915039062, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.8034912109375, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.6652886962890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.5426983642578125, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.4002947998046875, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.250889129638672, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.1056316375732422, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.989214973449707, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8673603630065918, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.7629976081848144, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6676075839996338, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5846672964096069, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5147038125991821, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4477569603919983, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.3995014750957489, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.36282014071941376, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.32182946145534513, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.28309379249811173, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.2552355928719044, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.23413412794470786, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.2103733079135418, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.19275448277592658, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.17514331839978695, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.1604458174854517, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1471323462203145, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1362346814200282, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1259134216606617, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.11499513186514378, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10775038911029697, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1020806274190545, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.09562694821506738, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.09076948300004005, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08606324610300362, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.08188427271321416, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07905009065754712, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.07634953300468623, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07439139618538321, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.014621498016640543, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014438826013356448, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.014142302656546235, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.013862637686543167, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01359299457166344, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013225462683476508, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01282898630015552, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012410356123000383, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.011890811249613761, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.011378145683556795, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01091677650809288, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010300943898037075, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0097808442520909, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00913858960615471, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008609391476493328, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008153471285477281, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007624497250653803, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007057045476976782, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006485643505584449, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006011873271781951, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005499349208548665, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005046568788820878, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004621608454035595, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004242625825572759, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003913940746570006, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0035922211315482853, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0033528462646063417, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003167593880207278, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002953451258945279, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002745888290228322, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0025917364365886895, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0024720481940312313, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002333478588843718, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002227984720375389, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002120741233229637, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0020284024585271255, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0019450550494366325, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0018773229388170875, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.001810820714745205, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0017396465694764628, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0016921146033564583, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0016562507580965757, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.001618428133369889, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0015936948204762302, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001576180765114259, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0015654925446142443, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0015683486801572144, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0015749007012345827, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0015772369320620782, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.816511869430542, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.785661220550537, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.735409736633301, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6866509914398193, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.6392035484313965, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.574967622756958, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5044543743133545, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.428584337234497, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.332665205001831, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.2349579334259033, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.143115758895874, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.012265920639038, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.8927663564682007, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.7321282625198364, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.591237187385559, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.467413306236267, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.3254928588867188, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.178650140762329, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.0380022525787354, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.9268816709518433, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8119739294052124, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7145205736160278, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6260581612586975, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.5495346188545227, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.48524317145347595, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.42385444045066833, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3795205056667328, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.3460347056388855, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.30850058794021606, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2731536328792572, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.24773848056793213, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.22851529717445374, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.20691397786140442, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.19097371399402618, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.17501665651798248, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.16183078289031982, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14985291659832, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.14025186002254486, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13124948740005493, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.12200847268104553, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.11597384512424469, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.11147428303956985, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.10648969560861588, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.10276349633932114, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.09940085560083389, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.09727659821510315, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.09616463631391525, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.09424501657485962, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.0920066088438034, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.19295634920634921, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.19915674603174602, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2160218253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.23759920634920634, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.26884920634920634, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3177083333333333, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3655753968253968, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.41889880952380953, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.46899801587301587, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5198412698412699, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5632440476190477, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6267361111111112, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6693948412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7249503968253969, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7896825396825397, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8420138888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8737599206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8931051587301587, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9064980158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9139384920634921, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.917906746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9203869047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9231150793650794, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9298115079365079, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9365079365079365, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9417162698412699, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9454365079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9548611111111112, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9580853174603174, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9739583333333334, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.05037112301077656, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.05809501733365385, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07899643679732529, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10200118645439608, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1311410308919388, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.16935673289031156, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2039387276447338, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.24428122727227208, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.28792285230072373, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3421761742284821, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.387836275999666, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4490722424953906, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5004342225980197, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5958741536019063, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.727992568846578, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8193095204368293, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8636565598657362, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8847147151073147, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9004699173780479, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9091826616094808, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9124048622480966, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9137207126350753, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9169994811031056, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9241299905128719, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.931113375148161, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9358856434535185, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.940413761332857, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9437735420129107, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9458606093943809, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9492838918446205, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9529193370715163, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9560062995354648, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.957343253260676, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9589424077229034, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.962465511501929, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.96441686499539, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9658007036328516, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9651623832592539, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9669581556485044, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9681969369622584, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9691079383713127, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.970575832683159, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9705612065765262, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.96980125700592, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9695145320066917, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.970479658712272, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9704153635033541, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.969919457974591, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9716082526736752, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.07439139618538321, "validation/loss_best": 0.0920066088438034, "validation/acc_best": 0.9739583333333334, "validation/f1_best": 0.9716082526736752} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.9565231141448021, "train/grad": 0.05062313282862305, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.80054443359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.7673974609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.713603515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.6614007568359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.61091552734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.542449951171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.467535400390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.38702880859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2856842041015626, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.1828271484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.0865289306640626, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.9499786376953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.8259176635742187, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.6606204223632812, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.5168991088867188, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.3918801879882812, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.2501297760009766, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.1055136871337892, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9689292144775391, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8622649002075196, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7531156158447265, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6614566230773926, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.5788492584228515, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5077217102050782, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.4480832028388977, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.39115586042404177, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.34991812467575073, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.31867185592651365, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.2835516917705536, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.25026516914367675, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.22623929649591445, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.20795216500759126, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1873491483926773, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.17198381662368775, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.1565995966643095, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.14375266406685114, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.13202168192714453, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.12252797190099955, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11351320140063763, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10400732414796948, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.09768050441518426, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09278318136930466, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08710845850408078, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.08287857020273805, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.07862519895657898, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.07479699166491628, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07201464904472232, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06930110037326813, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.06765918859280645, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.014464086662046611, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014256477151066064, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01392731714528054, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.013612695941701532, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013306167176924646, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012901180516928434, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012466586739756168, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012010720982216299, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.011456476645544172, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010924807912670077, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01045895448885858, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009849968827329575, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00934281320311129, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008710192833095789, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00817597086308524, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007707259617745876, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0071629828284494575, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006586830497253686, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00602085494901985, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005564784419257194, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005085481272544712, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004673107517883182, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004294577146647498, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003961170961847529, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0036768694303464145, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0033990648633334787, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0031919074663892388, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003033120628679171, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0028476159332785755, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002664360218332149, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002527549532824196, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0024182006606133653, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00229118910850957, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0021906975470483304, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002086232952424325, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0019969104905612767, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0019117824878776446, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0018428106335340998, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0017733169451821596, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0017011710684164427, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0016500100059784017, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0016123006920679473, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.001568831424228847, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0015395055526460056, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0015095841827860567, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0014853569894330576, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0014714602949970868, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0014671131101204082, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0014726692267868202, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7832632064819336, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7481491565704346, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.6911184787750244, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6359832286834717, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.5826258659362793, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.5105783939361572, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.431892156600952, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.347661018371582, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.2419795989990234, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.1351778507232666, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.035597324371338, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.8951621055603027, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.7682262659072876, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.6005946397781372, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.4562993049621582, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.3320286273956299, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1924951076507568, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.0518851280212402, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9206444025039673, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8191760182380676, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7161195278167725, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.6299978494644165, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.5527236461639404, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.4863151013851166, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.4308012127876282, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.37768295407295227, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.3394447863101959, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.31051182746887207, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2780742347240448, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.24738463759422302, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.22531022131443024, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.20850805938243866, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.18979130685329437, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1758882850408554, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1618744432926178, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.15029819309711456, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13978779315948486, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13125182688236237, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12319055944681168, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.11509248614311218, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.10973714292049408, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10583311319351196, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.10156167298555374, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.09858319163322449, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.09597600251436234, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.09349477291107178, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.0918624997138977, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.09020285308361053, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08917371183633804, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.1996527777777778, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.21205357142857142, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.234375, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.26959325396825395, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3125, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.36259920634920634, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4164186507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4585813492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5119047619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5610119047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6128472222222222, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6651785714285714, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7078373015873016, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7795138888888888, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8390376984126984, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8683035714285714, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9045138888888888, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9112103174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9174107142857143, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9193948412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9233630952380952, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.941468253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9464285714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.970734126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9732142857142857, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.05870714271921841, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07409127703226663, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.09866372288396158, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1312623276629921, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1651844737749825, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.20166569976297544, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.24168997443354975, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.27684652053655573, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.33097483621402524, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.38269908403016456, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4351465013540924, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4945272974137228, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5695594559413394, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7113767301050549, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.815556878763594, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8560758496225072, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8805897876083599, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8990867067960444, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9058153945945924, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9120407955322866, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9126907366493012, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9175685202930227, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9225787742779875, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9300941819073049, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9357381988320487, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9404024316328982, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9424996038283159, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9451166394955777, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9488606348697094, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9548737927734201, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9571480103089036, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9596236620191056, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9618365887924402, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9624690173322582, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9652874348592932, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9655536068284699, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9668721299332711, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9684106385937219, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.970475959893391, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9705347256509333, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9706286238451108, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9706299521546788, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9708884549654658, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9711445247837245, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9710693452985152, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9717400207272863, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9713226511942146, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9702445575730255, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9702003223131386, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.07479699166491628, "validation/loss_best": 0.09349477291107178, "validation/acc_best": 0.9754464285714286, "validation/f1_best": 0.9717400207272863} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.9112441694736481, "train/grad": 0.04836361777037382, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.769351806640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.7322900390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.672325439453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.614376220703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5583154296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.4828790283203124, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.4005987548828127, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.312642822265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2026812744140627, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.0917510986328125, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.9886077880859374, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.8436837768554688, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.7133811950683593, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.5424612426757813, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.396663360595703, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.272315216064453, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1341159439086914, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9964116287231445, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8693160247802735, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.7720390224456787, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.6739703989028931, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.5927091360092163, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.5200042486190796, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.45771456003189087, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.40554471373558043, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.3557212096452713, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.31972292453050616, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2922177056968212, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.26138160571455954, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.2319806831330061, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.21060827903449536, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1943109380453825, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1758395079523325, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.16205647140741347, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.14820801571011544, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.13651879038661718, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1257992072403431, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11699303846806287, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.10864843415096402, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.09972764680162072, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.09368973217904568, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.08905565893277526, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0837167052179575, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07968799487687647, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0756368291284889, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.07193240151740611, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06933172222226858, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06681342734023929, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0652161196153611, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01415737890638411, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013928013085387648, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013562802597880364, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.013208411931991577, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012871552584692835, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012429545354098081, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011956643820740282, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011468403488397599, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010889048571698368, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010347125441767276, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009882005439139903, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009283337176311761, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008785069303121418, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008156950469128788, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007620515793096274, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007151641808450222, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006613988792523741, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006058186998125166, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005527080898173153, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005108806730713695, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004676562808454037, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004310488387709484, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003975197300314903, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00368040768080391, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003428207732504234, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003180021804291755, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002992566336179152, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0028464705182705075, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0026746796566294506, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0025039294961607082, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002374720515217632, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002272675033309497, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0021530661528231577, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002062677178764716, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.001968900909414515, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.001887599161709659, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0018113148113479839, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0017473835046985186, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0016844612022396178, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0016155416634865106, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0015685756455059163, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0015308952386840246, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014886152418330312, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0014570582853048108, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0014235130252200179, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0013952572431298904, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0013803600112441928, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0013789568378706463, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0013877791332197376, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.753544807434082, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.714660406112671, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.6517493724823, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.5910677909851074, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.5324082374572754, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.4537599086761475, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.368211030960083, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.277078628540039, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.163421392440796, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.0493478775024414, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.943648338317871, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7959522008895874, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.6640729904174805, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4922759532928467, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.3471758365631104, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2242923974990845, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.089032530784607, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9552240967750549, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.832970917224884, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7397674918174744, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.6463455557823181, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5690922737121582, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.49999868869781494, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.44102105498313904, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.39168885350227356, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.34454113245010376, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.31057286262512207, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2847880423069, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.25579240918159485, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.22839288413524628, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2085856795310974, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.19351960718631744, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1765260100364685, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1639452427625656, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1514502763748169, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14106664061546326, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13166914880275726, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.12405389547348022, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11709415912628174, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.10980094969272614, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.10517650097608566, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.10165408253669739, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.09777703881263733, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0949682667851448, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.09219588339328766, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08950897306203842, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08760044723749161, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.085783451795578, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08514457941055298, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2095734126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.22271825396825398, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.25892857142857145, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.30580357142857145, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.34771825396825395, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4000496031746032, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.44841269841269843, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4937996031746032, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5486111111111112, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6068948412698413, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6473214285714286, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6999007936507936, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7509920634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8308531746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8670634920634921, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.886656746031746, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9027777777777778, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9124503968253969, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9166666666666666, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9196428571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9241071428571429, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9305555555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9486607142857143, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.953125, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9761904761904762, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0710583506793666, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.08600052174020793, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12252774044524624, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16029788339739864, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1897803048858258, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.22983692382674242, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2679633128603868, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3141853321868398, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3705726200339127, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.42960171032121913, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.47373040830798097, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.550669222991471, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.6494720410851651, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8017193487655008, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8551468915306735, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.877442926971497, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8969532790472682, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9073316169902667, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9111416150237006, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.913714355010343, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9177546624061896, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9243960117053027, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9308474821093436, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9356686920178665, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9373515468239256, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9421281732638734, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9444940770801671, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9472453150320786, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9528295034688057, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.957006117943843, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9581866055409337, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9599841625926466, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9629668056848588, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9639512792571848, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.964572627783262, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9666899446916096, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9683786499960304, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9678079636740871, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9694056163972242, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9714539605657583, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9716653011466427, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.97088292494947, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9717770724797655, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9718102676811851, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9722199300589313, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9735531912317986, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9728395262196011, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.973070939502225, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9738158637107711, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.0652161196153611, "validation/loss_best": 0.08514457941055298, "validation/acc_best": 0.9761904761904762, "validation/f1_best": 0.9738158637107711} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.873306038081646, "train/grad": 0.04673099294304848, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.740987548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.700440673828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.634820556640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5716302490234373, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.510694580078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.42898681640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.3403448486328124, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.2461260986328124, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.1288214111328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.011463623046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.90307861328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7521983337402345, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.6181561279296874, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4446533203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2993326568603516, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1773929595947266, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.044071578979492, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9135919952392578, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.7950971031188965, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.7054706287384033, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.6160561990737915, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.5422955107688904, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.4764748811721802, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.4201716423034668, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.37297490119934085, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.3279211500287056, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2951879373192787, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2702544529736042, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.24204951763153076, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.21517707504332065, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.19570236161351204, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1807673393934965, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.16388108905404805, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.15125994760543107, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.13861907612532376, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.12796572368592024, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.11818748313933611, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11012129163369537, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1024917732551694, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.09432850178331137, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08881777444854379, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.08450910653918982, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.079525791015476, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07580338330008089, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.07203950766474009, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06871467638760805, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06635644129477442, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06417809197679163, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0628042378090322, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013985551171936094, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013735543019138277, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013330216463655233, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012942544296383857, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01257868311367929, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012097967672161758, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011590418382547795, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011074156807735562, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010473074037581681, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009921573172323405, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009454160244204105, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008855690478812904, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008354248295072466, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007715637509245425, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007170303002931177, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006699634250253439, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006168346689082682, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005631291116587818, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005128909104969353, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004739843094721436, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004343747100792825, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004009503404377029, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0037050437566358597, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0034384479140862822, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0032096148643177003, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0029838518594624474, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0028157344687497242, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0026835723355179654, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0025294840103015303, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0023766054096631705, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0022610183601500466, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0021702945313882083, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002064169334480539, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00198301452153828, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0018997386563569308, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0018279866891680286, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0017610140284523368, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0017073637337307445, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0016552492108894512, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0015984444614150562, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0015594877011608332, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.001529021227324847, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014932308380957693, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00146617611637339, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0014380631499807351, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0014146943550440482, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0013968998711789027, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0013769262767164036, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0013635159951809327, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.727410078048706, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.685293197631836, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.617400646209717, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.5519533157348633, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.489020824432373, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.4047303199768066, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.3134713172912598, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.2167258262634277, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.096620798110962, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.976797342300415, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.866568922996521, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7135196924209595, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.57845938205719, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4046964645385742, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2602869272232056, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1397444009780884, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0089423656463623, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.8818991780281067, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.7670613527297974, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.6807203888893127, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5947480201721191, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5241528749465942, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4613538384437561, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.40761682391166687, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3627113401889801, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.320039838552475, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2891118824481964, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2656039297580719, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.23915688693523407, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.21405786275863647, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.19603604078292847, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.18228267133235931, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.16679905354976654, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1554017961025238, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.14396433532238007, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.134535014629364, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1260446161031723, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11908911168575287, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11257949471473694, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.10585476458072662, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.10148005932569504, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.09813527762889862, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.09437716007232666, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.09179119765758514, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08934470266103745, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08707422018051147, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08561381697654724, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08439019322395325, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08420876413583755, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2170138888888889, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2371031746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2839781746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.33382936507936506, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.375, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4280753968253968, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4739583333333333, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5190972222222222, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5783730158730159, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6324404761904762, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6713789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7254464285714286, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7879464285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8551587301587301, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8804563492063492, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8985615079365079, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.908234126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9156746031746031, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9196428571428571, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9228670634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9337797619047619, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9464285714285714, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9565972222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9776785714285714, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07986212998836435, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10132032815039545, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.14339981759199757, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.17972986728169957, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.21089371924511766, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.24987259461041475, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.29251637017079524, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.33941034125570757, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4011257525803758, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4553734175401359, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5023523687165321, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.599102007451493, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7264504445158324, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8378010327498059, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8717214243257703, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8913579747228756, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9033242485365908, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9108938799322692, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9140560145341977, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.916504882771634, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9224385325114186, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9275384174068674, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9337727807313314, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.937039980399136, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9401217016786904, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9436433530313115, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9480501297107454, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9513577849280838, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9550944423948216, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9571392762455617, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9587362407224672, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9605835149151117, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9635026402632166, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9655020645662515, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9664113604716038, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9679521988400258, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9691136233836957, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9717518092189579, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9728788080424712, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9730028218067901, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9728829698671604, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9741312352216938, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9753414567270272, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9758141435716186, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9755471468033832, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9742466136038056, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9747813900333304, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9756638036332845, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9753096771083489, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.07580338330008089, "validation/loss_best": 0.09179119765758514, "validation/acc_best": 0.9784226190476191, "validation/f1_best": 0.9758141435716186} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.8409173011779785, "train/grad": 0.045604637525975704, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.715484619140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.672025146484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.601690673828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5342034912109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.4693804931640626, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3825445556640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2886810302734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.18917236328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.065981750488281, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.9433245849609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8307485961914063, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.6749655151367187, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.538080596923828, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3629624938964844, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2184285736083984, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.0987142181396485, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.9696704483032227, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.8452394866943359, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.7336725234985352, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.6501084613800049, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5673032236099244, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.4994583487510681, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.439207569360733, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.38779086887836456, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3448058348894119, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.30367164194583895, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2738232153654099, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.25110243186354636, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.22548182874917985, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.20101669915020465, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.18320481218397616, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1695570834353566, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.15410808235406875, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.14250348266214133, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.1307739362679422, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.12091662783175706, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1118606260791421, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.10424021931365132, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09698783818632364, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08918660284020007, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08389633479528129, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0797600961010903, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07495578821748496, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07133415755815804, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06768043862655759, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06444429903291166, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.062182186422869566, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.060001162569969894, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.058613846525549886, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01384141634684056, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013574296296574175, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01313848395831883, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012730049570091068, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01234409047756344, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011838510348461569, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01131055552046746, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010781127624213696, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010175476814620197, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00962900449987501, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009169182861223818, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008579140957444907, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008079745976720005, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007438435852527618, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006891988378483802, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006423821228090674, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005901561609935015, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005381612475030124, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004899692834587768, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004530869581503794, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004156917632790282, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0038421459146775307, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003557229139842093, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0033071902336087077, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0030921699979808183, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00287892117514275, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0027192649338394406, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002594328041886911, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002447675383882597, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0023019919131183997, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0021926321706268936, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0021063544758362696, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0020053300022846087, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0019284368067746982, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0018482004385441543, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0017803607942187228, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0017154010536614805, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0016593479787115938, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.001604262275795918, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0015415314625715837, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0014982905657961966, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0014645037465379573, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014243990724207834, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001393227453227155, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0013641590237966738, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0013398071777191945, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0013287734062760138, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0013224205725418869, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0013285625337448436, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7052600383758545, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.660377025604248, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5881783962249756, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.518923759460449, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.452380657196045, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.3635480403900146, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.267641544342041, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.1663665771484375, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.041140556335449, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.916849970817566, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.8030692338943481, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.646316647529602, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.5091757774353027, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3348078727722168, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.1915911436080933, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.0737415552139282, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.9472793340682983, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.8258814215660095, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.7174158692359924, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.6364251971244812, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5563350319862366, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.49081936478614807, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4325866997241974, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.382940411567688, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.34154781699180603, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.3019911050796509, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2733314037322998, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.25154995918273926, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.22707048058509827, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.20376484096050262, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.18696174025535583, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.17394308745861053, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15942983329296112, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.148611918091774, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1378890722990036, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12894879281520844, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12087900191545486, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11426833271980286, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10832037031650543, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1021997332572937, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09839030355215073, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0955255776643753, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.09227416664361954, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08985928446054459, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08742610365152359, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08538618683815002, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.0839279443025589, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08266284316778183, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08204476535320282, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2251984126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2517361111111111, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3053075396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3556547619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.39831349206349204, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.44841269841269843, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4945436507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5399305555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6044146825396826, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6532738095238095, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.691468253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7544642857142857, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8246527777777778, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8707837301587301, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9025297619047619, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9129464285714286, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9169146825396826, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9201388888888888, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9246031746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9315476190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9365079365079365, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9474206349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9513888888888888, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08881351965301976, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1157793643216313, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.15965350226055589, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.19661634469123893, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.22827353298655012, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2678339480477019, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3130113183110462, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3621717231531547, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.42847444758411235, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.48097098570367114, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5356629978532257, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6584016782444175, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7911832436490642, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8599428659608741, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.883599408054569, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8972441248664156, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.908985659443272, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.911705670169899, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9138579874770856, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9187020890946117, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9257534613145508, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.930579010718, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.935733861589243, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9393248128996883, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9413532395490798, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9454384801727402, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.95046066274488, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9532691996586079, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.954670155868949, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9587164565869208, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9615779715776591, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9627386101852606, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.964964444252219, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9673033532032269, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9687425502288782, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9697800357677366, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9712047396721756, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9726353350264064, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9733274395887196, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.97437112377716, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9740490810595895, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9740988203785702, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9746363795797154, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9739976025904221, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9740112867852327, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9743890995766997, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9741604955358713, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9739794251631932, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9737552319160369, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.06444429903291166, "validation/loss_best": 0.08538618683815002, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9743890995766997} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.8200215065479278, "train/grad": 0.044526982437819246, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.697841796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.651953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.57815185546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.507362060546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.4394244384765624, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3487640380859376, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2509942626953126, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1477508544921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.0203656005859374, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8940484619140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.778662109375, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.6198475646972657, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4813841247558595, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3060296630859376, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.1629210662841798, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.0456347274780273, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.9205764770507813, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.801098747253418, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6950094699859619, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.6161587715148926, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5383222818374633, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.47468058347702025, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.4182392275333405, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3700715732574463, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.32967966079711913, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.29110868126153944, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.26300456672906875, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.24164665400981902, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.21742708891630172, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.19423027351498603, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1773463447391987, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.164404231980443, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1497007393091917, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.13863085623830557, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.12744385719299317, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.11792959559708834, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10919269066303969, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.10193754501640796, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09493741663172842, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08739195748232305, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.08230639647692442, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07826337911188602, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07357992525212467, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0700417102035135, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06642764875665307, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06313180124387145, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06076718042604625, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.05838373695500195, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.056847823252901435, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01368448322173208, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013400138560682536, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012943322490900754, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012517174361273647, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.012115488997660578, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01159363201353699, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011056076125241817, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010525484131649136, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009926133588887751, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009388149878941477, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008935546877328306, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008346193353645504, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007840324626304209, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007184837458189577, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006628620116971433, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006156031582504511, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005635743360035122, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005122160849859938, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004653508856426925, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004298003293806687, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003941141458926723, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0036438007361721246, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003374799583107233, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003140107069630176, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0029392193467356266, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002741341589135118, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002592924548080191, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0024776327481959015, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0023425931320525707, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00220907413342502, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0021090034849476067, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0020294567622477187, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0019369219179498033, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.001865496339451056, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017919641200569458, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0017281381782959216, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.001667922572232783, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0016165590594755487, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0015663715283153578, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0015096577102667653, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0014707049896242096, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0014380559339770115, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014010763727128506, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0013727826791000552, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0013441181025700645, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0013192066435294691, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0013039449478674215, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0012900740184704773, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0012812150047102476, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6870486736297607, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.6400270462036133, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5643372535705566, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4919233322143555, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.422487735748291, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.3300180435180664, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.2304465770721436, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.125554323196411, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.996421217918396, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8687769174575806, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7523998022079468, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5929226875305176, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4545269012451172, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.280124306678772, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.1386163234710693, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.0231677293777466, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.9004255533218384, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7836319208145142, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6801844239234924, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.6034662127494812, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5277002453804016, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4660874009132385, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.41122645139694214, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.36465325951576233, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.32574740052223206, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.28844210505485535, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2614481449127197, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.24102988839149475, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.21786190569400787, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1958400160074234, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.17983128130435944, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.16762849688529968, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15394127368927002, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.14378446340560913, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.13360761106014252, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12514539062976837, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11748398095369339, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11138039082288742, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10569052398204803, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0996304303407669, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09572909772396088, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.09277031570672989, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08959732204675674, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08751870691776276, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08584275096654892, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08481264859437943, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08418471366167068, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08319268375635147, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.0822635367512703, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.23412698412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.265625, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.32242063492063494, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.37028769841269843, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4164186507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4632936507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5126488095238095, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5577876984126984, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6245039682539683, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6684027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7085813492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7782738095238095, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8415178571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.878968253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8980654761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9060019841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9141865079365079, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9188988095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9275793650793651, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9345238095238095, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9397321428571429, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9441964285714286, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9469246031746031, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9585813492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.970734126984127, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9761904761904762, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.09782156163805411, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1282089412558258, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.17150160860918048, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.20727161544218756, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.24054300186905625, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.28084507632692446, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3319380466379161, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.37893563910447814, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.44643360119957964, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4999801626476569, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5699833977439682, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7075007441835868, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8177195228100005, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8697763580188678, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8913548916916776, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9013739255637064, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9098837321217733, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9134664446137445, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9158056638052778, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9212615641386234, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9280880761344681, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9330418094014925, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9376234171145386, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9405868585882728, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9431876937993854, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9475681575195514, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9493957357047468, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9543799145377401, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9570978743652583, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9607072505660235, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9616237799536821, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9632945911055415, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9658164187873463, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9672386932795864, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9676927083341741, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9690858628834762, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9702578679018634, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9712153413744533, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9722079711148544, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9726010594570644, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9737491988773839, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9738909039431131, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9732007923419019, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.97218180266942, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.972106457684603, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9731532179585403, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9728237226555876, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9726236388358155, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9728061495883542, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.07826337911188602, "validation/loss_best": 0.09277031570672989, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9738909039431131} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.7986099332571029, "train/grad": 0.04414778927341104, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6785546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6304168701171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.55304443359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4790625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.4082275390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3139324951171876, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.212548828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1058468627929687, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9746389770507813, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8451605224609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.727313232421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5664976501464845, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4272024536132812, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.252537384033203, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.111576385498047, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9971098709106445, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8759699630737304, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7612607192993164, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6600866031646728, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5851995515823364, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5114951014518738, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.45138758420944214, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.39804107546806333, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.352479053735733, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.31431017845869064, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.27770873039960864, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2510561916232109, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2306952005624771, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.20757835857570173, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1854290709644556, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16920410331338645, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.15680768609046936, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.14266387790441512, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.1320227089524269, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.121241539940238, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.11216409750282764, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.10376873318105936, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09675654916092753, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09011714827269315, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08283774020150304, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07792365187779068, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07408741226419807, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06961840697564185, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0662778886500746, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.062926872279495, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05989316442981362, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.057747917976230384, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.055573456110432747, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.05388654527254402, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013675058321096003, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01337451822590083, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0128994551114738, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0124558478128165, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01203827228397131, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01149860274977982, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010947231948375703, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010405839015729725, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009799245903268456, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009257239219732583, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0088000698806718, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008204065265599638, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007689699234906584, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0070291217835620045, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006475191796198487, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006011707882862538, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005507014775648713, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005016796309500932, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004573915048968047, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004239980818238109, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003905310274567455, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003626135195372626, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003370988217648119, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0031467570457607507, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002951957545010373, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0027559325512265787, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0026075766643043606, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002489973905030638, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0023495010286569596, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0022106363432249053, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002105158363119699, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002021916016819887, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0019247189169982449, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0018505533860297874, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017730127304093912, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0017067009391030297, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0016441732263774611, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0015917016155435704, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0015398075280245393, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.001482585817866493, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0014432858070358634, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.001410786166379694, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0013753445839392953, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001347116072429344, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0013214400850120002, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0013010779056639877, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0012884924870741087, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00127426728446153, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0012599554329062812, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6727304458618164, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.62396240234375, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.545625925064087, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4707529544830322, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3990917205810547, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.3037517070770264, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.201441764831543, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.093820810317993, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9616706371307373, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.831498622894287, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7132664918899536, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.552060604095459, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4127528667449951, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.2386857271194458, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0986839532852173, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9852813482284546, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.865658700466156, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7524359822273254, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6528084874153137, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5790846943855286, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.506602942943573, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.44774186611175537, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.39546796679496765, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3510209321975708, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.31376612186431885, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2782653272151947, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.25264787673950195, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2328987568616867, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.21075798571109772, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.18969888985157013, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.174362450838089, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.16268733143806458, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1495792716741562, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13985852897167206, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1300516128540039, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12209174782037735, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1147589460015297, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1088547483086586, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10345659404993057, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09794896841049194, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0944395661354065, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.09177163988351822, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08886082470417023, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0867018923163414, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0847649797797203, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08301525563001633, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08195295929908752, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08101402968168259, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08059918135404587, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24305555555555555, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2775297619047619, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.33407738095238093, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3851686507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.42931547619047616, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.47371031746031744, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5210813492063492, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5726686507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6329365079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6803075396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7232142857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8003472222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8516865079365079, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8839285714285714, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9020337301587301, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9089781746031746, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9151785714285714, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9208829365079365, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.923859126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9290674603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9367559523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9479166666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9546130952380952, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9761904761904762, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.10672729211203649, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.13842415953571127, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.17974152224611328, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.21747893115507724, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2513108326212354, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2912289431596857, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.34104742490501994, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.395402865403635, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.45660046455491327, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5156956129766914, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.596178703360922, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7503453941916738, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8333126022887895, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8756605485452764, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8960436786928176, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.904677896843575, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9107632993033822, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9149440488089192, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.91744636390691, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9228586235880363, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.930417855717065, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9341285665949576, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9391836314588848, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9415113029168939, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9446668802550792, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9494142018602775, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9534816350898089, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9561809847310265, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9581332578404872, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9602471849758597, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9629688943628558, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9647875183769756, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9660929033532069, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9676119603544057, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9683930692229012, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9694247991067313, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9706892496010724, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9717233547478008, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9726671888425434, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9731581397174303, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9719512651899449, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9715367385293485, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9723876224658268, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9723876224658268, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9727524875191668, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.973551310827769, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9738088260620794, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9732228796031748, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9727876115467247, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.05989316442981362, "validation/loss_best": 0.08301525563001633, "validation/acc_best": 0.9771825396825397, "validation/f1_best": 0.973551310827769} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.7837058693170548, "train/grad": 0.043807745445519686, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6668316650390627, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6172747802734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5376275634765624, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.461656494140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.388853759765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2920928955078126, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.18823974609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.0791586303710936, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9452999877929686, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8134109497070312, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6937649536132813, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5309342956542968, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3907821655273438, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.2160552215576172, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0760629272460938, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9630978775024414, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8443496322631836, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7324769878387452, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6343988132476807, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.56194420337677, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4908863806724548, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.43306599140167235, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3818129801750183, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.33811198711395263, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.30142959982156753, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.26640077754855157, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.24081949539482594, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.22133930183947087, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19921337999403477, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17804692257195712, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16258802030235528, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.15064832165837289, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.13720326717942954, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12703461445868014, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11679699676111341, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10809086134657264, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.100068732611835, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09337156658992171, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0869803387671709, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07997302890755237, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07525935474783182, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07152588182128966, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06713430132716894, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06375722330994904, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.060301515702158213, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.057169100483879444, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.054834807561710476, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.05251286685466766, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.05089698489755392, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013565461291000247, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01325928513891995, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012779261549003423, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012330612153746187, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011911492496728897, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011372361020185053, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010828161165118217, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010298297381959856, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009709231862798333, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009182976786978542, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.00873626375105232, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008145179436542093, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007631215946748853, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0069680917961522934, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006414818540215492, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005953702491242438, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005454758594278246, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004972723856335506, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004539615518879146, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004212608330417425, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003884792316239327, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003611454621423036, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003362486035330221, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003143811676418409, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002953252756269649, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002765271378448233, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002621869085705839, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002507638974348083, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0023749071423662827, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0022430725663434713, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002143476374621969, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0020633655990241096, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0019705904484726487, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.001899261809012387, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0018248546362156048, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0017604053308605217, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0016973986025550403, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0016424879193073139, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0015896981075638906, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00152740441437345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0014839154781657272, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0014476716604258399, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014045571107999422, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0013693300119484776, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0013310747387004086, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0012968123238533734, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.001269442135526333, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0012430255020444746, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0012269179488066584, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6619696617126465, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.611992597579956, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5317342281341553, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.454984664916992, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3817646503448486, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2843403816223145, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1799330711364746, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.070301055908203, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9360675811767578, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8040423393249512, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.68437659740448, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.521860957145691, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3822051286697388, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.208542823791504, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0697641372680664, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9580040574073792, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8404394388198853, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7299700975418091, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6330593228340149, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.561633825302124, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.4914981722831726, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.43457603454589844, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3840882182121277, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.34121599793434143, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3053019642829895, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2709619700908661, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.24615181982517242, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.22716763615608215, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.20585674047470093, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.18544210493564606, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.17063558101654053, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15941444039344788, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14674872159957886, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13734978437423706, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1279575675725937, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12016543745994568, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11314232647418976, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10752499103546143, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1023012027144432, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09693518280982971, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09359195083379745, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.09105711430311203, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08823899179697037, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08622962981462479, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08442369848489761, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.082757368683815, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.08156429976224899, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.08030828088521957, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07937771081924438, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24875992063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.28596230158730157, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.34424603174603174, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.39533730158730157, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.43799603174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4826388888888889, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5305059523809523, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5848214285714286, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6413690476190477, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6872519841269841, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.734375, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8142361111111112, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8601190476190477, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9037698412698413, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9114583333333334, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9181547619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9213789682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9253472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9317956349206349, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.941468253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9449404761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9479166666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9580853174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.964781746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9771825396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.11259200693930546, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.14437006822173074, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.18689764438956424, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.22554141453134108, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.25864148197139464, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3023900124377501, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3510079298514169, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4090968747956802, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4680784497173524, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5269536696106554, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6167211792849132, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7746602859918696, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8450028441958831, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8812490979053289, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8982606678388096, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9077238828092772, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9131532918587132, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9154101128470292, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9191330486730813, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9260025020915269, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9314928602475153, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9353012292595579, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9385059576424033, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9416663851919624, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9457111400374145, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9501265016281208, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9533732437218202, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9547372957114751, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9575854611730653, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9613808923407972, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9632521335942482, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9632976460298524, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9655924827140591, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9671522678340564, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9680860035442104, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9694528295921194, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9711170298488474, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9718595587243612, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9722208830559007, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9727331478612493, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9729122405589178, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9727815646934608, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9722921060299341, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9729166145222498, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9733215636248482, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.974113104978265, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9744402672224908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9744449487372124, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9738164536750405, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.054834807561710476, "validation/loss_best": 0.08156429976224899, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9744402672224908} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.7778202545642853, "train/grad": 0.04318402502685785, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.662900390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6127093505859373, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.532330322265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4553900146484375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.3818499755859377, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.283985595703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.17912109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.0689517211914064, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9335372924804688, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.80039306640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6795945739746094, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.515479736328125, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3744927978515624, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.1994147491455078, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0597671508789062, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9475513458251953, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8299543190002442, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7197418975830078, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6233561277389527, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.552404899597168, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4829505467414856, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.42641672372817996, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3762928521633148, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3335598233342171, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.29771839261054994, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.26341091632843017, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2384018338471651, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.21927324794232844, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19755008384585382, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17673446655273437, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.16148572333157063, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14977603781968354, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1363844022527337, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12625424016267062, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11595521751791239, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.1071899814158678, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09905948292464017, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09217932285740972, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08565701058134437, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07844531105831265, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07357003091834485, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06973409306257963, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06524187030270695, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06181003235280514, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0583176773507148, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.055120131280273196, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.05276066267862916, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0503524408955127, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04866415682248771, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01348081742413342, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013168328152969479, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012679360676556825, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01222292949911207, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011796104214154184, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01125067996326834, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010702363098971546, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010172019614838063, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009585992135107517, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009063785083126276, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008619486950337887, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008028409210965037, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007513655640650541, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006850235103629529, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006298050486948341, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0058384838188067075, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005342058683745563, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004862888847710565, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0044315899221692235, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004106352732051164, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00377981525962241, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003506588489981368, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0032566176273394376, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030360341281630097, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0028454672411317005, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002656615257728845, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0025131031806813553, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002400494783069007, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0022688714746618645, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0021371237334096805, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002037801367114298, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00195948657928966, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0018663368711713702, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.001794895684288349, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017196079334826208, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0016529651387827472, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015898536107852124, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00153387229249347, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0014795910220709628, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0014161153373424896, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.001371533238561824, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.001335990020306781, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0012916451232740656, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001256527611258207, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0012207897070038599, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0011864646623143927, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011601766779494938, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0011338672872807365, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0011154011652979535, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6546247005462646, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.6036794185638428, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.522024393081665, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4441347122192383, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3696999549865723, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2708258628845215, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.16499662399292, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0540900230407715, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9183558225631714, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7850297689437866, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6644258499145508, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5011643171310425, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3612161874771118, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1879314184188843, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.049890398979187, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9391385316848755, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8232641816139221, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7146210670471191, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6196444630622864, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5495681166648865, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.48121026158332825, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.42552047967910767, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3762768805027008, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.33437034487724304, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29939642548561096, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2659394443035126, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.24153399467468262, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.22303560376167297, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.20210330188274384, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.18216291069984436, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16774560511112213, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15680328011512756, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1442975550889969, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13503548502922058, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12587805092334747, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11820894479751587, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1113051176071167, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1057448610663414, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10063840448856354, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09527724981307983, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09195666015148163, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08949097990989685, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08673197031021118, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08478865772485733, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0829746350646019, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08133061230182648, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.0801527351140976, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07881705462932587, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07779790461063385, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2542162698412698, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2916666666666667, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.34970238095238093, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.40302579365079366, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4427083333333333, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4885912698412698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5372023809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.591765873015873, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6505456349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6964285714285714, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7433035714285714, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8251488095238095, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8640873015873016, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8921130952380952, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9064980158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9134424603174603, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.921875, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9263392857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9585813492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9769345238095238, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.11798213296053889, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1492026908755136, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.19094437353753377, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23187693871068446, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.26250430195841645, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.306763223184194, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.35876542001197514, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.41588743963026953, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4779099874702012, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5433723214600124, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6363851555215823, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7911047843548876, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8500439068424018, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8847463092208985, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9012242728823322, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9097061383551467, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9133758647330905, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9153789531568854, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9199397060015587, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9270104709705821, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9331472853446057, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.935670551785937, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9393054314328159, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9424711810811617, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9447694486074092, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9505752968017444, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9539236681305493, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9561463032819193, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9593058141019067, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9629728201914134, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9643352369024663, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9659204709108099, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9658892557856698, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9682470195703339, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9690196339019268, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.971851015606015, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9729777072738737, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9727561700832911, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9727700374771531, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9734230774701654, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9741043643466476, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9738822927116498, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9741545357787644, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.97379796604523, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9747783193057341, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9751430776521158, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9761262885203915, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9754667433885486, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9753792739021405, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.05276066267862916, "validation/loss_best": 0.0801527351140976, "validation/acc_best": 0.9776785714285714, "validation/f1_best": 0.9761262885203915} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.7705281060934067, "train/grad": 0.042947174534201625, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6539935302734374, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.603006591796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5212432861328127, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4431475830078124, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.36856201171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.269361572265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.16315185546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.0518511962890624, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9155416870117188, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7815866088867187, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6603797912597655, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.4961019897460937, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3553436279296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.1810562896728516, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0424870681762695, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9314416313171386, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8153494071960449, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7068020629882813, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6120481729507447, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5424182200431824, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.47432486414909364, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.41891003608703614, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.36990646898746493, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3281059667468071, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.29309631034731864, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2595221465826035, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.23503063134849073, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.21631636343896388, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19508255537599326, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17466121651232241, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.1597376037016511, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14820586334913968, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.13510576900094748, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12517770782113075, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11504718644544482, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10645679730921984, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09847611976787447, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09172074573114514, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08528087770566345, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07822537560015917, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07338361592963338, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06952099623158574, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0650568266864866, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06159217309206724, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.058046557772904635, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.054750581253319976, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0523253146559, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.04984730750322342, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04808971427381039, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013476558378897607, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013159528779797256, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01266337771434337, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012198622915893792, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011765325861051678, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011211153385229409, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010654593547806145, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010116814360953868, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009522210145369171, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008991724282968789, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.00853975365171209, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007940534802619368, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007419351448770612, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006750900715123862, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006197585961781442, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005739193621557206, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0052447603479959075, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004769631893141195, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0043425196968019004, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004021153136854991, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0036983964464161545, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034287745039910077, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003184053808217868, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00296890651457943, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002783137104124762, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0026003451383439825, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002462782104848884, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0023559755005408078, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0022309197584399953, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0021074330393457785, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0020149557088734583, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0019418383645825087, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0018569719957304187, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0017909889359725639, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017215487774228677, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0016615854031988419, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0016024147521238774, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0015510146084125153, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.001500648305809591, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0014428254071390256, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0014007654314627872, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0013674959039781242, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0013254916315781884, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0012919101855368353, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001256280362867983, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0012203150936693418, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011922077718190848, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0011641271178086754, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0011436936430982314, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.650043249130249, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.598540782928467, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5160319805145264, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4373762607574463, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3621699810028076, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.262463092803955, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1557610034942627, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.044009208679199, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9072586297988892, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7732043266296387, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.651993989944458, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.488179087638855, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.348095417022705, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.175062894821167, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0375776290893555, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9274862408638, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8125391006469727, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.7051089406013489, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6111677289009094, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5422838926315308, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.474789559841156, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4199484586715698, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3714367151260376, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3302544355392456, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29569047689437866, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.26274803280830383, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23896366357803345, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2205989956855774, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.20004813373088837, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1803612858057022, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16617199778556824, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15529826283454895, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14307184517383575, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13401083648204803, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1249723955988884, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11733074486255646, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1106579601764679, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10511589050292969, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1001020297408104, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09481923282146454, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09149975329637527, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.088884636759758, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0859563872218132, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08395648747682571, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08190205693244934, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.0800718441605568, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07879520952701569, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07750165462493896, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07658170163631439, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.25570436507936506, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.29538690476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.35441468253968256, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.40600198412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4466765873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.49330357142857145, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5414186507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5954861111111112, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6527777777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.699156746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7477678571428571, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8308531746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8670634920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8928571428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9064980158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9136904761904762, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9191468253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9226190476190477, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9263392857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9382440476190477, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9422123015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.949156746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.96875, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9774305555555556, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.11946195356797483, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1519571440632041, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.19465200649366382, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23316740832314053, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2663151476403034, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3119363826095408, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.36349533898686437, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4200968682769012, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4811212850747916, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5508360667670227, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6447529213481329, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8009683543935212, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.853874469104565, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8862576132266755, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9014493832194587, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.90986847921868, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9143129275471021, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9161967357301694, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.920157548826136, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9271221776661743, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9316217142192974, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9360419045481303, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9400370217059619, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9429300455556129, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9458511918509005, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9505076098590474, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.954742122991061, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9565958853949758, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9593950440415132, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.962417137113796, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9651054019111427, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.965097824479071, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9661925445152248, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9683178045991809, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9683683584840821, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9711306237520709, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9725717046671256, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9740781742573853, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9736643967636912, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9742665022057806, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9739938444394718, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9745458703531531, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9753658991652853, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9755349589355969, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9754787120793289, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9754839575872589, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9756645597879793, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.975614407469528, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9753780280662477, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.0650568266864866, "validation/loss_best": 0.0859563872218132, "validation/acc_best": 0.9779265873015873, "validation/f1_best": 0.9753658991652853} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.7669641187787056, "train/grad": 0.04273238845169544, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.652403564453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.601109619140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5188909912109376, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4403192138671876, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.365394287109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2656817626953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.159097900390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.04718994140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9101773071289063, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7756130981445313, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6539897155761718, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.4892759704589844, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3482447052001953, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.174101333618164, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0359447479248047, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9253960990905762, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8100680160522461, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7022676849365235, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6082470083236694, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5391291856765748, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.471451176404953, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.4163294959068298, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.36747972905635834, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3257613179087639, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2907049968838692, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2569623763859272, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.23232987754046916, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.21352643594145776, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19211902685463428, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17144215144217015, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.15641444407403468, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1448009065538645, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.13155735343694686, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12156477849930525, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11147459127008914, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10286922253668308, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09496004305779934, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0883545489422977, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08201546279713512, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07517229808494448, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07048117782920599, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06681882010772824, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.062517429580912, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.059210270270705224, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0557719250023365, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05263318099081516, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.05023944734595716, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.047759151626378296, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04597874419763684, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013394149127416313, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013075535465031863, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01258060115389526, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012117480500601232, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01168620069976896, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011138284159824253, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010590280047617853, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010063381167128682, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009482543366029859, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008964891687501222, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008522422234527767, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007931976991239936, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007415836392901838, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006751875402405858, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0062007867847569285, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005744448765181005, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005254283555550501, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004782091871602461, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004359432858182117, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004041260788217187, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003722568452358246, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034550067374948413, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003209839182673022, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0029931749647948893, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0028048408910399305, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002615726294461638, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002472860959242098, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0023599488876061515, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002228946211980656, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00209760909085162, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0019998057733755558, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0019220971286995337, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0018320413818582893, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0017624989149044269, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0016891027899691834, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.001625068499124609, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015628377834218555, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0015091658337041737, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.001454134161467664, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.001392691149667371, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0013468102202750742, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0013099654237157664, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0012637950241332874, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001227119953255169, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0011872177550685593, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.001150642809952842, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011204543082567397, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0010898310272023082, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0010679221936152317, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6475868225097656, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5957884788513184, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5128531455993652, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4336965084075928, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.358229160308838, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2580111026763916, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1508536338806152, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0387117862701416, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9014142751693726, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7670024633407593, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6454737186431885, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.4813717603683472, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3411386013031006, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.168205738067627, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0309745073318481, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9213948249816895, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8069096803665161, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6999427676200867, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.606661319732666, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5382040739059448, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.471286416053772, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.416925847530365, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3688075542449951, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3278813064098358, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29377782344818115, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2610592544078827, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23738634586334229, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.2192278802394867, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1988641321659088, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1794169545173645, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16523095965385437, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15443159639835358, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14236392080783844, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13338512182235718, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1244143694639206, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11689170449972153, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11013088375329971, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10474611073732376, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09973370283842087, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09465344250202179, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09132950007915497, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08883789926767349, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08609963208436966, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08404947817325592, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08214544504880905, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08036451041698456, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07911582291126251, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07795951515436172, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07713925093412399, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.25818452380952384, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2978670634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3559027777777778, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4069940476190476, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4469246031746032, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4937996031746032, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.544890873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5992063492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6557539682539683, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7023809523809523, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7504960317460317, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8343253968253969, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8678075396825397, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8940972222222222, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.90625, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9149305555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9188988095238095, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9270833333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9392361111111112, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.949156746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9789186507936508, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1219094223955686, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15408951909215465, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.19572252981572708, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23409909359942332, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.26612556771782436, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3120806172188164, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3673158828893951, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4238291801639485, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.48447245993142357, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5570047754442885, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6506851823611975, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8059401924210257, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8552320238339577, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8872512669829814, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9014081800026508, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9110370760654657, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9136783895200625, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9160273347839805, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9212159826780024, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9273389134809096, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9331624049942552, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9364997232150045, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.940239667985396, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.942841709692793, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9461795613997936, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9505783807270406, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.953755035931417, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9562798055380975, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9593451537122555, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.962241986293346, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9646571711088431, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9652780963622958, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9663602150840903, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9685855068065747, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.968915379224082, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9711265881173455, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9735178012878634, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9738546136935545, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9740270054742544, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9739519356471521, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9740649839737895, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9746035751591011, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9753263221570696, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.975496215158814, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9752752003831321, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9759463229956173, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9758556741960049, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9765834556241787, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9769045024063892, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.04597874419763684, "validation/loss_best": 0.07713925093412399, "validation/acc_best": 0.9789186507936508, "validation/f1_best": 0.9769045024063892} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.7644814255833626, "train/grad": 0.043051154240965846, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.649783935546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.59796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.51515380859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.4360308837890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.3606256103515624, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2602532958984374, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.15298583984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.040406494140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9025967407226563, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7672735595703124, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6450895690917968, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.4797023010253907, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3384583282470703, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.164254150390625, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0261054992675782, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9159468650817871, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.8010524559020996, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6938223266601562, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6004880142211914, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5321759963035584, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.4653372824192047, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.4110234439373016, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.36297205328941345, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3220343589782715, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2877886000275612, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.25494610965251924, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2311067308485508, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.21278334401547908, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.19211005091667174, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.17222841456532478, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.15772740237414837, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14651400659233332, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.13380201037973166, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.1242053847387433, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11440777972340584, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10603716505691409, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09823166351765394, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0916407491452992, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08528890583664178, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07832810661755502, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07352340467274189, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06965396753512323, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06513675858266652, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06161972393281758, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.057967856898903845, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05454611081629992, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.051945003094151614, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.049265381889417766, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.04724430561065674, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013463041260838509, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.013143851608037948, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012645884933881461, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01218246496282518, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011751784314401447, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01120166759006679, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010652360077947377, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010124913705512882, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00954343678895384, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009025642278138547, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008581269790884107, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007989707915112377, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007472093678079546, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006806263995822519, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006254225296434015, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005797635377384722, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005306188223185018, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00483229985460639, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0044079142587725075, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004088591814506799, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003767820138018578, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034996134764514863, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003254865659400821, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030383214849280193, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0028505651908926665, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0026617326075211167, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00251968216500245, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0024073061323724686, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0022758782270830124, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002143948646262288, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0020452545350417494, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001966874763602391, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0018743396518402732, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0018029688930255362, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017275073347263969, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0016617251679417677, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015980611846316607, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0015424538144725374, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0014869671562337317, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0014228455771808513, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.001376336476241704, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0013370037425193004, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0012895862362347543, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001251326454075752, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001209213669935707, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0011684692667040509, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011361548697459512, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.001101076533086598, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0010746493680926505, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6466612815856934, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.594820976257324, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.511629819869995, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.432375907897949, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.356796979904175, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2564496994018555, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1490349769592285, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.036742687225342, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8992356061935425, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7645442485809326, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.643044114112854, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.4788086414337158, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3385097980499268, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1655654907226562, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.028439998626709, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9190153479576111, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8046978116035461, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6980145573616028, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6048807501792908, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5366619229316711, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.46994781494140625, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.41571566462516785, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.3677366077899933, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.32707858085632324, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.29300054907798767, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.260471373796463, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23683954775333405, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.21874168515205383, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1984059363603592, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1790522336959839, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16489295661449432, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15416273474693298, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1420619934797287, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13311021029949188, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12421312928199768, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11668642610311508, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10996078699827194, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10457711666822433, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09956744313240051, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09450133144855499, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09112256020307541, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08865134418010712, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08592206239700317, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08387559652328491, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.0819091796875, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.08013195544481277, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07892843335866928, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07774805277585983, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07688092440366745, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.25892857142857145, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.29836309523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.35639880952380953, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.40773809523809523, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4486607142857143, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.49503968253968256, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5441468253968254, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6009424603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6555059523809523, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.703125, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7522321428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8343253968253969, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8680555555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8943452380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9087301587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9144345238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9223710317460317, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9280753968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9489087301587301, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9794146825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.12267823128241705, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15406627135737846, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1959923437790658, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23498611587856746, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2674774913530457, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3135010843862129, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.36709729668274316, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.42533558440393343, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4847975182122058, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5581494734318088, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.653612479561969, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8063664260025336, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8554444977927615, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8875534066458968, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9037923061473064, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9100991977384374, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.913492494975432, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9162998362945812, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9218685255844925, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9273529192041488, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.932703109288691, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9361368156333372, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.940062153409776, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9426539435590439, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9461795613997936, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9509511157607635, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9537452523522396, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9562798055380975, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.959117466320901, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9624352823599949, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9648774178602931, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9652780963622958, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9663602150840903, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9682470195703339, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.968915379224082, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9711265881173455, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9735143594894152, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9738546136935545, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9740270054742544, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9744491892621909, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9740649839737895, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9746035751591011, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9745272337337573, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9755937747112133, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9757713942014774, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9755907392902607, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9768545160894467, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9770921091976484, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9776367535338889, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.04724430561065674, "validation/loss_best": 0.07688092440366745, "validation/acc_best": 0.9794146825396826, "validation/f1_best": 0.9776367535338889} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.7591868236660957, "train/grad": 0.04248551351949573, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.64652587890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.594298095703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5106768798828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.430955810546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.3548065185546876, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2537664794921874, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.1457293701171873, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.032510986328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8940316772460937, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7583746337890624, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6357778930664062, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.470166473388672, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.328876953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.1547492218017579, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0169381713867187, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9070598983764648, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.7926061820983886, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6860075378417969, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.5931809043884277, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5252165031433106, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.45891362071037295, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.4050434398651123, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3575333946943283, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.3168946409225464, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.28297921374440194, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.2504180993139744, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.22672387309372424, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.2085861226171255, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.18803925365209578, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.16824531845748425, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.15384901002049445, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.14271029759198428, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1300399585440755, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.1204572244361043, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.11076232802122832, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10245957676321268, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09474363218992948, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08824754921719431, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08197660718113184, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07505142142064869, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07029276038520038, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06649336792528629, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06203383601270616, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0585721074603498, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05500349433161318, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05165597286075354, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.04911243747919798, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.04648694405332208, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.044573738407343626, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.013351579247973859, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01303115051239729, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012530996655113995, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012066876753233374, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011636444628238677, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011087080682627856, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010538437054492533, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010015371008776128, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009437580916564912, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008923272911924868, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008481405617203563, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007888783568050712, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007370372137520462, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006702763719949871, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006149441071320325, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005692260889336467, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005201179661089555, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004730131500400603, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0043080459022894506, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0039906479814089835, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00367408569669351, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0034076673025265334, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0031663243006914855, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0029513127682730556, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0027665447461185976, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0025823588657658547, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002444689746480435, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0023349140642676502, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002207753239781596, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0020798932161414994, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.001984786343527958, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0019091518281493336, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.001821715016849339, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0017538754569250158, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0016824023198569193, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.001620564843469765, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015613245189888402, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.001509527676680591, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0014575185748981311, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0013966628001071512, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0013520344797871076, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0013159692837507464, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0012693728461454156, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.001231759658548981, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0011911923316074535, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0011508435632276814, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011190090405580123, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0010847963305423037, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0010580768361978699, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6465420722961426, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.594694137573242, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5114874839782715, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4321959018707275, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.356570243835449, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2561984062194824, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1487648487091064, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.036475658416748, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8989242315292358, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7642630338668823, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.642651915550232, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.4784718751907349, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3380781412124634, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1651991605758667, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.0280745029449463, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9186115860939026, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.8044165372848511, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.697765588760376, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.6046707630157471, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5364307165145874, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.46976304054260254, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.41552552580833435, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.36760061979293823, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.32691845297813416, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2929011285305023, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2603795826435089, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.2367115467786789, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.21866537630558014, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.19832462072372437, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1789872795343399, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16482537984848022, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.15412554144859314, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14200656116008759, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13310647010803223, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12413676083087921, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11663393676280975, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10992405563592911, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10447844117879868, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09949740767478943, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09439193457365036, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.09105432033538818, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.08856841176748276, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.08578947186470032, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.08373662829399109, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.08180144429206848, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.0800030380487442, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07877406477928162, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07758765667676926, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.07667368650436401, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2586805555555556, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.298859126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3568948412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.40773809523809523, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4489087301587302, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4947916666666667, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5436507936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6014384920634921, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6555059523809523, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.703125, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7524801587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8343253968253969, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8683035714285714, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8948412698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9087301587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9144345238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9188988095238095, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9223710317460317, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9280753968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9332837301587301, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.949156746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9781746031746031, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9789186507936508, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.12238234482211603, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15450179913699905, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1966402608085793, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.23498611587856746, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2680008350551808, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3135522590811231, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3665309405364944, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4255798414731848, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.48460731927392764, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.558208616431392, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.65368119589922, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8066855628580698, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8557354719666308, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8879568178368348, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.903865845807483, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9100991977384374, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9136675786960302, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9162106196285911, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9218685255844925, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.927608563166257, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.932703109288691, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9361368156333372, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.940062153409776, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9428365565139756, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9461795613997936, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9507702187829237, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.953755035931417, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9568240232251723, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9592978464627344, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9624352823599949, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9648697715041069, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9652780963622958, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9663602150840903, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9682470195703339, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.968915379224082, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9711265881173455, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9733354935927659, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.973475489373388, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9738478386405217, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9740426144800655, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.97388574743874, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9746035751591011, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9745271229389819, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9755937747112133, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9757713942014774, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9763048379304626, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9770337881793244, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9767330665202432, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9769126028497026, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.04911243747919798, "validation/loss_best": 0.07877406477928162, "validation/acc_best": 0.9791666666666666, "validation/f1_best": 0.9770337881793244} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e55efe26625b57d1f67379337a25d4aff5b7d62 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..386655c4484650bab516134adabf72c994a3948c --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 18, "eval/id_best": 42, "eval/lr_best": 0.005699999999999999, "eval/wd_best": 0.05, "eval/train/loss": 0.1243719831109047, "eval/train/acc": 0.9766829833149113, "eval/train/acc_std": 0.0010892024586996512, "eval/train/f1": 0.9775164879968898, "eval/train/f1_std": 0.0011385074183406598, "eval/validation/loss": 0.3291154205799103, "eval/validation/acc": 0.8998015873015873, "eval/validation/acc_std": 0.004768571618616205, "eval/validation/f1": 0.8803089883930048, "eval/validation/f1_std": 0.0063966199112998855, "eval/test/loss": 0.36137253046035767, "eval/test/acc": 0.8944444444444445, "eval/test/acc_std": 0.004214791823105744, "eval/test/f1": 0.8744955577277829, "eval/test/f1_std": 0.00539248157171559} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..0fe98e3463f12dcc4c447141666b79bb9f9ea907 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 18, "eval/best/id_best": 42, "eval/best/lr_best": 0.005699999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.1243719831109047, "eval/best/train/acc": 0.9766829833149113, "eval/best/train/acc_std": 0.0010892024586996512, "eval/best/train/f1": 0.9775164879968898, "eval/best/train/f1_std": 0.0011385074183406598, "eval/best/validation/loss": 0.3291154205799103, "eval/best/validation/acc": 0.8998015873015873, "eval/best/validation/acc_std": 0.004768571618616205, "eval/best/validation/f1": 0.8803089883930048, "eval/best/validation/f1_std": 0.0063966199112998855, "eval/best/test/loss": 0.36137253046035767, "eval/best/test/acc": 0.8944444444444445, "eval/best/test/acc_std": 0.004214791823105744, "eval/best/test/f1": 0.8744955577277829, "eval/best/test/f1_std": 0.00539248157171559} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..14522e364e5f2c171dacb7a66a7a6a9972d77c9d --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 42, "eval/last/lr_best": 0.005699999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.12431035935878754, "eval/last/train/acc": 0.9768935207116164, "eval/last/train/acc_std": 0.0010830091317216785, "eval/last/train/f1": 0.9776986261955776, "eval/last/train/f1_std": 0.001134368300206932, "eval/last/validation/loss": 0.32918018102645874, "eval/last/validation/acc": 0.8993055555555556, "eval/last/validation/acc_std": 0.004828502653002829, "eval/last/validation/f1": 0.8793307180531877, "eval/last/validation/f1_std": 0.0065135611975825, "eval/last/test/loss": 0.36121904850006104, "eval/last/test/acc": 0.8940476190476191, "eval/last/test/acc_std": 0.004234710671450838, "eval/last/test/f1": 0.8740199845934365, "eval/last/test/f1_std": 0.005408460598865317} diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..c17cc2cb782bf65f684f6e33df795d52eaf97064 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",train,0.1243719831109047,0.9766829833149113,0.0010892024586996512,0.9775164879968898,0.0011385074183406598 +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",validation,0.3291154205799103,0.8998015873015873,0.004768571618616205,0.8803089883930048,0.0063966199112998855 +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",test,0.36137253046035767,0.8944444444444445,0.004214791823105744,0.8744955577277829,0.00539248157171559 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..c17cc2cb782bf65f684f6e33df795d52eaf97064 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",train,0.1243719831109047,0.9766829833149113,0.0010892024586996512,0.9775164879968898,0.0011385074183406598 +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",validation,0.3291154205799103,0.8998015873015873,0.004768571618616205,0.8803089883930048,0.0063966199112998855 +flat_mae,reg,linear,hcpya_task21,best,18,0.005699999999999999,0.05,42,"[19, 1.0]",test,0.36137253046035767,0.8944444444444445,0.004214791823105744,0.8744955577277829,0.00539248157171559 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..6f651ba5457a4f8a75b810f1259bf07c0d5fc1fd --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,reg,linear,hcpya_task21,last,19,0.005699999999999999,0.05,42,"[19, 1.0]",train,0.12431035935878754,0.9768935207116164,0.0010830091317216785,0.9776986261955776,0.001134368300206932 +flat_mae,reg,linear,hcpya_task21,last,19,0.005699999999999999,0.05,42,"[19, 1.0]",validation,0.32918018102645874,0.8993055555555556,0.004828502653002829,0.8793307180531877,0.0065135611975825 +flat_mae,reg,linear,hcpya_task21,last,19,0.005699999999999999,0.05,42,"[19, 1.0]",test,0.36121904850006104,0.8940476190476191,0.004234710671450838,0.8740199845934365,0.005408460598865317 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..28e782743a10322086a29f790aeaf822c235b628 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/log.txt @@ -0,0 +1,890 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 21:45:18 +config: +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (hcpya_task21 reg linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear +model: flat_mae +representation: reg +classifier: linear +dataset: hcpya_task21 +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=False, reg_tokens=1, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (reg): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x LinearClassifier( + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 0.8M (0.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:20:48 lr: nan time: 3.1223 data: 2.7551 max mem: 3910 +train: [0] [ 20/400] eta: 0:02:57 lr: 0.000003 loss: 3.1031 (3.1165) grad: 0.3937 (0.3984) time: 0.3349 data: 0.0061 max mem: 3951 +train: [0] [ 40/400] eta: 0:02:26 lr: 0.000006 loss: 3.0978 (3.0987) grad: 0.3755 (0.3874) time: 0.3441 data: 0.0036 max mem: 3951 +train: [0] [ 60/400] eta: 0:02:11 lr: 0.000009 loss: 3.0828 (3.0871) grad: 0.3792 (0.3848) time: 0.3463 data: 0.0036 max mem: 3951 +train: [0] [ 80/400] eta: 0:01:59 lr: 0.000012 loss: 3.0430 (3.0744) grad: 0.3792 (0.3790) time: 0.3339 data: 0.0038 max mem: 3951 +train: [0] [100/400] eta: 0:01:50 lr: 0.000015 loss: 3.0081 (3.0600) grad: 0.3457 (0.3698) time: 0.3520 data: 0.0038 max mem: 3951 +train: [0] [120/400] eta: 0:01:42 lr: 0.000018 loss: 2.9826 (3.0416) grad: 0.3457 (0.3657) time: 0.3391 data: 0.0038 max mem: 3951 +train: [0] [140/400] eta: 0:01:33 lr: 0.000021 loss: 2.9259 (3.0246) grad: 0.3451 (0.3616) time: 0.3404 data: 0.0042 max mem: 3951 +train: [0] [160/400] eta: 0:01:25 lr: 0.000024 loss: 2.8981 (3.0064) grad: 0.3108 (0.3548) time: 0.3284 data: 0.0041 max mem: 3951 +train: [0] [180/400] eta: 0:01:18 lr: 0.000027 loss: 2.8495 (2.9869) grad: 0.3095 (0.3518) time: 0.3402 data: 0.0043 max mem: 3951 +train: [0] [200/400] eta: 0:01:10 lr: 0.000030 loss: 2.8239 (2.9683) grad: 0.3196 (0.3485) time: 0.3267 data: 0.0042 max mem: 3951 +train: [0] [220/400] eta: 0:01:03 lr: 0.000033 loss: 2.7564 (2.9471) grad: 0.3193 (0.3458) time: 0.3570 data: 0.0041 max mem: 3951 +train: [0] [240/400] eta: 0:00:56 lr: 0.000036 loss: 2.7231 (2.9284) grad: 0.3045 (0.3420) time: 0.3567 data: 0.0041 max mem: 3951 +train: [0] [260/400] eta: 0:00:49 lr: 0.000039 loss: 2.6931 (2.9091) grad: 0.3045 (0.3398) time: 0.3445 data: 0.0042 max mem: 3951 +train: [0] [280/400] eta: 0:00:42 lr: 0.000042 loss: 2.6645 (2.8909) grad: 0.2918 (0.3361) time: 0.3393 data: 0.0037 max mem: 3951 +train: [0] [300/400] eta: 0:00:36 lr: 0.000045 loss: 2.5851 (2.8701) grad: 0.2814 (0.3330) time: 0.5145 data: 0.1973 max mem: 3951 +train: [0] [320/400] eta: 0:00:28 lr: 0.000048 loss: 2.5755 (2.8511) grad: 0.2754 (0.3299) time: 0.3355 data: 0.0039 max mem: 3951 +train: [0] [340/400] eta: 0:00:21 lr: 0.000051 loss: 2.5373 (2.8323) grad: 0.2898 (0.3279) time: 0.3273 data: 0.0031 max mem: 3951 +train: [0] [360/400] eta: 0:00:14 lr: 0.000054 loss: 2.5028 (2.8143) grad: 0.2898 (0.3262) time: 0.3697 data: 0.0041 max mem: 3951 +train: [0] [380/400] eta: 0:00:07 lr: 0.000057 loss: 2.4693 (2.7955) grad: 0.2777 (0.3239) time: 0.3315 data: 0.0043 max mem: 3951 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 2.4230 (2.7754) grad: 0.2762 (0.3216) time: 0.3396 data: 0.0039 max mem: 3951 +train: [0] Total time: 0:02:23 (0.3577 s / it) +train: [0] Summary: lr: 0.000060 loss: 2.4230 (2.7754) grad: 0.2762 (0.3216) +eval (validation): [0] [ 0/63] eta: 0:03:28 time: 3.3048 data: 3.0385 max mem: 3951 +eval (validation): [0] [20/63] eta: 0:00:20 time: 0.3410 data: 0.0048 max mem: 3951 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3137 data: 0.0033 max mem: 3951 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3122 data: 0.0035 max mem: 3951 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3067 data: 0.0032 max mem: 3951 +eval (validation): [0] Total time: 0:00:23 (0.3743 s / it) +cv: [0] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.851 acc: 0.773 f1: 0.717 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:21:03 lr: nan time: 3.1588 data: 2.8999 max mem: 3951 +train: [1] [ 20/400] eta: 0:03:09 lr: 0.000063 loss: 2.3680 (2.3736) grad: 0.2839 (0.2819) time: 0.3669 data: 0.0029 max mem: 3951 +train: [1] [ 40/400] eta: 0:02:33 lr: 0.000066 loss: 2.3680 (2.3766) grad: 0.2802 (0.2806) time: 0.3512 data: 0.0039 max mem: 3951 +train: [1] [ 60/400] eta: 0:02:16 lr: 0.000069 loss: 2.3664 (2.3709) grad: 0.2802 (0.2832) time: 0.3460 data: 0.0044 max mem: 3951 +train: [1] [ 80/400] eta: 0:02:03 lr: 0.000072 loss: 2.3493 (2.3594) grad: 0.2751 (0.2796) time: 0.3472 data: 0.0040 max mem: 3951 +train: [1] [100/400] eta: 0:01:53 lr: 0.000075 loss: 2.3100 (2.3502) grad: 0.2677 (0.2748) time: 0.3389 data: 0.0038 max mem: 3951 +train: [1] [120/400] eta: 0:01:44 lr: 0.000078 loss: 2.2809 (2.3347) grad: 0.2596 (0.2743) time: 0.3488 data: 0.0042 max mem: 3951 +train: [1] [140/400] eta: 0:01:35 lr: 0.000081 loss: 2.2283 (2.3173) grad: 0.2597 (0.2733) time: 0.3277 data: 0.0043 max mem: 3951 +train: [1] [160/400] eta: 0:01:27 lr: 0.000084 loss: 2.2515 (2.3130) grad: 0.2568 (0.2716) time: 0.3429 data: 0.0042 max mem: 3951 +train: [1] [180/400] eta: 0:01:19 lr: 0.000087 loss: 2.2541 (2.3027) grad: 0.2551 (0.2710) time: 0.3369 data: 0.0042 max mem: 3951 +train: [1] [200/400] eta: 0:01:11 lr: 0.000090 loss: 2.2032 (2.2924) grad: 0.2536 (0.2688) time: 0.3192 data: 0.0041 max mem: 3951 +train: [1] [220/400] eta: 0:01:04 lr: 0.000093 loss: 2.1350 (2.2776) grad: 0.2523 (0.2676) time: 0.3544 data: 0.0046 max mem: 3951 +train: [1] [240/400] eta: 0:00:56 lr: 0.000096 loss: 2.1331 (2.2654) grad: 0.2592 (0.2667) time: 0.3524 data: 0.0045 max mem: 3951 +train: [1] [260/400] eta: 0:00:49 lr: 0.000099 loss: 2.1495 (2.2534) grad: 0.2592 (0.2660) time: 0.3414 data: 0.0043 max mem: 3951 +train: [1] [280/400] eta: 0:00:42 lr: 0.000102 loss: 2.1486 (2.2439) grad: 0.2363 (0.2643) time: 0.3340 data: 0.0036 max mem: 3951 +train: [1] [300/400] eta: 0:00:36 lr: 0.000105 loss: 2.0900 (2.2322) grad: 0.2332 (0.2632) time: 0.5248 data: 0.1811 max mem: 3951 +train: [1] [320/400] eta: 0:00:29 lr: 0.000108 loss: 2.0549 (2.2218) grad: 0.2343 (0.2618) time: 0.3333 data: 0.0044 max mem: 3951 +train: [1] [340/400] eta: 0:00:21 lr: 0.000111 loss: 2.0210 (2.2083) grad: 0.2486 (0.2616) time: 0.3207 data: 0.0037 max mem: 3951 +train: [1] [360/400] eta: 0:00:14 lr: 0.000114 loss: 2.0028 (2.1970) grad: 0.2524 (0.2607) time: 0.3512 data: 0.0039 max mem: 3951 +train: [1] [380/400] eta: 0:00:07 lr: 0.000117 loss: 1.9930 (2.1859) grad: 0.2487 (0.2603) time: 0.3425 data: 0.0042 max mem: 3951 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.9858 (2.1764) grad: 0.2437 (0.2592) time: 0.3439 data: 0.0044 max mem: 3951 +train: [1] Total time: 0:02:23 (0.3588 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.9858 (2.1764) grad: 0.2437 (0.2592) +eval (validation): [1] [ 0/63] eta: 0:03:27 time: 3.2969 data: 3.0836 max mem: 3951 +eval (validation): [1] [20/63] eta: 0:00:19 time: 0.3157 data: 0.0029 max mem: 3951 +eval (validation): [1] [40/63] eta: 0:00:08 time: 0.3166 data: 0.0032 max mem: 3951 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3178 data: 0.0037 max mem: 3951 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3160 data: 0.0036 max mem: 3951 +eval (validation): [1] Total time: 0:00:23 (0.3682 s / it) +cv: [1] best hparam: (50, 1.0) (048) ('048_lr5.0e+01_wd1.0e+00') loss: 0.534 acc: 0.838 f1: 0.807 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:31 lr: nan time: 3.2277 data: 2.9602 max mem: 3951 +train: [2] [ 20/400] eta: 0:03:10 lr: 0.000123 loss: 1.9446 (1.9498) grad: 0.2313 (0.2374) time: 0.3649 data: 0.0038 max mem: 3951 +train: [2] [ 40/400] eta: 0:02:34 lr: 0.000126 loss: 1.9335 (1.9381) grad: 0.2323 (0.2378) time: 0.3540 data: 0.0037 max mem: 3951 +train: [2] [ 60/400] eta: 0:02:15 lr: 0.000129 loss: 1.9129 (1.9244) grad: 0.2316 (0.2350) time: 0.3387 data: 0.0045 max mem: 3951 +train: [2] [ 80/400] eta: 0:02:03 lr: 0.000132 loss: 1.9241 (1.9260) grad: 0.2243 (0.2330) time: 0.3425 data: 0.0041 max mem: 3951 +train: [2] [100/400] eta: 0:01:52 lr: 0.000135 loss: 1.9241 (1.9233) grad: 0.2231 (0.2321) time: 0.3402 data: 0.0042 max mem: 3951 +train: [2] [120/400] eta: 0:01:44 lr: 0.000138 loss: 1.9099 (1.9174) grad: 0.2316 (0.2330) time: 0.3472 data: 0.0038 max mem: 3951 +train: [2] [140/400] eta: 0:01:35 lr: 0.000141 loss: 1.8886 (1.9160) grad: 0.2346 (0.2328) time: 0.3525 data: 0.0043 max mem: 3951 +train: [2] [160/400] eta: 0:01:27 lr: 0.000144 loss: 1.8938 (1.9136) grad: 0.2196 (0.2310) time: 0.3424 data: 0.0038 max mem: 3951 +train: [2] [180/400] eta: 0:01:20 lr: 0.000147 loss: 1.8558 (1.9062) grad: 0.2114 (0.2298) time: 0.3483 data: 0.0043 max mem: 3951 +train: [2] [200/400] eta: 0:01:11 lr: 0.000150 loss: 1.8421 (1.8982) grad: 0.2149 (0.2289) time: 0.3245 data: 0.0039 max mem: 3951 +train: [2] [220/400] eta: 0:01:04 lr: 0.000153 loss: 1.7985 (1.8884) grad: 0.2119 (0.2282) time: 0.3434 data: 0.0037 max mem: 3951 +train: [2] [240/400] eta: 0:00:57 lr: 0.000156 loss: 1.7985 (1.8832) grad: 0.2121 (0.2279) time: 0.3621 data: 0.0043 max mem: 3951 +train: [2] [260/400] eta: 0:00:50 lr: 0.000159 loss: 1.8136 (1.8761) grad: 0.2197 (0.2273) time: 0.3495 data: 0.0040 max mem: 3951 +train: [2] [280/400] eta: 0:00:42 lr: 0.000162 loss: 1.7794 (1.8710) grad: 0.2111 (0.2264) time: 0.3387 data: 0.0041 max mem: 3951 +train: [2] [300/400] eta: 0:00:36 lr: 0.000165 loss: 1.7848 (1.8656) grad: 0.2128 (0.2256) time: 0.5427 data: 0.1911 max mem: 3951 +train: [2] [320/400] eta: 0:00:29 lr: 0.000168 loss: 1.7963 (1.8597) grad: 0.2147 (0.2241) time: 0.3388 data: 0.0044 max mem: 3951 +train: [2] [340/400] eta: 0:00:21 lr: 0.000171 loss: 1.7325 (1.8521) grad: 0.2031 (0.2230) time: 0.3419 data: 0.0037 max mem: 3951 +train: [2] [360/400] eta: 0:00:14 lr: 0.000174 loss: 1.7287 (1.8467) grad: 0.2032 (0.2221) time: 0.3640 data: 0.0042 max mem: 3951 +train: [2] [380/400] eta: 0:00:07 lr: 0.000177 loss: 1.7548 (1.8414) grad: 0.2102 (0.2216) time: 0.3382 data: 0.0038 max mem: 3951 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 1.7247 (1.8349) grad: 0.2185 (0.2212) time: 0.3397 data: 0.0033 max mem: 3951 +train: [2] Total time: 0:02:25 (0.3634 s / it) +train: [2] Summary: lr: 0.000180 loss: 1.7247 (1.8349) grad: 0.2185 (0.2212) +eval (validation): [2] [ 0/63] eta: 0:03:28 time: 3.3049 data: 3.0350 max mem: 3951 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3655 data: 0.0197 max mem: 3951 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3304 data: 0.0032 max mem: 3951 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3212 data: 0.0037 max mem: 3951 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3134 data: 0.0034 max mem: 3951 +eval (validation): [2] Total time: 0:00:24 (0.3894 s / it) +cv: [2] best hparam: (43, 1.0) (047) ('047_lr4.3e+01_wd1.0e+00') loss: 0.442 acc: 0.866 f1: 0.840 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:11 lr: nan time: 3.0292 data: 2.8155 max mem: 3951 +train: [3] [ 20/400] eta: 0:02:59 lr: 0.000183 loss: 1.6395 (1.6480) grad: 0.2060 (0.2101) time: 0.3441 data: 0.0041 max mem: 3951 +train: [3] [ 40/400] eta: 0:02:27 lr: 0.000186 loss: 1.6762 (1.6605) grad: 0.2048 (0.2077) time: 0.3463 data: 0.0038 max mem: 3951 +train: [3] [ 60/400] eta: 0:02:12 lr: 0.000189 loss: 1.6896 (1.6630) grad: 0.1999 (0.2078) time: 0.3491 data: 0.0041 max mem: 3951 +train: [3] [ 80/400] eta: 0:02:01 lr: 0.000192 loss: 1.6802 (1.6625) grad: 0.1949 (0.2040) time: 0.3442 data: 0.0044 max mem: 3951 +train: [3] [100/400] eta: 0:01:51 lr: 0.000195 loss: 1.6620 (1.6620) grad: 0.1946 (0.2034) time: 0.3376 data: 0.0045 max mem: 3951 +train: [3] [120/400] eta: 0:01:42 lr: 0.000198 loss: 1.6508 (1.6579) grad: 0.2012 (0.2040) time: 0.3450 data: 0.0040 max mem: 3951 +train: [3] [140/400] eta: 0:01:35 lr: 0.000201 loss: 1.6102 (1.6509) grad: 0.2065 (0.2050) time: 0.3588 data: 0.0040 max mem: 3951 +train: [3] [160/400] eta: 0:01:26 lr: 0.000204 loss: 1.6343 (1.6514) grad: 0.1971 (0.2037) time: 0.3268 data: 0.0043 max mem: 3951 +train: [3] [180/400] eta: 0:01:18 lr: 0.000207 loss: 1.6343 (1.6483) grad: 0.1945 (0.2046) time: 0.3402 data: 0.0042 max mem: 3951 +train: [3] [200/400] eta: 0:01:11 lr: 0.000210 loss: 1.6198 (1.6449) grad: 0.2014 (0.2043) time: 0.3558 data: 0.0044 max mem: 3951 +train: [3] [220/400] eta: 0:01:03 lr: 0.000213 loss: 1.6022 (1.6399) grad: 0.1988 (0.2043) time: 0.3276 data: 0.0042 max mem: 3951 +train: [3] [240/400] eta: 0:00:56 lr: 0.000216 loss: 1.5887 (1.6349) grad: 0.1937 (0.2035) time: 0.3530 data: 0.0046 max mem: 3951 +train: [3] [260/400] eta: 0:00:49 lr: 0.000219 loss: 1.5959 (1.6316) grad: 0.1883 (0.2021) time: 0.3547 data: 0.0042 max mem: 3951 +train: [3] [280/400] eta: 0:00:42 lr: 0.000222 loss: 1.6223 (1.6308) grad: 0.1867 (0.2015) time: 0.3438 data: 0.0043 max mem: 3951 +train: [3] [300/400] eta: 0:00:36 lr: 0.000225 loss: 1.5868 (1.6265) grad: 0.1867 (0.2010) time: 0.5116 data: 0.1818 max mem: 3951 +train: [3] [320/400] eta: 0:00:29 lr: 0.000228 loss: 1.5401 (1.6203) grad: 0.1885 (0.2000) time: 0.3287 data: 0.0035 max mem: 3951 +train: [3] [340/400] eta: 0:00:21 lr: 0.000231 loss: 1.5064 (1.6137) grad: 0.1916 (0.1995) time: 0.3188 data: 0.0033 max mem: 3951 +train: [3] [360/400] eta: 0:00:14 lr: 0.000234 loss: 1.5217 (1.6098) grad: 0.1859 (0.1988) time: 0.3528 data: 0.0034 max mem: 3951 +train: [3] [380/400] eta: 0:00:07 lr: 0.000237 loss: 1.5217 (1.6035) grad: 0.1902 (0.1986) time: 0.3387 data: 0.0041 max mem: 3951 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.5462 (1.6016) grad: 0.1887 (0.1980) time: 0.3517 data: 0.0045 max mem: 3951 +train: [3] Total time: 0:02:23 (0.3587 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.5462 (1.6016) grad: 0.1887 (0.1980) +eval (validation): [3] [ 0/63] eta: 0:03:28 time: 3.3056 data: 3.0592 max mem: 3951 +eval (validation): [3] [20/63] eta: 0:00:22 time: 0.3748 data: 0.0354 max mem: 3951 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3243 data: 0.0042 max mem: 3951 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3249 data: 0.0103 max mem: 3951 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3066 data: 0.0018 max mem: 3951 +eval (validation): [3] Total time: 0:00:24 (0.3909 s / it) +cv: [3] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.426 acc: 0.870 f1: 0.845 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:21:42 lr: nan time: 3.2554 data: 3.0284 max mem: 3951 +train: [4] [ 20/400] eta: 0:03:13 lr: 0.000243 loss: 1.4929 (1.4896) grad: 0.1929 (0.1929) time: 0.3712 data: 0.0139 max mem: 3951 +train: [4] [ 40/400] eta: 0:02:34 lr: 0.000246 loss: 1.4972 (1.4923) grad: 0.1916 (0.1882) time: 0.3475 data: 0.0036 max mem: 3951 +train: [4] [ 60/400] eta: 0:02:14 lr: 0.000249 loss: 1.5080 (1.5109) grad: 0.1898 (0.1911) time: 0.3239 data: 0.0034 max mem: 3951 +train: [4] [ 80/400] eta: 0:02:01 lr: 0.000252 loss: 1.4713 (1.4946) grad: 0.1835 (0.1884) time: 0.3343 data: 0.0040 max mem: 3951 +train: [4] [100/400] eta: 0:01:51 lr: 0.000255 loss: 1.4467 (1.4943) grad: 0.1827 (0.1888) time: 0.3407 data: 0.0042 max mem: 3951 +train: [4] [120/400] eta: 0:01:43 lr: 0.000258 loss: 1.4841 (1.4925) grad: 0.1870 (0.1877) time: 0.3457 data: 0.0041 max mem: 3951 +train: [4] [140/400] eta: 0:01:34 lr: 0.000261 loss: 1.4568 (1.4895) grad: 0.1827 (0.1868) time: 0.3333 data: 0.0033 max mem: 3951 +train: [4] [160/400] eta: 0:01:25 lr: 0.000264 loss: 1.4568 (1.4864) grad: 0.1782 (0.1857) time: 0.3193 data: 0.0035 max mem: 3951 +train: [4] [180/400] eta: 0:01:18 lr: 0.000267 loss: 1.4366 (1.4796) grad: 0.1770 (0.1854) time: 0.3395 data: 0.0040 max mem: 3951 +train: [4] [200/400] eta: 0:01:10 lr: 0.000270 loss: 1.4044 (1.4703) grad: 0.1740 (0.1850) time: 0.3226 data: 0.0039 max mem: 3951 +train: [4] [220/400] eta: 0:01:02 lr: 0.000273 loss: 1.4044 (1.4661) grad: 0.1739 (0.1840) time: 0.3198 data: 0.0038 max mem: 3951 +train: [4] [240/400] eta: 0:00:55 lr: 0.000276 loss: 1.3984 (1.4617) grad: 0.1697 (0.1831) time: 0.3118 data: 0.0041 max mem: 3951 +train: [4] [260/400] eta: 0:00:48 lr: 0.000279 loss: 1.3966 (1.4586) grad: 0.1660 (0.1823) time: 0.3341 data: 0.0042 max mem: 3951 +train: [4] [280/400] eta: 0:00:41 lr: 0.000282 loss: 1.4193 (1.4553) grad: 0.1730 (0.1820) time: 0.3309 data: 0.0042 max mem: 3951 +train: [4] [300/400] eta: 0:00:35 lr: 0.000285 loss: 1.4060 (1.4535) grad: 0.1736 (0.1816) time: 0.4942 data: 0.1670 max mem: 3951 +train: [4] [320/400] eta: 0:00:28 lr: 0.000288 loss: 1.3798 (1.4467) grad: 0.1737 (0.1817) time: 0.3320 data: 0.0034 max mem: 3951 +train: [4] [340/400] eta: 0:00:21 lr: 0.000291 loss: 1.3328 (1.4412) grad: 0.1728 (0.1812) time: 0.3274 data: 0.0037 max mem: 3951 +train: [4] [360/400] eta: 0:00:14 lr: 0.000294 loss: 1.3614 (1.4371) grad: 0.1734 (0.1810) time: 0.3334 data: 0.0043 max mem: 3951 +train: [4] [380/400] eta: 0:00:06 lr: 0.000297 loss: 1.3488 (1.4318) grad: 0.1758 (0.1807) time: 0.3245 data: 0.0039 max mem: 3951 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.3199 (1.4275) grad: 0.1811 (0.1809) time: 0.3229 data: 0.0037 max mem: 3951 +train: [4] Total time: 0:02:19 (0.3483 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.3199 (1.4275) grad: 0.1811 (0.1809) +eval (validation): [4] [ 0/63] eta: 0:03:07 time: 2.9753 data: 2.7816 max mem: 3951 +eval (validation): [4] [20/63] eta: 0:00:19 time: 0.3254 data: 0.0045 max mem: 3951 +eval (validation): [4] [40/63] eta: 0:00:08 time: 0.2943 data: 0.0027 max mem: 3951 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.2981 data: 0.0036 max mem: 3951 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.2945 data: 0.0035 max mem: 3951 +eval (validation): [4] Total time: 0:00:22 (0.3522 s / it) +cv: [4] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.396 acc: 0.883 f1: 0.858 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:24:05 lr: nan time: 3.6147 data: 3.3486 max mem: 3951 +train: [5] [ 20/400] eta: 0:03:04 lr: 0.000300 loss: 1.3440 (1.3341) grad: 0.1660 (0.1695) time: 0.3290 data: 0.0022 max mem: 3951 +train: [5] [ 40/400] eta: 0:02:27 lr: 0.000300 loss: 1.3589 (1.3469) grad: 0.1694 (0.1724) time: 0.3305 data: 0.0039 max mem: 3951 +train: [5] [ 60/400] eta: 0:02:13 lr: 0.000300 loss: 1.3633 (1.3560) grad: 0.1664 (0.1697) time: 0.3557 data: 0.0041 max mem: 3951 +train: [5] [ 80/400] eta: 0:02:02 lr: 0.000300 loss: 1.3520 (1.3563) grad: 0.1662 (0.1710) time: 0.3488 data: 0.0042 max mem: 3951 +train: [5] [100/400] eta: 0:01:52 lr: 0.000300 loss: 1.3314 (1.3436) grad: 0.1636 (0.1689) time: 0.3476 data: 0.0042 max mem: 3951 +train: [5] [120/400] eta: 0:01:43 lr: 0.000300 loss: 1.3008 (1.3432) grad: 0.1573 (0.1683) time: 0.3371 data: 0.0042 max mem: 3951 +train: [5] [140/400] eta: 0:01:34 lr: 0.000300 loss: 1.3002 (1.3338) grad: 0.1676 (0.1685) time: 0.3315 data: 0.0041 max mem: 3951 +train: [5] [160/400] eta: 0:01:26 lr: 0.000299 loss: 1.2899 (1.3311) grad: 0.1744 (0.1694) time: 0.3391 data: 0.0039 max mem: 3951 +train: [5] [180/400] eta: 0:01:19 lr: 0.000299 loss: 1.3230 (1.3282) grad: 0.1749 (0.1700) time: 0.3555 data: 0.0041 max mem: 3951 +train: [5] [200/400] eta: 0:01:11 lr: 0.000299 loss: 1.2622 (1.3232) grad: 0.1693 (0.1699) time: 0.3553 data: 0.0040 max mem: 3951 +train: [5] [220/400] eta: 0:01:04 lr: 0.000299 loss: 1.2622 (1.3202) grad: 0.1617 (0.1699) time: 0.3368 data: 0.0041 max mem: 3951 +train: [5] [240/400] eta: 0:00:56 lr: 0.000299 loss: 1.3162 (1.3218) grad: 0.1631 (0.1698) time: 0.3300 data: 0.0040 max mem: 3951 +train: [5] [260/400] eta: 0:00:49 lr: 0.000299 loss: 1.2809 (1.3171) grad: 0.1673 (0.1702) time: 0.3296 data: 0.0038 max mem: 3951 +train: [5] [280/400] eta: 0:00:42 lr: 0.000298 loss: 1.2742 (1.3178) grad: 0.1680 (0.1703) time: 0.3495 data: 0.0043 max mem: 3951 +train: [5] [300/400] eta: 0:00:36 lr: 0.000298 loss: 1.2909 (1.3149) grad: 0.1668 (0.1698) time: 0.4967 data: 0.1826 max mem: 3951 +train: [5] [320/400] eta: 0:00:28 lr: 0.000298 loss: 1.2433 (1.3102) grad: 0.1617 (0.1691) time: 0.3460 data: 0.0097 max mem: 3951 +train: [5] [340/400] eta: 0:00:21 lr: 0.000298 loss: 1.2297 (1.3065) grad: 0.1640 (0.1692) time: 0.3302 data: 0.0034 max mem: 3951 +train: [5] [360/400] eta: 0:00:14 lr: 0.000297 loss: 1.2222 (1.3022) grad: 0.1630 (0.1685) time: 0.3392 data: 0.0040 max mem: 3951 +train: [5] [380/400] eta: 0:00:07 lr: 0.000297 loss: 1.2360 (1.2988) grad: 0.1551 (0.1681) time: 0.3411 data: 0.0044 max mem: 3951 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.2413 (1.2954) grad: 0.1551 (0.1677) time: 0.3395 data: 0.0043 max mem: 3951 +train: [5] Total time: 0:02:22 (0.3571 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.2413 (1.2954) grad: 0.1551 (0.1677) +eval (validation): [5] [ 0/63] eta: 0:03:40 time: 3.5055 data: 3.2670 max mem: 3951 +eval (validation): [5] [20/63] eta: 0:00:20 time: 0.3324 data: 0.0103 max mem: 3951 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3312 data: 0.0086 max mem: 3951 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3218 data: 0.0037 max mem: 3951 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3209 data: 0.0038 max mem: 3951 +eval (validation): [5] Total time: 0:00:24 (0.3834 s / it) +cv: [5] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 0.388 acc: 0.884 f1: 0.863 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:24 lr: nan time: 3.3601 data: 3.0919 max mem: 3951 +train: [6] [ 20/400] eta: 0:03:28 lr: 0.000296 loss: 1.2598 (1.2442) grad: 0.1601 (0.1591) time: 0.4084 data: 0.0040 max mem: 3951 +train: [6] [ 40/400] eta: 0:02:37 lr: 0.000296 loss: 1.2479 (1.2227) grad: 0.1606 (0.1622) time: 0.3226 data: 0.0038 max mem: 3951 +train: [6] [ 60/400] eta: 0:02:17 lr: 0.000296 loss: 1.1900 (1.2068) grad: 0.1571 (0.1600) time: 0.3354 data: 0.0039 max mem: 3951 +train: [6] [ 80/400] eta: 0:02:05 lr: 0.000295 loss: 1.2393 (1.2238) grad: 0.1514 (0.1579) time: 0.3547 data: 0.0037 max mem: 3951 +train: [6] [100/400] eta: 0:01:55 lr: 0.000295 loss: 1.2444 (1.2226) grad: 0.1577 (0.1584) time: 0.3487 data: 0.0041 max mem: 3951 +train: [6] [120/400] eta: 0:01:45 lr: 0.000295 loss: 1.2157 (1.2224) grad: 0.1595 (0.1598) time: 0.3459 data: 0.0042 max mem: 3951 +train: [6] [140/400] eta: 0:01:36 lr: 0.000294 loss: 1.2326 (1.2247) grad: 0.1584 (0.1597) time: 0.3313 data: 0.0040 max mem: 3951 +train: [6] [160/400] eta: 0:01:28 lr: 0.000294 loss: 1.2513 (1.2274) grad: 0.1564 (0.1598) time: 0.3465 data: 0.0036 max mem: 3951 +train: [6] [180/400] eta: 0:01:20 lr: 0.000293 loss: 1.2283 (1.2244) grad: 0.1524 (0.1590) time: 0.3423 data: 0.0041 max mem: 3951 +train: [6] [200/400] eta: 0:01:12 lr: 0.000293 loss: 1.1912 (1.2224) grad: 0.1512 (0.1587) time: 0.3465 data: 0.0043 max mem: 3951 +train: [6] [220/400] eta: 0:01:05 lr: 0.000292 loss: 1.1856 (1.2177) grad: 0.1509 (0.1585) time: 0.3567 data: 0.0043 max mem: 3951 +train: [6] [240/400] eta: 0:00:57 lr: 0.000292 loss: 1.1856 (1.2176) grad: 0.1590 (0.1590) time: 0.3460 data: 0.0041 max mem: 3951 +train: [6] [260/400] eta: 0:00:50 lr: 0.000291 loss: 1.1856 (1.2126) grad: 0.1675 (0.1593) time: 0.3270 data: 0.0038 max mem: 3951 +train: [6] [280/400] eta: 0:00:43 lr: 0.000291 loss: 1.1867 (1.2138) grad: 0.1576 (0.1592) time: 0.3550 data: 0.0044 max mem: 3951 +train: [6] [300/400] eta: 0:00:36 lr: 0.000290 loss: 1.2087 (1.2121) grad: 0.1518 (0.1589) time: 0.4980 data: 0.1792 max mem: 3951 +train: [6] [320/400] eta: 0:00:29 lr: 0.000290 loss: 1.1802 (1.2098) grad: 0.1521 (0.1583) time: 0.3376 data: 0.0043 max mem: 3951 +train: [6] [340/400] eta: 0:00:21 lr: 0.000289 loss: 1.1688 (1.2074) grad: 0.1534 (0.1584) time: 0.3254 data: 0.0034 max mem: 3951 +train: [6] [360/400] eta: 0:00:14 lr: 0.000288 loss: 1.1382 (1.2034) grad: 0.1527 (0.1582) time: 0.3806 data: 0.0042 max mem: 3951 +train: [6] [380/400] eta: 0:00:07 lr: 0.000288 loss: 1.1636 (1.2025) grad: 0.1507 (0.1579) time: 0.3511 data: 0.0044 max mem: 3951 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 1.1611 (1.1992) grad: 0.1507 (0.1574) time: 0.3451 data: 0.0045 max mem: 3951 +train: [6] Total time: 0:02:25 (0.3634 s / it) +train: [6] Summary: lr: 0.000287 loss: 1.1611 (1.1992) grad: 0.1507 (0.1574) +eval (validation): [6] [ 0/63] eta: 0:03:26 time: 3.2763 data: 3.0667 max mem: 3951 +eval (validation): [6] [20/63] eta: 0:00:21 time: 0.3602 data: 0.0167 max mem: 3951 +eval (validation): [6] [40/63] eta: 0:00:10 time: 0.3745 data: 0.0036 max mem: 3951 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3062 data: 0.0033 max mem: 3951 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3053 data: 0.0033 max mem: 3951 +eval (validation): [6] Total time: 0:00:24 (0.3967 s / it) +cv: [6] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.372 acc: 0.884 f1: 0.862 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:22:06 lr: nan time: 3.3170 data: 3.0506 max mem: 3951 +train: [7] [ 20/400] eta: 0:03:17 lr: 0.000286 loss: 1.1482 (1.1710) grad: 0.1519 (0.1591) time: 0.3792 data: 0.0047 max mem: 3951 +train: [7] [ 40/400] eta: 0:02:35 lr: 0.000286 loss: 1.1496 (1.1583) grad: 0.1508 (0.1564) time: 0.3390 data: 0.0037 max mem: 3951 +train: [7] [ 60/400] eta: 0:02:14 lr: 0.000285 loss: 1.1414 (1.1375) grad: 0.1491 (0.1566) time: 0.3265 data: 0.0042 max mem: 3951 +train: [7] [ 80/400] eta: 0:02:03 lr: 0.000284 loss: 1.1414 (1.1499) grad: 0.1484 (0.1549) time: 0.3565 data: 0.0040 max mem: 3951 +train: [7] [100/400] eta: 0:01:53 lr: 0.000284 loss: 1.1715 (1.1480) grad: 0.1506 (0.1556) time: 0.3449 data: 0.0041 max mem: 3951 +train: [7] [120/400] eta: 0:01:44 lr: 0.000283 loss: 1.1566 (1.1485) grad: 0.1445 (0.1542) time: 0.3415 data: 0.0037 max mem: 3951 +train: [7] [140/400] eta: 0:01:35 lr: 0.000282 loss: 1.1345 (1.1428) grad: 0.1477 (0.1549) time: 0.3277 data: 0.0034 max mem: 3951 +train: [7] [160/400] eta: 0:01:27 lr: 0.000282 loss: 1.1282 (1.1412) grad: 0.1576 (0.1550) time: 0.3512 data: 0.0040 max mem: 3951 +train: [7] [180/400] eta: 0:01:19 lr: 0.000281 loss: 1.1380 (1.1401) grad: 0.1551 (0.1551) time: 0.3405 data: 0.0041 max mem: 3951 +train: [7] [200/400] eta: 0:01:12 lr: 0.000280 loss: 1.1282 (1.1393) grad: 0.1515 (0.1548) time: 0.3550 data: 0.0046 max mem: 3951 +train: [7] [220/400] eta: 0:01:04 lr: 0.000279 loss: 1.1218 (1.1372) grad: 0.1505 (0.1541) time: 0.3439 data: 0.0045 max mem: 3951 +train: [7] [240/400] eta: 0:00:57 lr: 0.000278 loss: 1.1110 (1.1351) grad: 0.1406 (0.1535) time: 0.3523 data: 0.0045 max mem: 3951 +train: [7] [260/400] eta: 0:00:50 lr: 0.000278 loss: 1.1103 (1.1338) grad: 0.1447 (0.1530) time: 0.3445 data: 0.0042 max mem: 3951 +train: [7] [280/400] eta: 0:00:42 lr: 0.000277 loss: 1.0750 (1.1290) grad: 0.1463 (0.1523) time: 0.3423 data: 0.0044 max mem: 3951 +train: [7] [300/400] eta: 0:00:36 lr: 0.000276 loss: 1.1108 (1.1287) grad: 0.1460 (0.1521) time: 0.5006 data: 0.1846 max mem: 3951 +train: [7] [320/400] eta: 0:00:29 lr: 0.000275 loss: 1.1154 (1.1246) grad: 0.1431 (0.1516) time: 0.3398 data: 0.0168 max mem: 3951 +train: [7] [340/400] eta: 0:00:21 lr: 0.000274 loss: 1.0863 (1.1242) grad: 0.1401 (0.1510) time: 0.3761 data: 0.0024 max mem: 3951 +train: [7] [360/400] eta: 0:00:14 lr: 0.000273 loss: 1.0812 (1.1208) grad: 0.1420 (0.1505) time: 0.3448 data: 0.0042 max mem: 3951 +train: [7] [380/400] eta: 0:00:07 lr: 0.000272 loss: 1.0589 (1.1171) grad: 0.1487 (0.1506) time: 0.3489 data: 0.0040 max mem: 3951 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 1.0775 (1.1162) grad: 0.1445 (0.1504) time: 0.3391 data: 0.0043 max mem: 3951 +train: [7] Total time: 0:02:25 (0.3627 s / it) +train: [7] Summary: lr: 0.000271 loss: 1.0775 (1.1162) grad: 0.1445 (0.1504) +eval (validation): [7] [ 0/63] eta: 0:03:35 time: 3.4252 data: 3.1392 max mem: 3951 +eval (validation): [7] [20/63] eta: 0:00:20 time: 0.3391 data: 0.0041 max mem: 3951 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3325 data: 0.0037 max mem: 3951 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.2941 data: 0.0030 max mem: 3951 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.2898 data: 0.0017 max mem: 3951 +eval (validation): [7] Total time: 0:00:23 (0.3741 s / it) +cv: [7] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 0.359 acc: 0.892 f1: 0.872 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:22:30 lr: nan time: 3.3756 data: 3.0793 max mem: 3951 +train: [8] [ 20/400] eta: 0:03:10 lr: 0.000270 loss: 1.1356 (1.1340) grad: 0.1424 (0.1449) time: 0.3563 data: 0.0046 max mem: 3951 +train: [8] [ 40/400] eta: 0:02:32 lr: 0.000270 loss: 1.1172 (1.0912) grad: 0.1442 (0.1462) time: 0.3409 data: 0.0034 max mem: 3951 +train: [8] [ 60/400] eta: 0:02:15 lr: 0.000269 loss: 1.0618 (1.0882) grad: 0.1471 (0.1484) time: 0.3491 data: 0.0038 max mem: 3951 +train: [8] [ 80/400] eta: 0:02:01 lr: 0.000268 loss: 1.0840 (1.0830) grad: 0.1489 (0.1487) time: 0.3222 data: 0.0036 max mem: 3951 +train: [8] [100/400] eta: 0:01:51 lr: 0.000267 loss: 1.0460 (1.0719) grad: 0.1499 (0.1480) time: 0.3430 data: 0.0044 max mem: 3951 +train: [8] [120/400] eta: 0:01:43 lr: 0.000266 loss: 1.0433 (1.0672) grad: 0.1532 (0.1492) time: 0.3516 data: 0.0044 max mem: 3951 +train: [8] [140/400] eta: 0:01:34 lr: 0.000265 loss: 1.0519 (1.0703) grad: 0.1515 (0.1499) time: 0.3366 data: 0.0036 max mem: 3951 +train: [8] [160/400] eta: 0:01:27 lr: 0.000264 loss: 1.0670 (1.0698) grad: 0.1519 (0.1506) time: 0.3506 data: 0.0041 max mem: 3951 +train: [8] [180/400] eta: 0:01:19 lr: 0.000263 loss: 1.0511 (1.0693) grad: 0.1509 (0.1500) time: 0.3431 data: 0.0043 max mem: 3951 +train: [8] [200/400] eta: 0:01:11 lr: 0.000262 loss: 1.0630 (1.0688) grad: 0.1426 (0.1493) time: 0.3476 data: 0.0038 max mem: 3951 +train: [8] [220/400] eta: 0:01:04 lr: 0.000260 loss: 1.0695 (1.0691) grad: 0.1402 (0.1486) time: 0.3490 data: 0.0043 max mem: 3951 +train: [8] [240/400] eta: 0:00:57 lr: 0.000259 loss: 1.0651 (1.0681) grad: 0.1416 (0.1487) time: 0.3468 data: 0.0040 max mem: 3951 +train: [8] [260/400] eta: 0:00:49 lr: 0.000258 loss: 1.0630 (1.0677) grad: 0.1476 (0.1489) time: 0.3486 data: 0.0039 max mem: 3951 +train: [8] [280/400] eta: 0:00:42 lr: 0.000257 loss: 1.0506 (1.0645) grad: 0.1493 (0.1489) time: 0.3453 data: 0.0039 max mem: 3951 +train: [8] [300/400] eta: 0:00:36 lr: 0.000256 loss: 1.0518 (1.0663) grad: 0.1452 (0.1487) time: 0.5323 data: 0.1905 max mem: 3951 +train: [8] [320/400] eta: 0:00:29 lr: 0.000255 loss: 1.0940 (1.0654) grad: 0.1399 (0.1481) time: 0.3269 data: 0.0037 max mem: 3951 +train: [8] [340/400] eta: 0:00:21 lr: 0.000254 loss: 1.0307 (1.0628) grad: 0.1426 (0.1480) time: 0.3347 data: 0.0033 max mem: 3951 +train: [8] [360/400] eta: 0:00:14 lr: 0.000253 loss: 1.0086 (1.0603) grad: 0.1446 (0.1479) time: 0.3467 data: 0.0041 max mem: 3951 +train: [8] [380/400] eta: 0:00:07 lr: 0.000252 loss: 1.0299 (1.0599) grad: 0.1446 (0.1480) time: 0.3418 data: 0.0042 max mem: 3951 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 1.0454 (1.0606) grad: 0.1429 (0.1478) time: 0.3421 data: 0.0038 max mem: 3951 +train: [8] Total time: 0:02:24 (0.3610 s / it) +train: [8] Summary: lr: 0.000250 loss: 1.0454 (1.0606) grad: 0.1429 (0.1478) +eval (validation): [8] [ 0/63] eta: 0:03:29 time: 3.3330 data: 3.0620 max mem: 3951 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3464 data: 0.0042 max mem: 3951 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3391 data: 0.0034 max mem: 3951 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3043 data: 0.0035 max mem: 3951 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3013 data: 0.0035 max mem: 3951 +eval (validation): [8] Total time: 0:00:24 (0.3819 s / it) +cv: [8] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.358 acc: 0.892 f1: 0.871 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:50 lr: nan time: 3.2761 data: 3.0211 max mem: 3951 +train: [9] [ 20/400] eta: 0:03:04 lr: 0.000249 loss: 1.0422 (1.0383) grad: 0.1405 (0.1463) time: 0.3463 data: 0.0032 max mem: 3951 +train: [9] [ 40/400] eta: 0:02:29 lr: 0.000248 loss: 1.0359 (1.0342) grad: 0.1393 (0.1436) time: 0.3407 data: 0.0031 max mem: 3951 +train: [9] [ 60/400] eta: 0:02:12 lr: 0.000247 loss: 1.0099 (1.0253) grad: 0.1423 (0.1458) time: 0.3416 data: 0.0046 max mem: 3951 +train: [9] [ 80/400] eta: 0:02:01 lr: 0.000246 loss: 1.0067 (1.0337) grad: 0.1423 (0.1451) time: 0.3439 data: 0.0038 max mem: 3951 +train: [9] [100/400] eta: 0:01:49 lr: 0.000244 loss: 1.0183 (1.0296) grad: 0.1365 (0.1434) time: 0.3149 data: 0.0040 max mem: 3951 +train: [9] [120/400] eta: 0:01:41 lr: 0.000243 loss: 1.0310 (1.0302) grad: 0.1396 (0.1436) time: 0.3495 data: 0.0040 max mem: 3951 +train: [9] [140/400] eta: 0:01:33 lr: 0.000242 loss: 1.0427 (1.0309) grad: 0.1396 (0.1433) time: 0.3365 data: 0.0045 max mem: 3951 +train: [9] [160/400] eta: 0:01:25 lr: 0.000241 loss: 1.0191 (1.0282) grad: 0.1413 (0.1431) time: 0.3324 data: 0.0037 max mem: 3951 +train: [9] [180/400] eta: 0:01:18 lr: 0.000240 loss: 0.9994 (1.0276) grad: 0.1411 (0.1428) time: 0.3521 data: 0.0043 max mem: 3951 +train: [9] [200/400] eta: 0:01:10 lr: 0.000238 loss: 0.9968 (1.0276) grad: 0.1364 (0.1421) time: 0.3258 data: 0.0039 max mem: 3951 +train: [9] [220/400] eta: 0:01:03 lr: 0.000237 loss: 1.0088 (1.0266) grad: 0.1360 (0.1419) time: 0.3545 data: 0.0042 max mem: 3951 +train: [9] [240/400] eta: 0:00:56 lr: 0.000236 loss: 0.9944 (1.0239) grad: 0.1360 (0.1414) time: 0.3455 data: 0.0042 max mem: 3951 +train: [9] [260/400] eta: 0:00:49 lr: 0.000234 loss: 0.9902 (1.0213) grad: 0.1311 (0.1411) time: 0.3513 data: 0.0042 max mem: 3951 +train: [9] [280/400] eta: 0:00:42 lr: 0.000233 loss: 0.9810 (1.0207) grad: 0.1397 (0.1414) time: 0.3396 data: 0.0044 max mem: 3951 +train: [9] [300/400] eta: 0:00:36 lr: 0.000232 loss: 1.0128 (1.0205) grad: 0.1369 (0.1410) time: 0.4951 data: 0.1830 max mem: 3951 +train: [9] [320/400] eta: 0:00:28 lr: 0.000230 loss: 1.0081 (1.0187) grad: 0.1385 (0.1414) time: 0.3444 data: 0.0117 max mem: 3951 +train: [9] [340/400] eta: 0:00:21 lr: 0.000229 loss: 0.9879 (1.0173) grad: 0.1434 (0.1414) time: 0.3354 data: 0.0027 max mem: 3951 +train: [9] [360/400] eta: 0:00:14 lr: 0.000228 loss: 0.9879 (1.0163) grad: 0.1434 (0.1418) time: 0.3571 data: 0.0043 max mem: 3951 +train: [9] [380/400] eta: 0:00:07 lr: 0.000226 loss: 0.9879 (1.0152) grad: 0.1416 (0.1418) time: 0.3389 data: 0.0042 max mem: 3951 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.9877 (1.0144) grad: 0.1402 (0.1421) time: 0.3350 data: 0.0042 max mem: 3951 +train: [9] Total time: 0:02:22 (0.3569 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.9877 (1.0144) grad: 0.1402 (0.1421) +eval (validation): [9] [ 0/63] eta: 0:03:25 time: 3.2631 data: 3.0051 max mem: 3951 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3380 data: 0.0037 max mem: 3951 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3731 data: 0.0039 max mem: 3951 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3070 data: 0.0036 max mem: 3951 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3030 data: 0.0036 max mem: 3951 +eval (validation): [9] Total time: 0:00:24 (0.3902 s / it) +cv: [9] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.359 acc: 0.891 f1: 0.873 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:39 lr: nan time: 3.2477 data: 3.0247 max mem: 3951 +train: [10] [ 20/400] eta: 0:03:01 lr: 0.000224 loss: 0.9492 (0.9843) grad: 0.1445 (0.1455) time: 0.3385 data: 0.0036 max mem: 3951 +train: [10] [ 40/400] eta: 0:02:29 lr: 0.000222 loss: 0.9844 (0.9987) grad: 0.1413 (0.1408) time: 0.3530 data: 0.0033 max mem: 3951 +train: [10] [ 60/400] eta: 0:02:14 lr: 0.000221 loss: 0.9821 (0.9932) grad: 0.1408 (0.1408) time: 0.3494 data: 0.0038 max mem: 3951 +train: [10] [ 80/400] eta: 0:02:03 lr: 0.000220 loss: 0.9691 (0.9893) grad: 0.1402 (0.1400) time: 0.3566 data: 0.0042 max mem: 3951 +train: [10] [100/400] eta: 0:01:52 lr: 0.000218 loss: 0.9688 (0.9873) grad: 0.1348 (0.1397) time: 0.3411 data: 0.0042 max mem: 3951 +train: [10] [120/400] eta: 0:01:43 lr: 0.000217 loss: 0.9574 (0.9775) grad: 0.1391 (0.1392) time: 0.3293 data: 0.0040 max mem: 3951 +train: [10] [140/400] eta: 0:01:35 lr: 0.000215 loss: 0.9574 (0.9774) grad: 0.1421 (0.1404) time: 0.3523 data: 0.0043 max mem: 3951 +train: [10] [160/400] eta: 0:01:27 lr: 0.000214 loss: 0.9924 (0.9826) grad: 0.1407 (0.1397) time: 0.3420 data: 0.0041 max mem: 3951 +train: [10] [180/400] eta: 0:01:19 lr: 0.000213 loss: 1.0195 (0.9837) grad: 0.1407 (0.1399) time: 0.3594 data: 0.0041 max mem: 3951 +train: [10] [200/400] eta: 0:01:12 lr: 0.000211 loss: 0.9707 (0.9809) grad: 0.1388 (0.1397) time: 0.3495 data: 0.0045 max mem: 3951 +train: [10] [220/400] eta: 0:01:04 lr: 0.000210 loss: 0.9481 (0.9798) grad: 0.1388 (0.1402) time: 0.3393 data: 0.0041 max mem: 3951 +train: [10] [240/400] eta: 0:00:57 lr: 0.000208 loss: 0.9797 (0.9820) grad: 0.1355 (0.1394) time: 0.3530 data: 0.0044 max mem: 3951 +train: [10] [260/400] eta: 0:00:50 lr: 0.000207 loss: 0.9776 (0.9828) grad: 0.1323 (0.1391) time: 0.3477 data: 0.0040 max mem: 3951 +train: [10] [280/400] eta: 0:00:42 lr: 0.000205 loss: 0.9776 (0.9828) grad: 0.1380 (0.1396) time: 0.3319 data: 0.0043 max mem: 3951 +train: [10] [300/400] eta: 0:00:36 lr: 0.000204 loss: 0.9774 (0.9811) grad: 0.1329 (0.1388) time: 0.5182 data: 0.1744 max mem: 3951 +train: [10] [320/400] eta: 0:00:29 lr: 0.000202 loss: 0.9860 (0.9813) grad: 0.1301 (0.1386) time: 0.3526 data: 0.0037 max mem: 3951 +train: [10] [340/400] eta: 0:00:21 lr: 0.000201 loss: 0.9491 (0.9794) grad: 0.1319 (0.1382) time: 0.3272 data: 0.0037 max mem: 3951 +train: [10] [360/400] eta: 0:00:14 lr: 0.000199 loss: 0.9490 (0.9793) grad: 0.1337 (0.1382) time: 0.3566 data: 0.0040 max mem: 3951 +train: [10] [380/400] eta: 0:00:07 lr: 0.000198 loss: 0.9790 (0.9798) grad: 0.1337 (0.1377) time: 0.3416 data: 0.0041 max mem: 3951 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.9650 (0.9780) grad: 0.1282 (0.1371) time: 0.3414 data: 0.0034 max mem: 3951 +train: [10] Total time: 0:02:24 (0.3619 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.9650 (0.9780) grad: 0.1282 (0.1371) +eval (validation): [10] [ 0/63] eta: 0:03:23 time: 3.2276 data: 3.0129 max mem: 3951 +eval (validation): [10] [20/63] eta: 0:00:21 time: 0.3678 data: 0.0049 max mem: 3951 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3501 data: 0.0035 max mem: 3951 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3341 data: 0.0037 max mem: 3951 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3230 data: 0.0038 max mem: 3951 +eval (validation): [10] Total time: 0:00:25 (0.4000 s / it) +cv: [10] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.344 acc: 0.892 f1: 0.873 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:25 lr: nan time: 3.2135 data: 2.9919 max mem: 3951 +train: [11] [ 20/400] eta: 0:03:02 lr: 0.000195 loss: 0.9306 (0.9331) grad: 0.1326 (0.1350) time: 0.3441 data: 0.0044 max mem: 3951 +train: [11] [ 40/400] eta: 0:02:26 lr: 0.000193 loss: 0.9594 (0.9511) grad: 0.1326 (0.1363) time: 0.3320 data: 0.0029 max mem: 3951 +train: [11] [ 60/400] eta: 0:02:13 lr: 0.000192 loss: 0.9771 (0.9592) grad: 0.1405 (0.1373) time: 0.3607 data: 0.0039 max mem: 3951 +train: [11] [ 80/400] eta: 0:02:01 lr: 0.000190 loss: 0.9479 (0.9548) grad: 0.1374 (0.1368) time: 0.3345 data: 0.0046 max mem: 3951 +train: [11] [100/400] eta: 0:01:52 lr: 0.000189 loss: 0.9265 (0.9521) grad: 0.1344 (0.1370) time: 0.3639 data: 0.0038 max mem: 3951 +train: [11] [120/400] eta: 0:01:43 lr: 0.000187 loss: 0.9411 (0.9523) grad: 0.1379 (0.1383) time: 0.3369 data: 0.0040 max mem: 3951 +train: [11] [140/400] eta: 0:01:34 lr: 0.000186 loss: 0.9676 (0.9564) grad: 0.1372 (0.1380) time: 0.3285 data: 0.0040 max mem: 3951 +train: [11] [160/400] eta: 0:01:26 lr: 0.000184 loss: 0.9616 (0.9563) grad: 0.1329 (0.1376) time: 0.3437 data: 0.0040 max mem: 3951 +train: [11] [180/400] eta: 0:01:18 lr: 0.000183 loss: 0.9620 (0.9577) grad: 0.1314 (0.1370) time: 0.3432 data: 0.0043 max mem: 3951 +train: [11] [200/400] eta: 0:01:11 lr: 0.000181 loss: 0.9622 (0.9567) grad: 0.1307 (0.1361) time: 0.3406 data: 0.0042 max mem: 3951 +train: [11] [220/400] eta: 0:01:03 lr: 0.000180 loss: 0.9756 (0.9595) grad: 0.1307 (0.1361) time: 0.3304 data: 0.0034 max mem: 3951 +train: [11] [240/400] eta: 0:00:56 lr: 0.000178 loss: 0.9544 (0.9570) grad: 0.1379 (0.1365) time: 0.3473 data: 0.0037 max mem: 3951 +train: [11] [260/400] eta: 0:00:49 lr: 0.000177 loss: 0.9455 (0.9576) grad: 0.1350 (0.1362) time: 0.3533 data: 0.0042 max mem: 3951 +train: [11] [280/400] eta: 0:00:42 lr: 0.000175 loss: 0.9151 (0.9540) grad: 0.1372 (0.1366) time: 0.3546 data: 0.0041 max mem: 3951 +train: [11] [300/400] eta: 0:00:36 lr: 0.000174 loss: 0.8960 (0.9501) grad: 0.1381 (0.1365) time: 0.4903 data: 0.1700 max mem: 3951 +train: [11] [320/400] eta: 0:00:28 lr: 0.000172 loss: 0.8994 (0.9488) grad: 0.1333 (0.1361) time: 0.3424 data: 0.0072 max mem: 3951 +train: [11] [340/400] eta: 0:00:21 lr: 0.000170 loss: 0.9020 (0.9467) grad: 0.1222 (0.1354) time: 0.3383 data: 0.0028 max mem: 3951 +train: [11] [360/400] eta: 0:00:14 lr: 0.000169 loss: 0.9362 (0.9477) grad: 0.1299 (0.1353) time: 0.3424 data: 0.0044 max mem: 3951 +train: [11] [380/400] eta: 0:00:07 lr: 0.000167 loss: 0.9384 (0.9463) grad: 0.1352 (0.1351) time: 0.3312 data: 0.0039 max mem: 3951 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.8982 (0.9450) grad: 0.1280 (0.1348) time: 0.3498 data: 0.0038 max mem: 3951 +train: [11] Total time: 0:02:23 (0.3582 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.8982 (0.9450) grad: 0.1280 (0.1348) +eval (validation): [11] [ 0/63] eta: 0:03:31 time: 3.3527 data: 3.0885 max mem: 3951 +eval (validation): [11] [20/63] eta: 0:00:22 time: 0.3796 data: 0.0040 max mem: 3951 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3212 data: 0.0028 max mem: 3951 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3085 data: 0.0034 max mem: 3951 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3067 data: 0.0022 max mem: 3951 +eval (validation): [11] Total time: 0:00:24 (0.3885 s / it) +cv: [11] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.347 acc: 0.894 f1: 0.871 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:22:22 lr: nan time: 3.3572 data: 3.0806 max mem: 3951 +train: [12] [ 20/400] eta: 0:03:05 lr: 0.000164 loss: 0.9188 (0.9352) grad: 0.1267 (0.1275) time: 0.3454 data: 0.0038 max mem: 3951 +train: [12] [ 40/400] eta: 0:02:29 lr: 0.000163 loss: 0.9163 (0.9263) grad: 0.1267 (0.1293) time: 0.3360 data: 0.0032 max mem: 3951 +train: [12] [ 60/400] eta: 0:02:12 lr: 0.000161 loss: 0.9141 (0.9231) grad: 0.1319 (0.1305) time: 0.3362 data: 0.0032 max mem: 3951 +train: [12] [ 80/400] eta: 0:02:02 lr: 0.000160 loss: 0.9417 (0.9270) grad: 0.1344 (0.1315) time: 0.3633 data: 0.0039 max mem: 3951 +train: [12] [100/400] eta: 0:01:54 lr: 0.000158 loss: 0.9144 (0.9218) grad: 0.1315 (0.1315) time: 0.3716 data: 0.0043 max mem: 3951 +train: [12] [120/400] eta: 0:01:45 lr: 0.000156 loss: 0.9228 (0.9227) grad: 0.1291 (0.1319) time: 0.3563 data: 0.0042 max mem: 3951 +train: [12] [140/400] eta: 0:01:36 lr: 0.000155 loss: 0.9272 (0.9231) grad: 0.1257 (0.1311) time: 0.3327 data: 0.0041 max mem: 3951 +train: [12] [160/400] eta: 0:01:27 lr: 0.000153 loss: 0.8883 (0.9208) grad: 0.1264 (0.1315) time: 0.3300 data: 0.0036 max mem: 3951 +train: [12] [180/400] eta: 0:01:19 lr: 0.000152 loss: 0.9163 (0.9223) grad: 0.1293 (0.1311) time: 0.3479 data: 0.0041 max mem: 3951 +train: [12] [200/400] eta: 0:01:12 lr: 0.000150 loss: 0.9179 (0.9264) grad: 0.1333 (0.1319) time: 0.3532 data: 0.0040 max mem: 3951 +train: [12] [220/400] eta: 0:01:05 lr: 0.000149 loss: 0.9137 (0.9242) grad: 0.1350 (0.1319) time: 0.3523 data: 0.0043 max mem: 3951 +train: [12] [240/400] eta: 0:00:57 lr: 0.000147 loss: 0.9126 (0.9248) grad: 0.1315 (0.1321) time: 0.3364 data: 0.0042 max mem: 3951 +train: [12] [260/400] eta: 0:00:50 lr: 0.000145 loss: 0.9143 (0.9255) grad: 0.1343 (0.1329) time: 0.3538 data: 0.0042 max mem: 3951 +train: [12] [280/400] eta: 0:00:42 lr: 0.000144 loss: 0.9163 (0.9277) grad: 0.1306 (0.1325) time: 0.3440 data: 0.0042 max mem: 3951 +train: [12] [300/400] eta: 0:00:36 lr: 0.000142 loss: 0.9297 (0.9282) grad: 0.1281 (0.1323) time: 0.5035 data: 0.1803 max mem: 3951 +train: [12] [320/400] eta: 0:00:29 lr: 0.000141 loss: 0.8988 (0.9266) grad: 0.1296 (0.1323) time: 0.3910 data: 0.0189 max mem: 3951 +train: [12] [340/400] eta: 0:00:22 lr: 0.000139 loss: 0.8976 (0.9251) grad: 0.1308 (0.1322) time: 0.3698 data: 0.0038 max mem: 3951 +train: [12] [360/400] eta: 0:00:14 lr: 0.000138 loss: 0.9205 (0.9245) grad: 0.1284 (0.1320) time: 0.3602 data: 0.0044 max mem: 3951 +train: [12] [380/400] eta: 0:00:07 lr: 0.000136 loss: 0.9172 (0.9226) grad: 0.1277 (0.1321) time: 0.3517 data: 0.0041 max mem: 3951 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.9013 (0.9213) grad: 0.1232 (0.1318) time: 0.3507 data: 0.0043 max mem: 3951 +train: [12] Total time: 0:02:26 (0.3672 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.9013 (0.9213) grad: 0.1232 (0.1318) +eval (validation): [12] [ 0/63] eta: 0:03:33 time: 3.3876 data: 3.1089 max mem: 3951 +eval (validation): [12] [20/63] eta: 0:00:21 time: 0.3625 data: 0.0043 max mem: 3951 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3330 data: 0.0037 max mem: 3951 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3335 data: 0.0038 max mem: 3951 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3305 data: 0.0037 max mem: 3951 +eval (validation): [12] Total time: 0:00:24 (0.3945 s / it) +cv: [12] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.339 acc: 0.895 f1: 0.876 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:22:28 lr: nan time: 3.3709 data: 3.0806 max mem: 3951 +train: [13] [ 20/400] eta: 0:03:11 lr: 0.000133 loss: 0.9236 (0.9400) grad: 0.1243 (0.1272) time: 0.3604 data: 0.0032 max mem: 3951 +train: [13] [ 40/400] eta: 0:02:32 lr: 0.000131 loss: 0.9234 (0.9191) grad: 0.1257 (0.1284) time: 0.3380 data: 0.0040 max mem: 3951 +train: [13] [ 60/400] eta: 0:02:14 lr: 0.000130 loss: 0.8799 (0.9110) grad: 0.1292 (0.1292) time: 0.3411 data: 0.0045 max mem: 3951 +train: [13] [ 80/400] eta: 0:02:02 lr: 0.000128 loss: 0.8768 (0.9068) grad: 0.1292 (0.1290) time: 0.3463 data: 0.0045 max mem: 3951 +train: [13] [100/400] eta: 0:01:54 lr: 0.000127 loss: 0.8871 (0.9091) grad: 0.1339 (0.1308) time: 0.3669 data: 0.0041 max mem: 3951 +train: [13] [120/400] eta: 0:01:45 lr: 0.000125 loss: 0.9043 (0.9047) grad: 0.1271 (0.1298) time: 0.3540 data: 0.0046 max mem: 3951 +train: [13] [140/400] eta: 0:01:36 lr: 0.000124 loss: 0.8971 (0.9057) grad: 0.1254 (0.1305) time: 0.3322 data: 0.0042 max mem: 3951 +train: [13] [160/400] eta: 0:01:27 lr: 0.000122 loss: 0.8971 (0.9041) grad: 0.1328 (0.1310) time: 0.3324 data: 0.0038 max mem: 3951 +train: [13] [180/400] eta: 0:01:20 lr: 0.000120 loss: 0.8778 (0.9020) grad: 0.1339 (0.1315) time: 0.3532 data: 0.0045 max mem: 3951 +train: [13] [200/400] eta: 0:01:12 lr: 0.000119 loss: 0.8823 (0.8985) grad: 0.1369 (0.1319) time: 0.3508 data: 0.0042 max mem: 3951 +train: [13] [220/400] eta: 0:01:05 lr: 0.000117 loss: 0.8898 (0.9020) grad: 0.1321 (0.1318) time: 0.3529 data: 0.0039 max mem: 3951 +train: [13] [240/400] eta: 0:00:57 lr: 0.000116 loss: 0.8899 (0.9021) grad: 0.1279 (0.1313) time: 0.3387 data: 0.0038 max mem: 3951 +train: [13] [260/400] eta: 0:00:50 lr: 0.000114 loss: 0.8838 (0.9020) grad: 0.1299 (0.1319) time: 0.3640 data: 0.0041 max mem: 3951 +train: [13] [280/400] eta: 0:00:43 lr: 0.000113 loss: 0.8958 (0.9035) grad: 0.1340 (0.1320) time: 0.3599 data: 0.0043 max mem: 3951 +train: [13] [300/400] eta: 0:00:37 lr: 0.000111 loss: 0.8961 (0.9037) grad: 0.1327 (0.1320) time: 0.5196 data: 0.1930 max mem: 3951 +train: [13] [320/400] eta: 0:00:29 lr: 0.000110 loss: 0.9018 (0.9041) grad: 0.1293 (0.1320) time: 0.3204 data: 0.0033 max mem: 3951 +train: [13] [340/400] eta: 0:00:21 lr: 0.000108 loss: 0.9155 (0.9054) grad: 0.1301 (0.1321) time: 0.3462 data: 0.0034 max mem: 3951 +train: [13] [360/400] eta: 0:00:14 lr: 0.000107 loss: 0.9036 (0.9044) grad: 0.1296 (0.1316) time: 0.3506 data: 0.0047 max mem: 3951 +train: [13] [380/400] eta: 0:00:07 lr: 0.000105 loss: 0.8868 (0.9040) grad: 0.1232 (0.1314) time: 0.3429 data: 0.0045 max mem: 3951 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.9011 (0.9035) grad: 0.1299 (0.1314) time: 0.3509 data: 0.0040 max mem: 3951 +train: [13] Total time: 0:02:25 (0.3642 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.9011 (0.9035) grad: 0.1299 (0.1314) +eval (validation): [13] [ 0/63] eta: 0:03:26 time: 3.2748 data: 3.0674 max mem: 3951 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3407 data: 0.0095 max mem: 3951 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3487 data: 0.0032 max mem: 3951 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3237 data: 0.0030 max mem: 3951 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3182 data: 0.0022 max mem: 3951 +eval (validation): [13] Total time: 0:00:24 (0.3896 s / it) +cv: [13] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.339 acc: 0.898 f1: 0.877 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:22:31 lr: nan time: 3.3783 data: 3.1503 max mem: 3951 +train: [14] [ 20/400] eta: 0:03:14 lr: 0.000102 loss: 0.9344 (0.9238) grad: 0.1329 (0.1347) time: 0.3697 data: 0.0082 max mem: 3951 +train: [14] [ 40/400] eta: 0:02:40 lr: 0.000101 loss: 0.8962 (0.9029) grad: 0.1329 (0.1318) time: 0.3751 data: 0.0041 max mem: 3951 +train: [14] [ 60/400] eta: 0:02:21 lr: 0.000099 loss: 0.8912 (0.9011) grad: 0.1309 (0.1329) time: 0.3563 data: 0.0043 max mem: 3951 +train: [14] [ 80/400] eta: 0:02:08 lr: 0.000098 loss: 0.9103 (0.9067) grad: 0.1364 (0.1355) time: 0.3624 data: 0.0047 max mem: 3951 +train: [14] [100/400] eta: 0:01:58 lr: 0.000096 loss: 0.8895 (0.9009) grad: 0.1369 (0.1348) time: 0.3542 data: 0.0042 max mem: 3951 +train: [14] [120/400] eta: 0:01:49 lr: 0.000095 loss: 0.8759 (0.8951) grad: 0.1310 (0.1344) time: 0.3801 data: 0.0039 max mem: 3951 +train: [14] [140/400] eta: 0:01:39 lr: 0.000093 loss: 0.8678 (0.8905) grad: 0.1310 (0.1341) time: 0.3337 data: 0.0038 max mem: 3951 +train: [14] [160/400] eta: 0:01:30 lr: 0.000092 loss: 0.8613 (0.8881) grad: 0.1328 (0.1334) time: 0.3434 data: 0.0035 max mem: 3951 +train: [14] [180/400] eta: 0:01:22 lr: 0.000090 loss: 0.8584 (0.8855) grad: 0.1268 (0.1328) time: 0.3481 data: 0.0040 max mem: 3951 +train: [14] [200/400] eta: 0:01:14 lr: 0.000089 loss: 0.8687 (0.8867) grad: 0.1284 (0.1330) time: 0.3566 data: 0.0038 max mem: 3951 +train: [14] [220/400] eta: 0:01:07 lr: 0.000088 loss: 0.8940 (0.8879) grad: 0.1332 (0.1328) time: 0.3658 data: 0.0040 max mem: 3951 +train: [14] [240/400] eta: 0:00:59 lr: 0.000086 loss: 0.8697 (0.8847) grad: 0.1318 (0.1324) time: 0.3459 data: 0.0042 max mem: 3951 +train: [14] [260/400] eta: 0:00:51 lr: 0.000085 loss: 0.8617 (0.8848) grad: 0.1244 (0.1322) time: 0.3553 data: 0.0040 max mem: 3951 +train: [14] [280/400] eta: 0:00:44 lr: 0.000083 loss: 0.8796 (0.8855) grad: 0.1249 (0.1322) time: 0.3525 data: 0.0039 max mem: 3951 +train: [14] [300/400] eta: 0:00:37 lr: 0.000082 loss: 0.8944 (0.8863) grad: 0.1296 (0.1322) time: 0.5438 data: 0.1928 max mem: 3951 +train: [14] [320/400] eta: 0:00:30 lr: 0.000081 loss: 0.8875 (0.8849) grad: 0.1288 (0.1321) time: 0.3327 data: 0.0040 max mem: 3951 +train: [14] [340/400] eta: 0:00:22 lr: 0.000079 loss: 0.8651 (0.8853) grad: 0.1281 (0.1319) time: 0.3649 data: 0.0043 max mem: 3951 +train: [14] [360/400] eta: 0:00:15 lr: 0.000078 loss: 0.8683 (0.8852) grad: 0.1301 (0.1319) time: 0.3623 data: 0.0044 max mem: 3951 +train: [14] [380/400] eta: 0:00:07 lr: 0.000076 loss: 0.8715 (0.8860) grad: 0.1291 (0.1315) time: 0.3485 data: 0.0042 max mem: 3951 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.8512 (0.8841) grad: 0.1268 (0.1315) time: 0.3395 data: 0.0041 max mem: 3951 +train: [14] Total time: 0:02:29 (0.3727 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.8512 (0.8841) grad: 0.1268 (0.1315) +eval (validation): [14] [ 0/63] eta: 0:03:32 time: 3.3764 data: 3.1023 max mem: 3951 +eval (validation): [14] [20/63] eta: 0:00:20 time: 0.3341 data: 0.0039 max mem: 3951 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3419 data: 0.0040 max mem: 3951 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3433 data: 0.0033 max mem: 3951 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3308 data: 0.0036 max mem: 3951 +eval (validation): [14] Total time: 0:00:24 (0.3928 s / it) +cv: [14] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.342 acc: 0.899 f1: 0.879 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:25:57 lr: nan time: 3.8928 data: 3.6597 max mem: 3951 +train: [15] [ 20/400] eta: 0:03:16 lr: 0.000074 loss: 0.8992 (0.9067) grad: 0.1286 (0.1291) time: 0.3493 data: 0.0123 max mem: 3951 +train: [15] [ 40/400] eta: 0:02:36 lr: 0.000072 loss: 0.8660 (0.8911) grad: 0.1286 (0.1274) time: 0.3448 data: 0.0031 max mem: 3951 +train: [15] [ 60/400] eta: 0:02:17 lr: 0.000071 loss: 0.8660 (0.8904) grad: 0.1286 (0.1294) time: 0.3408 data: 0.0033 max mem: 3951 +train: [15] [ 80/400] eta: 0:02:04 lr: 0.000070 loss: 0.8712 (0.8880) grad: 0.1344 (0.1301) time: 0.3485 data: 0.0033 max mem: 3951 +train: [15] [100/400] eta: 0:01:54 lr: 0.000068 loss: 0.8649 (0.8805) grad: 0.1309 (0.1304) time: 0.3481 data: 0.0037 max mem: 3951 +train: [15] [120/400] eta: 0:01:46 lr: 0.000067 loss: 0.8678 (0.8792) grad: 0.1276 (0.1302) time: 0.3751 data: 0.0041 max mem: 3951 +train: [15] [140/400] eta: 0:01:37 lr: 0.000066 loss: 0.8769 (0.8824) grad: 0.1294 (0.1315) time: 0.3298 data: 0.0038 max mem: 3951 +train: [15] [160/400] eta: 0:01:29 lr: 0.000064 loss: 0.8970 (0.8840) grad: 0.1294 (0.1310) time: 0.3583 data: 0.0040 max mem: 3951 +train: [15] [180/400] eta: 0:01:20 lr: 0.000063 loss: 0.8919 (0.8829) grad: 0.1285 (0.1307) time: 0.3341 data: 0.0042 max mem: 3951 +train: [15] [200/400] eta: 0:01:12 lr: 0.000062 loss: 0.8833 (0.8842) grad: 0.1289 (0.1305) time: 0.3352 data: 0.0042 max mem: 3951 +train: [15] [220/400] eta: 0:01:05 lr: 0.000061 loss: 0.8739 (0.8829) grad: 0.1247 (0.1304) time: 0.3553 data: 0.0042 max mem: 3951 +train: [15] [240/400] eta: 0:00:57 lr: 0.000059 loss: 0.8691 (0.8834) grad: 0.1247 (0.1304) time: 0.3455 data: 0.0039 max mem: 3951 +train: [15] [260/400] eta: 0:00:50 lr: 0.000058 loss: 0.8763 (0.8835) grad: 0.1275 (0.1301) time: 0.3450 data: 0.0038 max mem: 3951 +train: [15] [280/400] eta: 0:00:43 lr: 0.000057 loss: 0.8763 (0.8834) grad: 0.1270 (0.1298) time: 0.3577 data: 0.0043 max mem: 3951 +train: [15] [300/400] eta: 0:00:36 lr: 0.000056 loss: 0.9004 (0.8847) grad: 0.1275 (0.1300) time: 0.4975 data: 0.1829 max mem: 3951 +train: [15] [320/400] eta: 0:00:29 lr: 0.000054 loss: 0.8933 (0.8833) grad: 0.1270 (0.1295) time: 0.3449 data: 0.0045 max mem: 3951 +train: [15] [340/400] eta: 0:00:21 lr: 0.000053 loss: 0.8633 (0.8823) grad: 0.1270 (0.1294) time: 0.3445 data: 0.0038 max mem: 3951 +train: [15] [360/400] eta: 0:00:14 lr: 0.000052 loss: 0.8608 (0.8814) grad: 0.1313 (0.1294) time: 0.3674 data: 0.0045 max mem: 3951 +train: [15] [380/400] eta: 0:00:07 lr: 0.000051 loss: 0.8659 (0.8808) grad: 0.1314 (0.1296) time: 0.3454 data: 0.0041 max mem: 3951 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.8668 (0.8808) grad: 0.1324 (0.1299) time: 0.3433 data: 0.0041 max mem: 3951 +train: [15] Total time: 0:02:25 (0.3650 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.8668 (0.8808) grad: 0.1324 (0.1299) +eval (validation): [15] [ 0/63] eta: 0:03:34 time: 3.3984 data: 3.1298 max mem: 3951 +eval (validation): [15] [20/63] eta: 0:00:21 time: 0.3521 data: 0.0037 max mem: 3951 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3396 data: 0.0032 max mem: 3951 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3164 data: 0.0035 max mem: 3951 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3154 data: 0.0035 max mem: 3951 +eval (validation): [15] Total time: 0:00:24 (0.3881 s / it) +cv: [15] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.331 acc: 0.898 f1: 0.877 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:21:46 lr: nan time: 3.2669 data: 3.0419 max mem: 3951 +train: [16] [ 20/400] eta: 0:03:13 lr: 0.000048 loss: 0.8950 (0.8991) grad: 0.1199 (0.1202) time: 0.3712 data: 0.0122 max mem: 3951 +train: [16] [ 40/400] eta: 0:02:40 lr: 0.000047 loss: 0.8915 (0.8784) grad: 0.1254 (0.1246) time: 0.3788 data: 0.0036 max mem: 3951 +train: [16] [ 60/400] eta: 0:02:23 lr: 0.000046 loss: 0.8788 (0.8716) grad: 0.1299 (0.1277) time: 0.3711 data: 0.0042 max mem: 3951 +train: [16] [ 80/400] eta: 0:02:09 lr: 0.000045 loss: 0.8788 (0.8775) grad: 0.1297 (0.1275) time: 0.3577 data: 0.0045 max mem: 3951 +train: [16] [100/400] eta: 0:01:58 lr: 0.000044 loss: 0.8891 (0.8776) grad: 0.1297 (0.1290) time: 0.3486 data: 0.0046 max mem: 3951 +train: [16] [120/400] eta: 0:01:48 lr: 0.000043 loss: 0.8441 (0.8714) grad: 0.1263 (0.1286) time: 0.3597 data: 0.0043 max mem: 3951 +train: [16] [140/400] eta: 0:01:39 lr: 0.000042 loss: 0.8644 (0.8744) grad: 0.1260 (0.1287) time: 0.3370 data: 0.0041 max mem: 3951 +train: [16] [160/400] eta: 0:01:30 lr: 0.000041 loss: 0.8894 (0.8754) grad: 0.1312 (0.1294) time: 0.3541 data: 0.0045 max mem: 3951 +train: [16] [180/400] eta: 0:01:23 lr: 0.000040 loss: 0.8721 (0.8766) grad: 0.1304 (0.1288) time: 0.3823 data: 0.0042 max mem: 3951 +train: [16] [200/400] eta: 0:01:14 lr: 0.000039 loss: 0.8721 (0.8759) grad: 0.1241 (0.1290) time: 0.3370 data: 0.0041 max mem: 3951 +train: [16] [220/400] eta: 0:01:06 lr: 0.000038 loss: 0.8495 (0.8739) grad: 0.1241 (0.1283) time: 0.3417 data: 0.0042 max mem: 3951 +train: [16] [240/400] eta: 0:00:59 lr: 0.000036 loss: 0.8771 (0.8757) grad: 0.1266 (0.1288) time: 0.3496 data: 0.0043 max mem: 3951 +train: [16] [260/400] eta: 0:00:51 lr: 0.000035 loss: 0.8712 (0.8724) grad: 0.1286 (0.1288) time: 0.3449 data: 0.0042 max mem: 3951 +train: [16] [280/400] eta: 0:00:44 lr: 0.000034 loss: 0.8423 (0.8721) grad: 0.1225 (0.1281) time: 0.3650 data: 0.0044 max mem: 3951 +train: [16] [300/400] eta: 0:00:37 lr: 0.000033 loss: 0.8423 (0.8705) grad: 0.1201 (0.1278) time: 0.5011 data: 0.1878 max mem: 3951 +train: [16] [320/400] eta: 0:00:29 lr: 0.000032 loss: 0.8695 (0.8718) grad: 0.1261 (0.1281) time: 0.3471 data: 0.0125 max mem: 3951 +train: [16] [340/400] eta: 0:00:22 lr: 0.000031 loss: 0.8991 (0.8727) grad: 0.1313 (0.1283) time: 0.3333 data: 0.0030 max mem: 3951 +train: [16] [360/400] eta: 0:00:14 lr: 0.000031 loss: 0.8900 (0.8717) grad: 0.1244 (0.1281) time: 0.3637 data: 0.0045 max mem: 3951 +train: [16] [380/400] eta: 0:00:07 lr: 0.000030 loss: 0.8444 (0.8710) grad: 0.1249 (0.1284) time: 0.3511 data: 0.0045 max mem: 3951 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.8446 (0.8713) grad: 0.1292 (0.1284) time: 0.3525 data: 0.0043 max mem: 3951 +train: [16] Total time: 0:02:28 (0.3703 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.8446 (0.8713) grad: 0.1292 (0.1284) +eval (validation): [16] [ 0/63] eta: 0:03:25 time: 3.2619 data: 3.0489 max mem: 3951 +eval (validation): [16] [20/63] eta: 0:00:22 time: 0.3751 data: 0.0527 max mem: 3951 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3262 data: 0.0113 max mem: 3951 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3529 data: 0.0203 max mem: 3951 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3364 data: 0.0027 max mem: 3951 +eval (validation): [16] Total time: 0:00:25 (0.4026 s / it) +cv: [16] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.331 acc: 0.898 f1: 0.878 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:26 lr: nan time: 3.3662 data: 3.1333 max mem: 3951 +train: [17] [ 20/400] eta: 0:03:22 lr: 0.000028 loss: 0.8655 (0.8469) grad: 0.1273 (0.1314) time: 0.3924 data: 0.0050 max mem: 3951 +train: [17] [ 40/400] eta: 0:02:39 lr: 0.000027 loss: 0.8655 (0.8521) grad: 0.1273 (0.1295) time: 0.3502 data: 0.0035 max mem: 3951 +train: [17] [ 60/400] eta: 0:02:19 lr: 0.000026 loss: 0.8470 (0.8552) grad: 0.1319 (0.1326) time: 0.3433 data: 0.0045 max mem: 3951 +train: [17] [ 80/400] eta: 0:02:06 lr: 0.000025 loss: 0.8542 (0.8593) grad: 0.1376 (0.1322) time: 0.3436 data: 0.0042 max mem: 3951 +train: [17] [100/400] eta: 0:01:55 lr: 0.000024 loss: 0.8542 (0.8599) grad: 0.1332 (0.1328) time: 0.3487 data: 0.0044 max mem: 3951 +train: [17] [120/400] eta: 0:01:46 lr: 0.000023 loss: 0.8613 (0.8621) grad: 0.1308 (0.1312) time: 0.3506 data: 0.0041 max mem: 3951 +train: [17] [140/400] eta: 0:01:37 lr: 0.000023 loss: 0.8672 (0.8632) grad: 0.1254 (0.1310) time: 0.3378 data: 0.0042 max mem: 3951 +train: [17] [160/400] eta: 0:01:29 lr: 0.000022 loss: 0.8642 (0.8656) grad: 0.1254 (0.1302) time: 0.3582 data: 0.0043 max mem: 3951 +train: [17] [180/400] eta: 0:01:21 lr: 0.000021 loss: 0.8642 (0.8667) grad: 0.1281 (0.1308) time: 0.3412 data: 0.0042 max mem: 3951 +train: [17] [200/400] eta: 0:01:13 lr: 0.000020 loss: 0.8608 (0.8648) grad: 0.1312 (0.1306) time: 0.3394 data: 0.0038 max mem: 3951 +train: [17] [220/400] eta: 0:01:05 lr: 0.000019 loss: 0.8629 (0.8688) grad: 0.1280 (0.1306) time: 0.3365 data: 0.0040 max mem: 3951 +train: [17] [240/400] eta: 0:00:57 lr: 0.000019 loss: 0.8574 (0.8656) grad: 0.1277 (0.1304) time: 0.3458 data: 0.0040 max mem: 3951 +train: [17] [260/400] eta: 0:00:50 lr: 0.000018 loss: 0.8346 (0.8657) grad: 0.1267 (0.1300) time: 0.3344 data: 0.0044 max mem: 3951 +train: [17] [280/400] eta: 0:00:43 lr: 0.000017 loss: 0.8658 (0.8680) grad: 0.1267 (0.1300) time: 0.3704 data: 0.0042 max mem: 3951 +train: [17] [300/400] eta: 0:00:37 lr: 0.000016 loss: 0.8958 (0.8700) grad: 0.1279 (0.1299) time: 0.5247 data: 0.1917 max mem: 3951 +train: [17] [320/400] eta: 0:00:29 lr: 0.000016 loss: 0.8671 (0.8686) grad: 0.1261 (0.1296) time: 0.3392 data: 0.0043 max mem: 3951 +train: [17] [340/400] eta: 0:00:22 lr: 0.000015 loss: 0.8473 (0.8689) grad: 0.1270 (0.1297) time: 0.3447 data: 0.0029 max mem: 3951 +train: [17] [360/400] eta: 0:00:14 lr: 0.000014 loss: 0.8425 (0.8675) grad: 0.1207 (0.1292) time: 0.3498 data: 0.0033 max mem: 3951 +train: [17] [380/400] eta: 0:00:07 lr: 0.000014 loss: 0.8563 (0.8672) grad: 0.1207 (0.1289) time: 0.3490 data: 0.0035 max mem: 3951 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.8675 (0.8678) grad: 0.1220 (0.1286) time: 0.3568 data: 0.0042 max mem: 3951 +train: [17] Total time: 0:02:26 (0.3660 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.8675 (0.8678) grad: 0.1220 (0.1286) +eval (validation): [17] [ 0/63] eta: 0:04:22 time: 4.1699 data: 3.9012 max mem: 3951 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3168 data: 0.0027 max mem: 3951 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3270 data: 0.0058 max mem: 3951 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3352 data: 0.0027 max mem: 3951 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3279 data: 0.0030 max mem: 3951 +eval (validation): [17] Total time: 0:00:24 (0.3923 s / it) +cv: [17] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.336 acc: 0.898 f1: 0.877 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:05 lr: nan time: 3.3149 data: 3.0407 max mem: 3951 +train: [18] [ 20/400] eta: 0:03:06 lr: 0.000012 loss: 0.8853 (0.8787) grad: 0.1256 (0.1314) time: 0.3497 data: 0.0044 max mem: 3951 +train: [18] [ 40/400] eta: 0:02:31 lr: 0.000012 loss: 0.9056 (0.8920) grad: 0.1243 (0.1279) time: 0.3453 data: 0.0035 max mem: 3951 +train: [18] [ 60/400] eta: 0:02:14 lr: 0.000011 loss: 0.8537 (0.8678) grad: 0.1254 (0.1293) time: 0.3431 data: 0.0044 max mem: 3951 +train: [18] [ 80/400] eta: 0:02:02 lr: 0.000011 loss: 0.8222 (0.8619) grad: 0.1220 (0.1271) time: 0.3498 data: 0.0043 max mem: 3951 +train: [18] [100/400] eta: 0:01:52 lr: 0.000010 loss: 0.8382 (0.8582) grad: 0.1210 (0.1258) time: 0.3427 data: 0.0044 max mem: 3951 +train: [18] [120/400] eta: 0:01:44 lr: 0.000009 loss: 0.8525 (0.8640) grad: 0.1231 (0.1261) time: 0.3511 data: 0.0044 max mem: 3951 +train: [18] [140/400] eta: 0:01:35 lr: 0.000009 loss: 0.8461 (0.8633) grad: 0.1276 (0.1267) time: 0.3293 data: 0.0041 max mem: 3951 +train: [18] [160/400] eta: 0:01:27 lr: 0.000008 loss: 0.8610 (0.8664) grad: 0.1289 (0.1274) time: 0.3619 data: 0.0042 max mem: 3951 +train: [18] [180/400] eta: 0:01:20 lr: 0.000008 loss: 0.8610 (0.8653) grad: 0.1289 (0.1281) time: 0.3794 data: 0.0044 max mem: 3951 +train: [18] [200/400] eta: 0:01:13 lr: 0.000007 loss: 0.8759 (0.8684) grad: 0.1338 (0.1286) time: 0.3646 data: 0.0043 max mem: 3951 +train: [18] [220/400] eta: 0:01:05 lr: 0.000007 loss: 0.8780 (0.8684) grad: 0.1306 (0.1287) time: 0.3517 data: 0.0040 max mem: 3951 +train: [18] [240/400] eta: 0:00:57 lr: 0.000006 loss: 0.8651 (0.8694) grad: 0.1273 (0.1288) time: 0.3299 data: 0.0039 max mem: 3951 +train: [18] [260/400] eta: 0:00:50 lr: 0.000006 loss: 0.8633 (0.8664) grad: 0.1296 (0.1289) time: 0.3503 data: 0.0040 max mem: 3951 +train: [18] [280/400] eta: 0:00:43 lr: 0.000006 loss: 0.8756 (0.8676) grad: 0.1235 (0.1289) time: 0.3540 data: 0.0039 max mem: 3951 +train: [18] [300/400] eta: 0:00:37 lr: 0.000005 loss: 0.8796 (0.8693) grad: 0.1292 (0.1297) time: 0.5418 data: 0.1877 max mem: 3951 +train: [18] [320/400] eta: 0:00:29 lr: 0.000005 loss: 0.8705 (0.8709) grad: 0.1325 (0.1297) time: 0.3286 data: 0.0044 max mem: 3951 +train: [18] [340/400] eta: 0:00:22 lr: 0.000004 loss: 0.8536 (0.8698) grad: 0.1262 (0.1295) time: 0.3521 data: 0.0030 max mem: 3951 +train: [18] [360/400] eta: 0:00:14 lr: 0.000004 loss: 0.8712 (0.8707) grad: 0.1262 (0.1295) time: 0.3416 data: 0.0041 max mem: 3951 +train: [18] [380/400] eta: 0:00:07 lr: 0.000004 loss: 0.8733 (0.8705) grad: 0.1279 (0.1294) time: 0.3722 data: 0.0046 max mem: 3951 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.8475 (0.8701) grad: 0.1279 (0.1298) time: 0.3802 data: 0.0047 max mem: 3951 +train: [18] Total time: 0:02:27 (0.3689 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.8475 (0.8701) grad: 0.1279 (0.1298) +eval (validation): [18] [ 0/63] eta: 0:03:31 time: 3.3519 data: 3.0757 max mem: 3951 +eval (validation): [18] [20/63] eta: 0:00:23 time: 0.4036 data: 0.0056 max mem: 3951 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3426 data: 0.0031 max mem: 3951 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3198 data: 0.0035 max mem: 3951 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3154 data: 0.0034 max mem: 3951 +eval (validation): [18] Total time: 0:00:25 (0.4069 s / it) +cv: [18] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.329 acc: 0.900 f1: 0.880 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +saving best checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +train: [19] [ 0/400] eta: 0:22:39 lr: nan time: 3.3999 data: 3.1202 max mem: 3951 +train: [19] [ 20/400] eta: 0:03:22 lr: 0.000003 loss: 0.8480 (0.8601) grad: 0.1217 (0.1261) time: 0.3905 data: 0.0038 max mem: 3951 +train: [19] [ 40/400] eta: 0:02:38 lr: 0.000003 loss: 0.8440 (0.8532) grad: 0.1220 (0.1263) time: 0.3399 data: 0.0035 max mem: 3951 +train: [19] [ 60/400] eta: 0:02:20 lr: 0.000002 loss: 0.8440 (0.8616) grad: 0.1229 (0.1260) time: 0.3556 data: 0.0041 max mem: 3951 +train: [19] [ 80/400] eta: 0:02:06 lr: 0.000002 loss: 0.8866 (0.8664) grad: 0.1265 (0.1267) time: 0.3467 data: 0.0044 max mem: 3951 +train: [19] [100/400] eta: 0:01:57 lr: 0.000002 loss: 0.8234 (0.8576) grad: 0.1277 (0.1267) time: 0.3775 data: 0.0044 max mem: 3951 +train: [19] [120/400] eta: 0:01:47 lr: 0.000002 loss: 0.8317 (0.8577) grad: 0.1268 (0.1269) time: 0.3421 data: 0.0039 max mem: 3951 +train: [19] [140/400] eta: 0:01:37 lr: 0.000001 loss: 0.8457 (0.8559) grad: 0.1268 (0.1278) time: 0.3336 data: 0.0040 max mem: 3951 +train: [19] [160/400] eta: 0:01:29 lr: 0.000001 loss: 0.8504 (0.8563) grad: 0.1254 (0.1266) time: 0.3522 data: 0.0044 max mem: 3951 +train: [19] [180/400] eta: 0:01:21 lr: 0.000001 loss: 0.8700 (0.8578) grad: 0.1219 (0.1270) time: 0.3649 data: 0.0045 max mem: 3951 +train: [19] [200/400] eta: 0:01:14 lr: 0.000001 loss: 0.8759 (0.8585) grad: 0.1240 (0.1267) time: 0.3558 data: 0.0041 max mem: 3951 +train: [19] [220/400] eta: 0:01:06 lr: 0.000001 loss: 0.8536 (0.8571) grad: 0.1240 (0.1271) time: 0.3422 data: 0.0040 max mem: 3951 +train: [19] [240/400] eta: 0:00:58 lr: 0.000001 loss: 0.8365 (0.8550) grad: 0.1295 (0.1274) time: 0.3469 data: 0.0040 max mem: 3951 +train: [19] [260/400] eta: 0:00:51 lr: 0.000000 loss: 0.8553 (0.8577) grad: 0.1309 (0.1279) time: 0.3530 data: 0.0043 max mem: 3951 +train: [19] [280/400] eta: 0:00:43 lr: 0.000000 loss: 0.8636 (0.8542) grad: 0.1301 (0.1281) time: 0.3465 data: 0.0043 max mem: 3951 +train: [19] [300/400] eta: 0:00:37 lr: 0.000000 loss: 0.8636 (0.8578) grad: 0.1285 (0.1281) time: 0.4992 data: 0.1760 max mem: 3951 +train: [19] [320/400] eta: 0:00:29 lr: 0.000000 loss: 0.8825 (0.8580) grad: 0.1269 (0.1281) time: 0.3495 data: 0.0038 max mem: 3951 +train: [19] [340/400] eta: 0:00:22 lr: 0.000000 loss: 0.8474 (0.8570) grad: 0.1264 (0.1281) time: 0.3473 data: 0.0038 max mem: 3951 +train: [19] [360/400] eta: 0:00:14 lr: 0.000000 loss: 0.8702 (0.8585) grad: 0.1194 (0.1275) time: 0.3376 data: 0.0029 max mem: 3951 +train: [19] [380/400] eta: 0:00:07 lr: 0.000000 loss: 0.8453 (0.8572) grad: 0.1155 (0.1272) time: 0.3503 data: 0.0041 max mem: 3951 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.8303 (0.8576) grad: 0.1234 (0.1272) time: 0.3606 data: 0.0043 max mem: 3951 +train: [19] Total time: 0:02:27 (0.3678 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.8303 (0.8576) grad: 0.1234 (0.1272) +eval (validation): [19] [ 0/63] eta: 0:03:36 time: 3.4356 data: 3.1514 max mem: 3951 +eval (validation): [19] [20/63] eta: 0:00:21 time: 0.3621 data: 0.0034 max mem: 3951 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3344 data: 0.0044 max mem: 3951 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3272 data: 0.0029 max mem: 3951 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3259 data: 0.0032 max mem: 3951 +eval (validation): [19] Total time: 0:00:24 (0.3942 s / it) +cv: [19] best hparam: (19, 1.0) (042) ('042_lr1.9e+01_wd1.0e+00') loss: 0.329 acc: 0.899 f1: 0.879 +saving checkpoint experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +evaluating last checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-last.pth +eval model info: +{"score": 0.8993055555555556, "hparam": [19, 1.0], "hparam_id": 42, "epoch": 19, "is_best": false, "best_score": 0.8998015873015873} +eval (train): [20] [ 0/297] eta: 0:16:52 time: 3.4103 data: 3.1350 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:18 time: 0.3542 data: 0.0174 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:52 time: 0.3698 data: 0.0046 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:35 time: 0.3383 data: 0.0031 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:24 time: 0.3390 data: 0.0035 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:14 time: 0.3354 data: 0.0038 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:04 time: 0.3067 data: 0.0035 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:57 time: 0.3477 data: 0.0036 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:49 time: 0.3617 data: 0.0040 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3400 data: 0.0035 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3268 data: 0.0038 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3428 data: 0.0031 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3299 data: 0.0035 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3286 data: 0.0036 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:05 time: 0.3469 data: 0.0036 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3037 data: 0.0033 max mem: 3951 +eval (train): [20] Total time: 0:01:44 (0.3509 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:09 time: 3.0157 data: 2.7680 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3540 data: 0.0045 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3284 data: 0.0033 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3083 data: 0.0036 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.2998 data: 0.0036 max mem: 3951 +eval (validation): [20] Total time: 0:00:23 (0.3761 s / it) +eval (test): [20] [ 0/79] eta: 0:03:54 time: 2.9634 data: 2.7263 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:26 time: 0.3277 data: 0.0032 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:15 time: 0.3319 data: 0.0036 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3232 data: 0.0040 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.2926 data: 0.0029 max mem: 3951 +eval (test): [20] Total time: 0:00:28 (0.3567 s / it) +evaluating best checkpoint: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/checkpoint-best.pth +eval model info: +{"score": 0.8998015873015873, "hparam": [19, 1.0], "hparam_id": 42, "epoch": 18, "is_best": true, "best_score": 0.8998015873015873} +eval (train): [20] [ 0/297] eta: 0:15:22 time: 3.1046 data: 2.8575 max mem: 3951 +eval (train): [20] [ 20/297] eta: 0:02:13 time: 0.3518 data: 0.0029 max mem: 3951 +eval (train): [20] [ 40/297] eta: 0:01:48 time: 0.3586 data: 0.0035 max mem: 3951 +eval (train): [20] [ 60/297] eta: 0:01:32 time: 0.3227 data: 0.0033 max mem: 3951 +eval (train): [20] [ 80/297] eta: 0:01:20 time: 0.3159 data: 0.0035 max mem: 3951 +eval (train): [20] [100/297] eta: 0:01:10 time: 0.3109 data: 0.0031 max mem: 3951 +eval (train): [20] [120/297] eta: 0:01:01 time: 0.2996 data: 0.0032 max mem: 3951 +eval (train): [20] [140/297] eta: 0:00:53 time: 0.3100 data: 0.0032 max mem: 3951 +eval (train): [20] [160/297] eta: 0:00:47 time: 0.3410 data: 0.0032 max mem: 3951 +eval (train): [20] [180/297] eta: 0:00:40 time: 0.3427 data: 0.0040 max mem: 3951 +eval (train): [20] [200/297] eta: 0:00:33 time: 0.3338 data: 0.0038 max mem: 3951 +eval (train): [20] [220/297] eta: 0:00:26 time: 0.3009 data: 0.0034 max mem: 3951 +eval (train): [20] [240/297] eta: 0:00:19 time: 0.3495 data: 0.0035 max mem: 3951 +eval (train): [20] [260/297] eta: 0:00:12 time: 0.3232 data: 0.0036 max mem: 3951 +eval (train): [20] [280/297] eta: 0:00:05 time: 0.3607 data: 0.0037 max mem: 3951 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3253 data: 0.0036 max mem: 3951 +eval (train): [20] Total time: 0:01:40 (0.3401 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:12 time: 3.0513 data: 2.8406 max mem: 3951 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3435 data: 0.0033 max mem: 3951 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3753 data: 0.0043 max mem: 3951 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.2957 data: 0.0032 max mem: 3951 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.2981 data: 0.0032 max mem: 3951 +eval (validation): [20] Total time: 0:00:24 (0.3865 s / it) +eval (test): [20] [ 0/79] eta: 0:04:14 time: 3.2253 data: 2.9661 max mem: 3951 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3632 data: 0.0037 max mem: 3951 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3239 data: 0.0033 max mem: 3951 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3290 data: 0.0037 max mem: 3951 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3088 data: 0.0035 max mem: 3951 +eval (test): [20] Total time: 0:00:29 (0.3702 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:-------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|--------:|--------:|----------:|--------:|----------:| +| flat_mae | reg | linear | hcpya_task21 | best | 18 | 0.0057 | 0.05 | 42 | [19, 1.0] | train | 0.12437 | 0.97668 | 0.0010892 | 0.97752 | 0.0011385 | +| flat_mae | reg | linear | hcpya_task21 | best | 18 | 0.0057 | 0.05 | 42 | [19, 1.0] | validation | 0.32912 | 0.8998 | 0.0047686 | 0.88031 | 0.0063966 | +| flat_mae | reg | linear | hcpya_task21 | best | 18 | 0.0057 | 0.05 | 42 | [19, 1.0] | test | 0.36137 | 0.89444 | 0.0042148 | 0.8745 | 0.0053925 | + + +done! total time: 1:03:07 diff --git a/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..3919585e6d6d07e9acfeb301ad5c734ef90b1123 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/hcpya_task21__reg__linear/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.775433964729309, "train/grad": 0.3216345778852701, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11481201171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114185791015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1130029296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11195556640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110872802734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10940673828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10768798828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.105748291015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.103197021484375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10055419921875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09789306640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.09376220703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.089813232421875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.083946533203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.07827880859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.072596435546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0653076171875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.056490478515625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.04646240234375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.036778564453125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.024442138671875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.01136962890625, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.9964208984375, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.97989501953125, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.96221435546875, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.94046142578125, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.920614013671875, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.9021282958984376, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.87634765625, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.844859619140625, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.8159475708007813, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.7887722778320314, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.7506134033203127, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.7150204467773436, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.670701904296875, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.6246385192871093, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.5728243255615233, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.5210504150390625, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.462162399291992, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.385234251022339, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.323131561279297, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2672269201278685, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1929534602165224, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.127885320186615, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.0525205093622207, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9723538875579834, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.9039209187030792, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.8230111414194108, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.754791695177555, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05460002359002829, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05457122908905149, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.05452266920357943, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05447512181475758, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.054428623616695405, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.054360556453466415, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.05428382163867354, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.05419919162988663, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.05408382765948772, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.053961977884173395, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.05384174380451441, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.05365453876554966, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.053472987357527015, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.05320010263472796, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.05293361511081457, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.052669433057308195, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.05232682945206761, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.051911414787173274, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.05142414866015315, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.05095484673976898, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.05034963555634022, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04970203034579754, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.048953792499378326, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04812506320886314, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04724331243894994, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04618761598132551, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045255002761259676, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04444562468677759, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043418483110144734, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04231938309967518, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04144692968577147, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04072111871093512, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.039825451225042344, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03908345552161336, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03825775878503919, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.037490666480734944, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03671575111337006, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03601767980493605, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.035290305027738215, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03440713844262064, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03373106987215579, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03314346536062658, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03239562536589801, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.031775341210886834, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.031095765801146626, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.030417151255533098, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02987903212662786, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.029281651261262597, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.028792240573093295, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1099140644073486, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1079838275909424, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1046948432922363, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1014180183410645, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.098101854324341, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0935540199279785, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0884482860565186, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0828053951263428, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.07529616355896, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.067426919937134, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0592877864837646, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.047802209854126, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0361812114715576, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.019589424133301, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.003708600997925, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.988327741622925, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.968763589859009, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.945657253265381, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.919879913330078, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.895789384841919, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.8659632205963135, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.835484504699707, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.801537036895752, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.765355110168457, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.727935791015625, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6832327842712402, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6428775787353516, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.605466842651367, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.553004264831543, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.487353801727295, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.425236225128174, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.3664047718048096, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.2836577892303467, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.207176685333252, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.1141200065612793, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.0201237201690674, "validation/loss_036_lr7.1e+00_wd1.0e+00": 1.918304681777954, "validation/loss_037_lr8.3e+00_wd1.0e+00": 1.8209047317504883, "validation/loss_038_lr9.8e+00_wd1.0e+00": 1.715285062789917, "validation/loss_039_lr1.2e+01_wd1.0e+00": 1.5860463380813599, "validation/loss_040_lr1.4e+01_wd1.0e+00": 1.4890222549438477, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.4075167179107666, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3070378303527832, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.2257992029190063, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.1383731365203857, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0527682304382324, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9847490787506104, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9093542098999023, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8511525988578796, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.030753968253968252, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.031001984126984128, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.031746031746031744, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.032242063492063495, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.032242063492063495, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.03298611111111111, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.03422619047619048, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.03521825396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.03670634920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.03844246031746032, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.04017857142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.042162698412698416, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.046378968253968256, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.05009920634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.05828373015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06870039682539683, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08680555555555555, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.10094246031746032, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.12103174603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.13814484126984128, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.1644345238095238, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.18303571428571427, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.20362103174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.20634920634920634, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.20734126984126985, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.20362103174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.20932539682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21899801587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23387896825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2537202380952381, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.27802579365079366, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.3189484126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.35987103174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.4114583333333333, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.46850198412698413, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.5200892857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.5577876984126984, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.5932539682539683, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.625, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.6545138888888888, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.6681547619047619, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.6889880952380952, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.7065972222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.7204861111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.7373511904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.7472718253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.7614087301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.7725694444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02085204316305537, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.021044800939976915, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02141794351676945, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02165555867107473, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02165138909772838, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02229507361676466, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.023501642706336076, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.024065587366158773, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.024365739800658655, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.025792219550534578, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02687046761122226, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02689423270603279, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02940378473588412, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.030255743801204764, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.03439940379566073, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.039482614589512424, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.046386754619401595, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.05228878941159735, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.0582753798416065, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.06233398304874807, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.06691893264551788, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.06968078546975458, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.07051132716762384, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.06581381962383288, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.06504159395059554, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.0623449609818829, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.06548265331482896, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.07084432726381881, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.08184229738187167, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.09680391177400104, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.11591344016290493, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1418245744583126, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1838802827120204, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.22149923143178396, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2729287153336312, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.328342316380511, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.36928008536136636, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.40971515321220536, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.45332416231064265, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.4908894372743903, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.532606108821601, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.5553285630577721, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.5862901870873019, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.6161597156141599, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.6336756980636282, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.661225089957961, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.6741044809891092, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.6971184874270998, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.7169680804938472, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 1.754791695177555, "validation/loss_best": 0.8511525988578796, "validation/acc_best": 0.7725694444444444, "validation/f1_best": 0.7169680804938472} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.1763607126474382, "train/grad": 0.2591730758547783, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.088089599609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.083612060546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.076273193359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.06908935546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.061895751953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.052242431640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.041458740234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.029615478515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.01452392578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.998807373046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.983699951171875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.961715087890625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9411090087890623, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.912333984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.885814208984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.861317138671875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.831241455078125, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.797091064453125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.7600885009765626, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7264910888671876, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6854257202148437, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.64318603515625, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5955410766601563, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.543257141113281, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.4869989013671874, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.4173760986328126, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.3527916717529296, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2924180603027344, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2088550186157225, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1085493659973142, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.019209623336792, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9389422750473022, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8330201363563539, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7410251808166504, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6356868493556975, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5356563156843186, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4334535139799118, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3410010904073715, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.24597048163414, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1359777736663819, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0572882878780365, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9929248198866845, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9155385795235634, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.854719632267952, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7915634663403034, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7317221282422542, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6861817520856858, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.6382372862100602, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.6026489342749118, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05410612704232335, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05391017310321331, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.053584656454622744, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.05326390845701098, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.052947371117770675, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.052509709298610686, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.05201877642422915, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.051479506734758616, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.05077898314222693, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.05004332097247243, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.049330448396503926, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04827575189992785, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04727312451228499, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.045858764750882984, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04454836397431791, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.043331839386373756, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04186185440048575, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04025155862793326, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03863010010682046, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037304641446098685, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.035907428236678245, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03471650516614318, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03363377914763987, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03270868154242635, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0319323964510113, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.031182622173801064, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.030615629013627767, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03014379134401679, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02953783266246319, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.028838508101180195, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.028221581615507602, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02766483132727444, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02692746138200164, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.026281490502879024, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025535689955577253, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02482073930092156, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02408922358416021, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.023423210750333965, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022751833898946642, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021991487462073564, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.021431739316321908, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.020935422223992646, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.020284546040929854, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01974802856799215, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.019209541464224458, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.018722391123883426, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01836699844803661, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.018052579024806618, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.017923824610188602, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0711374282836914, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.0637292861938477, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0518624782562256, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.040195941925049, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0290637016296387, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0135653018951416, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.9967572689056396, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.9787192344665527, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.955942153930664, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.932497978210449, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9107723236083984, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8796982765197754, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.851130247116089, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.812558174133301, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7779595851898193, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.746486186981201, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.7086262702941895, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.6661217212677, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.6196463108062744, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.576643228530884, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.522636890411377, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.465106248855591, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3986217975616455, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.325110912322998, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.246065139770508, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.15008544921875, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.0633063316345215, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.9845198392868042, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.8795205354690552, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.7594714164733887, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.6575183868408203, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.5697296857833862, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.4590858221054077, "validation/loss_033_lr4.3e+00_wd1.0e+00": 1.3679354190826416, "validation/loss_034_lr5.1e+00_wd1.0e+00": 1.2682734727859497, "validation/loss_035_lr6.0e+00_wd1.0e+00": 1.178483009338379, "validation/loss_036_lr7.1e+00_wd1.0e+00": 1.0910135507583618, "validation/loss_037_lr8.3e+00_wd1.0e+00": 1.0152602195739746, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.9409914016723633, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.8590161800384521, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.8019651770591736, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7556512355804443, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7020412683486938, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6632546186447144, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6278303265571594, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.600577712059021, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5803281664848328, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5559502243995667, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5339255332946777, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.03720238095238095, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.03943452380952381, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.041666666666666664, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.04538690476190476, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.047371031746031744, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.053323412698412696, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06374007936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.078125, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09771825396825397, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11755952380952381, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.13740079365079366, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.16294642857142858, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1800595238095238, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20238095238095238, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2065972222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20535714285714285, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.20634920634920634, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20634920634920634, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2078373015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.21453373015873015, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2261904761904762, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24305555555555555, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26587301587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.3013392857142857, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.3385416666666667, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.3834325396825397, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.4365079365079365, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.48735119047619047, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.5396825396825397, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.5813492063492064, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.6106150793650794, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.6329365079365079, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.6577380952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.6793154761904762, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.7021329365079365, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.7197420634920635, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.7338789682539683, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.7497519841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.7725694444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.7899305555555556, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.800843253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8120039682539683, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8236607142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8308531746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8291170634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8278769841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8303571428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8313492063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8380456349206349, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02556531330893308, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.026369709907758856, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.027100028280420945, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02903288387449405, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02937162149850173, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03229812766018868, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03663015460336326, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04258317598943265, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05045990892504283, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05675906243351026, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06332298662791988, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06715058639446375, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06962981977347421, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07039394640174856, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06618176336101782, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.06419107128899941, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.06493590801967587, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.0659508048159252, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.06940331142439933, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.07771925633769425, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.08991448644581619, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.10698651647479991, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.12906823160590725, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1620074998077171, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1968289652736945, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2395764582126458, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.29267452553422785, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.33971083842383354, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.38159490667074264, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.4259790514241934, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.46339122900076696, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.49265194069361357, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.5309310958484191, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.5620034138542659, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.6060497030698113, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.6341523659370631, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.6596447040322028, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.683924548290566, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.7161178671936494, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.7433244536210373, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.7564934517118685, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.7728891761196396, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.7891781332795105, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.7992987662488037, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.7969253990366092, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.7940172950343791, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.7954113372823146, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.7964053861367094, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8065477403700014, "id_best": 48, "lr_best": 0.015, "wd_best": 0.05, "train/loss_best": 0.6026489342749118, "validation/loss_best": 0.5339255332946777, "validation/acc_best": 0.8380456349206349, "validation/f1_best": 0.8065477403700014} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.8349058198928834, "train/grad": 0.22118994928896427, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.038687744140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.02817138671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.011097412109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.99490966796875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.979317626953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.9585443115234376, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.9362481689453124, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.91272216796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.883818359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.854993896484375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.828592529296875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.791524658203125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.758052978515625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7129010009765624, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.672320556640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6347412109375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5879995727539065, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.5334292602539064, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.4720535278320312, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.414165496826172, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3416439056396485, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2657469177246092, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.180558319091797, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.089097747802734, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9944971656799317, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8838204908370972, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7874279308319092, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7028550028800964, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5936897611618042, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4735112929344176, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3751367777585983, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2928594422340394, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1917183208465576, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1102154126763344, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0230160808563233, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.945861949622631, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8719202643632888, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8092200094461441, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7483332505822182, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.6817367532849312, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.6359597006440163, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.5996673011779785, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5577598299086094, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.5259076197445393, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.4931409573554993, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.46331844985485077, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.44207683607935905, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.4201923830062151, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.4052060548961163, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05123569004237652, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0507382439263165, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04993371713906527, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.049150304030627014, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04839514717459679, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04737672468647361, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04626545490697026, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04508084027096629, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.043605557596310976, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0421303855907172, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.040786330802366134, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03895271330140531, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.037397820744663475, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03554167540743947, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03416979094035923, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033168367259204384, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03223017220385373, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.031452611368149516, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030828169230371714, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03036318549886346, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.029846723480150102, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.029331800593063236, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.028758705742657185, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02814155480824411, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.027499219393357633, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.026748877111822368, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.026094076950103046, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.025517566027119756, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.024772906424477697, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.023945184061303734, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.023259789608418943, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022674845326691867, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021938325138762593, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021323941480368376, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02063994322437793, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02002533811610192, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.019445365592837334, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.018953043553046882, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01844606654252857, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01785647834185511, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.017423155554570257, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.017118231896311044, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.016885874518193303, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016750145112164317, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.016523774131201208, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.016297225477173924, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.016237513967789708, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.016235143104568125, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.016300801406614482, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0139808654785156, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.9998772144317627, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.977386474609375, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9561660289764404, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.936054229736328, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9097139835357666, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.881822347640991, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.852961540222168, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.818049192428589, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.784027576446533, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7530293464660645, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7101123332977295, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6713621616363525, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6188035011291504, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.570295572280884, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.52451229095459, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.466526746749878, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3981425762176514, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3213717937469482, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.249894142150879, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.1621692180633545, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.07277250289917, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.974993109703064, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.873169183731079, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.7710189819335938, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.6553105115890503, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.557789921760559, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.474487066268921, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.3700470924377441, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.2585787773132324, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.169980525970459, "validation/loss_031_lr3.1e+00_wd1.0e+00": 1.0976521968841553, "validation/loss_032_lr3.7e+00_wd1.0e+00": 1.0111932754516602, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.9429853558540344, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.8716498613357544, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.8096461892127991, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.7518613338470459, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.7042080760002136, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.659213662147522, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6107403635978699, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5770026445388794, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5506349802017212, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5210583209991455, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.49771401286125183, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.47810813784599304, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.4646787941455841, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.4499824047088623, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4416109621524811, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.4392016530036926, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.053323412698412696, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06101190476190476, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0798611111111111, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.09647817460317461, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.11259920634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.13467261904761904, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.1589781746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.17782738095238096, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1996527777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.20709325396825398, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.20610119047619047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20734126984126985, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.20610119047619047, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21006944444444445, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2177579365079365, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22817460317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2482638888888889, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2740575396825397, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.3117559523809524, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.34399801587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.4000496031746032, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.44990079365079366, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.5052083333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.5483630952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.5828373015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.6121031746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.6361607142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.6577380952380952, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.6840277777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7085813492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.7313988095238095, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.746031746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.7619047619047619, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.7760416666666666, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.792906746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8077876984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8167162698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8231646825396826, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8296130952380952, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8392857142857143, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8432539682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8487103174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8516865079365079, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8551587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8546626984126984, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8546626984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8618551587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8655753968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8630952380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.03246610662398916, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03660983429825685, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.04330317005010052, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.050030205120016505, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.055637032974092705, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.06265372377275791, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.06639012982259647, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.06827171950148765, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06946837804000983, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06812217488141421, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06495041478472593, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06412115778897629, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06439344489679837, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07102931099640285, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.0807380448405963, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.09218810567332561, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.11322949353771186, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1404552753067294, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1727872331857651, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2040460963849969, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.25232251266114125, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.30022607797906004, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.3493681089536413, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.3893724622676062, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.43001456047393416, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.4705596830679887, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.5045131034921292, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.5372269509390996, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.5812867729858991, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.622589043948531, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.6584485538950595, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.6809830331586545, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.705150436062271, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7235982134535482, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.7468868351149134, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.7660264577090342, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.7766958639445896, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.7833402861755525, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.7917558655076565, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8049412367874692, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8111408044302538, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8191777295557164, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8248860630784471, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8299983499418101, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8302692431710336, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8279600145136321, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8344024035099206, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8404255124441097, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8367602598733621, "id_best": 47, "lr_best": 0.012899999999999998, "wd_best": 0.05, "train/loss_best": 0.4201923830062151, "validation/loss_best": 0.4416109621524811, "validation/acc_best": 0.8655753968253969, "validation/f1_best": 0.8404255124441097} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.6016118443012237, "train/grad": 0.1980294243246317, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.9787158203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.961212158203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.933974609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.9085382080078124, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8850384521484376, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8545550537109374, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.822822265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.790364990234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.7513250732421874, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7132122802734373, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6784527587890623, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6294366455078126, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5840020751953126, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5205267333984374, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4608544921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.404318084716797, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.333357696533203, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.250923919677734, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1604895401000976, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.078261585235596, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9795246696472169, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8812374591827392, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7764202499389647, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6698994040489197, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5655851590633392, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4499761545658112, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3546893388032912, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2745775198936462, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1757172363996506, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0720899528265, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9909550729393959, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.9254780712723732, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8476669818162919, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.7867697796225548, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.7234181433916091, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6686585642397403, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.6174977089464665, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.5748254269361496, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.5339850597083569, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.48952570974826815, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.45913576550781726, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.43518979646265504, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.407908995822072, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.38747321248054506, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.36769612662494183, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.35116481587290765, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.3413280650228262, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.3356889332085848, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.33451620951294897, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.048598096221685406, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.047743984293192626, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04639367589727044, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.045120875295251606, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.043925916329026225, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04237312998622656, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.040769302854314446, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.039166121566668155, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.037358296681195495, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.035785529986023905, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03456945273093879, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03324016419239342, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03236322843469679, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031522715287283065, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03095580207183957, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.030507883774116635, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02998912855051458, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.029400533279404045, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.028755635693669318, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.028164958199486136, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.027456238558515908, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.026746887341141702, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.025985140651464463, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.025210235537961125, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02444487739354372, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023593288492411376, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022884607771411537, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0222807096876204, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0215296753263101, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020724613182246686, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020086234007030725, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019553617099300025, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.018880634042434393, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.018303677369840444, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.017654466507956386, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.017086290931329132, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01658296092879027, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.016209165886975824, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.015881384550593795, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01549055014271289, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015131791182793676, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014805632452480495, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014481384730897844, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014314642632380128, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014246866656467318, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.014290153379552067, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.014466924578882753, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.014881594916805624, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01519341953098774, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.94543719291687, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.924602746963501, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8924481868743896, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.8631722927093506, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.8360726833343506, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.8016197681427, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.766099452972412, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.730048179626465, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.6868889331817627, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.644603967666626, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.605452299118042, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.5492937564849854, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.496354579925537, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.42170786857605, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.3518245220184326, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.286262273788452, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.2051219940185547, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.112630605697632, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.013296127319336, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.9248493909835815, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.821112036705017, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.720291256904602, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.6151024103164673, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.510663390159607, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.4108175039291382, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.3025133609771729, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.215147852897644, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.1430822610855103, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.055861234664917, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.9666905403137207, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.898327112197876, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.8439856767654419, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.7798567414283752, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.7299770712852478, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.678457498550415, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.6353880167007446, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.5961280465126038, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.5648626685142517, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.5360822081565857, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5034065842628479, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.480014443397522, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4617908298969269, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.44380834698677063, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.432192862033844, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.42610689997673035, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.42725470662117004, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.43958720564842224, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.46424436569213867, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5052030682563782, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.10491071428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1212797619047619, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.1480654761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.1691468253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.18501984126984128, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.203125, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.20510912698412698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.20535714285714285, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.20461309523809523, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2065972222222222, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2105654761904762, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.22247023809523808, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23462301587301587, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25818452380952384, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2914186507936508, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.3211805555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.3687996031746032, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.42658730158730157, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.4809027777777778, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.5260416666666666, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5667162698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5969742063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6254960317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6510416666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6733630952380952, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7006448412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7232142857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.7413194444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.7569444444444444, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7713293650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.7827380952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.7956349206349206, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8097718253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8169642857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.826140873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8325892857142857, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8395337301587301, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8449900793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.847718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8541666666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8549107142857143, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8625992063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8650793650793651, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8668154761904762, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8695436507936508, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8678075396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8628472222222222, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8546626984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8474702380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0532358375000369, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.058084972910630514, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06307292049375664, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.065734269525799, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06714411655796448, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0681183112047741, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.06447560518285998, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.06337079374537617, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06347875875100277, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06574717946593422, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07149847529676757, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.08575099161631912, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09871265572378686, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1209184660552835, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15346609157683228, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18282526335799695, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.22163329941116322, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.28082643298792653, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.3252090826833549, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.3656030455411492, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.40509667062312105, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.4412723128425846, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.47928088949213093, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.5173370576252585, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.5512503787944857, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.5961267575328288, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.6383050017129375, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.6726158778283646, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.6965051216906302, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.7194558577067309, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7342019967899643, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7507041258275222, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.7714616962978333, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7808886658689561, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.7934254423408935, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.7999915293530733, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8089870742478121, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8146436168428293, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8166471213118696, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8238045617623097, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8250075693311933, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8345206066965716, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8396984757388272, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.841645912727132, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8454847675848504, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8429822416610827, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8382639019850185, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.826606971665454, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8161331222046941, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.36769612662494183, "validation/loss_best": 0.42610689997673035, "validation/acc_best": 0.8695436507936508, "validation/f1_best": 0.8454847675848504} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.4274735915660859, "train/grad": 0.18094574011862277, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.9044891357421876, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.8808917236328124, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.84491455078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8122821044921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.78255615234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7449322509765626, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.706119384765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.666556396484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.618463439941406, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5700677490234374, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.5245111083984373, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.4583584594726564, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3959765625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3086265563964843, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.228116302490234, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.1536981582641603, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.063004493713379, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.961735897064209, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.8549966239929199, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.761770634651184, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.654193422794342, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.5515045070648192, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.4463863724470138, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.3437437564134598, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.24691952586174, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.1435094264149666, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.061014261841774, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.9935488894581794, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.9124030551314354, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.8297898915410041, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.7663714715838432, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.715842177271843, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.6566771778464318, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.6109071187674999, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.5638658013939858, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.5235906811058522, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.48642099007964135, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.4554068198800087, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.4259772452712059, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3943613787740469, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.37293272249400616, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.35666821904480456, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.3387872575968504, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.3259557043015957, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3133743767440319, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3032054220885038, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.29851596415042875, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.30063607163727285, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.3116300565749407, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.045366554129868744, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.044170444579795, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.042328437007963654, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04066840436309576, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03918700403533876, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03740465388633311, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03575903008691966, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03435730546712876, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.033061005892232063, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03216527798213065, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03156790071167052, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030937756486237047, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030457711154595016, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029835573034361004, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.029273744886741043, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.028754599476233125, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.028119764421135188, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02740885779261589, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.026655607828870415, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02599408630281687, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02522235964424908, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.024479700196534396, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023708568261936305, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0229410532861948, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022197877373546362, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021377885593101383, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02070194052532315, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.020131610329262913, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0194277562526986, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.018682389431633056, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01807647167239338, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.017554222429171203, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01690123380627483, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01636932538356632, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01580554957035929, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01533501646015793, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014932817537337542, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014630869585089386, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014387949169613421, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.014155442258343101, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01395057128276676, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013812684658914805, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01369777740444988, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.013656776342540979, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.013705900739878416, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.013841330157592893, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.014058716418221593, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01456066274549812, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.015299829633440822, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8726284503936768, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.846473217010498, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.8069491386413574, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.771669864654541, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7395079135894775, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.699056625366211, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.657017707824707, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.6138908863067627, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.5603435039520264, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.5058839321136475, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.454198122024536, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.379047393798828, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.308917760848999, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.212043046951294, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.1240789890289307, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.0441410541534424, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.948351502418518, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.8432661294937134, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.7345890998840332, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.641497015953064, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.5358405113220215, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.4369040727615356, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.3371717929840088, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.2414851188659668, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.1525343656539917, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.0589334964752197, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.9852558970451355, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.9254021048545837, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.854455292224884, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.783151388168335, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.7295600771903992, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6878990530967712, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.6395045518875122, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.6016898155212402, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.5627719759941101, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.5295841097831726, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.49978870153427124, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.47575512528419495, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.45359811186790466, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4309101104736328, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.41698527336120605, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4051717519760132, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3956409990787506, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3907434642314911, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.39004936814308167, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3956034779548645, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.4102989435195923, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.4414198100566864, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.4764115810394287, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.16319444444444445, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.17981150793650794, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.20287698412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2065972222222222, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.20858134920634921, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.20634920634920634, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.20758928571428573, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.21106150793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2197420634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2326388888888889, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.25024801587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2785218253968254, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.3115079365079365, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.3663194444444444, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.419890873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.46378968253968256, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5143849206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5500992063492064, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.5828373015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6108630952380952, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.6371527777777778, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.6631944444444444, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6850198412698413, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7108134920634921, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7299107142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7490079365079365, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7609126984126984, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.7785218253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.7934027777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8100198412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8184523809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8266369047619048, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8345734126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.84375, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8536706349206349, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8625992063492064, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8675595238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.871031746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.875, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8742559523809523, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8777281746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8819444444444444, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8831845238095238, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8821924603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8812003968253969, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.876984126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8720238095238095, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.859375, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8482142857142857, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06655372569508207, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06864828003361248, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06846377284994767, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.06607469258097932, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06621663355742118, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.06369574257745578, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.06650403411599735, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.07228451513652019, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.08235974154800269, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09659292455988647, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11257340075521097, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.14140908908564245, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.17263344007312204, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.22133502110717912, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2695222995849707, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.3051590371791471, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.3516390273072069, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3848175205659312, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.42059568647060325, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.46102090342709295, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.4978959448599696, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.5379168077462056, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.5748745888911296, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.6210648817355607, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.6524456066437415, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.68515073085853, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.7033583123761341, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7293805789490682, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.7482412898590863, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.7697575310559367, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.780344938125012, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.790512559021411, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8023449043772469, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8151840654771659, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.825920576077501, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8361239079423081, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8421286183740491, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8450272793080623, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.850806515507972, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8509577193438093, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8548940290410402, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8575570076471402, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8580472192593609, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8582212645939793, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8582609328358078, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8525040239932634, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8468584973693836, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8345627113405982, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8199135124481591, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.3387872575968504, "validation/loss_best": 0.3956409990787506, "validation/acc_best": 0.8831845238095238, "validation/f1_best": 0.8580472192593609} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.2953539383411408, "train/grad": 0.1677071090787649, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.833524169921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.8054217529296874, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.7632464599609374, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.72576904296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.6915496826171874, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.6479693603515626, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.60210693359375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.5539227294921876, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.4935968017578123, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.43179443359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.3733834838867187, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.2892225646972655, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.211475143432617, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.1056375885009766, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.011077461242676, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.9262663459777831, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.8258478450775146, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7171458530426025, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.6064537024497987, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.5124901735782623, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.4071423226594926, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.3093753051757813, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.2116979533433914, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.1184850293397903, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.0325160717964172, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.9424924272298812, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.8719309389591217, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.8150028717517853, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.7473159536719323, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.6793549709022045, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.6278157341480255, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.587150074839592, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.5398364782333374, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.5033295720815658, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.4656198328733444, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.43326699316501616, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.4031879584491253, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.3781546415388584, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.35432640932500364, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3291086158156395, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.31260092064738276, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.30007383063435555, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2862910944968462, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.27707453653216363, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.270328079611063, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.2693164239823818, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2744349958002567, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.2882904291898012, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.30892245210707187, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.041610444318503144, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04020881633274257, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03816969969309866, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03650569282472134, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03518011200241745, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03381829352118075, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.032782297376543285, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.032037288621068, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.031393738156184556, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.030887817097827792, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.030458420822396873, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029860353032127022, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02930896235629916, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02855213004164398, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02787230039946735, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.027259695855900646, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.026530032940208913, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02573720134794712, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02491643065586686, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024209093963727356, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02340051794424653, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022629940127953888, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021832402385771273, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021041603535413744, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02028493197169155, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.019458424802869558, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01878530679270625, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01822494141291827, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.017540441658347845, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.016832277830690145, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.016282577575184407, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.015843615266494453, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.015344073446467519, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.014945400338619947, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.014499648604542018, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01409329361282289, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013698499761521815, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.013355312021449208, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013024088139645755, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.012715182981919497, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.012560416178312153, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.012483762581832708, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.012462967867031693, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.012503268183209001, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.012636871291324497, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.013077517366036772, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.013729476106818765, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.014719751877710224, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01575373960658908, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8073465824127197, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7774658203125, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.732670545578003, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6927647590637207, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.65639066696167, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6093976497650146, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.559537649154663, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.5067739486694336, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.440300464630127, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.372495174407959, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.308759927749634, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.217851400375366, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.134897470474243, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.023491621017456, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.925490379333496, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.838584542274475, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.7370198965072632, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.6288982629776, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.5201948881149292, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.4293144941329956, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.3286436796188354, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.236780047416687, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.1461586952209473, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.0608563423156738, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.9831137657165527, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.9030144214630127, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.8412611484527588, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.7920461297035217, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.7344604134559631, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.6772051453590393, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.6341760754585266, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6002220511436462, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.5609811544418335, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.5311388373374939, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.500010073184967, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.4725043773651123, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.44733139872550964, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.4284457266330719, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.41259050369262695, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3968469500541687, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38801440596580505, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3840460181236267, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3816131055355072, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.37959980964660645, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3772972822189331, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3833217918872833, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3883315920829773, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3965892195701599, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.4168311357498169, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.20238095238095238, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.20560515873015872, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2078373015873016, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.20709325396825398, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2078373015873016, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2113095238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.22296626984126985, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2333829365079365, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2569444444444444, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2849702380952381, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.31820436507936506, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.36334325396825395, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.4166666666666667, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.4791666666666667, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5285218253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.564484126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5974702380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6264880952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6515376984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6721230158730159, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.6964285714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7172619047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7361111111111112, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7534722222222222, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.767609126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.7837301587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7951388888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8040674603174603, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8154761904761905, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8256448412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8328373015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8412698412698413, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8492063492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8544146825396826, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8591269841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8663194444444444, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8740079365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8772321428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8792162698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8809523809523809, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8844246031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8844246031746031, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8841765873015873, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8839285714285714, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8841765873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8824404761904762, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.878968253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8754960317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8722718253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0689387210772362, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06585287631200286, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06544532443738829, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.06598823489763143, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0677661293870529, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.07244351901302702, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.08652480963984745, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09729931711928448, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.12134449710760525, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.14941898514491697, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.18080263122972942, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.21805385797433138, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.27131466675563704, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3236281978352123, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.3694895357677398, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.40478803043556255, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.4455791280956103, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.48315478167547826, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5187192937301901, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.5477987591782563, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.5889505195811292, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.6237285103108116, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.6585908725166776, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.6856630087977577, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7060517689544048, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7269413306578814, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.7452852246877872, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7582171723617319, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.7739508741714739, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.7882974953447828, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7981833497019418, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8091544426674974, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.819658514081739, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8278901268830217, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8341090526271552, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8422383560748095, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8511703024177723, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.854613185865273, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8575059326583295, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8596211356937966, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8628231522714783, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8638489957679598, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8640786629031924, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8617480755219594, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8605272677959837, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8580430443419326, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8552304091609907, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8522300501778245, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8506128403279521, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 0.31260092064738276, "validation/loss_best": 0.38801440596580505, "validation/acc_best": 0.8844246031746031, "validation/f1_best": 0.8628231522714783} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 1.1991504761576652, "train/grad": 0.1574253772944212, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7829327392578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.7519500732421873, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.705572509765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.663779296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.6251031494140626, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.5744549560546877, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.519927978515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.4615740966796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.3882015991210936, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.3138088989257812, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.2443252563476563, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.145950927734375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.057136306762695, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.9391083908081055, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.8361513137817382, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.7457826137542725, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.64121591091156, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.5306752300262452, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.4207486820220947, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.3294515490531922, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.2291043609380723, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.137698295712471, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.0480862510204316, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.9640926015377045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.8877931895852089, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.8092340806126594, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.7484378457069397, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.6998036679625511, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.642449537217617, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.5851290453970432, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.5418341080844402, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.5077239362895489, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.467862718552351, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.43705672860145567, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.4050919127464294, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.377619062513113, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.3521150732040405, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.33113357797265053, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.31122126176953313, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2901499692350626, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.27661528773605826, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.26640653491020205, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.25559605527669194, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2493901639804244, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.2448322969675064, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.2431417280808091, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.24645386405289174, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.25659273786470294, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.2678573777526617, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.038375321635976435, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.036932270731776955, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.035041125817224385, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0336956219933927, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.032770928824320436, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0319344125688076, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.031331702144816516, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03084106247872114, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.030307557294145228, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.029790373109281062, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02930709121748805, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028618377353996038, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02799150115810335, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027150839800015092, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.026407319521531462, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02574743394739926, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024976098584011196, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02414711005985737, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023307982729747892, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022595604294911028, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021795723559334873, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021040719402953982, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0202742698835209, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01952889814041555, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.018820593943819405, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.018056433480232954, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.017437457866035403, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.016923563121818006, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.016301453094929457, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.015668623559176923, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01518864975310862, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014806034569628536, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.014353271652944387, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.013988741636276246, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01358988333493471, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.013231024760752916, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.012908053840510548, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01266940426081419, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012457328899763524, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.012238081542309374, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.012093154322355986, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011990188546478748, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011910396185703576, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.012014304620679468, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.012119750380516053, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.012217169729992747, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.012515333741903304, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.013116654208861291, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.013593213292770088, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7534241676330566, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7206687927246094, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.671581506729126, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6273090839385986, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.58625864982605, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.5320677757263184, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.473627805709839, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.411436080932617, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.333709239959717, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.2553696632385254, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.182933807373047, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.0816097259521484, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.9908233880996704, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8716415166854858, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.7690860033035278, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.6798640489578247, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.5775686502456665, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.470678448677063, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.3654555082321167, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.2789313793182373, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.1847944259643555, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.0998873710632324, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.0174047946929932, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.9408231973648071, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.8718416094779968, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.8013026714324951, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.7471367716789246, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.7041618824005127, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.6539612412452698, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.6040797829627991, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.5665639042854309, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.5372576713562012, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.5034802556037903, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.47831371426582336, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.4536648988723755, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.43364110589027405, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.41686102747917175, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.4047996997833252, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.39441609382629395, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3844257593154907, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3770219087600708, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.37215664982795715, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3711809813976288, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3710583746433258, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3687586188316345, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3661680817604065, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3717973530292511, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3842858076095581, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3991813063621521, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.20808531746031747, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.20684523809523808, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2065972222222222, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.21006944444444445, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.21651785714285715, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.22767857142857142, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.24627976190476192, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.26612103174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.30456349206349204, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3420138888888889, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.3851686507936508, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4437003968253968, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.49875992063492064, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5496031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5875496031746031, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6148313492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6411210317460317, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6649305555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6912202380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7125496031746031, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7346230158730159, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7495039682539683, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7646329365079365, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7817460317460317, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7978670634920635, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8097718253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.816468253968254, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8239087301587301, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8315972222222222, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8447420634920635, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8511904761904762, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.857390873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8648313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8685515873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8732638888888888, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.875, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8777281746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8764880952380952, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8787202380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8777281746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8802083333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8821924603174603, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.878968253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8799603174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8794642857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8834325396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8836805555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8809523809523809, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8787202380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06618683651829098, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06397466315939629, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06556674949584226, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.07033876463571669, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.07848325965821135, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0908360639259832, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.1091821331101347, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.12890520595118407, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.1657629111223357, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.19962617956626158, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.2397829304014426, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2944208409567305, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3421082319992996, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.38907319385882777, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.4374260561633963, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4718045300970877, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5049954544063162, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5356660131886641, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5776939772758725, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6206307854640818, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.6623781363612627, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.6844067978288945, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7078697398585957, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7303229226402559, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7556601230176869, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7708045803548571, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.7781204283668411, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7887480693041996, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.7987166462621476, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8138205794450535, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8215481565879825, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8288528614613468, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8396362537623904, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8436201597292926, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8493237944696161, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8516900008498912, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8550952547679629, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8539853655907867, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8568936607794484, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8564132742969025, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8592274954958443, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8619942033486361, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.85895029522986, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8584158807024487, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8571495922875815, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8616078717343967, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8620658312995333, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8612461454359535, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8595631042564972, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.24645386405289174, "validation/loss_best": 0.3717973530292511, "validation/acc_best": 0.8836805555555556, "validation/f1_best": 0.8620658312995333} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 1.116202174127102, "train/grad": 0.1504000385478139, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7245208740234377, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.690291748046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.6386724853515626, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5916925048828126, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5476718139648438, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.4893063354492186, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.42630859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.3593310546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2760113525390624, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.1928163146972657, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.1161220550537108, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.009729118347168, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.9152261924743652, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.7918838691711425, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.686510181427002, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.595467176437378, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.4917717218399047, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.3838951075077057, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.2782391387224197, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.1916208136081696, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.097566116452217, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.0128721410036088, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.930632677078247, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.8543807375431061, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.7855108472704887, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.7150912058353424, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.6609099483489991, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.6176623816788197, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.5668503980338574, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.5160970096290112, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.4776767802238464, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.44745316848158834, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.4121853405237198, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.3850887278467417, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.35723025754094123, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.3333108917623758, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.311105587631464, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2927203890681267, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.2752040825039148, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2560783175379038, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.24288874093443155, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.2332132337614894, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.22373362358659507, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.21763822693377732, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.21324450351297855, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.21080913387238978, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.20982706170529128, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.21396220922470094, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.22587562628090382, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.036201382484287024, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.034857218973338606, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03326862019486725, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.032275288263335825, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.031637051533907654, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.031041053384542466, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03053370981477201, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030052684182301163, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.029471048628911378, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.028886596113443373, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028344421442598106, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027586439652368427, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026909194495528935, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02601673537865281, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.025247581787407398, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02457368588075042, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02379638929851353, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022970215352252125, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02214167852886021, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021443391991779207, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020657728360965848, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019926761221140624, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01918256198987365, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01846288166474551, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.017781450939364733, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.017053418047726154, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.016474948883987962, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.015996605921536684, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.015417503933422267, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.014810068788938225, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.014339999337680638, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.013953310321085155, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.013503056718036532, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.013155686515383423, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.012824889968615026, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.012552833585068583, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.012287915411870926, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012037032144144177, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011781940867658705, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011507085703779013, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011289042041171342, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011212426205165684, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011307467590086162, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011400891372468323, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01159993099514395, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01175431981915608, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.011748694330453873, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.012049560358282179, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.012685355155263096, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.7082479000091553, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.672978639602661, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.619788885116577, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.570805072784424, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.524688959121704, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.46328067779541, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.397049903869629, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.327139377593994, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.240459680557251, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.154574394226074, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.076045036315918, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.9678261280059814, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.8727558851242065, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.7497682571411133, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.645707130432129, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.556514024734497, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4559398889541626, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.352110743522644, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.2514053583145142, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.169620156288147, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.0814929008483887, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.0028704404830933, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.9272178411483765, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.8576636910438538, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.7955306172370911, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.732297420501709, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.6841131448745728, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.6461006999015808, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.601813793182373, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.5584043264389038, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.5259628295898438, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.5009127855300903, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.47249141335487366, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.4515853822231293, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.4314705431461334, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.41519540548324585, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.40000417828559875, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3874361515045166, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.37636706233024597, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.36580514907836914, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3593710660934448, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3563663065433502, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3565995991230011, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.35877177119255066, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.36139827966690063, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.36666426062583923, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3732099235057831, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.39048945903778076, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.40904465317726135, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.20709325396825398, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.20560515873015872, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2095734126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.21750992063492064, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.22743055555555555, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.24677579365079366, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2683531746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.30654761904761907, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.345734126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3968253968253968, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.44047619047619047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5047123015873016, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5461309523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5875496031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6192956349206349, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6403769841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6622023809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6870039682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7113095238095238, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7341269841269841, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7475198412698413, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7633928571428571, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.78125, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7973710317460317, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8100198412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8196924603174603, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.830109126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8360615079365079, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8435019841269841, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8521825396825397, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8608630952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8663194444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8720238095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.875, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8774801587301587, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8799603174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8824404761904762, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8869047619047619, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8893849206349206, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8923611111111112, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8916170634920635, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8918650793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8883928571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8844246031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8831845238095238, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8792162698412699, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8757440476190477, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06411904661067883, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06411201375861071, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06981176983438406, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0796814253303337, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09092694359803336, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.10926384070285122, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.13197012697748065, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.1685956568421286, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.20345454930272683, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.24988462668172334, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.2896984931346771, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.34262853173624314, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.382176702396691, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.4366187624527559, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.4747522142557093, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5056295653143158, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5386322997852815, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5791442306012669, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6241993227689584, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6647672595766275, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.6855612554501842, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7077549475576029, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7316869207393463, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7542421648355102, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7696830721043213, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7839033868034806, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.7966638147287409, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8037399718385594, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8136315402061427, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8224847924833072, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8324473680781185, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8400155272798076, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8472455872419191, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8507992922015866, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8542350047009095, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8574431951557581, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8612481595981394, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.865689798418772, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8681280899469473, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8687286842196048, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8719692580660822, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8706994408985919, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.87194824308358, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8700811209884565, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8674556850508585, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8628069237025581, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8612722509951192, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.853201571292395, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8482639753368124, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 0.24288874093443155, "validation/loss_best": 0.3593710660934448, "validation/acc_best": 0.8923611111111112, "validation/f1_best": 0.8719692580660822} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 1.060621630549431, "train/grad": 0.1478108098357916, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6864239501953127, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6499560546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5942071533203124, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5424737548828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.4934091186523437, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.4283651733398437, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.358175048828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.2843218994140626, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.1933919525146486, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.1036126708984373, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.0220399856567384, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.910118637084961, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.8122503471374511, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.686393928527832, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.5805699181556703, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.490190486907959, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.3883380365371705, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.2837066334486007, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.182187163233757, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.0997308108210564, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.0109921240806579, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.9316062596440315, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.8550229054689408, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.7843720450997352, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.7209858059883117, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.6561672529578209, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.6063218128681183, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5665418510138989, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.5197204850614071, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.4729177913069725, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.4375001556426287, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.4096211487054825, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.37703047901391984, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.3519720397889614, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.326116348952055, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.30393865652382374, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.2832261121273041, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2661226526275277, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.24952296741306781, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.23149606082588434, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.21987541384994982, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.21091747790575027, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2014004145935178, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.19529986958950757, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.19125453628599642, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.19064041234552861, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.19396197225898504, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.20290552515536547, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.21314766120165587, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0350178109575063, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.033918955521658065, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.032734496928751466, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03203349884599447, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.031559203350916505, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03104533036239445, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03053835210390389, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.030014952896162867, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0293678080663085, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.028717737877741456, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028121664244681596, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02729935573413968, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026570399440824986, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025615211203694345, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024802642101421954, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024093867791816593, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02327993532642722, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022422180473804475, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021566930329427123, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020848864531144498, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020049785594455898, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019309168034233153, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.018572745430283248, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.017860310073010623, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01720225647557527, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01650288819335401, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.015945780789479613, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.015484412941150367, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014920439054258168, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.014325794801115989, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013854420031420886, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.013473402340896428, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.013012002394534648, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012664420674555003, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.012311598611995577, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.012023049076087772, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011773947500623763, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011592951135244221, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011384765533730388, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011123395483009517, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011011493916157633, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010953510815743356, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010942627876065671, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011026740185916423, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01127903048414737, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.011614513525273651, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.012028728690929712, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.012705404264852404, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.013238630911801011, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.670037269592285, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.632533311843872, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5748658180236816, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.5211236476898193, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.46998929977417, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.402312755584717, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.329627513885498, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.2532901763916016, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.160027265548706, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.068584442138672, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.9860472679138184, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.873572826385498, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.7759064435958862, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.651415228843689, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.5476096868515015, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.45969557762146, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.3612498044967651, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.2609769105911255, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.164798617362976, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0872093439102173, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.0044012069702148, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.9311067461967468, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.8611671924591064, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.7969777584075928, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.740052342414856, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.682396650314331, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.6384198069572449, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.603887677192688, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.5634111762046814, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.5241403579711914, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.49522751569747925, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4734269082546234, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.44853323698043823, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.430245041847229, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.41212403774261475, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3976047933101654, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.38506272435188293, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.37537556886672974, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.36649465560913086, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3583831191062927, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.35498493909835815, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.35498952865600586, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3569594621658325, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3600277602672577, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3654530346393585, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.376432865858078, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3893158733844757, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.40986090898513794, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.44413769245147705, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.20560515873015872, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.20858134920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.21626984126984128, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.22817460317460317, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.24305555555555555, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.26686507936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.30505952380952384, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.34052579365079366, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3948412698412698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.4476686507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.49702380952380953, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5456349206349206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5783730158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6145833333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6418650793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6629464285714286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6870039682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7113095238095238, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7380952380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.75, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7661210317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7834821428571429, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7983630952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8122519841269841, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8201884920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8308531746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8355654761904762, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8439980158730159, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8519345238095238, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8566468253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8628472222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8668154761904762, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8715277777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8757440476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8787202380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8814484126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8854166666666666, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8864087301587301, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8878968253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8918650793650794, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.890625, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8891369047619048, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8861607142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8864087301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8854166666666666, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8814484126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8777281746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8715277777777778, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8700396825396826, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06402038508684628, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.06874986255251991, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07831807059426286, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.09123140334461916, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.10552522327047209, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.13047578519261976, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.16582401129738203, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.19848396641035457, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.24717194353234712, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.2947785515748017, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.33620279518573104, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.38266521541127313, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.4230466547784482, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.470795021571137, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5064970442189966, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5354550622274198, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5772010227808186, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6231755328952501, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6689841720190257, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6883131218170344, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7116752942154874, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7344431787591016, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7574306610821386, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7748240167332635, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7852774401875716, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7977169952899432, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.803028909954593, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8135947053629972, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8244595995588093, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8304632787821801, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8378085939995646, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8425123530198291, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8488523990823756, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8525964236840989, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8560833433083869, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8591602530926578, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8631476956340429, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8650089929706355, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8673196545750306, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.871278182113245, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.86974952272124, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8681620998945386, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8656904873519793, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.866572578416452, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8659496766740162, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8627550978556997, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8578043543880237, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8457146815813181, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8387544082155168, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.23149606082588434, "validation/loss_best": 0.3583831191062927, "validation/acc_best": 0.8918650793650794, "validation/f1_best": 0.871278182113245} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 1.0143712398409843, "train/grad": 0.14208621848374606, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6515411376953124, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6126275634765626, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5523077392578126, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.49568115234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.4416668701171873, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.370205078125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.293638153076172, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.213671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.1162123107910156, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.021337127685547, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.9357910919189454, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.819955587387085, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.719701690673828, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.592678542137146, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.487069320678711, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.397915269136429, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.2984075045585632, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.197213265299797, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.1000875088572502, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.021800702214241, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.9382746574282647, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.864184699356556, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.7931591960787773, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.728108591735363, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.6697529691457749, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.6104197931289673, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.5648090934753418, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5284331050515175, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.48555314898490903, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.44255371317267417, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.409904892295599, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.38400091230869293, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.35366025067865847, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.3300593077391386, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.30565235994756224, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2844507161527872, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.26461180455982686, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2479043124616146, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.23215519733726978, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.21542802587151527, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.20471296783536672, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.19702112436294555, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.18910481180995703, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.18429034538567066, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.18094182316213847, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1804020368680358, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1834188650920987, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.190762487500906, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.2009510774165392, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.033483534390106796, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.032603647001087664, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03170432776212692, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.031151804765686394, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030729529578238726, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03022140053100884, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.029689911417663098, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02912998203188181, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.028444456420838832, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0277714613545686, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.027162592755630613, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02633024745620787, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02560432012192905, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024674337701871992, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023884512400254607, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02320513553917408, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02242593524977565, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021603159578517078, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020781314158812167, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020088786371052266, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019314424023032187, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0185920200496912, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.017864319062791765, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.017171723921783268, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.016522442176938056, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.015830457070842387, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.015287185003980994, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014832232967019082, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014282143414020539, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013707694238983095, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013250112482346595, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012883224543184042, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012428135648369788, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012058998835273086, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011682256853673608, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011355753573589027, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011058163719717413, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010791755362879485, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010518763980362564, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.010236208036076277, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010111065541859716, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010060326368547977, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010090999016538262, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.010206776419654489, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.010416919530835003, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.010755792618729174, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.011155541015323251, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01160474275238812, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.012167976405471563, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.6378660202026367, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5979840755462646, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.535998821258545, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.477736711502075, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.4223666191101074, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.349243640899658, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.2712464332580566, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.190058469772339, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.0917913913726807, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.9965301752090454, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.911164402961731, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7960660457611084, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.6972904205322266, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.572646975517273, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.469727873802185, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.3833801746368408, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.2874829769134521, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.1906806230545044, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.0983431339263916, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0246217250823975, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9461901783943176, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.877301812171936, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.8116877675056458, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.7519192099571228, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6986807584762573, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.6449903249740601, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.6040887832641602, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5719486474990845, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.5347075462341309, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4985065162181854, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.471882164478302, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.45167842507362366, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.42884090542793274, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.412244588136673, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3956213593482971, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.38235902786254883, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.37133610248565674, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3632834851741791, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.35764873027801514, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3548029959201813, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3542691171169281, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3525662124156952, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3500378429889679, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3499228358268738, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.353679358959198, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.35941454768180847, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3682982325553894, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.37610137462615967, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.38288843631744385, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.20833333333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.21180555555555555, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.22569444444444445, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.24131944444444445, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.25992063492063494, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2934027777777778, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3308531746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.37648809523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4330357142857143, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.4915674603174603, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.533234126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5756448412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6078869047619048, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.642609126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6626984126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6857638888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7105654761904762, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7309027777777778, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7497519841269841, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7624007936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7802579365079365, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.7931547619047619, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8062996031746031, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.816468253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8253968253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8353174603174603, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8447420634920635, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8519345238095238, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8586309523809523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8690476190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.873015873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8767361111111112, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8792162698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8821924603174603, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8844246031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8874007936507936, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8888888888888888, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.890625, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8911210317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8903769841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8891369047619048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8903769841269841, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8903769841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8896329365079365, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8908730158730159, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8913690476190477, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8874007936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8864087301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8841765873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0682312515559427, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0727489260239447, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08923082509783023, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10496851062271363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.12229534644943792, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.1554912721982317, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.19104070921509336, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.23039193030394556, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.28594308755730935, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3333526865063484, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3740961915254837, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4198092479571281, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.46030928121024467, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5055180145639193, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5359399919693028, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5720980459407885, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.619675082290772, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6562011874689933, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6862731200749422, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7048480825385032, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7285543488748872, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7483180996888713, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7659337952899719, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.779076116229467, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7900280310595045, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8023280291838015, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8152053965203836, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8222767546188118, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8303245438931354, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8439729265972676, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8499791397809964, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8544349826404691, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8569601192854783, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8607643568672152, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8624928162076839, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8659365924509397, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8683890359318863, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8704385562242898, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8715944506006382, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8707390301597121, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8684383109041883, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8706301797915559, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8700037383972801, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8695141101828952, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8712975109236797, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8731416739892195, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.870972052921753, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8699800858958594, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8678505555566806, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.1804020368680358, "validation/loss_best": 0.35941454768180847, "validation/acc_best": 0.8913690476190477, "validation/f1_best": 0.8731416739892195} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.9779655906558037, "train/grad": 0.13710477124899625, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.6197479248046873, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5784530639648438, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.513982849121094, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.453238220214844, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.395413818359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.319173583984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.238015899658203, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1537064361572265, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.0520692825317384, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.9537776565551759, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8657845878601074, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7477814960479736, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.6465186977386475, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.519275119304657, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.4145478761196137, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.3267634719610215, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.2294968181848527, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.1312477988004686, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.0376547610759734, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.9627916440367699, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8833079180121421, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.8132906344532966, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.7464830788969994, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.6854889732599259, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.6310023362934589, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.5756148375570774, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.5331752204895019, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.49922497987747194, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.4592157828807831, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.41900712698698045, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.38846868485212327, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.364216428399086, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.3357795200496912, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.31380678541958335, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2911338544636965, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.27154623821377755, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.25326011307537555, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.23818524982780218, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.22382031854242088, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.20864827640354633, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.19863132648169995, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.19078258503228426, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.18235491815954447, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1768133182823658, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.17299306664615868, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.17166712608188392, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.17264248043298722, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1769212855398655, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.18339128758758305, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.032503930833190676, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03176015078090131, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03099305722862482, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.030470215938985346, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030038597527891397, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029492345387116075, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02891389879398048, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02830867676064372, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.027572429990395903, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02686019577085972, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026218944313004613, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02535540605895221, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024608553079888226, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023660723231732846, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02286914429627359, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022192102828994392, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02142501113936305, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020626554796472192, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01983665722422302, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01917788912076503, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018447392568923533, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.017772752260789275, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.017102361549623312, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.016455591730773447, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015856486582197248, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.015212648441083729, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01469898536335677, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014269028678536416, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.013747009779326618, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013194271954707801, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012757507436908781, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012392898686230183, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01195027734618634, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011602243701927363, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011246911277994513, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010949195004068315, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010671762858983129, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010454778231214733, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010264249036554247, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.010101096779108048, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.009988629571162165, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.009873739448376, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00975453418912366, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00970956582808867, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.009761198721826077, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00995048784185201, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.010244439037051052, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.010704273080918938, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01114917085506022, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.610459804534912, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5683703422546387, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5024056434631348, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4403276443481445, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.381462574005127, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.303847074508667, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.221738338470459, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.1368982791900635, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.034968614578247, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.936867356300354, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.849670648574829, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7332650423049927, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.6338822841644287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.509861707687378, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.408224105834961, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.3235691785812378, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.2301253080368042, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.1362254619598389, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.047268271446228, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.9764838814735413, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9017143845558167, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8362585306167603, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7740294337272644, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.7176589965820312, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6678590178489685, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.6178130507469177, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5800480246543884, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5502521991729736, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.5160776376724243, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4828232526779175, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.45869705080986023, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.44017401337623596, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.4195207357406616, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.40427568554878235, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.38919132947921753, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.37681785225868225, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3656594455242157, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.35688960552215576, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.34983935952186584, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.34411513805389404, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.34253910183906555, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.34261175990104675, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.34456881880760193, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.34695684909820557, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.35171523690223694, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3589867055416107, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.36479485034942627, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3705276846885681, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3758752644062042, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.21006944444444445, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.21651785714285715, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2316468253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2509920634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2740575396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3149801587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3576388888888889, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.41021825396825395, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4660218253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5198412698412699, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5518353174603174, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5932539682539683, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6212797619047619, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6537698412698413, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6770833333333334, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6974206349206349, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7175099206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7435515873015873, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7589285714285714, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7715773809523809, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7876984126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8020833333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8127480158730159, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8226686507936508, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8306051587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8395337301587301, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8497023809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8556547619047619, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8640873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8702876984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8740079365079365, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8784722222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8787202380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8819444444444444, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8829365079365079, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8849206349206349, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.886656746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8893849206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8918650793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8923611111111112, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8908730158730159, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.890625, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8891369047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8883928571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8849206349206349, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8851686507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8826884920634921, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8824404761904762, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07057313887776936, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07871189106722098, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.09532668266530088, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.11346505996778107, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1375556631969767, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.17642538771296762, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.21258427522501414, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2620383563096691, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3095576754113357, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3582066754106522, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.38755683981142186, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.43896109053414245, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.4793996135699228, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5227424944747954, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5552786259482159, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.5899665961138554, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6324147722409836, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.675395103141505, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6984768216046805, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7172829983874527, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7400755018447137, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.75980508486451, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7744157583100291, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.786938071517973, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7971018890632264, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8073160872361838, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8195702649262502, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8259831379940549, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.836626346953816, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8468629790984342, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8504531140758923, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8558043499041761, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8563612802170288, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.860010426290446, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8613768333143287, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8637104383489473, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8659550341244449, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8695981425763677, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8712900285086326, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.87250779064872, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8700709969449343, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8707254580652178, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8691186537177212, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8696647809089132, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8671925405923273, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8612372304155043, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8623536965299907, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8596230824500518, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8582258130822431, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.20864827640354633, "validation/loss_best": 0.34411513805389404, "validation/acc_best": 0.8923611111111112, "validation/f1_best": 0.87250779064872} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.9449706307053566, "train/grad": 0.1348352812975645, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.594322509765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5507431030273438, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.4821957397460936, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.41755126953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.3564108276367186, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2758209228515627, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.1907138061523437, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.10312744140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.998148307800293, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8974179649353027, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.808053321838379, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.6888512516021728, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.5875187778472901, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4610939717292786, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.3577243304252624, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.271421108841896, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.176197996735573, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.0803879097104072, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9894399026036262, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.9169912806153298, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8401241075992584, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.7726221948862075, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.7083599624037743, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.6496009151637554, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5973358975350856, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.544214034229517, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.503507388830185, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.47121220037341116, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.4330276131629944, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.39478849299252033, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.36581775844097136, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3428444080799818, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.31582234762609007, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2948105655610561, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2729256986826658, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2539554441720247, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.23624676384031773, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.22149182468652726, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.2074615580588579, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.19301830500364303, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.18334152530878783, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.17561076913028956, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1669750588387251, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.16134150691330432, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.15717793498188257, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.15561137855052948, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1562488828599453, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.15946923976764082, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.1644664687104523, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03199030011892319, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0313624188862741, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030688698794692756, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03019238233566284, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029760483680292965, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029197802180424334, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02859770894050598, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027977171493694185, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02723199193365872, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02651082198135555, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025868447329849005, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025004575923085214, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024258826756849886, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02331240554340184, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02251769885420799, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021838437216356397, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02106261278502643, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020251779658719896, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0194443763140589, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018774757124483586, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0180299447895959, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.017345441710203885, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016663007736206055, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01601299918722361, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015408743685111404, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014763450352475047, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.014246117644943298, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013821385083720088, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.013307214407250285, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012771642431616784, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012351449811831117, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012009409982711076, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011591257876716554, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011255531599745154, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010900465867016464, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010589494495652616, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010300007183104753, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010064567239023745, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009880852492060513, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009746125433593988, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00963607280049473, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00950019472045824, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.009342130047734827, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.009306528051383793, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.009424851324874908, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.009658142239786685, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009906176244840026, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.010279403969179838, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.010658684195950627, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5877315998077393, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.543619394302368, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.474212169647217, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.408851146697998, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.347043991088867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.266016721725464, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1806235313415527, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0929975509643555, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9883238077163696, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8882086277008057, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7998336553573608, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.682395100593567, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.5830414295196533, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4596972465515137, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.359553337097168, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2761398553848267, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1847866773605347, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.093457818031311, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.007277011871338, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.9389936923980713, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8671773672103882, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.8044090867042542, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7450960874557495, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6912384629249573, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6437090635299683, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5960935950279236, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5600698590278625, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5317614674568176, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.49925532937049866, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4676833152770996, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4445563852787018, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4270194172859192, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.407441645860672, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.39339470863342285, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3800697922706604, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3700200617313385, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3622452914714813, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3569623529911041, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.35284221172332764, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3492906987667084, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3477480113506317, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3465317487716675, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.34603002667427063, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.34742167592048645, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.35219839215278625, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.35854047536849976, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.36466971039772034, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.37530088424682617, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3873119652271271, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2123015873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.22321428571428573, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.24032738095238096, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2638888888888889, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2934027777777778, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3315972222222222, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3819444444444444, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4322916666666667, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4928075396825397, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5401785714285714, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5706845238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6116071428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6346726190476191, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6661706349206349, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6907242063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7100694444444444, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.734375, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7502480158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.767609126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7782738095238095, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7971230158730159, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8107638888888888, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8184523809523809, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.828125, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8365575396825397, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8474702380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8536706349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8571428571428571, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8638392857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8727678571428571, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8772321428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8797123015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8821924603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8869047619047619, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8881448412698413, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8883928571428571, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8883928571428571, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.890625, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8911210317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8916170634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8921130952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8916170634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8891369047619048, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8878968253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8854166666666666, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8814484126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07362886977905037, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.08657648547398081, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.10339840083170782, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.12708563205396414, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.15532705382363435, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.1915930526373027, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.23449928580167687, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2841184640403067, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.33196903255243493, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.38030772345280905, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4136865988902504, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4652747363610777, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.49622832186221516, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5429299892373043, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5828707425479352, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6212712271288178, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.663916998231278, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6883846964550443, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.713201388468655, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7285452339082039, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7565139667225125, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7753602934840552, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7840181576470177, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.7945884662700238, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8044167461064502, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8180657777785908, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8247382659059872, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8296631758593664, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8391746064494361, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.850144109777104, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8541817021684056, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.856911059839217, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8605269886719301, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.866170214001337, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.867599903035561, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8676379060831673, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.867053384442244, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.869375213441815, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8697733096333742, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8698636223501133, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8700008212941519, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8713076870477895, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8706468357338982, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8711647003285101, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8684918850959626, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8648202789610799, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8638325783281495, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8620038908077813, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8576304900933436, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.17561076913028956, "validation/loss_best": 0.3465317487716675, "validation/acc_best": 0.8938492063492064, "validation/f1_best": 0.8713076870477895} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.9212883222103119, "train/grad": 0.13177342347800733, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5743344116210936, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.528795166015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.457162170410156, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.389451904296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.325450134277344, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.241660461425781, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.153509063720703, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.063312644958496, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9556474304199218, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8529358291625977, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.7623919582366943, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.642380452156067, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.5409904503822327, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4152493584156036, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.313166081905365, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.2284580618143082, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1354064098000527, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.042345179617405, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9545109257102012, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8846725082397461, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8110300007462502, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.7464106100797653, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6848902028799057, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.6286742796003818, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5785762663185596, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.5274841955304146, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.4881968435645103, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.45678247839212416, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.41966145873069766, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.38224073342978954, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.35375550739467143, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3311078620702028, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.30456256680190563, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.28393612645566463, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.26261728540062906, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2441171943768859, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.2267754466459155, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.21237119480967523, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.19832282163202763, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.18317990981042384, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.17296653993427755, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.16510918466374278, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.15658793989568948, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1506348816677928, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14549176136031747, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.14193853804841638, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.14051573034375905, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.1407845743186772, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.14257585402578116, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.031602962370961905, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.031074783047661186, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030467066895216704, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02997663985937834, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02952860403805971, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02893888724967837, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028311801152303816, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02766727834008634, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02689402331598103, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026152391042560338, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025494493870064618, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024613886028528213, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023860775670036673, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02291032752022147, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02212156781926751, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0214474000222981, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02068266035988927, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019887261651456357, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019095424641855062, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01843908093869686, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017707528779283166, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.017030479316599668, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016354657337069513, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015702396584674715, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015091745192185044, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014440229902975261, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01391566501930356, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01348029931075871, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012946735722944141, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012383489753119648, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011936789148021489, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011574277132749557, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011130059398710727, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010780016682110727, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010413930066861213, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010091553365346045, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009777820960152894, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009513157224282622, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00922275656601414, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008868224038742482, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00861386738019064, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008419853911036626, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00825993275619112, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008196132284356282, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008211709548486396, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008325337312417105, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00847561978502199, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008651788345305248, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008848594421287999, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.569340467453003, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5235111713409424, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4511606693267822, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3832428455352783, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3191540241241455, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.2353758811950684, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1475651264190674, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0579137802124023, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9511626958847046, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8497824668884277, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.760608196258545, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.642917275428772, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.5436633825302124, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4212744235992432, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.3221113681793213, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2400882244110107, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1505080461502075, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.0612833499908447, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.977486789226532, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.9112377762794495, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8416593670845032, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7810689806938171, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7238340973854065, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6722391247749329, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6265251636505127, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5810014605522156, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5465133786201477, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5196434855461121, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4887433350086212, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.458772748708725, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4369988739490509, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.42066213488578796, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.40292125940322876, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.390414297580719, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.37869566679000854, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3693186342716217, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3604530394077301, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.35261014103889465, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.34629690647125244, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.34170615673065186, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.34004828333854675, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33948203921318054, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3404768109321594, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3424244225025177, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3454928398132324, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3501313328742981, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3550868034362793, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3636733591556549, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.37155210971832275, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.21577380952380953, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2269345238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.24851190476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.27232142857142855, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3072916666666667, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3462301587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.40153769841269843, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.45188492063492064, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.513640873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5545634920634921, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5840773809523809, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.623015873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6470734126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6750992063492064, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6996527777777778, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7189980158730159, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7410714285714286, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7571924603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7713293650793651, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.785218253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8025793650793651, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8132440476190477, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8226686507936508, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8333333333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8402777777777778, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8492063492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8556547619047619, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8621031746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8692956349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8747519841269841, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8802083333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8826884920634921, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8846726190476191, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8856646825396826, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8849206349206349, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8864087301587301, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.888640873015873, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8913690476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.892609126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8948412698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8921130952380952, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8893849206349206, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8876488095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0777377963913565, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.090278974882644, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.11062467485216103, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.13481695575552857, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.16889400698006354, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.20288274071130977, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.25421843347364864, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3005826718972277, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.35392062209546143, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3919205630464453, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4297630155163072, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4796782358391121, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.512446463576675, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5545923951126064, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.5959987468499661, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6310770419892255, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6699855600220341, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6961428918055771, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7182646662553773, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7374094569406364, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7633716339858959, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7782911431520593, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7892008868753665, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8026884767947258, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8097259253159528, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8205264209732109, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8291169449904968, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8363271447152831, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8457877709813585, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.851356386359951, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8576426427891367, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8613901640293006, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8642262231993362, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8652349055494549, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8652992150398605, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8673374992001348, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8695697509381378, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8719624497849372, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8729703232658876, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8751950041274312, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8752117211826157, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8758188593370628, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8742355365038366, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8729334442926163, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8734156154765034, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8738599894834018, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8724092830198277, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8677982836271712, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8671320134186002, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.16510918466374278, "validation/loss_best": 0.33948203921318054, "validation/acc_best": 0.8948412698412699, "validation/f1_best": 0.8758188593370628} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.9034824693202972, "train/grad": 0.1314333088696003, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5548858642578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5078195190429686, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.433682556152344, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.363895263671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.298206787109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.2123904418945313, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.1225196075439454, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.030832862854004, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.9219516563415526, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.8186088275909424, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.727749752998352, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.6078419399261474, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.5069558906555176, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3823448812961578, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2813818502426146, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1977936619520186, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.1061831921339036, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.0147511062026024, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.9287199717760086, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8604671528935433, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7885143733024598, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.7254808792471885, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6656157568097114, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.6110400587320328, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5624141502380371, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.5129181982576847, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.47490776583552363, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.44443670377135275, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.40848740816116336, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.37224013932049277, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3445589420944452, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3225872810930014, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.29660311616957186, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2764286940544844, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2553573005646467, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.23715726010501384, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.22004954375326632, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2056114285811782, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.19166113510727883, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.17651462703943252, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.16637143183499575, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.15867623139172793, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.15018856827169658, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.14428250584751368, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.13917712263762952, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.135504050552845, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.13405515383929015, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.13437172666192054, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.13644718555733562, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03160035792738199, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.031106389677152037, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030503538278862832, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.030002199364826083, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029532392118126152, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02891964415088296, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0282699890807271, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027603473411872984, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026805128352716565, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026046522380784153, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025373401483520866, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024477087147533893, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023713318007066844, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022755870362743735, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021958754556253554, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02128235007636249, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020516033302992582, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019718253454193473, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01893242008984089, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01827857781201601, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017555020470172166, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016890880917198956, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016226167562417685, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0155882349004969, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014992775404825806, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014352873982861638, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013836823091842233, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01340580923948437, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0128728981083259, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012309952382929623, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011863788021728397, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011499667498283088, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011056104286108166, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.01070419997908175, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01033156894147396, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010006460035219789, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009689353206194937, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009404421758372337, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009116060780361295, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008813602360896766, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008611492235213519, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00846320930402726, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008339818753302097, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00828867253381759, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.008304632229264825, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008361829475034029, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.008481789689976722, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008712747185491025, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.009014254952780902, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5549075603485107, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5077764987945557, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4333529472351074, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.36331844329834, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.29748272895813, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.211714029312134, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.121986150741577, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0306992530822754, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9226528406143188, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8203295469284058, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7306689023971558, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.6126503944396973, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.5136562585830688, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3919315338134766, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2939140796661377, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.213002324104309, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.124849557876587, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.037227988243103, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9551640152931213, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8905742764472961, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8228417634963989, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7638366222381592, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.7082640528678894, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6579659581184387, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6135672330856323, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5693023204803467, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5358020067214966, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5095633864402771, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4794667959213257, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4502376914024353, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4290601909160614, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.41275909543037415, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3949953317642212, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.38216736912727356, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36989814043045044, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.36045926809310913, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.35299912095069885, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.34778472781181335, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3439933955669403, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3405276834964752, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3389955163002014, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3384020924568176, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.33865073323249817, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.340271532535553, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.34256669878959656, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3454603850841522, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3485368490219116, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3527281880378723, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3576549291610718, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.21850198412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.22991071428571427, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2534722222222222, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2839781746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3152281746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.36235119047619047, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.41592261904761907, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.46899801587301587, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5238095238095238, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5647321428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5902777777777778, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6272321428571429, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6500496031746031, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6805555555555556, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7043650793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7251984126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.746031746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7609126984126984, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7750496031746031, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7896825396825397, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8040674603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8137400793650794, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8258928571428571, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8333333333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.841765873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8504464285714286, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8566468253968254, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8638392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8715277777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8759920634920635, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8792162698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8821924603174603, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8834325396825397, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8861607142857143, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8901289682539683, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.890625, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8921130952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8911210317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.892609126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8948412698412699, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8936011904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8965773809523809, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8970734126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8963293650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8950892857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.894593253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8953373015873016, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8943452380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08109820076240543, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09328954653419418, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.11604660642293284, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1463478407178144, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1765417073480308, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.21616673270486433, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2681498204547193, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3137141332089848, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3625112277377914, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.404815516770793, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4367138021373781, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.48714041231692323, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5179393676997059, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5624304448529701, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6086626703184205, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6459758377552164, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6768658868228773, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7010533156545046, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7229164365805392, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7450598136381706, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7644934515583601, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7767234618993216, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7924788672462169, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8011722056180075, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8113999437468319, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8223951436856208, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.830266643129111, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8390313895788796, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8488969564483355, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.853103348880006, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8570668761936495, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8610495699635722, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.862800315259243, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8652404042393698, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8695593812999662, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8707380227663671, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8731927924187549, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8717423321659096, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8730267939371776, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8750677371681167, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8735732688075598, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8761772690869223, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8769317290772934, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8761854553258617, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8756579442469588, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8746636011656234, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8736719640420889, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8734757781213991, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8725354866212723, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.15018856827169658, "validation/loss_best": 0.33865073323249817, "validation/acc_best": 0.8975694444444444, "validation/f1_best": 0.8769317290772934} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.8840845331549645, "train/grad": 0.13145329497754574, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5401602172851563, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4917779541015626, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.4151437377929685, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.343013916015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.275335235595703, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.1871783447265627, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.095171890258789, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.0015612030029297, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.890773391723633, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7859474945068359, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6941564083099365, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5734249448776245, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4722434973716736, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3476356589794158, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2472377038002014, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1644007557630538, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.0738304793834685, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9837695172429085, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8992435792088509, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8322642809152603, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7620408338308334, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.7004214277863503, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6421178844571114, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.588974437713623, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5415655890107155, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4934781217575073, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.4565728145837784, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.4271580192446709, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3923615078628063, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3573720995336771, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3307094895839691, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3095634862035513, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.28457006320357325, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2652086967974901, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.24482861921191215, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.22716575246304274, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.21041478995233776, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.19640204701572656, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.18295496590435506, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.16852912079542876, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.15886210456490515, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1514392140135169, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.14322979846969247, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13734941951930524, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.13191912680864334, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.12746138963848352, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.12502650966867804, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.12394404562190176, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.12423163468018174, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03162886504083872, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.031187975807115437, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030618653362616897, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03012367813847959, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029653175035491586, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029037528512999414, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028384771943092347, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027717613531276584, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026923150280490516, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026164347967132927, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02549540936946869, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02460236323066056, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023839781107380987, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02287786983884871, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022077844869345428, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021394352475181223, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020618289057165385, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019807367082685233, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019007607786916195, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018342349934391676, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017606559838168324, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016929563558660446, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0162544612493366, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015614103409461677, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015011176350526512, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01436706141103059, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013849084074608982, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013418574770912527, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01288505740929395, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01232126819435507, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0118678168929182, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01148959285346791, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011021627108566462, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010643456485122443, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010227849613875151, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009858812901657075, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009495348515920342, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009182220194488763, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008888685314450412, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008578175224829464, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.008377962745726109, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00822036103112623, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.008058845987543464, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.007956764136906713, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007893626685254276, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.007877650349400937, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.007930552843026817, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008082860577851535, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.008234041139949112, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5443203449249268, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4960620403289795, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.419891357421875, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3485097885131836, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2813992500305176, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1940526962280273, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.103161334991455, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.0106887817382812, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.9016510248184204, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7987443208694458, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7087849378585815, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.590571403503418, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4919342994689941, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3707464933395386, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.273333191871643, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1931979656219482, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1059393882751465, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.0194613933563232, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9386041760444641, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8749077916145325, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8082994222640991, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7503388524055481, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6958074569702148, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6464849710464478, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6030853986740112, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5596582889556885, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5268441438674927, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.5012710094451904, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4718880355358124, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4435129463672638, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4229351282119751, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4073876738548279, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.39014577865600586, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3776603639125824, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36585816740989685, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.35669565200805664, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3492441773414612, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.34393566846847534, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33964085578918457, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.336539089679718, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.33531758189201355, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33533647656440735, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.33605054020881653, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.33734023571014404, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.33879491686820984, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.34017109870910645, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3419618010520935, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.34575673937797546, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3505784571170807, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22247023809523808, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23239087301587302, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2594246031746032, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.2914186507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.32217261904761907, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3722718253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.42509920634920634, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4816468253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5344742063492064, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5694444444444444, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5962301587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6316964285714286, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6532738095238095, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6840277777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7063492063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7284226190476191, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7485119047619048, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7648809523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7787698412698413, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7938988095238095, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8082837301587301, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8199404761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8288690476190477, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8373015873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8442460317460317, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8534226190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8606150793650794, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8668154761904762, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8712797619047619, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8772321428571429, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8804563492063492, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8846726190476191, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.888640873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8923611111111112, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8916170634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.892609126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8938492063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8938492063492064, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8948412698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8963293650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8968253968253969, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8985615079365079, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8968253968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08538518794693485, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09567950145589599, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12221622594275545, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1540343052213354, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.18241380918832087, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.22422246324224643, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2756626786056048, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.32266262857889977, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3716355986036273, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.41049248249542686, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.44589116555115005, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.49346659743651694, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5242611617453485, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5690558616556358, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6119597493984985, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6517796849672929, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6826823091653976, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.70927435742057, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7298553466939405, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7517565278039824, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.770766977329739, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7852464075367644, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7959825629315732, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.805945145015752, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8155244220270591, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8261857676199978, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.834213518411707, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8434314648423379, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8490060598149297, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8550466349682813, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8591188430477655, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8632346232315878, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8674235947183381, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8719345729223414, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8721366459481834, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8724897376815364, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8736662965531792, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8735711371099917, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8743930655905597, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8760165552718329, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8767804980851446, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8771178276368611, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.877487111630091, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8776051131231808, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.877077947622172, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8786100083352967, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8787455158090209, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8787511046647927, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.877590110308708, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.12502650966867804, "validation/loss_best": 0.3419618010520935, "validation/acc_best": 0.8985615079365079, "validation/f1_best": 0.8787455158090209} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.8807502651214599, "train/grad": 0.1298759240284562, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5481332397460936, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.499432373046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.422391662597656, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3497955322265627, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.2814430236816405, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.192451629638672, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.099497604370117, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.0050839614868163, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8933456802368165, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.787644853591919, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6951148509979248, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5735777473449708, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.471862268447876, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3466034209728241, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.245709729194641, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.162460122704506, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.071479970216751, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9810051038861275, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8959914797544479, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8288531655073166, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7582409527897834, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6964361101388932, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.63785465285182, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5845884290337563, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5371445274353027, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.48895214036107065, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.45190586790442466, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.42237984135746953, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3874494153261185, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3521930070966482, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3253463859856129, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3039301815629005, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.2786649303138256, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.25894044257700444, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2384254266321659, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2204854115471244, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.20363379068672657, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18941346485167743, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17577531918883324, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1609750624746084, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.15094052165746688, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1432487393170595, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1344912968017161, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12818194704130292, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.12213619120419025, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.11702103305608035, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11383226696401834, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.11160690234974027, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.11069221582263708, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03148254604078829, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03106165141798556, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03050890360958874, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03001780523918569, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029548731995746494, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028932523196563125, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02828115594573319, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027610683515667915, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026811186457052827, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026045781522989274, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025367031628265976, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02446040532551706, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023686857027933002, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022711165258660914, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021895671356469394, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021203290997073056, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020417190399020912, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019597729812376202, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01879108661785722, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01811968975234777, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0173763825930655, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01669759040698409, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01602036375552416, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015371554587036372, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014762978600338102, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014110917332582176, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013585768607445061, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013145689610391855, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01260356864426285, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01202491934876889, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01155915015609935, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011176675499882549, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01070326087065041, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010316446346696467, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00989968994865194, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009518233772832901, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009144944595173001, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008817278938367963, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008480828176252543, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008102810743730515, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007834976797457784, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007633296181447804, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007401766052935272, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00724303757539019, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007096638880902902, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006996295081917197, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006974927824921906, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.007022301171673462, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.007088244812330231, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.53702449798584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.487961769104004, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4107298851013184, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.338164806365967, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2701687812805176, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.181858539581299, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.09006404876709, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9969805479049683, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.88723623752594, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7837682962417603, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6936379671096802, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5754750967025757, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4768681526184082, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.356161117553711, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2592519521713257, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1796064376831055, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0930286645889282, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.007368564605713, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9273304343223572, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8644629120826721, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7987216114997864, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7416607737541199, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6878364086151123, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6393970251083374, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5967141389846802, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5541582107543945, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5220361948013306, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.49679890275001526, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.46804866194725037, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.43998873233795166, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4197613596916199, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.404496967792511, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3873516321182251, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.37507304549217224, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36351898312568665, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.35400107502937317, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3465026617050171, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3410164415836334, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33667802810668945, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.33313316106796265, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.33133718371391296, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33077558875083923, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.33035412430763245, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.33083483576774597, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3318536877632141, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3336944878101349, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3356703221797943, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3384133279323578, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.341313898563385, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22420634920634921, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23462301587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.26339285714285715, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.29910714285714285, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.33110119047619047, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3819444444444444, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.43427579365079366, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.49107142857142855, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5391865079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5753968253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6026785714285714, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6374007936507936, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.660218253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6902281746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7127976190476191, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7351190476190477, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7524801587301587, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7678571428571429, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7839781746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.798859126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8112599206349206, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8189484126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.830109126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8377976190476191, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.845734126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8563988095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8625992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8680555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8725198412698413, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8794642857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8821924603174603, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8849206349206349, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8871527777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8893849206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8908730158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8923611111111112, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8936011904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.894593253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8948412698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8965773809523809, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8960813492063492, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8948412698412699, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8958333333333334, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8968253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8973214285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08736551834040111, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0983170634908183, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1267922679303244, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1605403271417598, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.19109432046244304, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2346611465028238, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2842023095984737, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3295278171273968, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3766556786773803, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4196501401375939, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4541865293902778, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.49893871044775695, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5320054955329442, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5797667477881774, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6220628756516356, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6621094942387952, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6891200166505983, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7137562566839832, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7354453803185816, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7569534460860053, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7751287060803755, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7840926330611634, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7976103598897195, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8052683063951765, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8162712233813343, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8288948225419833, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8370418240134596, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8445573260128114, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8495384663047875, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8571728627024261, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8609655394325317, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.86308492998813, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8662431669453283, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8686304487420551, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8709050563976881, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8722982752263149, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8738761397635479, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8751638494277801, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.874421844616806, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8734766837006303, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8751613005487437, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8768517269657528, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8763717265906953, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8768850438109077, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8748579911176606, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8729174118988843, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.873990271335144, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.875720509208865, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8761155863105093, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.1432487393170595, "validation/loss_best": 0.33077558875083923, "validation/acc_best": 0.8980654761904762, "validation/f1_best": 0.8768517269657528} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.8712626564502716, "train/grad": 0.12836264740675687, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.533388977050781, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.483456726074219, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.404695739746094, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.330704650878906, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.261120147705078, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.170952377319336, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.076993980407715, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.9817266845703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8693177223205566, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.76344801902771, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6710922384262086, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.549996430873871, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4488793468475343, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3250129681825638, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.225533223748207, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1435792118310928, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.054375101029873, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9658930283784867, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8829834577441216, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8176252564787865, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.74895927131176, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.688973782658577, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6320722360908985, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.580102836638689, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5338250812888146, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4866618400812149, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.4502741466462612, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.42109082207083703, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.38659084260463716, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.35155199982225893, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.32478051163256166, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3033549651503563, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.2780717435479164, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.25820648938417434, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.23754438675940037, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.21952727064490318, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.20249310620129107, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18805397097021342, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17413335867226123, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.15901790909469127, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14880848489701748, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.14093783773481847, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.13209781363606454, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1256062452867627, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.11943694522604346, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.11421133782714606, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11070830089971423, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.10777160901576281, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.10623070916160941, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03146229550242424, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.031022142386063933, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030441225385293365, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.029920323062688114, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029428819175809622, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02878396556712687, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028104970380663872, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027412258256226778, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02658955128863454, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025807148162275553, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02511558497324586, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02419444586150348, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02340901667252183, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022418009378015996, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021591867757961153, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02088597053196281, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020088000032119455, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019257727968506513, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018440821734257044, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01776550915557891, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01702452823985368, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016349051436409353, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.015679873232729732, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015038640177808702, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014440469574183226, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013804547381587326, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013287813016213477, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012854614239186048, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012325647589750587, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011759503372013569, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011306612677872181, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010925532039254903, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01046279088128358, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010079042771831154, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009659634702838957, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009276829392183572, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008907065321691334, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008571746172383427, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008235129504464566, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007852686510886997, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007586973113939166, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00737388780224137, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007147190851392224, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006979970154352486, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0068353983038105075, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006719017823925242, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0066563291207421575, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006627292193006724, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006657312910538167, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.532407522201538, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4829022884368896, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.404904842376709, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3317911624908447, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2631850242614746, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.174255609512329, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.081869602203369, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.988325834274292, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8782219886779785, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7745405435562134, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6842272281646729, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5661003589630127, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4675811529159546, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3470839262008667, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.250490665435791, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1712298393249512, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0850884914398193, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9999386072158813, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9206631779670715, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8581238985061646, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7929796576499939, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7363159656524658, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6830883622169495, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6349813938140869, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5926672220230103, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5504156947135925, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5185242295265198, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.49371007084846497, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4651356339454651, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.43754929304122925, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4175749123096466, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4023706614971161, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3856012225151062, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.37358179688453674, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36211079359054565, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3531755208969116, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3458905816078186, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.340549111366272, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3365028500556946, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3333934545516968, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3319421708583832, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33132031559944153, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.33142805099487305, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3321634531021118, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3333401679992676, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.33505377173423767, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3368435502052307, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3396039605140686, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.34198257327079773, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2244543650793651, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2361111111111111, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2648809523809524, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3013392857142857, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3325892857142857, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3861607142857143, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4399801587301587, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.49503968253968256, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5438988095238095, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.578125, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6071428571428571, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6398809523809523, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6634424603174603, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6922123015873016, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7152777777777778, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7378472222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7534722222222222, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7678571428571429, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7847222222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8000992063492064, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8110119047619048, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8211805555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8313492063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8390376984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.847718253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8563988095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8640873015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8680555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8742559523809523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8787202380952381, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8814484126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8846726190476191, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8871527777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8903769841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8918650793650794, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8921130952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8923611111111112, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8933531746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8955853174603174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8960813492063492, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8950892857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8955853174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.894593253968254, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8955853174603174, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08758591044141988, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0995720796577593, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12831790015560193, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1623274283018507, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.19161277944411875, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.23793483789617959, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.29094094906480644, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3339800658955694, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.38172616660548914, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.42363325043384525, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4610463700775562, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5025066416110987, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5371771187759082, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.583790210305228, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6259533408609558, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6675172677878214, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6910555469964669, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7148975886443989, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7371219790783355, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7592010794350518, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7752542913565005, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7881695663913959, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7991951458024603, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8081599851366873, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8191674433504914, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8280415008798485, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8385871829166276, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8434869452339991, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8507287179116642, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8565719004069026, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8591565458871689, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8626917176974881, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8656572639563505, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8700079118633861, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8716106065863459, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8721394355183372, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8729477909477598, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8737042289701493, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8732410252338662, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8731317495853026, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8757971018774742, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8783899232602966, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8780001893844054, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8769642745208002, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8731112274232145, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8713791897210172, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8722443387948898, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8714234645710659, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8729697550678891, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.14093783773481847, "validation/loss_best": 0.33132031559944153, "validation/acc_best": 0.8980654761904762, "validation/f1_best": 0.8783899232602966} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.8678153374791145, "train/grad": 0.1285819375887513, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5314535522460937, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4815911865234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.402610168457031, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3284848022460936, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.2589378356933594, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.1685951232910154, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.0746951293945313, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.9794714736938477, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8672609329223633, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.761509141921997, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6692213821411133, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5482730340957642, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4472886896133423, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3233442014455796, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2236499327421189, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.141501806974411, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.0519133445620537, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.962970246374607, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8796069395542144, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8138228926062584, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7447172892093659, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6842903071641921, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6271119651198387, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5749579414725303, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5285912412405014, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.4813419343531132, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.4450233981013298, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.41604784220457075, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3817195785790682, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.34710770383477213, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3206115724146366, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.2993797491490841, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.27434135183691977, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2547128686308861, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.23418043859302998, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.21626936856657267, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.19930896811187268, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.18494194447994233, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17099523961544036, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.15587501376867294, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14557714235037567, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1374960071966052, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12843332085758447, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12170760560780763, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.11515545465052128, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10941289737820625, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.10552891986444593, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.10200226010754704, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0999112974666059, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03133846744894982, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030923855546861886, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03036763268522918, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02986258052289486, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029384667854756116, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02875853145495057, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028100353768095374, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027430530982092024, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026631331238895654, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02587080464698374, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02520106328651309, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024304507402703165, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023541195252910256, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022572634825482964, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02176536834798753, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021076157009229064, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020289035509340466, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019467455754056572, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018655951195396484, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017978398725390435, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017233335385099052, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016548954085446894, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01586763383820653, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015213760593906046, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014603817658498884, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013952294220216573, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013423233767971396, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012982247387990356, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012434491757303476, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011853689700365067, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011383551077451558, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010990059885662049, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010504730266984553, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010106115210801363, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00967194715514779, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009273502402938902, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008882061229087412, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008534189010970295, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008185642268508674, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007788051455281675, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007508194134570659, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007278931371401996, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007013633374590426, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006813108436763287, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006612698261160403, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006439165483461693, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006327124160015955, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.006238965525990352, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006193917781347409, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.529982089996338, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4802961349487305, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.401918411254883, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3284525871276855, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2595269680023193, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.170279026031494, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0776009559631348, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.983845829963684, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8734453916549683, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.769585132598877, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.67931067943573, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5611114501953125, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4627259969711304, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3423830270767212, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2459464073181152, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1668659448623657, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0810400247573853, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9961344003677368, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.916918933391571, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8548307418823242, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7898368239402771, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7334235906600952, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.680359959602356, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6326666474342346, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5905677676200867, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5485643744468689, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5168617367744446, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.4922165274620056, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.46385759115219116, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.43640902638435364, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4165107309818268, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.401447594165802, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.38474273681640625, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3728179931640625, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.36132729053497314, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.35238155722618103, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.34510430693626404, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.33999890089035034, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3355976343154907, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3325161933898926, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.33099281787872314, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3303541839122772, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3306063413619995, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3310694694519043, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3324306309223175, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.33424997329711914, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3358971178531647, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3378930389881134, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3397339880466461, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22495039682539683, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.2361111111111111, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2648809523809524, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3023313492063492, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3333333333333333, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3888888888888889, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.44047619047619047, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4975198412698413, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5456349206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5811011904761905, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6088789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6413690476190477, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6639384920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6939484126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7170138888888888, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7390873015873016, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7542162698412699, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7683531746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7876984126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.800843253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8132440476190477, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8211805555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.832093253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8400297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8497023809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8578869047619048, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8630952380952381, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8680555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8752480158730159, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8794642857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8824404761904762, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8854166666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.888640873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8901289682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8908730158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8928571428571429, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8928571428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8931051587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8950892857142857, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8968253968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8970734126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8980654761904762, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8963293650793651, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08828973157575232, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09931022931827133, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12788809261364134, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16337102543063878, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.19232534406373838, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.240979536936296, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.29105401427067146, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.33670367765634335, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.382830919313153, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.42747830117943814, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.46314553632966515, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.503901799229414, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5378406045857942, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.586773418415785, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.630798276021964, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6684108491645451, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6925775017285336, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7157267817113061, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7404651155349613, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7599380710626464, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7776276947042827, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7876236260362478, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7998480998752493, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8092491046391436, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.821576885637889, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8299414896418073, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8369099842613449, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8440595793365164, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8524573581747816, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.857066040272611, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.860507485183649, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8632391479889, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8678163358737462, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8701339227609766, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8713550732322025, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8728264762507915, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8727543617609131, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8725502918735144, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8723059643661469, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8740519710568836, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8762088024016783, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8761487253913354, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8758782989319817, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8754748013126571, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8761697673727986, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8765694200557403, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8769276853698604, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8768169523377438, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8746231357974207, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.10552891986444593, "validation/loss_best": 0.3358971178531647, "validation/acc_best": 0.8980654761904762, "validation/f1_best": 0.8769276853698604} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.8700792473554612, "train/grad": 0.12975684195756912, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.530867614746094, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.480780029296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.4016314697265626, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3273199462890624, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.257684326171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.16718017578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.073187789916992, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.977890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8656513977050782, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7599637508392334, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6677075910568238, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5468973731994629, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.446168782711029, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3225939804315567, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.223239466547966, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1415701308846473, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.0524702748656274, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9641401469707489, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8812719306349754, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.815755880177021, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.7470448324084282, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6869321323931217, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6299320566654205, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5779439914226532, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5315854258835315, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.48450333818793295, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.44832349494099616, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.41939442604780197, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3851422099024057, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.35074938379228116, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.32441933907568454, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.3034802643209696, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.27872915148735045, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2593637170642614, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.23911839436739682, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.22139526009559632, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.20457912642508747, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.19035949617624282, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1765014960616827, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.16135167431086303, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.15104409094899893, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.14297985749319195, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.13376466510817409, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12690893745049833, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.1201351672783494, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.11416850803419948, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.11002018230035901, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.106219292152673, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.10382157791405916, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03164079166948795, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.031226567029953002, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0306624526809901, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.030149664245545865, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02966383470222354, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029020260656252503, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028345517255365847, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027655587075278164, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026832298394292593, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026050601499155165, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025361098432913422, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024446704210713505, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0236667016800493, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022690216703340413, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02187882812693715, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021188973784446716, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020406856313347817, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019593469691462816, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018790213461034, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018123553600162268, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017384624932892622, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016705845207907258, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.016026158705353736, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.015375618650577962, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014761962741613389, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014110902310349047, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013577991100028157, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.013132825074717403, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012583635803312064, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011999132335186005, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011529782481957226, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01113581848097965, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01065285863354802, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010256276335567235, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00981976414564997, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009421410376671702, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009023461157921702, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008671384640038013, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008315545145887882, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007901539164595305, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007611007993109525, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007374659429769963, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007099837246350944, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006895098109962419, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.006683291351655498, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006495255172485486, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0063634472922421996, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00625155444839038, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006180611423915252, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.529086112976074, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.479275941848755, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4007437229156494, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3270933628082275, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2581560611724854, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1688265800476074, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.075956344604492, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9821282625198364, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.871692419052124, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7678009271621704, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.6774872541427612, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.559310793876648, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4608848094940186, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3406214714050293, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2443220615386963, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1651359796524048, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0794601440429688, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9946684241294861, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.9155918955802917, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8534850478172302, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7886902093887329, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7324411273002625, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6794533729553223, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6317254900932312, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5897108912467957, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5477481484413147, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5162320137023926, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.49152612686157227, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4632134735584259, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.4358289837837219, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.41596171259880066, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.4008435308933258, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.38426312804222107, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3721046447753906, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3606504201889038, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3517720699310303, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3444182872772217, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.33903613686561584, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33485037088394165, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.33145514130592346, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.32995134592056274, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3292918801307678, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3291154205799103, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.32973530888557434, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.33068791031837463, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.33194342255592346, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.3333112895488739, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.33498257398605347, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3367060124874115, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22495039682539683, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23685515873015872, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.26512896825396826, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3023313492063492, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3353174603174603, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3898809523809524, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.44146825396825395, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.49851190476190477, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5456349206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.582093253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6096230158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6418650793650794, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6649305555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6949404761904762, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7182539682539683, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7403273809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7544642857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7698412698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7867063492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8005952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8125, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8209325396825397, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8315972222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8405257936507936, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8499503968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8583829365079365, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8625992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8683035714285714, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8747519841269841, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8792162698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8819444444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8856646825396826, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8888888888888888, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8903769841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8911210317460317, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.890625, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8936011904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8933531746031746, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.8950892857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8965773809523809, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8988095238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8998015873015873, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8988095238095238, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8988095238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8970734126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8968253968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08827434046462124, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10026370696772605, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.12807075193129358, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16302306817976225, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1944504247945432, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.24142094003588668, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.29217845616568766, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.33769481110741945, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3829312936091227, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.42948094117798385, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4645729500501975, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5044252915096886, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5387537349753428, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5892113230947046, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.632961212102094, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6700165457556363, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6923225636426971, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7179917340966322, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.739445845824943, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.7592888480570223, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7767661037765544, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.787895360649247, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7991736321099429, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8094800411889002, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8216029687372226, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8303398805342488, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8363570477362531, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.844315069588112, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8524272633327441, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8565815680269177, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8596013027648299, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8631628640584041, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8677317001856757, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8702924971928975, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8714129377724004, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8703707927290247, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8734214509527509, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8731268882504268, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8750932453080464, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8767123286135527, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.878353870155456, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.879484429380613, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8803089883930048, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8792082395196597, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8781441952881289, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8754163830682815, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8757065961965205, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8756783144173426, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8750841811281537, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.13376466510817409, "validation/loss_best": 0.3291154205799103, "validation/acc_best": 0.8998015873015873, "validation/f1_best": 0.8803089883930048} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.8575706762075425, "train/grad": 0.12723551604896785, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5207894897460936, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.4701834106445313, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.390445556640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3156533813476563, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.2455731201171876, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.1545103454589842, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.0601009368896483, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.9645265197753907, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8519416427612305, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7460797500610352, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6537067461013795, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5328750252723693, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.4320548820495604, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3085136634111405, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2092915344238282, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1275460267066955, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.0386388832330704, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9503727719187737, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8675882241129875, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.8022909131646156, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.733821551501751, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6738862973451615, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6170893602073193, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5652956110239029, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.5190497905015945, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.47212384417653086, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.43599904611706736, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.40702251687645913, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.3727861687541008, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3382844883948565, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.3119117923825979, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.29081850275397303, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.26604075483977796, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2466256284713745, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.22642137803137302, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.2088501753285527, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1921514854207635, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1781130386143923, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.16441811878234147, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.14952020850032569, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.13939006950706243, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13147265702486038, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12235511142760515, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11557142075151206, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.10893985452130436, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.103099344689399, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.09903199233114719, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.09529906170442701, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0928921801969409, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.031201209546998142, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030800083912909032, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.030252569699659945, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02975039539858699, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029271601950749754, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028639802373945714, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.027976589882746337, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02729866176843643, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02648915191181004, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025716775618493556, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025034288754686714, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024124603169038892, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02334517707116902, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022364379400387407, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02154684434644878, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020847506765276195, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020057860612869263, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019231676682829858, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018416535523720086, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.017742771240882575, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.016995645561255514, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016310769515112043, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.015623221402056514, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.014968846780247987, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.014350231876596809, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0136877419706434, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.013152578845620155, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012703708461485804, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.012149847773835064, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011561786511447281, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011086377315223217, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010693654550705105, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010204846519045532, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009802623586729169, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009366327144671232, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008967888744082301, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00856925726402551, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008217884874902666, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007857136139646172, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007448166131507605, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0071555429417639975, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006912706133443862, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006629898510873318, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.006410915858577937, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00619494759128429, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00599550768150948, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0058597260620445015, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.005731921091210097, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0056491122441366315, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.528964042663574, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4791243076324463, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.400538921356201, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3269436359405518, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.257939577102661, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1685731410980225, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0757014751434326, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9819114208221436, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8714207410812378, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7675961256027222, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.677222728729248, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.5590544939041138, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.4606364965438843, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.3403840065002441, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.2440228462219238, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.1649307012557983, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.0792382955551147, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9944841861724854, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.915370523929596, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8533384203910828, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.7884896993637085, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7323300838470459, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6793226003646851, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.6316128969192505, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.5895892977714539, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5476067066192627, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5160989165306091, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.49145904183387756, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.46300646662712097, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.43568700551986694, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.41587990522384644, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.40071046352386475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.3841671645641327, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3721429705619812, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.3607640266418457, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.35172924399375916, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3443821668624878, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.339073121547699, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3347906768321991, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.331377238035202, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3297959864139557, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32916954159736633, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.32918018102645874, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.32972970604896545, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3307202458381653, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.3318799138069153, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.33310210704803467, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3349452018737793, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.3361821174621582, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22495039682539683, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23685515873015872, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.26537698412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3023313492063492, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3353174603174603, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.39037698412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.44146825396825395, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.49875992063492064, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5451388888888888, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5825892857142857, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6098710317460317, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6423611111111112, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6649305555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6949404761904762, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7185019841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7403273809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.7544642857142857, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7698412698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7872023809523809, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.800843253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8129960317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8211805555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.832093253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8402777777777778, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8499503968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8576388888888888, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8625992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8680555555555556, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.875, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8794642857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8821924603174603, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8854166666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.888640873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8903769841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8908730158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8913690476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8938492063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8936011904761905, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.894593253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.8963293650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8988095238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8993055555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8990575396825397, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.8983134920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8975694444444444, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8978174603174603, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8973214285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8975694444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08825301402218273, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10025126352765676, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1285603687418772, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16301654710986102, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.19436794305383712, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.24162571026373478, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2921570468072873, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3380478927673172, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.38259922645182426, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.42993256146005365, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.46472130011673407, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5050293805591511, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5386523672378143, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5883435733113754, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6327966345694697, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.669975621426812, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.6923842143386536, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.7177513066971447, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7398796853427644, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.759583301251371, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7779228795534798, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7880101623194816, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7997282596788715, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8093095815934906, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8216413688413688, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8296020263297915, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8363573993678334, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8440754212191263, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8526297064421161, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8568046165768577, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8601278031867687, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.862987398439752, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8675880751049043, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.870275087971781, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.8714268940238076, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.87118283318579, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8737464591391444, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8733084272885063, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8747866262916388, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8761488470942034, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8776785019209395, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8788202718872149, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8793307180531877, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.879517705444036, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8771432359750009, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8756415254512934, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8759371261911107, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8753050153599718, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8758493249814359, "id_best": 42, "lr_best": 0.005699999999999999, "wd_best": 0.05, "train/loss_best": 0.12235511142760515, "validation/loss_best": 0.32918018102645874, "validation/acc_best": 0.8993055555555556, "validation/f1_best": 0.8793307180531877} diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58291479dd4d4508cc003972d4869ff86ce7caac --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..1c11bdd25c3ccfa5c81a3ce93292a1415b0b4fea --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 14, "eval/id_best": 18, "eval/lr_best": 0.00011399999999999999, "eval/wd_best": 0.05, "eval/train/loss": 2.1285789012908936, "eval/train/acc": 0.3583084913488429, "eval/train/acc_std": 0.0023367576562800625, "eval/train/f1": 0.2959530054782943, "eval/train/f1_std": 0.002313338538781961, "eval/validation/loss": 2.357365131378174, "eval/validation/acc": 0.2956810631229236, "eval/validation/acc_std": 0.0052654320225261324, "eval/validation/f1": 0.2247032904624946, "eval/validation/f1_std": 0.004916472245126382, "eval/test/loss": 2.2639319896698, "eval/test/acc": 0.31280148423005566, "eval/test/acc_std": 0.005589979402797768, "eval/test/f1": 0.24265113928551085, "eval/test/f1_std": 0.00559821247692473, "eval/testid/loss": 2.268148183822632, "eval/testid/acc": 0.3100057836899942, "eval/testid/acc_std": 0.005635298013433535, "eval/testid/f1": 0.2474330251065113, "eval/testid/f1_std": 0.0053608807689259986} diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..4afd780ad05cee9f427bb8b1164d267e2294122e --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 14, "eval/best/id_best": 18, "eval/best/lr_best": 0.00011399999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.1285789012908936, "eval/best/train/acc": 0.3583084913488429, "eval/best/train/acc_std": 0.0023367576562800625, "eval/best/train/f1": 0.2959530054782943, "eval/best/train/f1_std": 0.002313338538781961, "eval/best/validation/loss": 2.357365131378174, "eval/best/validation/acc": 0.2956810631229236, "eval/best/validation/acc_std": 0.0052654320225261324, "eval/best/validation/f1": 0.2247032904624946, "eval/best/validation/f1_std": 0.004916472245126382, "eval/best/test/loss": 2.2639319896698, "eval/best/test/acc": 0.31280148423005566, "eval/best/test/acc_std": 0.005589979402797768, "eval/best/test/f1": 0.24265113928551085, "eval/best/test/f1_std": 0.00559821247692473, "eval/best/testid/loss": 2.268148183822632, "eval/best/testid/acc": 0.3100057836899942, "eval/best/testid/acc_std": 0.005635298013433535, "eval/best/testid/f1": 0.2474330251065113, "eval/best/testid/f1_std": 0.0053608807689259986} diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..e2e43166555cf523275aeeca74cb29f71973b0d0 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 20, "eval/last/lr_best": 0.000156, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.9967973232269287, "eval/last/train/acc": 0.3990903223823719, "eval/last/train/acc_std": 0.0024563123329222883, "eval/last/train/f1": 0.34505924075543565, "eval/last/train/f1_std": 0.002654195176959497, "eval/last/validation/loss": 2.3674747943878174, "eval/last/validation/acc": 0.29494278331487633, "eval/last/validation/acc_std": 0.005328307553277219, "eval/last/validation/f1": 0.22846606487678567, "eval/last/validation/f1_std": 0.004971585089561768, "eval/last/test/loss": 2.2610065937042236, "eval/last/test/acc": 0.3109461966604824, "eval/last/test/acc_std": 0.005620048166909631, "eval/last/test/f1": 0.2484623397453091, "eval/last/test/f1_std": 0.005792482906884165, "eval/last/testid/loss": 2.210700750350952, "eval/last/testid/acc": 0.33314054366685947, "eval/last/testid/acc_std": 0.005766031728825074, "eval/last/testid/f1": 0.27705110809591144, "eval/last/testid/f1_std": 0.005817829769766895} diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f39f9394b3882dc42aa4a4571447ec8f732f2eca --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.1285789012908936,0.3583084913488429,0.0023367576562800625,0.2959530054782943,0.002313338538781961 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.357365131378174,0.2956810631229236,0.0052654320225261324,0.2247032904624946,0.004916472245126382 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.2639319896698,0.31280148423005566,0.005589979402797768,0.24265113928551085,0.00559821247692473 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.268148183822632,0.3100057836899942,0.005635298013433535,0.2474330251065113,0.0053608807689259986 diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..f39f9394b3882dc42aa4a4571447ec8f732f2eca --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.1285789012908936,0.3583084913488429,0.0023367576562800625,0.2959530054782943,0.002313338538781961 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.357365131378174,0.2956810631229236,0.0052654320225261324,0.2247032904624946,0.004916472245126382 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.2639319896698,0.31280148423005566,0.005589979402797768,0.24265113928551085,0.00559821247692473 +flat_mae,patch,attn,nsd_cococlip,best,14,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.268148183822632,0.3100057836899942,0.005635298013433535,0.2474330251065113,0.0053608807689259986 diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..c779d3d6606ac0eef42c1a618f9a4bdc47aa3d49 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.000156,0.05,20,"[0.52, 1.0]",train,1.9967973232269287,0.3990903223823719,0.0024563123329222883,0.34505924075543565,0.002654195176959497 +flat_mae,patch,attn,nsd_cococlip,last,19,0.000156,0.05,20,"[0.52, 1.0]",validation,2.3674747943878174,0.29494278331487633,0.005328307553277219,0.22846606487678567,0.004971585089561768 +flat_mae,patch,attn,nsd_cococlip,last,19,0.000156,0.05,20,"[0.52, 1.0]",test,2.2610065937042236,0.3109461966604824,0.005620048166909631,0.2484623397453091,0.005792482906884165 +flat_mae,patch,attn,nsd_cococlip,last,19,0.000156,0.05,20,"[0.52, 1.0]",testid,2.210700750350952,0.33314054366685947,0.005766031728825074,0.27705110809591144,0.005817829769766895 diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..392e92193307ee935d199ff305214557f0b9cbd6 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/decoders/output +name_prefix: eval_probe +remote_root: null +notes: decoder ablations cross_reg1_pep4; eval v2 (nsd_cococlip patch linear) +model_kwargs: + ckpt_path: experiments/decoders/output/decoders/cross_reg1_pep4/pretrain/checkpoint-last.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: false + norm: false +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +model: flat_mae +representation: patch +classifier: linear +dataset: nsd_cococlip +distributed: false +output_dir: experiments/decoders/output/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear +remote_dir: null diff --git a/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..fd248a3352673e1bb04502846d844e972e309783 --- /dev/null +++ b/decoders/cross_reg1_pep4/eval_v2/nsd_cococlip__patch__linear/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 12, "eval/best/id_best": 46, "eval/best/lr_best": 0.010799999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.9561731815338135, "eval/best/train/acc": 0.13442330741571654, "eval/best/train/acc_std": 0.0016399786261972085, "eval/best/train/f1": 0.07360312856380823, "eval/best/train/f1_std": 0.0011894892872814158, "eval/best/validation/loss": 3.050022840499878, "eval/best/validation/acc": 0.11517165005537099, "eval/best/validation/acc_std": 0.0037073851707885345, "eval/best/validation/f1": 0.0568927381373181, "eval/best/validation/f1_std": 0.0025098501705171287, "eval/best/test/loss": 3.0346412658691406, "eval/best/test/acc": 0.11706864564007421, "eval/best/test/acc_std": 0.0036309939339685808, "eval/best/test/f1": 0.05047890849878861, "eval/best/test/f1_std": 0.0020235796960419747, "eval/best/testid/loss": 3.0452094078063965, "eval/best/testid/acc": 0.10776942355889724, "eval/best/testid/acc_std": 0.0037217302058737472, "eval/best/testid/f1": 0.05534874186236579, "eval/best/testid/f1_std": 0.0025537881427831616}